From 2d9d3ecfc72d14fb05f94e677c56fce85108fdb2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Mar 2018 14:31:14 +0100 Subject: [PATCH 001/405] two bug fixes --- src/casekit/HOSECodePredictor.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index da6cbc2..7f59639 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -15,6 +15,7 @@ import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Hashtable; +import java.util.List; import java.util.StringTokenizer; import org.apache.commons.cli.CommandLine; @@ -198,7 +199,7 @@ public void predict(IAtomContainer ac) throws Exception "'''''", "'''''", "''''''", - "'''''''" + "''''''''" }; fixExplicitHydrogens(ac); if (verbose) System.out.println("Entering prediction module"); @@ -269,6 +270,7 @@ public void generatePicture(IAtomContainer ac, String path) throws IOException, */ void fixExplicitHydrogens(IAtomContainer ac) { + List toRemoveList = new ArrayList<>(); IAtom atomB; for (IAtom atomA : ac.atoms()) { @@ -276,9 +278,12 @@ void fixExplicitHydrogens(IAtomContainer ac) { atomB = ac.getConnectedAtomsList(atomA).get(0); atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() +1 ); - ac.removeAtom(atomA); + toRemoveList.add(atomA); } } + for (IAtom iAtom : toRemoveList) { + ac.removeAtom(iAtom); + } } private void parseArgs(String[] args) throws ParseException From 02dbd43bb41f753f8f152b103039e74657f7f939 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Mar 2018 14:31:14 +0100 Subject: [PATCH 002/405] bug fixes in following two functions: - predict: addition of a missing stroke in last sphere - fixExplicitHydrogens: did not work properly, now corrected with additional remove list --- src/casekit/HOSECodePredictor.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index da6cbc2..7f59639 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -15,6 +15,7 @@ import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Hashtable; +import java.util.List; import java.util.StringTokenizer; import org.apache.commons.cli.CommandLine; @@ -198,7 +199,7 @@ public void predict(IAtomContainer ac) throws Exception "'''''", "'''''", "''''''", - "'''''''" + "''''''''" }; fixExplicitHydrogens(ac); if (verbose) System.out.println("Entering prediction module"); @@ -269,6 +270,7 @@ public void generatePicture(IAtomContainer ac, String path) throws IOException, */ void fixExplicitHydrogens(IAtomContainer ac) { + List toRemoveList = new ArrayList<>(); IAtom atomB; for (IAtom atomA : ac.atoms()) { @@ -276,9 +278,12 @@ void fixExplicitHydrogens(IAtomContainer ac) { atomB = ac.getConnectedAtomsList(atomA).get(0); atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() +1 ); - ac.removeAtom(atomA); + toRemoveList.add(atomA); } } + for (IAtom iAtom : toRemoveList) { + ac.removeAtom(iAtom); + } } private void parseArgs(String[] args) throws ParseException From b6826016bf8dbd8a42bf32874122628cfc0a4f28 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 Jul 2018 15:23:52 +0200 Subject: [PATCH 003/405] NMR package added --- src/NMR/DB.java | 401 +++++++++++ src/NMR/ParseRawData.java | 942 ++++++++++++++++++++++++ src/NMR/Process.java | 647 +++++++++++++++++ src/NMR/ShiftMatcher.java | 339 +++++++++ src/NMR/Signal.java | 59 ++ src/NMR/Utils.java | 1436 +++++++++++++++++++++++++++++++++++++ 6 files changed, 3824 insertions(+) create mode 100644 src/NMR/DB.java create mode 100644 src/NMR/ParseRawData.java create mode 100644 src/NMR/Process.java create mode 100644 src/NMR/ShiftMatcher.java create mode 100644 src/NMR/Signal.java create mode 100644 src/NMR/Utils.java diff --git a/src/NMR/DB.java b/src/NMR/DB.java new file mode 100644 index 0000000..9450509 --- /dev/null +++ b/src/NMR/DB.java @@ -0,0 +1,401 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package NMR; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.fingerprint.Fingerprinter; +import org.openscience.cdk.fingerprint.IBitFingerprint; +import org.openscience.cdk.fingerprint.IFingerprinter; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomContainerSet; +import org.openscience.cdk.interfaces.IAtomType; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; +import org.openscience.cdk.silent.AtomContainerSet; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class DB { + + /** + * Returns the molecules of a given NMRShiftDB file. + * This function sets the molecule aromaticity (with allowed exocyclic pi + * bonds) by using the + * {@link #setAromaticitiesInAtomContainer(org.openscience.cdk.interfaces.IAtomContainer, int)} + * function. + * + * @param pathToNMRShiftDB path to NMRShiftDB file + * @param maxCycleSize maximum cycle size for setting the aromaticity in a + * molecule + * @return + * @throws FileNotFoundException + * @throws CDKException + */ + public static IAtomContainerSet getStructuresFromNMRShiftDBFile(final String pathToNMRShiftDB, final int maxCycleSize) throws FileNotFoundException, CDKException { + + final IAtomContainerSet acSet = new AtomContainerSet(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance() + ); + while (iterator.hasNext()) { + acSet.addAtomContainer(NMR.Utils.setAromaticitiesInAtomContainer(iterator.next(), maxCycleSize)); + } + + return acSet; + } + + public static Connection getDBConnection(final String server, final String options, final String user, final String pwd) throws SQLException { + + return DriverManager.getConnection(server + "?" + options, user, pwd); + } + + public static int[][] countNeighborhoodBonds(final Connection DBConnection, final String[] bondsSet, final String elem, String[] neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException, SQLException { + + if (DBConnection == null || stepSize < 1) { + return null; + } + // creation of frequency counting matrix and shift indices holder + final int[][] neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.length * bondsSet.length]; + HashMap> signalAtomIndicesInNMRShiftDB = new HashMap<>(); // holding of all indices of each ac set (DB) entry [first value] and it's atom indices [second value] too + for (int i = 0; i < stepSize * maxShift; i++) { + for (int k = 0; k < 3 + 4 + neighborElems.length * bondsSet.length; k++) { + neighborhoodCountsMatrix[i][k] = 0; + } + signalAtomIndicesInNMRShiftDB.put(i, new ArrayList<>()); + } + + final Statement statement = DBConnection.createStatement(); + String multQuery = "SELECT (FLOOR(sh.VALUE*" + stepSize + ")/" + stepSize + ") AS shift, nmrsig.MULTIPLICITY AS mult, COUNT(FLOOR(sh.VALUE*" + stepSize + ")/" + stepSize + ") AS shiftCount \n" + + "FROM SHIFT AS sh, SIGNAL_ATOM AS sigatom, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" + + "WHERE sh.SIGNAL_ID = sigatom.SIGNAL_ID AND \n" + + " sigatom.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" + + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + + " spec.REVIEW_FLAG = \"true\" AND \n" + + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" + + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\" AND \n" + + " nmrsig.MULTIPLICITY IS NOT NULL AND \n" + + " nmrsig.MULTIPLICITY != \"\"" + + "GROUP BY shift, mult \n" + + "HAVING shift >= " + minShift + " AND shift <= " + maxShift + ";"; + + double shiftDouble; + int shiftInt; + System.out.println("\n\nneighborhoods:\nQUERY: " + multQuery); + final ResultSet resultSet = statement.executeQuery(multQuery); + while (resultSet.next()) { + shiftDouble = Math.floor(resultSet.getDouble("shift") * stepSize) / (double) stepSize; + if (shiftDouble < minShift || shiftDouble > maxShift - 1) { + continue; + } + shiftInt = (int) (shiftDouble * stepSize); + neighborhoodCountsMatrix[shiftInt - minShift][0] += resultSet.getInt("shiftCount"); + switch (resultSet.getString("mult")) { + case "S": // for qC + neighborhoodCountsMatrix[shiftInt - minShift][3] += resultSet.getInt("shiftCount"); + break; + case "D": // for CH + neighborhoodCountsMatrix[shiftInt - minShift][4] += resultSet.getInt("shiftCount"); + break; + case "T": // for CH2 + neighborhoodCountsMatrix[shiftInt - minShift][5] += resultSet.getInt("shiftCount"); + break; + case "Q": // for CH3 + neighborhoodCountsMatrix[shiftInt - minShift][6] += resultSet.getInt("shiftCount"); + break; + } + + } + +// this.neighborhoodCountsMatrix[shiftDB - min][0] += 1; // increase number of this shift occurence +// this.neighborhoodCountsMatrix[shiftDB - min][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member +// this.neighborhoodCountsMatrix[shiftDB - min][2] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN +// this.neighborhoodCountsMatrix[shiftDB - min][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH +// this.neighborhoodCountsMatrix[shiftDB - min][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 +// this.neighborhoodCountsMatrix[shiftDB - min][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 +// // add counts for a specific atom to matrix m +// int[] counts = NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); +// for (int i = 0; i < counts.length; i++) { +// this.neighborhoodCountsMatrix[shiftDB - min][2 + 4 + i] += counts[i]; +// } +// // add this atom container index and atom index within it to belonging hash map +// this.shiftIndicesInACSet.get(shiftDB).add(new Integer[]{k, atomIndexDB}); + return neighborhoodCountsMatrix; + } + + + public static ResultSet getResultSet(final Connection DBConnection, final String query) throws SQLException{ + + if (DBConnection == null) { + return null; + } + + return DBConnection.createStatement().executeQuery(query); + } + + + /** + * + * @param DBConnection + * @param minShift + * @param maxShift + * @param mult + * @param minIntens + * @param maxIntens + * @param elem + * @return + * @throws SQLException + */ + public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBConnection, final double minShift, final double maxShift, final String mult, final Double minIntens, final Double maxIntens, final String elem) throws SQLException { + + final ArrayList spectraIDs = new ArrayList<>(); + String query = "SELECT nmrsig.SIGNAL_ID AS sigID" + + " FROM SHIFT AS sh, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" + + " WHERE sh.VALUE >= " + minShift + " AND sh.VALUE <= " + maxShift + " AND \n" // for filtering by means of shift values + + " sh.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" + + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true + + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" + + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\" \n"; + if(mult != null && !mult.trim().isEmpty()){ + query += " AND nmrsig.MULTIPLICITY = \"" + mult + "\" \n"; + } else { + query += " AND nmrsig.MULTIPLICITY IS NOT NULL AND nmrsig.MULTIPLICITY != \"\" \n"; + } + if((minIntens != null && minIntens > 0.0) && (maxIntens != null && maxIntens > 0.0)){ + query += " AND nmrsig.INTENSITY >= " + minIntens + " AND nmrsig.INTENSITY <= " + maxIntens + " \n"; + } + query += " ;"; + System.out.println("\n\ngetSpectraIDs:\nQUERY: " + query); + final ResultSet resultSet = NMR.DB.getResultSet(DBConnection, query); + while (resultSet.next()) { + spectraIDs.add(resultSet.getInt("sigID")); + } + + return spectraIDs; + } + + + public static HashMap> matchSpectrumAgainstDB(final Connection DBConnection, final ArrayList spectrum, final double shiftDev, final Double intensDev, final int stepSize) throws SQLException{ + + final HashMap> hits = new HashMap<>(); double shift; + for (int i = 0; i < spectrum.size(); i++) { + hits.put(i, new ArrayList<>()); + shift = Math.floor(spectrum.get(i).getShift() * stepSize) / (double) stepSize; + if(spectrum.get(i).getIntensity() != null){ + hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity() - intensDev, spectrum.get(i).getIntensity() + intensDev, spectrum.get(i).getElement())); + } else { + hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity(), spectrum.get(i).getIntensity(), spectrum.get(i).getElement())); + } + } + + return hits; + } + + + /** + * + * @param DBConnection + * @param elem + * @return + * @throws SQLException + */ + public static HashMap> getLookupTableFromNMRShiftDB(final Connection DBConnection, final String elem) throws SQLException { + + if (DBConnection == null) { + return null; + } + final HashMap> lookup = new HashMap<>(); + final Statement statement = DBConnection.createStatement(); + final String query = "SELECT a.HOSE_CODE AS hose, sh.VALUE AS shift \n" + + " FROM ATOM AS a, SHIFT AS sh, SIGNAL_ATOM AS sigatom, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" + + " WHERE a.ATOM_ID = sigatom.ATOM_ID AND \n" // to get signals of each atom + + " sigatom.SIGNAL_ID = sh.SIGNAL_ID AND \n" // to get shift values + + " sigatom.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" + + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true + + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" + + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\";"; + System.out.println("\n\ngetLookupTable:\nQUERY: " + query); + final ResultSet resultSet = statement.executeQuery(query); + while (resultSet.next()) { + if (!lookup.containsKey(resultSet.getString("hose"))) { + lookup.put(resultSet.getString("hose"), new ArrayList<>()); + } + lookup.get(resultSet.getString("hose")).add(resultSet.getDouble("shift")); + } + + return lookup; + } + + + /** + * + * @param DBConnection + * @param minShift + * @param maxShift + * @param elem + * @return + * @throws SQLException + */ + public static HashMap getRMS(final Connection DBConnection, final double minShift, final double maxShift, final String elem) throws SQLException { + + if (DBConnection == null) { + return null; + } + final HashMap rms = new HashMap<>(); + final Statement statement = DBConnection.createStatement(); + final String query = "SELECT a.HOSE_CODE AS hose, COUNT(sh.VALUE) AS shiftCount, AVG(sh.VALUE) AS mean, SQRT(SUM(POW(sh.VALUE, 2))/COUNT(sh.VALUE)) AS rms \n" + + " FROM ATOM AS a, SHIFT AS sh, SIGNAL_ATOM AS sigatom, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" + + " WHERE sh.VALUE >= " + minShift + " AND sh.VALUE <= " + maxShift + " AND \n" // for filtering by means of shift values + + " sh.SIGNAL_ID = sigatom.SIGNAL_ID AND \n" // to get shift values + + " a.ATOM_ID = sigatom.ATOM_ID AND \n" + + " sigatom.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" + + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true + + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" + + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\" AND \n" + + " nmrsig.MULTIPLICITY IS NOT NULL AND \n" + + " nmrsig.MULTIPLICITY != \"\" \n" + + " GROUP BY hose;"; + System.out.println("\n\nRMS SQL:\nQUERY: " + query); + final ResultSet resultSet = statement.executeQuery(query); + while (resultSet.next()) { + rms.put(resultSet.getString("hose"), resultSet.getDouble("rms")); + } + + return rms; + } + + + public static ArrayList getSpectraFromNMRShiftDBEntry(final IAtomContainer ac, final String elem) { + + ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); + final ArrayList spectra = new ArrayList<>(); + for (String prop : props) { + if (prop.contains("Spectrum " + NMR.Utils.getNMRIsotopeIdentifier(elem))) { + spectra.add(ac.getProperty(prop)); + } + } + + return spectra; + } + + + + + + + + + + + + + + + + + + + + + + + // TRIALS + + + public static void findSubstructuresInNMRShiftDB(final IAtomContainer acQ, final String pathToNMRShiftDB) throws CDKException, FileNotFoundException, CloneNotSupportedException { + + final IAtomContainer acQcopy = acQ.clone(); + AtomContainerManipulator.convertImplicitToExplicitHydrogens(acQcopy); + + final IFingerprinter fingerprinter = new Fingerprinter(); + final IBitFingerprint fingerprintQ = fingerprinter.getBitFingerprint(acQcopy); +// System.out.println("Q: cardinality: " + fingerprintQ.cardinality() + ", bit set: " + Arrays.toString(fingerprintQ.getSetbits())); + IBitFingerprint fingerprintDB; + IAtomContainer acDB; + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance() + ); + final AtomHybridizationDescriptor hybridDesc = new AtomHybridizationDescriptor(); + int molCounter = 1; + while (iterator.hasNext()) { + acDB = iterator.next(); +// // skip structures which do not at least contain one carbon spectrum +// if (!acDB.getProperties().containsKey("Spectrum 13C 0")) { +// continue; +// } +// IAtomContainer acDBcopy = acDB.clone(); +// AtomContainerManipulator.convertImplicitToExplicitHydrogens(acDBcopy); +// fingerprintDB = fingerprinter.getBitFingerprint(acDBcopy); +//// System.out.println("DB: cardinality: " + fingerprintDB.cardinality() + ", bit set: " + Arrays.toString(fingerprintDB.getSetbits())); +// +//// fingerprintDB.and(fingerprintQ); +//// System.out.println("and: " + Arrays.toString(fingerprintDB.getSetbits())); +// +// if(Tanimoto.calculate(fingerprintQ, fingerprintDB) >= 0.25) +// System.out.println("similarity: " + Tanimoto.calculate(fingerprintQ, fingerprintDB) + " at " + acDB.getProperties()); + + int counter = 0; + for (int i = 0; i < acDB.getAtomCount(); i++) { + if (acDB.getAtom(i).getSymbol().equals("N")) { + for (IAtom neighbor : acDB.getConnectedAtomsList(acDB.getAtom(i))) { + if (neighbor.getSymbol().equals("C") && IAtomType.Hybridization.values()[Integer.parseInt(hybridDesc.calculate(neighbor, acDB).getValue().toString())].equals(IAtomType.Hybridization.SP2)) { + for (IAtom neighbor2 : acDB.getConnectedAtomsList(neighbor)) { + if (neighbor2.getSymbol().equals("C") && neighbor2.getImplicitHydrogenCount() == 3) { + counter++; + } + } + } + } + if (counter >= 2) { + System.out.println(molCounter); + } + break; + } + } + molCounter++; + } + + } +} diff --git a/src/NMR/ParseRawData.java b/src/NMR/ParseRawData.java new file mode 100644 index 0000000..7f4b3bf --- /dev/null +++ b/src/NMR/ParseRawData.java @@ -0,0 +1,942 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package NMR; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Scanner; +import javax.xml.parsers.ParserConfigurationException; +import org.apache.commons.lang3.ArrayUtils; +import org.openscience.cdk.Atom; +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IElement; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; +import org.xml.sax.SAXException; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class ParseRawData { + + final private IAtomContainer mol; + final private IMolecularFormula molFormula; + final private HashMap> atomTypeIndices = new HashMap<>(); + final public static String CONST_STRING_EQUIVALENCE = "Equivalence"; + final public static String CONST_STRING_HYDROGENSHIFTS = "HydrogenShifts"; + final public static String CONST_STRING_COSY = "COSY"; + final public static String CONST_STRING_HMBC = "HMBC"; + final public static String CONST_STRING_INADEQUATE = "INADEQUATE"; + + + public ParseRawData(){ + + this.molFormula = null; + this.mol = SilentChemObjectBuilder.getInstance().newAtomContainer(); + } + + + public ParseRawData(final IMolecularFormula molFormula){ + + this.molFormula = molFormula; + this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); + this.setAtomTypeIndices(); + } + + + /** + * + * @return used IMolecularFormula object for this class instance + */ + public final IMolecularFormula getMolecularFormula() { + + return this.molFormula; + } + + + /** + * + * @return used IAtomContainer object for this class instance + */ + public final IAtomContainer getAtomContainer() { + + return this.mol; + } + + public final void setAtomTypeIndices() { + + final HashSet atomTypes = new HashSet<>(); + if(this.molFormula != null){ + for (final IElement heavyElem : MolecularFormulaManipulator.getHeavyElements(this.molFormula)) { + atomTypes.add(heavyElem.getSymbol()); + } + } else { + for (final IAtom heavyAtom : AtomContainerManipulator.getHeavyAtoms(this.mol)) { + atomTypes.add(heavyAtom.getSymbol()); + } + } + for (final String atomType : atomTypes) { + this.atomTypeIndices.put(atomType, Utils.getAtomTypeIndicesInAtomContainer(this.mol, atomType)); + } + } + + + /** + * Returns a HashMap object with the indices of all atoms for all atom types + * (elements) within the atom container of this class. + * + * @return + */ + public final HashMap> getAtomTypeIndices() { + + return this.atomTypeIndices; + } + + + /** + * Copies all up to here set properties from an atom in atom container to its + * linked atoms with equivalent shift values. + * + */ + public final void setEquivalentProperties() { + + Map properties; + for (int i = 0; i < this.mol.getAtomCount(); i++) { + if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) != null) { + properties = this.mol.getAtom(i).getProperties(); + for (final Object prop: properties.keySet()) { + if (this.mol.getAtom(i).getProperty(prop) != null && !prop.equals(ParseRawData.CONST_STRING_EQUIVALENCE)) { + for (final int k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)) { + this.mol.getAtom(k).setProperty(prop, this.mol.getAtom(i).getProperty(prop)); + } + } + } + } + } + } + + + /** + * Wrapper function for automatically choosing which file format to take. + * For more details see {@link NMR.ParseRawData#parse1DNMRviaPeakTable(String, String)} + * and {@link NMR.ParseRawData#parse1DNMRviaXML(String, String)} + * + * @param pathToPeakList + * @param atomType + * @return + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + */ + public final boolean parse1DNMR(final String pathToPeakList, final String atomType) throws IOException, ParserConfigurationException, SAXException{ + + switch (NMR.Utils.getFileFormat(pathToPeakList)) { + case "csv": + return this.parse1DNMRviaPeakTable(pathToPeakList, atomType); + case "xml": + return this.parse1DNMRviaXML(pathToPeakList, atomType); + default: + return false; + } + } + + + /** + * Assigns shift values from 1D NMR peak list to atoms of an IAtomContainer. + * The shift values will be assigned sequentially. + * In case of a molecular formula is given in this class, the number of + * shifts must be equal to the number of atoms in this molecular formula. + * Otherwise this function will return a false value. + * In case of no molecular was given to class, new atom in the atom container + * will be created regarding to the inpout peak list. + * + * + * @param pathToPeakList Path to peak list (Bruker's TopSpin csv file + * format) + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @return false if input shift list size greater than the number of atoms in + * molecular formula, if such was given to the class + * @throws java.io.IOException + */ + public final boolean parse1DNMRviaPeakTable(final String pathToPeakList, final String atomType) throws IOException { + + final ArrayList shifts = NMR.Utils.parsePeakTable(pathToPeakList, 4); + + return this.set1DNMRShifts(shifts, atomType); + } + + /** + * Assigns shift values from 1D NMR XML file to atoms of an IAtomContainer. + * The shift values will be assigned sequentially. + * In case of a molecular formula is given in this class, the number of + * shifts must be equal to the number of atoms in this molecular formula. + * Otherwise this function will return a false value. + * In case of no molecular was given to class, new atom in the atom container + * will be created regarding to the inpout peak list. + * + * @param pathToXML Path to XML file (Bruker's TopSpin XML file + * format) + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @return false if input shift list size greater than the number of atoms in + * molecular formula, if such was given to the class + * @throws java.io.IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public final boolean parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList shifts = NMR.Utils.parseXML(pathToXML, 1, 1); + + return this.set1DNMRShifts(shifts, atomType); + } + + /** + * + * @param shifts + * @param atomType + * @return false, if shift list size is not equal to number of atoms in the + * atom container for the given atom type; otherwise true + */ + private boolean set1DNMRShifts(final ArrayList shifts, final String atomType) { + + // check whether indices for that atom type exist or the number of input signals are greater than the atom number in atom container for that atom type + if (!this.atomTypeIndices.containsKey(atomType) || shifts.size() > this.atomTypeIndices.get(atomType).size()) { + // if molecular formula is known and too much picked peak are to be assigned + if(this.atomTypeIndices.containsKey(atomType)){ + System.err.println("Too many peaks in peak list for \"" + atomType + "\" and molecular formula \"" + MolecularFormulaManipulator.getString(this.molFormula) + "\"!!!"); + return false; + } else { // + // "fill up" the first peaks for that atom type from given peak list + IAtom atom; + for (final double shift : shifts) { + atom = new Atom(atomType); + atom.setProperty(NMR.Utils.getNMRShiftConstant(atomType), shift); + atom.setImplicitHydrogenCount(null); + this.mol.addAtom(atom); + } + this.setAtomTypeIndices(); + } + } + int assignedShiftCount = 0; + for (final int i : this.atomTypeIndices.get(atomType)) { + if(assignedShiftCount < shifts.size()){ + // shift assignment + this.mol.getAtom(i).setProperty(NMR.Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); + } + assignedShiftCount++; + } + // "fill up" the missing equivalent peaks + // check whether the number of input signals is smaller than the number of atoms in atom container from that atom type + if (shifts.size() < this.atomTypeIndices.get(atomType).size()) { + System.out.println("Not enough peaks in 1D peak list for \"" + atomType + "\"!!!"); + this.askForEquivalentPeaks(atomType); + } + + this.setAtomTypeIndices(); + + return true; + } + + + private void askForEquivalentPeaks(final String atomType) { + + final Scanner reader = new Scanner(System.in); int n = -1; + final HashSet validIndices = new HashSet<>(); + for (final int i : this.atomTypeIndices.get(atomType)) { + if (this.mol.getAtom(i).getProperty(Utils.getNMRShiftConstant(atomType)) != null) { + continue; + } + System.out.println("\nThe " + i + "th shift value is missing!\nWhich shift value is not unique?"); + for (final int k : this.atomTypeIndices.get(atomType)) { + if(this.mol.getAtom(k).getProperty(Utils.getNMRShiftConstant(atomType)) != null){ + System.out.println(k + "\t: " + this.mol.getAtom(k).getProperty(Utils.getNMRShiftConstant(atomType))); + validIndices.add(k); + } + } + n = -1; + while(!validIndices.contains(n)){ + System.out.println("Enter the index: "); + n = reader.nextInt(); + } + this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), this.mol.getAtom(n).getProperty(Utils.getNMRShiftConstant(atomType))); + if(this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) == null){ + this.mol.getAtom(i).setProperty(ParseRawData.CONST_STRING_EQUIVALENCE, new ArrayList<>()); + } + if(this.mol.getAtom(n).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) == null){ + this.mol.getAtom(n).setProperty(ParseRawData.CONST_STRING_EQUIVALENCE, new ArrayList<>()); + } + ((ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)).add(n); + ((ArrayList) this.mol.getAtom(n).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)).add(i); + } + reader.close(); + } + + + /** + * Wrapper function for automatically choosing which file format to take. + * For more details see + * {@link NMR.ParseRawData#parseDEPTviaPeakTable(java.lang.String, java.lang.String, double) } + * and {@link NMR.ParseRawData#parseDEPTviaXML(java.lang.String, java.lang.String, double) } + * @param pathToDEPT90 + * @param pathToDEPT135 + * @param tol + * @return + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + */ + public final boolean parseDEPT(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { + + if(NMR.Utils.getFileFormat(pathToDEPT90).equals("csv") && NMR.Utils.getFileFormat(pathToDEPT135).equals("csv")) { + this.parseDEPTviaPeakTable(pathToDEPT90, pathToDEPT135, tol); + } else if(NMR.Utils.getFileFormat(pathToDEPT90).equals("xml") && NMR.Utils.getFileFormat(pathToDEPT135).equals("xml")) { + this.parseDEPTviaXML(pathToDEPT90, pathToDEPT135, tol); + } else { + return false; + } + + return true; + } + + + /** + * Sets the number of implicit hydrogens from two carbon DEPT90 and DEPT135 + * peak + * tables to carbon atoms. The meanwhile found matches are corrected, + * see + * {@link testkit.Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. + * + * @param pathToDEPT90 Path to DEPT90 peak list (Bruker's TopSpin csv file + * format) + * @param pathToDEPT135 Path to DEPT135 peak list (Bruker's TopSpin csv file + * format) + * @param tol Tolance value [ppm] when matching carbon shifts + * @throws java.io.IOException + */ + public final void parseDEPTviaPeakTable(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException { + + final ArrayList matchesDEPT90 = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT90, "C", tol, 4); + final ArrayList matchesDEPT135 = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT135, "C", tol, 4); + final ArrayList intensitiesDEPT135 = NMR.Utils.parsePeakTable(pathToDEPT135, 6); + + this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + } + + /** + * Sets the number of implicit hydrogens from two carbon DEPT90 and DEPT135 + * XML files to carbon atoms. The meanwhile found matches are corrected, see + * {@link testkit.Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. + * + * @param pathToDEPT90 Path to DEPT90 peak list (Bruker's TopSpin XML file + * format) + * @param pathToDEPT135 Path to DEPT135 peak list (Bruker's TopSpin XML file + * format) + * @param tol Tolance value [ppm] when matching carbon shifts + * @throws java.io.IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public final void parseDEPTviaXML(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList matchesDEPT90 = NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT90, "C", tol, 1, 1); + final ArrayList matchesDEPT135 = NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT135, "C", tol, 1, 1); + final ArrayList intensitiesDEPT135 = NMR.Utils.parseXML(pathToDEPT135, 1, 2); + + this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + } + + /** + * + * @param matchesDEPT90 + * @param matchesDEPT135 + * @param intensitiesDEPT135 + */ + private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDEPT90, final ArrayList matchesDEPT135, final ArrayList intensitiesDEPT135) { + + int matchDEPT90, matchDEPT135, hCount, hCountAll = 0; + for (int i : this.atomTypeIndices.get("C")) { + if ((this.mol.getAtom(i).getProperty(CDKConstants.NMRSHIFT_CARBON) != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() == null)) { + matchDEPT90 = matchesDEPT90.indexOf(i); + matchDEPT135 = matchesDEPT135.indexOf(i); + if (matchDEPT90 >= 0) { + // CH + hCount = 1; + } else if (matchDEPT90 == -1 && matchDEPT135 >= 0) { + // CH2 or CH3 + if (intensitiesDEPT135.get(matchDEPT135) < 0) { + hCount = 2; + } else if (intensitiesDEPT135.get(matchDEPT135) > 0) { + hCount = 3; + } else { + // qC + hCount = 0; + } + } else { + // qC + hCount = 0; + } + this.mol.getAtom(i).setImplicitHydrogenCount(hCount); + hCountAll += hCount; + if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) != null) { + for (Integer k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)) { + this.mol.getAtom(k).setImplicitHydrogenCount(hCount); + hCountAll += hCount; + } + } + + } + } + if(this.molFormula != null){ + System.out.println("assigned protons to carbons: " + hCountAll + " (" + MolecularFormulaManipulator.getElementCount(this.molFormula, "H") + ") -> " + (MolecularFormulaManipulator.getElementCount(this.molFormula, "H") - hCountAll) + " protons to be attached on hetero atoms!!!"); + } else { + System.out.println("assigned protons to carbons: " + hCountAll+ "!!!"); + } + } + + + /** + * Wrapper function for automatically choosing which file format to take. + * For more details see + * {@link NMR.ParseRawData#parseHSQCviaPeakTable(java.lang.String, java.lang.String, double)} + * and {@link NMR.ParseRawData#parseHSQCviaXML(java.lang.String, java.lang.String, double)} + * + * @param pathToPeakList + * @param atomType + * @param tol + * @return + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + */ + public final boolean parseHSQC(final String pathToPeakList, final String atomType, final double tol) throws IOException, ParserConfigurationException, SAXException { + + switch (NMR.Utils.getFileFormat(pathToPeakList)) { + case "csv": + parseHSQCviaPeakTable(pathToPeakList, atomType, tol); + break; + case "xml": + parseHSQCviaXML(pathToPeakList, atomType, tol); + break; + default: + return false; + } + + return true; + } + + + /** + * Assigns shifts to implicit hydrogens of a given atom type from HSQC + * peak table, e.g. 1H,13C-HSQC or 1H,15N-HSQC. The implicit hydrogen + * number for an atom of the given atom type must be set beforehand. + * In case of 1H,13C-HSQC, this could be done by + * {@link ParseRawData#parseDEPT(String, String, double)} or + * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or + * {@link ParseRawData#parseDEPTviaXML(String, String, double) }. + * + * @param pathToPeakList path to HSQC peak table (Bruker's TopSpin csv file + * format) + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param tol tolerance value [ppm] for matching the atoms of given atom + * type + * within the atom container + * @throws IOException + */ + public final void parseHSQCviaPeakTable(final String pathToPeakList, final String atomType, final double tol) throws IOException { + + final ArrayList hydrogenShifts = NMR.Utils.parsePeakTable(pathToPeakList, 5); + final ArrayList matchesAtomType = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tol, 6); + + this.setImplicitHydrogenShifts(hydrogenShifts, matchesAtomType); + } + + /** + * Assigns shifts to implicit hydrogens of a given atom type from HSQC XML + * file, e.g. 1H,13C-HSQC or 1H,15N-HSQC. The implicit hydrogen + * number for an atom of the given atom type must be set beforehand. + * In case of 1H,13C-HSQC, this could be done by + * {@link ParseRawData#parseDEPT(String, String, double)} or + * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or + * {@link ParseRawData#parseDEPTviaXML(String, String, double) }. + * + * @param pathToXML path to HSQC XML file + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param tol tolerance value [ppm] for matching the atoms of given atom + * type + * within the atom container + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public final void parseHSQCviaXML(final String pathToXML, final String atomType, final double tol) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList hydrogenShifts = NMR.Utils.parseXML(pathToXML, 2, 2); + final ArrayList matchesAtomType = NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, atomType, tol, 2, 1); + + this.setImplicitHydrogenShifts(hydrogenShifts, matchesAtomType); + } + + private void setImplicitHydrogenShifts(final ArrayList hydrogenShifts, final ArrayList matchesAtomType) { + + IAtom matchAtom; + ArrayList assignedHydrogensShifts; + for (int i = 0; i < matchesAtomType.size(); i++) { + if (matchesAtomType.get(i) >= 0) { + matchAtom = this.mol.getAtom(matchesAtomType.get(i)); + if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { + continue; + } + if (matchAtom.getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS) == null) { + matchAtom.setProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); + } + assignedHydrogensShifts = matchAtom.getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS); + if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { + assignedHydrogensShifts.add(hydrogenShifts.get(i)); + } + } + } + } + + /** + * Finds the matches with the lowest deviations between a given hydrogen + * shift value set and implicit hydrogens of heavy atoms in the atom + * container. + * + * @param shiftList shift value list to match + * @param tol tolerance value [ppm] + * @return + */ + private ArrayList findImplicitHydrogenShiftMatches(final ArrayList shiftList, final double tol) { + + final ArrayList matches = new ArrayList<>(); + for (int i = 0; i < shiftList.size(); i++) { + matches.add(this.findSingleImplicitHydrogenShiftMatch(shiftList.get(i), tol)[0]); + } + + return matches; + } + + /** + * Finds a match with the lowest deviations between a given hydrogen + * shift value and implicit hydrogens of heavy atoms in the atom + * container. + * + * @param queryShift hydrogen shift value [ppm] to match + * @param tol tolerance value [ppm] for matching + * @return int array of two values: 1. index of matched heavy atom in + * atom container, 2. index of matched hydrogen in hydrogen shift list + * of corresponding found heavy atom + */ + private int[] findSingleImplicitHydrogenShiftMatch(final double queryShift, final double tol) { + + int matchIndexAtom = -1; + int matchIndexProton = -1; + double minDiff = tol; + ArrayList protonShiftList; + for (int i = 0; i < this.mol.getAtomCount(); i++) { + // skip atoms without implicit hydrogens + if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS) == null) { + continue; + } + protonShiftList = this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS); + for (int j = 0; j < protonShiftList.size(); j++) { + // figure out the atom with lowest shift deviation + if ((queryShift - tol <= protonShiftList.get(j)) && (protonShiftList.get(j) <= queryShift + tol) && (Math.abs(queryShift - protonShiftList.get(j)) < minDiff)) { + minDiff = Math.abs(queryShift - protonShiftList.get(j)); + matchIndexProton = j; + matchIndexAtom = i; + } + } + } + + return new int[]{matchIndexAtom, matchIndexProton}; + } + + /** + * Corrects a hydrogen match list regarding a given shift list and an atom + * container. + * This is useful when two ore more hydrogen shift values match + * with the same hydrogen shift (actually heavy atom) in the atom container. + * So the purpose here is to enable more unambiguous matches. This method + * first looks for unambiguous matches and calculates the median of the + * difference values between the shift list values and the shifts of atom + * container. Then, all shift list values are adjusted (+/-) with this + * median value. + * + * @param shifts Shift value list to match + * @param matches Match list to correct + * @param tol Tolerance value [ppm] for hydrogen rematching + * @return + */ + private ArrayList correctHydrogenShiftMatches(final ArrayList shifts, ArrayList matches, final double tol) { + + int matchIndex, middle; + double diff, median; + int[] singleMatchIndex; + ArrayList singleMatchShifts; + ArrayList diffs = new ArrayList<>(); + final HashSet uniqueMatchIndicesSet = new HashSet<>(matches); + for (Integer matchIndexAtomContainer : uniqueMatchIndicesSet) { + if (Collections.frequency(matches, matchIndexAtomContainer) == 1) { + matchIndex = matches.indexOf(matchIndexAtomContainer); + if (matches.get(matchIndex) >= 0) { + singleMatchIndex = this.findSingleImplicitHydrogenShiftMatch(shifts.get(matchIndex), tol); + singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS); + diff = shifts.get(matchIndex) - singleMatchShifts.get(singleMatchIndex[1]); + diffs.add(diff); + } + } + } + if (diffs.size() > 0) { + middle = diffs.size() / 2; + if (diffs.size() % 2 == 1) { + median = diffs.get(middle); + } else { + median = (diffs.get(middle - 1) + diffs.get(middle)) / 2.0; + } + // add or subtract the median of the differences to all shift list values (input) and match again then + for (int i = 0; i < shifts.size(); i++) { + shifts.set(i, shifts.get(i) - median); + } + // rematch + matches = this.findImplicitHydrogenShiftMatches(shifts, tol); + } + + return matches; + } + + + /** + * Wrapper function for automatically choosing which file format to take. + * For more details see + * {@link NMR.ParseRawData#parseCOSYviaPeakTable(java.lang.String, double)} + * and {@link NMR.ParseRawData#parseCOSYviaXML(java.lang.String, double)} + * + * @param pathToPeakList + * @param tol + * @return + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + */ + public final boolean parseCOSY(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { + + switch (NMR.Utils.getFileFormat(pathToPeakList)) { + case "csv": + parseCOSYviaPeakTable(pathToPeakList, tol); + break; + case "xml": + parseCOSYviaXML(pathToPeakList, tol); + break; + default: + return false; + } + + return true; + } + + + /** + * Sets links between implicit hydrogens from H,H-COSY peak table to heavy + * atoms in the atom container. The implicit hydrogen number for + * a heavy atom, which is the corresponding heavy atom for an H shift value, + * must be set beforehand. In case of carbons, this could be done by parsing + * the DEPT information: + * {@link ParseRawData#parseDEPT(String, String, double)} or + * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or + * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. + * Returns true if all signals are bidirectional, so that atom A has a + * signal according to atom B and vice versa. + * + * @param pathToPeakList path to H,H-COSY peak table (Bruker's TopSpin csv + * file + * format) + * @param tol tolerance value [ppm] for hydrogen shift matching + * @return + * @throws IOException + */ + public final boolean parseCOSYviaPeakTable(final String pathToPeakList, final double tol) throws IOException { + + final ArrayList hydrogenShifts1 = NMR.Utils.parsePeakTable(pathToPeakList, 5); + final ArrayList hydrogenShifts2 = NMR.Utils.parsePeakTable(pathToPeakList, 6); + + return this.setCOSY(hydrogenShifts1, hydrogenShifts2, tol); + } + + /** + * Sets links between implicit hydrogens from H,H-COSY peak XML file to + * heavy + * atoms in the atom container. The implicit hydrogen number for a heavy + * atom, which is the corresponding heavy atom for an H shift value, must be + * set beforehand. In case of carbons, this could be done by parsing the + * DEPT information: + * {@link ParseRawData#parseDEPT(String, String, double)} or + * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or + * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. Returns true if + * all signals are bidirectional, so that atom A has a signal according to + * atom B and vice versa. + * + * @param pathToXML path to H,H-COSY peak XML file (Bruker's TopSpin XML + * file format) + * @param tol tolerance value [ppm] for hydrogen shift matching + * @return + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public final boolean parseCOSYviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList hydrogenShifts1 = NMR.Utils.parseXML(pathToXML, 2, 1); + final ArrayList hydrogenShifts2 = NMR.Utils.parseXML(pathToXML, 2, 2); + + return this.setCOSY(hydrogenShifts1, hydrogenShifts2, tol); + } + + private boolean setCOSY(final ArrayList hydrogenShifts1, final ArrayList hydrogenShifts2, final double tol) { + + final ArrayList hydrogenShiftMatches1 = this.findImplicitHydrogenShiftMatches(hydrogenShifts1, tol); + final ArrayList hydrogenShiftMatches2 = this.findImplicitHydrogenShiftMatches(hydrogenShifts2, tol); + // are all signals bidirectional? + if (!NMR.Utils.isBidirectional(hydrogenShiftMatches1, hydrogenShiftMatches2)) { + return false; + } + NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, ParseRawData.CONST_STRING_COSY); + + return true; + } + + + /** + * Wrapper function for automatically choosing which file format to take. + * For more details see + * {@link NMR.ParseRawData#parseINADEQUATEviaPeakTable(java.lang.String, double)} + * and {@link NMR.ParseRawData#parseINADEQUATEviaXML(java.lang.String, double)} + * + * @param pathToPeakList + * @param tol + * @return + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + */ + public final boolean parseINADEQUATE(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { + + switch (NMR.Utils.getFileFormat(pathToPeakList)) { + case "csv": + parseINADEQUATEviaPeakTable(pathToPeakList, tol); + break; + case "xml": + parseINADEQUATEviaXML(pathToPeakList, tol); + break; + default: + return false; + } + + return true; + } + + + /** + * Sets links between carbons from INADEQUATE peak table in the atom + * container. + * To match the shift values, the carbon shifts must be set beforehand. + * This could be done by + * {@link ParseRawData#parse1DNMR(String, String)} or + * {@link ParseRawData#parse1DNMRviaPeakTable(String, String)} or + * {@link ParseRawData#parse1DNMRviaXML(String, String) }. + * Returns true if all signals are bidirectional, so that atom A has a + * signal according to atom B and vice versa. + * + * @param pathToPeakList path to INADEQUATE peak table (Bruker's TopSpin csv + * file format) + * @param tol tolerance value [ppm] for carbon shift matching + * @return + * @throws IOException + */ + public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, final double tol) throws IOException { + + final ArrayList carbonShifts1 = NMR.Utils.parsePeakTable(pathToPeakList, 5); + final ArrayList carbonShifts2 = NMR.Utils.parsePeakTable(pathToPeakList, 6); + + return this.setINADEQUATE(carbonShifts1, carbonShifts2, tol); + } + + /** + * Sets links between carbons from INADEQUATE xml peak file in the atom + * container. + * To match the shift values, the carbon shifts must be set beforehand. + * This could be done by + * {@link ParseRawData#parse1DNMRviaPeakTable(String, String)} or + * {@link ParseRawData#parse1DNMRviaXML(String, String) }. + * Returns true if all signals are bidirectional, so that atom A has a + * signal according to atom B and vice versa. + * + * @param pathToXML path to INADEQUATE peak XML file (Bruker's TopSpin XML + * file format) + * @param tol tolerance value [ppm] for hydrogen shift matching + * @return + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public final boolean parseINADEQUATEviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList carbonShifts1 = NMR.Utils.parseXML(pathToXML, 2, 1); + final ArrayList carbonShifts2 = NMR.Utils.parseXML(pathToXML, 2, 2); + + return this.setINADEQUATE(carbonShifts1, carbonShifts2, tol); + } + + private boolean setINADEQUATE(final ArrayList carbonShifts1, final ArrayList carbonShifts2, final double tol) { + + final ArrayList carbonShiftMatches1 = NMR.Utils.findShiftMatches(this.mol, carbonShifts1, tol, "C"); + final ArrayList carbonShiftMatches2 = NMR.Utils.findShiftMatches(this.mol, carbonShifts2, tol, "C"); + // are all signals bidirectional? + if (!NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { + return false; + } + NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, ParseRawData.CONST_STRING_INADEQUATE); + + return true; + } + + + /** + * Wrapper function for automatically choosing which file format to take. + * For more details see + * {@link NMR.ParseRawData#parseHMBCviaPeakTable(String, String, double, double)} + * and + * {@link NMR.ParseRawData#parseHMBCviaXML(String, String, double, double)} + * + * @param pathToPeakList + * @param atomType + * @param tolHydrogen + * @param tolHeavy + * @return + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + */ + public final boolean parseHMBC(final String pathToPeakList, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException, ParserConfigurationException, SAXException { + + switch (NMR.Utils.getFileFormat(pathToPeakList)) { + case "csv": + parseHMBCviaPeakTable(pathToPeakList, atomType, tolHydrogen, tolHeavy); + break; + case "xml": + parseHMBCviaXML(pathToPeakList, atomType, tolHydrogen, tolHeavy); + break; + default: + return false; + } + + return true; + } + + + /** + * Sets links between implicit hydrogens and heavy atoms from HMBC peak + * tablein the atom container. The implicit hydrogen number for a heavy + * atom, which is the corresponding heavy atom for an H shift value, must be + * set beforehand. In case of carbon, this could be done by parsing the + * DEPT information: + * {@link ParseRawData#parseDEPT(String, String, double) } or + * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or + * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. + * + * @param pathToPeakList path to HMBC peak table (Bruker's TopSpin csv + * file format) + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching + * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * @throws IOException + */ + public final void parseHMBCviaPeakTable(final String pathToPeakList, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException { + + final ArrayList hydrogenShifts = NMR.Utils.parsePeakTable(pathToPeakList, 5); + final ArrayList hydrogenShiftMatches = this.correctHydrogenShiftMatches(hydrogenShifts, this.findImplicitHydrogenShiftMatches(hydrogenShifts, tolHydrogen), tolHydrogen); + final ArrayList heavyAtomShiftMatches = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tolHeavy, 6); + + this.setHMBC(hydrogenShiftMatches, heavyAtomShiftMatches); + } + + /** + * Sets links between implicit hydrogens and heavy atoms from HMBC peak + * XML file in the atom container. The implicit hydrogen number for a heavy + * atom, which is the corresponding heavy atom for an H shift value, must be + * set beforehand. In case of carbon, this could be done by parsing the DEPT + * information: + * {@link ParseRawData#parseDEPT(String, String, double) } or + * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or + * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. + * + * @param pathToXML path to HMBC peak XML file (Bruker's TopSpin XML file + * format) + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching + * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public final void parseHMBCviaXML(final String pathToXML, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList hydrogenShifts = NMR.Utils.parseXML(pathToXML, 2, 2); + final ArrayList hydrogenShiftMatches = this.correctHydrogenShiftMatches(hydrogenShifts, this.findImplicitHydrogenShiftMatches(hydrogenShifts, tolHydrogen), tolHydrogen); + final ArrayList heavyAtomShiftMatches = NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, atomType, tolHeavy, 2, 1); + + this.setHMBC(hydrogenShiftMatches, heavyAtomShiftMatches); + } + + private void setHMBC(final ArrayList hydrogenShiftMatches, final ArrayList heavyAtomShiftMatches) { + + ArrayList HMBCList; + for (int i = 0; i < hydrogenShiftMatches.size(); i++) { + if (hydrogenShiftMatches.get(i) >= 0 && heavyAtomShiftMatches.get(i) >= 0) { + if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_STRING_HMBC) == null) { + this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(ParseRawData.CONST_STRING_HMBC, new ArrayList<>()); + } + HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_STRING_HMBC); + if (!HMBCList.contains(heavyAtomShiftMatches.get(i))) { + HMBCList.add(heavyAtomShiftMatches.get(i)); + } + } + } + } +} diff --git a/src/NMR/Process.java b/src/NMR/Process.java new file mode 100644 index 0000000..c0cbf50 --- /dev/null +++ b/src/NMR/Process.java @@ -0,0 +1,647 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package NMR; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import org.openscience.cdk.Atom; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomContainerSet; +import org.openscience.cdk.interfaces.IAtomType; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Process extends ParseRawData { + + final private IAtomContainer mol; + final private IMolecularFormula molFormula; + private HashMap> atomTypeIndices = new HashMap<>(); + private int[][] neighborhoodCountsMatrix; + final private HashMap> shiftIndicesInACSet = new HashMap<>(); // holding of all indices of each ac set (DB) entry [first value] and it's atom indices [second value] too + + + public Process(){ + super(); + this.molFormula = super.getMolecularFormula(); + this.mol = super.getAtomContainer(); + } + + public Process(final IMolecularFormula molFormula){ + super(molFormula); + this.molFormula = molFormula; + this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); + this.setAtomTypeIndices(); + } + + + + + +// /** +// * Sets the hybridization level of each heavy atom in the molecule which has +// * its own shift value (property), only if a frequency threshold value for one +// * hybridization level is reached. +// * For further details see {@link testkit.Utils#getHybridizationsFromNMRShiftDB(IAtomContainer, String, double, IMolecularFormula)} +// * Two threshold value are used to accept a hybridization level and a found +// * neighbor as real neighbor (see thrs parameter descriptions). +// * +// * @param pathToNMRShiftDB path to NMRShiftDB sdf file +// * @param tol tolerance value [ppm] for atom shift matching in DB +// * @param thrsHybrid threshold for accepting a hybridization frequency rate, e.g. +// * the value 0.9 means that 90% of all found hybridizations for given carbon +// * shift must be from the same hybridization level +// * @param thrsNeighbor threshold for accepting a found neighbor frequency rate +// * (atom type) in database as real neighbor for the unknown structure. E.g. +// * the value 0.9 means that 90% of all found neighbors for given carbon +// * shift must be from the same atom type, like nitrogen or oxygen. +// * +// * @throws IOException +// */ +// public void setHybridizationsFromNMRShiftDB(final String pathToNMRShiftDB, final double tol, final double thrsHybrid, final double thrsNeighbor) throws IOException { +// +// final HashMap>>> elementsHybridAndBondTypeCounter = testkit.Utils.getHybridizationsFromNMRShiftDB(this.mol, pathToNMRShiftDB, tol, this.molFormula); +// final HashMap>> elementsHybridCounter = elementsHybridAndBondTypeCounter.get("hybridCounter"); +// final HashMap>> elementsBondTypeCounter = elementsHybridAndBondTypeCounter.get("bondTypeCounter"); +// +// ArrayList hybrids; +// final HashMap probsNeighbors = new HashMap<>(); +// int idx = 0; +// Double[] temp; +// final HashMap elementsHybridCounterSum = new HashMap<>(); +// for (int qAtomIndex : elementsHybridCounter.keySet()) { +// for (String keyValue : elementsHybridCounter.get(qAtomIndex).keySet()) { +// if(keyValue.equals("query") || keyValue.equals("queryH")){ +// continue; +// } +// if(!probsNeighbors.containsKey(keyValue)){ +// probsNeighbors.put(keyValue, new Double[elementsHybridCounter.keySet().size()]); +// } +// hybrids = elementsHybridCounter.get(qAtomIndex).get(keyValue); +// temp = probsNeighbors.get(keyValue); +// temp[idx] = (double) hybrids.size(); +// probsNeighbors.put(keyValue, temp); +// +// if(!elementsHybridCounterSum.containsKey(keyValue)){ +// elementsHybridCounterSum.put(keyValue, 0); +// } +// elementsHybridCounterSum.put(keyValue, elementsHybridCounterSum.get(keyValue) + hybrids.size()); +// } +// idx++; +// } +// int sumQueryAtom; +// for (int i = 0; i < idx; i++) { +// sumQueryAtom = 0; +// for (String keyValue : probsNeighbors.keySet()) { +// sumQueryAtom += probsNeighbors.get(keyValue)[i]; +// } +// for (String keyValue : probsNeighbors.keySet()) { +// temp = probsNeighbors.get(keyValue); +// temp[i] = temp[i]/sumQueryAtom;//0.5 * (temp[i]/sumQueryAtom + temp[i]/elementsHybridCounterSum.get(keyValue)); +// probsNeighbors.put(keyValue, temp); +// } +// } +// +// +// HashMap hybridFreqs; +// int maxFreqHybridValue; +// double maxFreq; +// IAtom qAtom; +// // for all query atoms which have their own NMR shift value +// idx = 0; +// for (int qAtomIndex : elementsHybridCounter.keySet()) { +// qAtom = this.mol.getAtom(qAtomIndex); +// System.out.println("\nmain key: " + qAtomIndex + " -> H: " + qAtom.getImplicitHydrogenCount() + ", nmr shift: " + qAtom.getProperty(testkit.Utils.getNMRShiftConstant(qAtom.getSymbol()))); +// // for all possible neighbors +// for (String keyValue : elementsHybridCounter.get(qAtomIndex).keySet()) { +// hybrids = elementsHybridCounter.get(qAtomIndex).get(keyValue); +// if(hybrids.isEmpty()){ +// continue; +// } +// hybridFreqs = testkit.Utils.getValueFrequencies(hybrids); +// maxFreqHybridValue = -1; +// maxFreq = Collections.max(hybridFreqs.values()); +// for (int hybridValue : hybridFreqs.keySet()) { +// if(hybridFreqs.get(hybridValue) == maxFreq){ +// maxFreqHybridValue = hybridValue; +// break; +// } +// } +// +// // set hybridization for a query atom which has at least one match with an attached hydrogen shift +// // value for a matched heavy atom in DB; this method is preferred +// switch (keyValue) { +// case "queryH": +// System.out.println("queryH -> " + hybrids.size() + " -> " + IAtomType.Hybridization.values()[maxFreqHybridValue] + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); +// if(hybridFreqs.get(maxFreqHybridValue) >= thrsHybrid){ +// qAtom.setHybridization(IAtomType.Hybridization.values()[maxFreqHybridValue]); +//// System.out.println("queryH -> " + hybrids.size() + " -> " + qAtom.getHybridization() + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); +// } break; +// case "query": +// System.out.println("query -> " + hybrids.size() + " -> " + IAtomType.Hybridization.values()[maxFreqHybridValue] + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); +// if(qAtom.getHybridization() == null && (hybridFreqs.get(maxFreqHybridValue) >= thrsHybrid)){ +// // set hybridization from DB entries without attached hydrogen shift matches for an heavy atom +// qAtom.setHybridization(IAtomType.Hybridization.values()[maxFreqHybridValue]); +//// System.out.println("query -> " + hybrids.size() + " -> " + qAtom.getHybridization() + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); +// } break; +// default: +// System.out.println(idx + ": " + keyValue + ": " + probsNeighbors.get(keyValue)[idx] + " (" + IAtomType.Hybridization.values()[maxFreqHybridValue] + ", " + hybridFreqs.get(maxFreqHybridValue) + ")"); +// HashMap freqs = testkit.Utils.getValueFrequencies(elementsBondTypeCounter.get(qAtomIndex).get(keyValue)); +// for (Integer bondType : freqs.keySet()) { +// System.out.println(" -> " + IBond.Order.values()[bondType - 1] + " (" + freqs.get(bondType) + ")"); +// } break; +// } +// } +// idx++; +// } +// +// System.out.println("\n"); +// idx = 0; +// for (int qAtomIndex : elementsHybridCounter.keySet()) { +// qAtom = this.mol.getAtom(qAtomIndex); +// String output = qAtomIndex + "\t(" + String.format( "%.3f", (double) qAtom.getProperty(testkit.Utils.getNMRShiftConstant(qAtom.getSymbol()))) + ",\t" + qAtom.getHybridization() + ",\tH:" + qAtom.getImplicitHydrogenCount() + "):\t"; +// for (String keyValue : elementsHybridCounter.get(qAtomIndex).keySet()) { +// if (keyValue.equals("queryH") || keyValue.equals("query")) { +// continue; +// } +// if(probsNeighbors.get(keyValue)[idx] >= 0.1){ +// output += keyValue + ": " + String.format( "%.3f", probsNeighbors.get(keyValue)[idx]) + " "; +// } else { +// output += keyValue + ": ----- "; +// } +// } +// +//// for (IAtom neighbor : this.mol.getConnectedAtomsList(qAtom)) { +//// if(neighbor.getProperty(testkit.Utils.getNMRShiftConstant(neighbor.getSymbol())) == null){ +//// output += " -> " + neighbor.getSymbol(); +//// } +//// } +// +// +// System.out.println(output); +// idx++; +// +// testkit.Utils.getOpenBonds(this.mol, qAtomIndex); +// } +// } + + + /** + * Sets bonds from already set experiment information (H,H-COSY, INADEQUATE and HMBC). + * Additionally, this function is build for bond type recognition, + * for details see {@link testkit.Utils#getBondTypeFromHybridizations(java.lang.String, org.openscience.cdk.interfaces.IAtomType.Hybridization, java.lang.String, org.openscience.cdk.interfaces.IAtomType.Hybridization)}. + * + */ + public void setBonds(final String[] experiments){ + + String NMRSHIFT_ATOMTYPE; + ArrayList signalList; + for (int e = 0; e < experiments.length; e++) { + for (int i = 0; i < this.mol.getAtomCount(); i++) { + NMRSHIFT_ATOMTYPE = NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol()); + // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? + if (NMRSHIFT_ATOMTYPE != null && this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) != null) { + if (this.mol.getAtom(i).getProperties().containsKey(experiments[e])) { + signalList = this.mol.getAtom(i).getProperty(experiments[e]); + for (int bondPartnerIndex : signalList) { + // no bonds on one and the same atom; ignore already set bonds if no override wanted + if ((i == bondPartnerIndex)) {// || (this.mol.getBond(this.mol.getAtom(i), this.mol.getAtom(bondPartnerIndex)) != null)) { + continue; + } + if(experiments[e].equals("HMBC")){ + System.out.println("HMBC bond setting: still to come!!!"); + } else { + this.setBond(i, bondPartnerIndex); + } + } + } + } + } + } + } + + + private void setBond(final int index1, final int index2){ + + if(this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null){ + this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); + } + this.mol.addBond(index1, index2, NMR.Utils.getBondTypeFromHybridizations(this.mol.getAtom(index1), this.mol.getAtom(index2))); + } + + + /** + * Adds a bond manually after reading the experimental data and + * setting bonds from that automatically. + * + * @param atomType1 Element name (e.g. "C") for the first heavy atom + * which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param shift1 shift value [ppm] of the first heavy atom for matching + * @param tol1 tolerance value for first heavy atom matching + * @param atomType2 Element name (e.g. "C") for the second heavy atom + * which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param shift2 shift value [ppm] of the second heavy atom for matching + * @param tol2 tolerance value for second heavy atom matching + * @return returns false if no matches were found and no bond could + * be stored or the matched atom indices are the same, otherwise true + * @deprecated + */ + public boolean addBond(final String atomType1, final double shift1, final double tol1, final String atomType2, final double shift2, final double tol2) { + + final String NMRSHIFT_ATOMTYPE1 = NMR.Utils.getNMRShiftConstant(atomType1); + final String NMRSHIFT_ATOMTYPE2 = NMR.Utils.getNMRShiftConstant(atomType2); + if ((NMRSHIFT_ATOMTYPE1 == null) || (NMRSHIFT_ATOMTYPE2 == null)) { + return false; + } + int atomIndex1 = NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); + int atomIndex2 = NMR.Utils.findSingleShiftMatch(this.mol, shift2, tol2, atomType2); + if ((atomIndex1 < 0) || (atomIndex2 < 0) || (atomIndex1 == atomIndex2)) { + return false; + } + this.setBond(atomIndex1, atomIndex2); + + + return true; + } + + + /** + * Adds a H,H-COSY signal and bond between two heavy atoms. To add such a signal, + * at least the first heavy atom has to have a shift value match within + * the atom container of the unknown. + * For matching the second heavy atom and creating a (pseudo) HSQC signal, + * the function {@link #addHSQC(java.lang.String, double, double, double) } + * is used. + * + * @param atomType1 Element name (e.g. "C") for the first heavy atom + * which also occurrs in {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param shift1 shift value [ppm] of the first heavy atom for matching + * @param tol1 tolerance value for first heavy atom matching + * @param atomType2 Element name (e.g. "C") for the second heavy atom + * which also occurrs in {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param shift2 shift value [ppm] of the second heavy atom for matching + * @param tol2 tolerance value for second heavy atom matching + * @param shiftH proton shift value [ppm] to store + * @return returns false if no matches were found and no COSY signal could + * be stored or the matched atom indices are the same, otherwise true + * @deprecated + */ + public boolean addCOSY(final String atomType1, final double shift1, final double tol1, final String atomType2, final Double shift2, final double tol2, final Double shiftH){ + + final String NMRSHIFT_ATOMTYPE1 = NMR.Utils.getNMRShiftConstant(atomType1); + final String NMRSHIFT_ATOMTYPE2 = NMR.Utils.getNMRShiftConstant(atomType2); + if ((NMRSHIFT_ATOMTYPE1 == null) || (NMRSHIFT_ATOMTYPE2 == null)) { + return false; + } + int atomIndex1 = NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); + int atomIndex2 = this.addHSQC(atomType2, shift2, tol2, shiftH); + if ((atomIndex1 < 0) || (atomIndex2 < 0) || (atomIndex1 == atomIndex2)) { + return false; + } + if(this.mol.getAtom(atomIndex1).getProperty("COSY") == null){ + this.mol.getAtom(atomIndex1).setProperty("COSY", new ArrayList<>()); + } + if(this.mol.getAtom(atomIndex2).getProperty("COSY") == null){ + this.mol.getAtom(atomIndex2).setProperty("COSY", new ArrayList<>()); + } + + final ArrayList COSYList = this.mol.getAtom(atomIndex1).getProperty("COSY"); + final ArrayList COSYListX = this.mol.getAtom(atomIndex2).getProperty("COSY"); + COSYList.add(atomIndex2); + COSYListX.add(atomIndex1); + + this.setBond(atomIndex1, atomIndex2); + + // set new hybridization of the COSY partner + final AtomHybridizationDescriptor desc = new AtomHybridizationDescriptor(); + this.mol.getAtom(atomIndex1).setHybridization(IAtomType.Hybridization.values()[Integer.parseInt(desc.calculate(this.mol.getAtom(atomIndex1), this.mol).getValue().toString())]); + this.mol.getAtom(atomIndex2).setHybridization(IAtomType.Hybridization.values()[Integer.parseInt(desc.calculate(this.mol.getAtom(atomIndex2), this.mol).getValue().toString())]); + + return true; + } + + + /** + * Adds a HSQC signal manually after reading the experimental data and setting bonds from that automatically. + * If a shift value for a heavy atom is >0.0 then this shift value will be used + * to find a heavy atom match between this given shift value and atoms + * of the atom container of the unknown. Otherwise the first heavy atom without stored + * NMR shift entry and without stored proton shifts in the atom container + * is used for attaching a proton. Additionally, a given proton shift value >0.0 + * is used to store it into the matched heavy atom's proton shift list. + * + * @param atomType atom type used for matching + * @param shift shift valuen [ppm] of the heavy atom + * @param tol tolerance value [ppm] for matching + * @param shiftH proton shift value [ppm] to store + * @return index of matched heavy atom within the atom container; returns -1 if no heavy atom match was found + * @deprecated + */ + public int addHSQC(final String atomType, final Double shift, final double tol, final Double shiftH ){ + + int atomIndex = -1; + final String NMRSHIFT_ATOMTYPE = NMR.Utils.getNMRShiftConstant(atomType); + if ((NMRSHIFT_ATOMTYPE == null) || (this.atomTypeIndices.get(atomType) == null)) { + return -1; + } + // set additional HSQC for an atom with already set shift value + if(shift != null){ + atomIndex = NMR.Utils.findSingleShiftMatch(this.mol, shift, tol, atomType); + } else { + // set HSQC for the first atom of given atom type without a already set shift value and without attached proton shifts + for (Integer i : this.atomTypeIndices.get(atomType)) { + if ((this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null) && (this.mol.getAtom(i).getProperty("HydrogenShifts") == null)) { + atomIndex = i; + break; + } + } + } + // if no atom found to attach a proton + if (atomIndex < 0) { + return -1; + } + // add the proton shift value if it is higher than 0 + if(shiftH != null){ + if (this.mol.getAtom(atomIndex).getProperty("HydrogenShifts") == null) { + this.mol.getAtom(atomIndex).setProperty("HydrogenShifts", new ArrayList<>()); + } + final ArrayList protonShifts = this.mol.getAtom(atomIndex).getProperty("HydrogenShifts"); + protonShifts.add(shiftH); + } + // increase the implicit proton number + if(this.mol.getAtom(atomIndex).getImplicitHydrogenCount() == null){ + this.mol.getAtom(atomIndex).setImplicitHydrogenCount(0); + } + this.mol.getAtom(atomIndex).setImplicitHydrogenCount(this.mol.getAtom(atomIndex).getImplicitHydrogenCount() + 1); + // set the (new) hybridization + final AtomHybridizationDescriptor desc = new AtomHybridizationDescriptor(); + this.mol.getAtom(atomIndex).setHybridization(IAtomType.Hybridization.values()[Integer.parseInt(desc.calculate(this.mol.getAtom(atomIndex), this.mol).getValue().toString())]); + + + return atomIndex; + } + + /** + * + * @param atomType + * @param shift + * @deprecated + */ + public void addAtom(final String atomType, final Double shift){ + + this.mol.addAtom(new Atom(atomType)); + if(shift != null){ + this.mol.getAtom(this.mol.getAtomCount() - 1).setProperty(NMR.Utils.getNMRShiftConstant(atomType), shift); + } + this.setAtomTypeIndices(); + } + + + /** + * + * @param projectName + * @param pathToOutputFile + * @param pathsToFilters + * @throws FileNotFoundException + * @throws UnsupportedEncodingException + */ + public void createLSDfile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException{ + + PrintWriter writer = new PrintWriter(pathToOutputFile, "UTF-8"); + ArrayList idxs; + String hybrid, protons, MULT = "", HSQC = "", COSY = "", BOND = "", HMBC = ""; + final int[][] bondTable = new int[this.mol.getAtomCount()][this.mol.getAtomCount()]; + for (int i = 0; i < this.mol.getAtomCount(); i++) { + for (int j = 0; j < this.mol.getAtomCount(); j++) { + bondTable[i][j] = 0; + } + } + + writer.println("; " + projectName); + writer.println("; " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"); + + for (int i = 0; i < this.mol.getAtomCount(); i++) { + // set MULT section in LSD input file + // set hybridization level + if(this.mol.getAtom(i).getHybridization() == null){ + hybrid = "X"; + } else { + switch (this.mol.getAtom(i).getHybridization()) { + case SP1: + case S: + hybrid = "1"; + break; + case SP2: + hybrid = "2"; + break; + default: + hybrid = "3"; + } + } + // set implicit proton number + if(this.mol.getAtom(i).getImplicitHydrogenCount() == null){ + protons = "X"; + } else { + protons = String.valueOf(this.mol.getAtom(i).getImplicitHydrogenCount()); + } + MULT += "MULT " + (i+1) + " " + this.mol.getAtom(i).getSymbol() + " " + hybrid + " " + protons; + if(this.mol.getAtom(i).getProperty(NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) != null){ + MULT += ";\t" + this.mol.getAtom(i).getProperty(NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())); + } + MULT += "\n"; + // set HSQC section in LSD input file + if((this.mol.getAtom(i).getImplicitHydrogenCount() != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() > 0)){ + HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + "\n"; + } + // set BOND section in LSD input file from INADEQUATE + if (this.mol.getAtom(i).getProperty("INADEQUATE") != null) { + idxs = this.mol.getAtom(i).getProperty("INADEQUATE"); + for (Integer idx : idxs) { + if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { + bondTable[i][idx] = 1; + BOND += "BOND " + (i+1) + " " + (idx+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + " - " + this.mol.getAtom(idx).getSymbol() + "H" + this.mol.getAtom(idx).getImplicitHydrogenCount() + "\n"; + } + } + } + // set BOND section in LSD input file from COSY + if(this.mol.getAtom(i).getProperty("COSY") != null){ + idxs = this.mol.getAtom(i).getProperty("COSY"); + for (Integer idx : idxs) { + if(bondTable[i][idx] == 0 && bondTable[idx][i] == 0){ + bondTable[i][idx] = 1; + COSY += "COSY " + (i+1) + " " + (idx+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + " - " + this.mol.getAtom(idx).getSymbol() + "H" + this.mol.getAtom(idx).getImplicitHydrogenCount() + "\n"; + } else { + COSY += ";COSY " + (i+1) + " " + (idx+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + " - " + this.mol.getAtom(idx).getSymbol() + "H" + this.mol.getAtom(idx).getImplicitHydrogenCount() + "\n"; + } + } + } + // set HMBC section in LSD input file + // sets only HMBC signals which are not represented by a bond + boolean test3JviaNextNeighborBond; + if (this.mol.getAtom(i).getProperty("HMBC") != null) { + idxs = this.mol.getAtom(i).getProperty("HMBC"); + for (Integer idx : idxs) { + if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { + test3JviaNextNeighborBond = false; + for (IAtom neighbor : this.mol.getConnectedAtomsList(this.mol.getAtom(i))) { + if(this.mol.getBond(neighbor, this.mol.getAtom(idx)) != null){ + test3JviaNextNeighborBond = true; + break; + } + } + if(test3JviaNextNeighborBond){ + HMBC += ";HMBC " + (idx+1) + " " + (i+1) + "; 3J\t\n"; + } else { + HMBC += "HMBC " + (idx+1) + " " + (i+1) + ";\n"; + } + } else { + HMBC += ";HMBC " + (idx+1) + " " + (i+1) + "; 2J\t\n"; + } + } + } + } + writer.println(MULT); + writer.println(HSQC); + writer.println(BOND); + writer.println(COSY); + writer.println(HMBC); + + String DEFF = ""; + String FEXP = ""; + if(pathsToFilters.length > 0){ + int fragmentCounter = 1; + for (String pathToFilter : pathsToFilters) { + File folder = new File(pathToFilter); + File[] listOfFiles = folder.listFiles(); + for (File file : listOfFiles) { + if (file.isFile() && !file.getName().toLowerCase().contains(".")) { + DEFF += "DEFF F" + fragmentCounter + " \"" + file.getAbsolutePath() + "\"\n"; + fragmentCounter++; + } + } + } + FEXP = "FEXP \"NOT F1"; + for (int i = 2; i < fragmentCounter; i++) { + FEXP += " and NOT F" + i; + } + FEXP += "\""; + } + + writer.println(DEFF); + writer.println(FEXP); + writer.close(); + + } + + + public int[][] getNeighborhoodBondsCountMatrix(){ + + return this.neighborhoodCountsMatrix; + } + + + + + + public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] bondsSet, final String elem, String[] neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException{ + + if (stepSize < 1) { + System.err.println("stepSize < 1 not allowed!!!"); + return; + } + // creation of frequency counting matrix and shift indices holder + this.neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.length * bondsSet.length]; + this.shiftIndicesInACSet.clear(); + for (int i = 0; i < stepSize * maxShift; i++) { + for (int j = 0; j < 3 + 4 + neighborElems.length * bondsSet.length; j++) { + neighborhoodCountsMatrix[i][j] = 0; + } + this.shiftIndicesInACSet.put(i, new ArrayList<>()); + } + int atomIndexDB, shiftDBInt; double shiftDBDouble; IAtomContainer acDB; + // go through all molecules in DB + for (int k = 0; k < acSet.getAtomContainerCount(); k++) { + acDB = acSet.getAtomContainer(k); + // for all DB entries containing a spectrum for the current query atom type + for (final String shiftsDB : NMR.DB.getSpectraFromNMRShiftDBEntry(acDB, elem)) { + if (shiftsDB == null) { + continue; + } + String[][] shiftsDBvalues = NMR.Utils.parseShiftsNMRShiftDB(shiftsDB); + for (String[] shiftsDBvalue : shiftsDBvalues) { + atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); + // sometimes the DB atom index is wrong and out of array range + if (atomIndexDB > acDB.getAtomCount() - 1) { + continue; + } + shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize;; + // if DB shift value out of min-max-range then skip this shift + if(shiftDBDouble < minShift || shiftDBDouble > maxShift - 1){ + continue; + } + shiftDBInt = (int) (shiftDBDouble * stepSize); + this.neighborhoodCountsMatrix[shiftDBInt - minShift][0] += 1; // increase number of this shift occurence + this.neighborhoodCountsMatrix[shiftDBInt - minShift][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member + this.neighborhoodCountsMatrix[shiftDBInt - minShift][2] += (acDB.getAtom(atomIndexDB).isAromatic()) ? 1 : 0; // increase if atom is aromatic + this.neighborhoodCountsMatrix[shiftDBInt - minShift][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN + this.neighborhoodCountsMatrix[shiftDBInt - minShift][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH + this.neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 + this.neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 + // add counts for a specific atom to matrix m + int[] counts = NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); + for (int i = 0; i < counts.length; i++) { + this.neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; + } + // add this atom container index and atom index within it to belonging hash map + this.shiftIndicesInACSet.get(shiftDBInt).add(new Integer[]{k, atomIndexDB}); + } + } + } + } + + + + + + + + + + + + + +} diff --git a/src/NMR/ShiftMatcher.java b/src/NMR/ShiftMatcher.java new file mode 100644 index 0000000..5e3647f --- /dev/null +++ b/src/NMR/ShiftMatcher.java @@ -0,0 +1,339 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package NMR; + + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import org.apache.commons.lang3.ArrayUtils; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.Set; +import java.util.StringTokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.fingerprint.Fingerprinter; +import org.openscience.cdk.fingerprint.IBitFingerprint; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.similarity.Tanimoto; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class ShiftMatcher { + + public ShiftMatcher(){ + this.HOSESymbolHash = new Hashtable(); + this.verbose = true; + +// this.HOSESymbolHash.put("H", "H"); +// this.HOSESymbolHash.put("C", "C"); + this.HOSESymbolHash.put("O", "O"); + this.HOSESymbolHash.put("N", "N"); + this.HOSESymbolHash.put("S", "S"); + this.HOSESymbolHash.put("P", "P"); + this.HOSESymbolHash.put("Si", "Q"); + this.HOSESymbolHash.put("B", "B"); + this.HOSESymbolHash.put("F", "F"); + this.HOSESymbolHash.put("Cl", "X"); + this.HOSESymbolHash.put("Br", "Y"); + this.HOSESymbolHash.put("I", "I"); + +// this.HOSESymbolHash.put("Se", "Se"); +// this.HOSESymbolHash.put("Sn", "Sn"); +// this.HOSESymbolHash.put("Ge", "Ge"); +// this.HOSESymbolHash.put("te", "Te"); +// this.HOSESymbolHash.put("Zn", "Zn"); +// this.HOSESymbolHash.put("As", "As"); +// this.HOSESymbolHash.put("Li", "Li"); +// this.HOSESymbolHash.put("Ti", "Ti"); +// this.HOSESymbolHash.put("Pb", "Pb"); +// this.HOSESymbolHash.put("Hg", "Hg"); +// this.HOSESymbolHash.put("Mg", "Mg"); + + } + + private final boolean verbose; + private Hashtable> hoseLookup; + private int carbonNMRCount; + private int hydrogenNMRCount; + private int molListDBCount; + private final Hashtable HOSESymbolHash; + private ArrayList molListDB; + + + + + + private ArrayList convertToHOSECodeTerm(final String molFormula){ + + final ArrayList convertedList = new ArrayList<>(); + Matcher m = Pattern.compile("[A-Z]").matcher(molFormula); + String s; + + while (m.find()) { + s = molFormula.substring(m.start(), m.end()); + if(m.end() + 1 <= molFormula.length()){ + Character c = molFormula.subSequence(m.end(), m.end() + 1).charAt(0); + if(Character.isLowerCase(c)){ + s = molFormula.substring(m.start(), m.end() + 1); + } + } + // ignore C and H atoms + if(s.equals("C") || s.equals("H")) continue; + + if(this.HOSESymbolHash.containsKey(s)) { + convertedList.add(this.HOSESymbolHash.get(s)); + } else { + convertedList.add(s); + } + } + + return convertedList; + } + + + public void matchHOSEShifts(final double[] nmrShiftValues, final String molFormula) throws Exception { + if (verbose) { + System.out.println("Starting shift matching (2) against hose code table"); + } + + Arrays.parallelSort(nmrShiftValues); + + final Set keys = this.hoseLookup.keySet(); + ArrayList shifts; + + final Hashtable hoseHeteroCount = new Hashtable<>(); + final Hashtable hoseHCount = new Hashtable<>(); + + StringTokenizer strtok; + StringTokenizer strtok2; + + final ArrayList atomTypesHetero = convertToHOSECodeTerm(molFormula); + System.err.println("found heavy atoms in molecular formula: " + molFormula + " -> " + atomTypesHetero.toString()); + + + Matcher m; + int heteroAtomCount; + int carbonAtomCount; + int HAtomCount; + int totalBindPartnCount; + int allAtomCount; + + int hitCounter; +// int middle; +// double mean; +// double median; +// double d1; +// double d2; +// double sum; +// double sd; + + String neighbors; + + + for (int i = 0; i < nmrShiftValues.length; i++) { + final ArrayList hoseMatch = new ArrayList<>(); + + hitCounter = 0; + + for (String hose : keys) { + + strtok = new StringTokenizer(hose, ";"); + totalBindPartnCount = Integer.parseInt(strtok.nextToken().substring(2, 3)); + strtok2 = new StringTokenizer(strtok.nextToken(), "("); + + String directNeighbors = strtok2.nextToken(); + + heteroAtomCount = 0; + for (String s : atomTypesHetero) { + if (s.length() == 1) { + m = Pattern.compile(s +"(?![a-z])").matcher(directNeighbors); + } else { + m = Pattern.compile(s).matcher(directNeighbors); + } + while (m.find()) { + heteroAtomCount++; + } + } + + m = Pattern.compile("C(?![a-z])").matcher(directNeighbors); + carbonAtomCount = 0; + while (m.find()) { + carbonAtomCount++; + } + + //check (count) whether other atom types than C or needed hetero atoms are present or not + m = Pattern.compile("[A-Z]").matcher(directNeighbors); + allAtomCount = 0; + while (m.find()) { + allAtomCount++; + } + if(allAtomCount - (carbonAtomCount + heteroAtomCount) != 0){ +// System.out.println(hose + " -> " + allAtomCount + " - (" + carbonAtomCount + " + " + heteroAtomCount + ")"); +// System.out.println(" -> REFUSED!!!"); + continue; + } + + HAtomCount = totalBindPartnCount - (carbonAtomCount + heteroAtomCount); + + hoseHeteroCount.put(hose, heteroAtomCount); + hoseHCount.put(hose, HAtomCount); + + + shifts = hoseLookup.get(hose); + if ((nmrShiftValues[i] >= shifts.get(shifts.indexOf(Collections.min(shifts)))) && nmrShiftValues[i] <= shifts.get(shifts.indexOf(Collections.max(shifts)))) { + + hitCounter++; + hoseMatch.add(hose); + } + } + System.out.println("\nThere are " + hitCounter + " hits for shift value " + nmrShiftValues[i]); + + for (int j = 4; j >= 0; j--) { + + System.out.println(j + ":"); + final ArrayList directNeighbors = new ArrayList<>(); + + for (String hose : hoseMatch) { + if (hoseHeteroCount.get(hose) == j) { +// System.out.println(hose + ": " + hoseHeteroAtoms.get(hose)); +// shifts = hoseLookup.get(hose); +// +// middle = shifts.size() / 2; +// if (shifts.size() % 2 == 1) { +// median = shifts.get(middle); +// } else { +// median = (shifts.get(middle - 1) + shifts.get(middle)) / 2.0; +// } +// +// sum = 0; +// for (int k = 0; k < shifts.size(); k++) { +// sum += shifts.get(k); +// } +// mean = (sum / shifts.size()); +// +// d1 = 0; +// d2 = 0; +// sum = 0; +// for (int k = 0; k < shifts.size(); k++) { +// d2 = (mean - shifts.get(k)) * (mean - shifts.get(k)); +// d1 = d2 + d1; +// } +// sd = 0; +// if (shifts.size() > 1) { +// sd = Math.sqrt((d1 / (shifts.size() - 1))); +// } +// +// System.out.println(hose + " [" + shifts.get(shifts.indexOf(Collections.min(shifts))) + ", " + shifts.get(shifts.indexOf(Collections.max(shifts))) + "] (" + shifts.size() + ") -> mean: " + mean + ", median: " + median + ", sd: " + sd + "\n"); + + strtok = new StringTokenizer(hose, ";"); + strtok.nextToken(); + strtok2 = new StringTokenizer(strtok.nextToken(), "("); + neighbors = strtok2.nextToken(); + + neighbors = neighbors + "\t\t -> het: " + j + ", H: " + hoseHCount.get(hose); + + directNeighbors.add(neighbors); + } + } + + //unique HOSE codes + HashSet hs = new HashSet(); + hs.addAll(directNeighbors); + directNeighbors.clear(); + directNeighbors.addAll(hs); + + for (String n : directNeighbors) { + System.out.println("\t" + n); + } + + } + } +// System.out.println("\n"); + + + } + + + public void matchDBBits(final IAtomContainer acQ, final String elementSymbol) throws CDKException{ + + final double[] nmrShiftValuesQ = new double[acQ.getAtomCount()]; + for (int i = 0; i < acQ.getAtomCount(); i++) { + nmrShiftValuesQ[i] = acQ.getAtom(i).getProperty(CDKConstants.NMRSHIFT_CARBON); + } + System.out.println("Q shifts: " + Arrays.toString(nmrShiftValuesQ)); + + Arrays.parallelSort(nmrShiftValuesQ); + + ArrayList nmrShiftValuesDBList; + double[] nmrShiftValuesDB; + Double[] temp; + for(int i = 0; i< this.molListDBCount; i++){ + IAtomContainer acDB = this.molListDB.get(i); + System.out.println("acDB atom count: " + acDB.getAtomCount()); + nmrShiftValuesDBList = new ArrayList<>(); + + Fingerprinter fp = new Fingerprinter(); + IBitFingerprint ifpDB = fp.getBitFingerprint(acDB); + IBitFingerprint ifpQ = fp.getBitFingerprint(acQ); + System.out.println(Arrays.toString(ifpQ.getSetbits())); + System.out.println(Arrays.toString(ifpDB.getSetbits()) + "\n"); + + for(int j = 0; j < acDB.getAtomCount(); j++) { + if(acDB.getAtom(j).getSymbol().equals(elementSymbol)){ + nmrShiftValuesDBList.add(acDB.getAtom(j).getProperty(CDKConstants.NMRSHIFT_CARBON)); + } + } + temp = nmrShiftValuesDBList.toArray(new Double[nmrShiftValuesDBList.size()]); + nmrShiftValuesDB = ArrayUtils.toPrimitive(temp); + Arrays.parallelSort(nmrShiftValuesDB); + + System.out.println("DB shifts: " + Arrays.toString(nmrShiftValuesDB)); + + double tanimoto_coefficient = Tanimoto.calculate(nmrShiftValuesQ, nmrShiftValuesDB); + System.out.println(i + ": Tanimo result: " + tanimoto_coefficient); + } + + } + + + + + + +} diff --git a/src/NMR/Signal.java b/src/NMR/Signal.java new file mode 100644 index 0000000..22964ef --- /dev/null +++ b/src/NMR/Signal.java @@ -0,0 +1,59 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package NMR; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Signal { + + private final String element; + private final String multiplicity; + private final double shift; + private final Double intensity; + + public Signal(final String elem, final double shift, final String mult, final Double intens){ + this.element = elem; + this.shift = shift; + this.multiplicity = mult; + this.intensity = intens; + } + + public String getElement(){ + return this.element; + } + + public double getShift(){ + return this.shift; + } + + public String getMultiplicity(){ + return this.multiplicity; + } + + public Double getIntensity(){ + return this.intensity; + } +} diff --git a/src/NMR/Utils.java b/src/NMR/Utils.java new file mode 100644 index 0000000..19280f1 --- /dev/null +++ b/src/NMR/Utils.java @@ -0,0 +1,1436 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package NMR; + + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Scanner; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.w3c.dom.Document; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import org.apache.commons.lang3.StringUtils; +import org.openscience.cdk.Atom; +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.aromaticity.Aromaticity; +import org.openscience.cdk.aromaticity.ElectronDonation; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.fingerprint.IBitFingerprint; +import org.openscience.cdk.fingerprint.KlekotaRothFingerprinter; +import org.openscience.cdk.fingerprint.SubstructureFingerprinter; +import org.openscience.cdk.graph.CycleFinder; +import org.openscience.cdk.graph.Cycles; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomType; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.io.SDFWriter; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; +import org.openscience.cdk.qsar.descriptors.atomic.AtomValenceDescriptor; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.smiles.smarts.parser.SMARTSParser; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Utils { + + + public Utils() throws Exception { + + } + + /** + * Splits an SDF into single molecular files and converts each of them into the LSD substructure format. + * Therefore, the mol2ab executable provided by LSD is required. + * + * @param pathSDF path to SDF to split + * @param pathOut path to directory which should contain the splitted and converted structure files + * @param pathMol2ab path to mol2ab executable provided by LSD + * @throws FileNotFoundException + * @throws CDKException + * @throws IOException + */ + public static void convertSDFtoLSD(final String pathSDF, final String pathOut, final String pathMol2ab) throws FileNotFoundException, CDKException, IOException{ + + + System.out.println("Conversion from SDF format to LSD format... "); + + + IAtomContainer ac; + + IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathSDF), + SilentChemObjectBuilder.getInstance() + ); + + + File fout; + FileOutputStream fos; + BufferedWriter bw; + File foutPilot = new File(pathOut + "/pilot"); + FileOutputStream fosPilot = new FileOutputStream(foutPilot); + BufferedWriter bwPilot = new BufferedWriter(new OutputStreamWriter(fosPilot)); + + + int i = 0; + while (iterator.hasNext()) { + i++; + ac = iterator.next(); + String molID = String.valueOf(i);//(String) ac.getProperties().get("cdk:Remark"); +// molID = molID.replace(" ", "_"); + fout = new File(pathOut + "/" + molID + ".sdf"); + fos = new FileOutputStream(fout); + bw = new BufferedWriter(new OutputStreamWriter(fos)); + + SDFWriter wtr = new SDFWriter(bw); +// Properties sdfWriterProps = new Properties(); +// sdfWriterProps.put("WriteAromaticBondTypes", "true"); +// wtr.addChemObjectIOListener(new PropertiesListener(sdfWriterProps)); +// wtr.customizeJob(); + + wtr.write(ac); + wtr.close(); + bw.close(); + + bwPilot.write(molID + " " + fout.getPath()); + bwPilot.newLine(); + + } + + iterator.close(); + bwPilot.close(); + System.out.println("Input file contained " + i + " molecules!\nSingle files created!"); + + + // should be replaced by e.g. the process command because: + // 1. for very long files the program ends long before the conversion process (command) ends + // 2. no control or output here + Runtime.getRuntime().exec(pathMol2ab + "/mol2ab " + pathOut + " " + foutPilot.getPath()); + + + System.out.println("Conversion from SDF format to LSD format... DONE!"); + + } + + + /** + * Creates an IAtomContainer object containing atoms without any bond + * information, given by a molecular formula. + * + * @param molFormula Molecular Formula + * @return + * @deprecated + */ + @Deprecated + public static IAtomContainer createAtomContainer(final String molFormula) { + + HashMap hash = NMR.Utils.getAtomCountsInMolecularFormula(molFormula); + IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); + + for (String elem : hash.keySet()) { + // add atoms of current element + ac = NMR.Utils.addAtoms(ac, elem, hash.get(elem)); + } + + return ac; + } + + /** + * Creates a HashMap with the number of atoms for each occurring atom type. + * + * @deprecated + * @param molFormula + * @return + */ + @Deprecated + public static HashMap getAtomCountsInMolecularFormula(final String molFormula) { + + HashMap hash = new HashMap<>(); + String[] molFormSplit = molFormula.split("[A-Z]"); + Matcher m = Pattern.compile("[A-Z]").matcher(molFormula); + String elem; + int noAtoms; + int k = 1; + + while (m.find()) { + // name of current element + elem = molFormula.substring(m.start(), m.end()); + if (k >= molFormSplit.length || molFormSplit[k].isEmpty()) { + // if no atom number is given then assume only one atom + noAtoms = 1; + } else if (Character.isLowerCase(molFormSplit[k].charAt(0))) { + // if element's name contains two letters then extend it + elem += molFormSplit[k].charAt(0); + // if more than one atoms of that element with two letters exist + if (molFormSplit[k].length() > 1) { + // check given atom number + noAtoms = Integer.parseInt(molFormSplit[k].substring(1)); + } else { + noAtoms = 1; + } + } else { + // if atom number is given + noAtoms = Integer.parseInt(molFormSplit[k].substring(0)); + } + try { + // add atom type and frequency to class hashmap + hash.put(elem, noAtoms); + } catch (Exception e) { + System.err.println("Illegal element \"" + elem + "\" will be ignored!!!"); + } + + k++; + } + + return hash; + } + + /** + * + * @param ac + * @param atomType + * @return + * @deprecated + */ + @Deprecated + public static int getAtomTypeCount(final IAtomContainer ac, final String atomType) { + + int noAtoms = 0; + for (int i = 0; i < ac.getAtomCount(); i++) { + if (ac.getAtom(i).getSymbol().equals(atomType)) { + noAtoms++; + } + } + + return noAtoms; + } + + /** + * Creates atoms of the same atom type and store it into an atom container. + * + * @param ac Atom container + * @param noAtoms Number of atoms to create + * @param atomType Atom type (element's name, e.g. C or Br) + * @return + */ + public static IAtomContainer addAtoms(final IAtomContainer ac, final String atomType, final int noAtoms) throws IllegalArgumentException { + + for (int i = 0; i < noAtoms; i++) { + ac.addAtom(new Atom(atomType)); + } + + return ac; + } + + + /** + * Removes atoms from a given atom type from an atom container. + * + * @param ac IAtomContainer object where to remove the atoms + * @param atomType Atom type (element's name, e.g. C or Br) + * @return IAtomContainer where the atoms were removed + */ + public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType){ + + List toRemoveList = new ArrayList<>(); + for (IAtom atomA : ac.atoms()) { + if (atomA.getSymbol().equals(atomType)){// detect wether the current atom A is a from the given atom type + toRemoveList.add(atomA); + } + } + + for (IAtom iAtom : toRemoveList) { + ac.removeAtom(iAtom); + } + + return ac; + } + + + /** + * Returns a list of atom indices in an atom container for a given atom + * type (e.g. C or Br) + * + * @param ac IAtomContainer to use for search + * @param atomType Atom type to find in atom container + * @return + */ + public static ArrayList getAtomTypeIndicesInAtomContainer(final IAtomContainer ac, final String atomType){ + + final ArrayList indices = new ArrayList<>(); + for (int i = 0; i < ac.getAtomCount(); i++) { + if(ac.getAtom(i).getSymbol().equals(atomType)){ + indices.add(i); + } + } + + return indices; + } + + + /** + * Reads a specific column of a NMR peak table and stores it into an + * ArrayList object. + * + * @param pathToPeakList path to NMR peak table + * @param column column to select in peak table + * @return ArrayList of Double shift values + * @throws IOException + */ + public static ArrayList parsePeakTable(final String pathToPeakList, final int column) throws IOException { + + final ArrayList shifts = new ArrayList<>(); + String line; + String[] tokens; + BufferedReader fileReader = new BufferedReader(new FileReader(pathToPeakList)); + while ((line = fileReader.readLine()) != null) { + tokens = line.split(","); + // get shift value + if (tokens[column].trim().matches("^[+|-]{0,1}\\d+\\.{0,1}\\d*")) { + shifts.add(Double.parseDouble(tokens[column].trim())); + } + } + fileReader.close(); + + return shifts; + } + + + /** + * Reads a NMR peak XML file and stores it into an + * ArrayList object. + * The XML file must be in Bruker's TopSpin format. + * + * @param pathToXML Path to XML file + * @param ndim number of dimensions of given data 1 (1D) or 2 (2D) + * @param attribute which attribute index in XML peak nodes should be used: + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, + * intensity if 1D data) or 3 (intensity if 2D data) + * + * @return ArrayList of Double shift values + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public static ArrayList parseXML(final String pathToXML, final int ndim, final int attribute) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList shifts = new ArrayList<>(); + final DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); + final DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); + final Document doc = docBuilder.parse(new File(pathToXML)); + + // normalize text representation + doc.getDocumentElement().normalize(); + final NodeList peakLists = doc.getElementsByTagName("Peak" + ndim + "D"); + for (int i = 0; i < peakLists.getLength(); i++) { + shifts.add(Double.parseDouble(peakLists.item(i).getAttributes().item(attribute - 1).getNodeValue())); + } + + return shifts; + } + + + /** + * Corrects a match list regarding a given shift list and an atom container. + * This is useful when two ore more shift values (e.g. DEPT shifts) match + * with the same atom in the atom container. So the purpose here is to + * enable more unambiguous matches. This method first looks for unambiguous + * matches and calculates the median of the difference values between the + * shift list values and the shifts of atom container. Then, all shift list + * values are adjusted (+/-) with this median value. + * + * @param ac IAtomContainer to search + * @param shifts Shift value list to match + * @param matches Match list to correct + * @param tol Tolerance value + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @return + */ + public static ArrayList correctShiftMatches(final IAtomContainer ac, final ArrayList shifts, final ArrayList matches, final double tol, final String atomType) { + + int matchIndex; + // get differences of unique matches between query shift and ac shifts + ArrayList diffs = new ArrayList<>(); + final HashSet uniqueMatchIndicesSet = new HashSet<>(matches); + for (Integer matchIndexAtomContainer : uniqueMatchIndicesSet) { + if (Collections.frequency(matches, matchIndexAtomContainer) == 1) { + matchIndex = matches.indexOf(matchIndexAtomContainer); + if (matches.get(matchIndex) >= 0) { + diffs.add(shifts.get(matchIndex) - Double.parseDouble(ac.getAtom(matches.get(matchIndex)).getProperty(NMR.Utils.getNMRShiftConstant(atomType)).toString())); + } + } + } + // calculate the median of found unique match differences + if (diffs.size() > 0) { + final double median = NMR.Utils.getMedian(diffs); + // add or subtract the median of the differences to all shift list values (input) and match again then + for (int i = 0; i < shifts.size(); i++) { + shifts.set(i, shifts.get(i) - median); + } + // rematch + return NMR.Utils.findShiftMatches(ac, shifts, tol, atomType); + } + + return matches; + } + + + /** + * Finds the matches with the lowest deviations between a given shift value + * set and the atoms of an atom container. A tolerance value and NMRSHIFT + * constant must be set. + * + * @param ac IAtomContainer to search + * @param shiftList shift value list to match + * @param tol Tolerance value [ppm] + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @return List of match indices for every query shift within the IAtomContainer + */ + public static ArrayList findShiftMatches(final IAtomContainer ac, final ArrayList shiftList, final double tol, final String atomType) { + + final ArrayList matches = new ArrayList<>(); + for (int i = 0; i < shiftList.size(); i++) { + matches.add(NMR.Utils.findSingleShiftMatch(ac, shiftList.get(i), tol, atomType)); + } + + return matches; + } + + /** + * Finds the match with the lowest deviation between a given shift value and + * the atoms of an atom container. A tolerance value and NMRSHIFT constant + * must be set. + * + * @param ac IAtomContainer to search + * @param shift Shift value to match [ppm] + * @param tol Tolerance value [ppm] + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @return Match index of a query shift within the IAtomContainer + */ + public static int findSingleShiftMatch(final IAtomContainer ac, final double shift, final double tol, final String atomType) { + + int matchIndex = -1; + double minDiff = tol, acShift; + for (int k = 0; k < ac.getAtomCount(); k++) { + // skip other atom types than given + if (ac.getAtom(k).getProperty(NMR.Utils.getNMRShiftConstant(atomType)) == null) { + continue; + } + // figure out the atom with lowest shift deviation + acShift = Double.parseDouble(ac.getAtom(k).getProperty(NMR.Utils.getNMRShiftConstant(atomType)).toString()); + if ((shift - tol <= acShift) && (acShift <= shift + tol) && (Math.abs(shift - acShift) < minDiff)) { + minDiff = Math.abs(shift - acShift); + matchIndex = k; + } + } + + return matchIndex; + } + + /** + * Finds match indices between a given shift list from a peak table and an atom container. + * Wrapper function for {@link #parsePeakTable(String, int)}, + * {@link #findShiftMatches(IAtomContainer, ArrayList, double, String)} + * and + * {@link #correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double, String)}. + * + * @param ac IAtomContainer to search for matches + * @param pathToPeakList Path to peak table + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param tol Tolerance value [ppm] + * @param column Column number of shift values in peak table + * @return Indices of matches for each shift within the IAtomContainer + * @throws IOException + */ + public static ArrayList matchShiftsFromPeakTable(final IAtomContainer ac, final String pathToPeakList, final String atomType, final double tol, final int column) throws IOException { + + final ArrayList shiftsAtomType = NMR.Utils.parsePeakTable(pathToPeakList, column); + ArrayList matchesAtomType = NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); + matchesAtomType = NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); + + return matchesAtomType; + } + + + /** + * Finds match indices between a given shift list from a XML file and an + * atom container. Wrapper function for {@link #parseXML(String, int)}, + * {@link #findShiftMatches(IAtomContainer, ArrayList, double, String)} and + * {@link #correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double, String)}. + * + * @param ac IAtomContainer to search for matches + * @param pathToXML + * @param atomType Element name (e.g. "C") which also occurrs in + * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param tol Tolerance value [ppm] + * @param ndim number of dimensions of given data 1 (1D) or 2 (2D) + * @param attribute which attribute index in XML peak nodes should be used: + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, + * intensity if 1D data) or 3 (intensity if 2D data) + * @return Indices of matches for each shift within the IAtomContainer + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public static ArrayList matchShiftsFromXML(final IAtomContainer ac, final String pathToXML, final String atomType, final double tol, final int ndim, final int attribute) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList shiftsAtomType = NMR.Utils.parseXML(pathToXML, ndim, attribute); + ArrayList matchesAtomType = NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); + matchesAtomType = NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); + + return matchesAtomType; + } + + + /** + * Creates a two dimensional array of a given NMRShiftDB NMR entry + * with all shift values and atom indices. + * + * @param shiftsString + * @return two dimensional array: + * 1. dimension: shift entry (row); + * 2. dimension: shift value (column 1), atom index in atom container (column 2) + */ + public static String[][] parseShiftsNMRShiftDB(final String shiftsString){ + + if(shiftsString.trim().length() == 0){ + return new String[][]{}; + } + + String[] signalSplit; + final String[] shiftsSplit = shiftsString.split("\\|"); + final String[][] values = new String[shiftsSplit.length][3]; + for (int i = 0; i < shiftsSplit.length; i++) { + signalSplit = shiftsSplit[i].split(";"); + values[i][0] = signalSplit[0]; + values[i][1] = signalSplit[1]; + values[i][2] = signalSplit[2]; + } + + return values; + } + + /** + * Returns the NMR shift constant value for a given element. As far as + * it is defined, the value from CDKConstants.NMRSHIFT_* (e.g. + * {@link org.openscience.cdk.CDKConstants#NMRSHIFT_CARBON}) will be used. + * Otherwise the same format is used for other atom types. + * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. + * @param element element's symbol (e.g. "C") + * @return + */ + public static String getNMRShiftConstant(final String element){ + switch(element){ + case "C": return CDKConstants.NMRSHIFT_CARBON; + case "H": return CDKConstants.NMRSHIFT_HYDROGEN; + case "N": return CDKConstants.NMRSHIFT_NITROGEN; + case "P": return CDKConstants.NMRSHIFT_PHOSPORUS; + case "F": return CDKConstants.NMRSHIFT_FLUORINE; + case "D": return CDKConstants.NMRSHIFT_DEUTERIUM; + case "O": return "oxygen nmr shift"; + case "S": return "sulfur nmr shift"; + case "Si": return "silicon nmr shift"; + case "B": return "boron nmr shift"; + case "Pt": return "platinum nmr shift"; + default: + return null; + } + } + + + /** + * Returns a bond type for two bond atoms from its hybridization. + * CURRENTLY ONLY SINGLE BOND DETECTION POSSIBLE!!! + * This function detects single, double and triple bonds and returns a + * bond order from {@link org.openscience.cdk.interfaces.IBond.Order}. + * If no bond type could be detected then + * {@link org.openscience.cdk.interfaces.IBond.Order#UNSET} will be returned. + * For single and double bond detection, the following elements are defined so far: C, O, N, S. + * For triple bond detection, the following elements are defined so far: C, N. + * + * + * @param atom1 + * @param atom2 + * @return + */ + public static IBond.Order getBondTypeFromHybridizations(final IAtom atom1, final IAtom atom2){ + + final String atomType1 = atom1.getSymbol(); + final IAtomType.Hybridization hybridization1 = atom1.getHybridization(); + final String atomType2 = atom2.getSymbol(); + final IAtomType.Hybridization hybridization2 = atom2.getHybridization(); + + if(hybridization1 == null || hybridization2 == null){ + return IBond.Order.UNSET; + } + IBond.Order bondOrder1 = IBond.Order.UNSET; + IBond.Order bondOrder2 = IBond.Order.UNSET; + // single bond detection, the "3" means all SP3 hybrdidizations like SP3, SP3D2 or PLANAR3 + if ((atomType1.equals("C") || atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) + && hybridization1.toString().contains("3")) { + return IBond.Order.SINGLE; + } + if ((atomType2.equals("C") || atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) + && hybridization2.toString().contains("3")) { + return IBond.Order.SINGLE; + } +// // double bond detection +// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1) || hybridization1.equals(IAtomType.Hybridization.SP2))) +// || ((atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) && (hybridization1.equals(IAtomType.Hybridization.SP2)))) { +// bondOrder1 = IBond.Order.DOUBLE; +// } +// if ((atomType2.equals("C") && (hybridization2.equals(IAtomType.Hybridization.SP1) || hybridization2.equals(IAtomType.Hybridization.SP2))) +// || ((atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) && hybridization2.equals(IAtomType.Hybridization.SP2))) { +// bondOrder2 = IBond.Order.DOUBLE; +// } +// // triple bond detection +// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1))) +// && (atomType2.equals("N") && hybridization2.equals(IAtomType.Hybridization.SP1))) { +// bondOrder1 = IBond.Order.TRIPLE; +// } +// if ((atomType2.equals("N") && (hybridization2.equals(IAtomType.Hybridization.SP1))) +// && (atomType1.equals("C") && hybridization1.equals(IAtomType.Hybridization.SP1))) { +// bondOrder2 = IBond.Order.TRIPLE; +// } + + if (bondOrder1.equals(bondOrder2)) { + return bondOrder1; + } + + return IBond.Order.UNSET; + } + + /** + * Returns a list of open bonds of an atom. + * + * @param ac atom container + * @param atomIndex index of the atom to test + * @return + */ + public static ArrayList getOpenBonds(final IAtomContainer ac, final int atomIndex){ + + final IAtom atom = ac.getAtom(atomIndex); + if(atom.getHybridization() == null){ + return null; + } + final ArrayList bondOrderList = new ArrayList<>(); + final AtomValenceDescriptor valenceDesc = new AtomValenceDescriptor(); + final int valence = Integer.valueOf(valenceDesc.calculate(atom, ac).getValue().toString()); + int electronsLeft = (8 - (valence + atom.getImplicitHydrogenCount())); + + if (electronsLeft == 0) { +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); + return bondOrderList; + } + // only one single bond left; possible at SP1, SP2 and SP3 + if (electronsLeft == 1) { + bondOrderList.add(IBond.Order.SINGLE); +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); + return bondOrderList; + } + // with SP3 are only single bonds possible + if (atom.getHybridization().equals(IAtomType.Hybridization.SP3)) { + // subtract the single bonded neighbor number + electronsLeft -= ac.getConnectedAtomsList(atom).size(); + for (int k = 0; k < electronsLeft; k++) { + bondOrderList.add(IBond.Order.SINGLE); + } +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); + return bondOrderList; + } + + if (atom.getHybridization().equals(IAtomType.Hybridization.SP2)) { + switch (atom.getSymbol()) { + case "O": + case "S": + bondOrderList.add(IBond.Order.DOUBLE); + return bondOrderList; + case "C": + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.DOUBLE); + break; + case "N": + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.DOUBLE); + break; + default: + break; + } + } else if (atom.getHybridization().equals(IAtomType.Hybridization.SP1)) { + switch (atom.getSymbol()) { + case "C": + bondOrderList.add(IBond.Order.DOUBLE); + bondOrderList.add(IBond.Order.DOUBLE); + // or + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.TRIPLE); + break; + case "N": + bondOrderList.add(IBond.Order.TRIPLE); + break; + default: + break; + } + } + for (IAtom neighbor : ac.getConnectedAtomsList(atom)) { + bondOrderList.remove(ac.getBond(atom, neighbor).getOrder()); + electronsLeft -= NMR.Utils.getElectronNumberByBondOrder(ac.getBond(atom, neighbor).getOrder()); + } + + int theoCounter = 0; + for (IBond.Order order : bondOrderList) { + theoCounter += NMR.Utils.getElectronNumberByBondOrder(order); + } + + switch(Math.abs(theoCounter - electronsLeft)){ + case 1: + bondOrderList.remove(IBond.Order.SINGLE); + theoCounter -= 1; + break; + case 2: + + break; + case 3: + + break; + } + + +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList + " -> e: " + theoCounter + " (theo) vs. " + electronsLeft + " (real), bond counter: " + ac.getConnectedAtomsList(atom).size() + " (+" + atom.getImplicitHydrogenCount() + "H)"); + + + return bondOrderList; + } + + + public static int getElectronNumberByBondOrder(final IBond.Order order) { + switch (order) { + case SINGLE: + return 1; + case DOUBLE: + return 2; + case TRIPLE: + return 3; + case QUADRUPLE: + return 4; + case QUINTUPLE: + return 5; + case SEXTUPLE: + return 6; + default: + return 0; + } + } + + + /** + * Returns the NMR isotope identifier for a given element, e.g. C -> 13C. + * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. + * + * @param element element's symbol (e.g. "C") + * @return + */ + public static String getNMRIsotopeIdentifier(final String element) { + switch(element){ + case "C": return "13C"; + case "H": return "1H"; + case "N": return "15N"; + case "P": return "31P"; + case "F": return "19F"; + case "O": return "17O"; + case "S": return "33S"; + case "Si": return "29Si"; + case "B": return "11B"; + case "Pt": return "195Pt"; + default: + return null; + } + } + +// /** +// * Returns the hybridization level of each heavy atom in given molecule which has +// * its own shift value. +// * First it compares the number of attached (implicit) hydrogens and sets +// * the hybridization level from it directly. This is only possible for +// * carbons with three or four attached hydrogens (sp3). [CURRENTLY DISABLED] +// * +// * If less than three hydrogens are attached or in case of other heavy +// * atoms then a NMRShiftDB file will be used to obtain the +// * frequencies of the different hybridization levels from the database. +// * This happens for directly bonded neighbors too. +// * +// * +// * @param ac +// * @param pathToNMRShiftDB +// * @param tol +// * @param molFormula +// * @return +// * @throws FileNotFoundException +// */ +// public static HashMap>>> getHybridizationsFromNMRShiftDB(final IAtomContainer ac, final String pathToNMRShiftDB, final double tol, final IMolecularFormula molFormula) throws FileNotFoundException{ +// +// final HashMap>> elementsHybridCounter = new HashMap<>(); +// final HashMap>> elementsBondTypeCounter = new HashMap<>(); +// final HashMap> expactedNeighbors = new HashMap<>(); +// String NMRSHIFT_ATOMTYPE; +// // initializations only +// for (int i = 0; i < ac.getAtomCount(); i++) { +// // sure case for carbon: 3 or 4 hydrogens -> sp3 +//// if (ac.getAtom(i).getSymbol().equals("C") && ac.getAtom(i).getImplicitHydrogenCount() >= 3) { +//// ac.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); +//// continue; +//// } +// NMRSHIFT_ATOMTYPE = testkit.Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol()); +// // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? +// if ((NMRSHIFT_ATOMTYPE == null) || (ac.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null)) { +// continue; +// } +// elementsHybridCounter.put(i, new HashMap<>()); +// elementsBondTypeCounter.put(i, new HashMap<>()); +// elementsHybridCounter.get(i).put("query", new ArrayList<>()); +// elementsHybridCounter.get(i).put("queryH", new ArrayList<>()); +// // create an array list for each atom type in given molecular formula +// for (IElement elem : MolecularFormulaManipulator.getHeavyElements(molFormula)) { +// elementsHybridCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); +// elementsBondTypeCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); +// } +// +// expactedNeighbors.put(i, new HashMap<>()); +// for (IAtom expNeighbor : ac.getConnectedAtomsList(ac.getAtom(i))) { +// if (!expactedNeighbors.get(i).keySet().contains(expNeighbor.getSymbol())) { +// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), 0); +// } +// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), expactedNeighbors.get(i).get(expNeighbor.getSymbol()) + 1); +// } +// } +// // beginning of DB search +// String shiftsDB; +// double shiftDB, shiftQ; +// int atomIndexDB; +// boolean add, toContinue; +// final AtomHybridizationDescriptor hybridDesc = new AtomHybridizationDescriptor(); +// IAtom qAtom; +// IAtomContainer acDB; +// final IteratingSDFReader iterator = new IteratingSDFReader( +// new FileReader(pathToNMRShiftDB), +// SilentChemObjectBuilder.getInstance() +// ); +// while (iterator.hasNext()) { +// acDB = iterator.next(); +// ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(acDB.getProperties().keySet())); +// Collections.sort(props); +// // the DB entry should at least contain one carbon spectrum +// toContinue = false; +// for (String prop : props) { +// if (prop.contains("Spectrum " + testkit.Utils.getNMRIsotopeIdentifier("C"))) { +// toContinue = true; +// break; +// } +// } +// if (!toContinue) { +// continue; +// } +// +// for (int i : elementsHybridCounter.keySet()) { +// qAtom = ac.getAtom(i); +// // check wether the DB entry contains a spectrum for the current query atom type +// shiftsDB = null; +// for (String prop : props) { +// if (prop.contains("Spectrum " + testkit.Utils.getNMRIsotopeIdentifier(qAtom.getSymbol()))) { +// shiftsDB = acDB.getProperty(prop); +// break; +// } +// } +// if(shiftsDB == null){ +// continue; +// } +// // ignore the already set sp3 hybridizations at carbon atoms with at least 3 implicit hydrogens +//// if (qAtom.getSymbol().equals("C") && qAtom.getImplicitHydrogenCount() >= 3) { +//// continue; +//// } +// shiftQ = qAtom.getProperty(testkit.Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())); +// +// // check wether the DB entry contains a proton spectrum +// String shiftsDBHydrogen = null; +// for (String prop : props) { +// if (prop.contains("Spectrum " + testkit.Utils.getNMRIsotopeIdentifier("H"))) { +// shiftsDBHydrogen = acDB.getProperty(prop); +// break; +// } +// } +// +// String[][] shiftsDBvalues = testkit.Utils.parseShiftsNMRShiftDB(shiftsDB); +// for (String[] shiftsDBvalue : shiftsDBvalues) { +// shiftDB = Double.parseDouble(shiftsDBvalue[0]); +// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); +// add = true; +// // shift match within a shift tolerance range +// if ((shiftQ - tol <= shiftDB) && (shiftDB <= shiftQ + tol)) { +// // matched atom should have the same number of attached (implicit) hydrogens +// if (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount().intValue() == qAtom.getImplicitHydrogenCount().intValue()) { +// // count next neighbors +// HashMap foundNeighbors = new HashMap<>(); +// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { +// if (!foundNeighbors.keySet().contains(neighborAtomDB.getSymbol())) { +// foundNeighbors.put(neighborAtomDB.getSymbol(), 0); +// } +// foundNeighbors.put(neighborAtomDB.getSymbol(), foundNeighbors.get(neighborAtomDB.getSymbol()) + 1); +// } +// // check whether the number of expacted next neighbors is higher than the number of found next neighbor, if yes then skip this DB atom match +// for (String elemExpNeighbor : expactedNeighbors.get(i).keySet()) { +// if (foundNeighbors.get(elemExpNeighbor) == null || (expactedNeighbors.get(i).get(elemExpNeighbor) > foundNeighbors.get(elemExpNeighbor))) { +// add = false; +// } +// } +// if(!add){ +// continue; +// } +// // only elements which occur in molecular formula of the unknown are allowed, otherwise skip this matched DB atom +// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { +// if (MolecularFormulaManipulator.getElementCount(molFormula, neighborAtomDB.getSymbol()) == 0) { +// add = false; +// break; +// } +// // ignore explicit protons; ignore query atoms here, add them as below -> otherwise multiple counting +// if (!neighborAtomDB.getSymbol().equals("H")){// && !neighborAtomDB.getSymbol().equals(qAtom.getSymbol())) { +// elementsHybridCounter.get(i).get(neighborAtomDB.getSymbol()).add(Integer.parseInt(hybridDesc.calculate(neighborAtomDB, acDB).getValue().toString())); +// elementsBondTypeCounter.get(i).get(neighborAtomDB.getSymbol()).add(acDB.getBond(acDB.getAtom(atomIndexDB), neighborAtomDB).getOrder().numeric()); +// } +// } +// if(!add){ +// continue; +// } +// // likely allowed to add hybridization for query atom +// // check whether the shifts of attached hydrogens are equal to hydrogen shifts of query atom -> higher priority at hybridization assignment step later +// boolean added = false; +// if(shiftsDBHydrogen != null){ +// String[][] shiftsDBvaluesHydrogen = testkit.Utils.parseShiftsNMRShiftDB(shiftsDBHydrogen); +// if(qAtom.getProperty("HydrogenShifts") != null){ +// ArrayList shiftsQAtomvaluesHydrogen = qAtom.getProperty("HydrogenShifts"); +// for (int j = 0; j < shiftsQAtomvaluesHydrogen.size(); j++) { +// for (String[] shiftsDBvalueHydrogen : shiftsDBvaluesHydrogen) { +// shiftDB = Double.parseDouble(shiftsDBvalueHydrogen[0]); +// if((shiftsQAtomvaluesHydrogen.get(j) - 0.1 <= shiftDB) && (shiftDB <= shiftsQAtomvaluesHydrogen.get(j) + 0.1)){ +// elementsHybridCounter.get(i).get("queryH").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); +// added = true; +// break; +// } +// } +// if(added){ +// break; +// } +// } +// } +// +// } else { +// elementsHybridCounter.get(i).get("query").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); +// } +// } +// } +// } +// } +// } +// final HashMap>>> toReturn = new HashMap<>(); +// toReturn.put("hybridCounter", elementsHybridCounter); +// toReturn.put("bondTypeCounter", elementsBondTypeCounter); +// +// return toReturn; +// } + + + + + + public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final String[] neighborElems){ + final int[] counts = new int[neighborElems.length * bondsSet.length]; + String foundBonds; + // for all given neighbor element types + for (int n = 0; n < neighborElems.length; n++) { + foundBonds = ""; + // for all next neighbors of a specific element + for (IAtom neighborAtom : ac.getConnectedAtomsList(ac.getAtom(indexAC))) { + // skip if not the right neighborhood element or bond type is unknown/unset + if ((!neighborAtom.getSymbol().equals(neighborElems[n])) || (NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { + continue; + } + foundBonds += NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); + } + for (int k = 0; k < bondsSet.length; k++) { + counts[n * bondsSet.length + k] = 0; + if (NMR.Utils.sortString(foundBonds).equals(NMR.Utils.sortString(bondsSet[k]))) { + counts[n * bondsSet.length + k] = 1; + break; + } + } + } + + return counts; + } + + + public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, String[] neighborElems, final int min, final int max, final int stepSize) throws IOException{ + + if(stepSize < 1){ + System.err.println("stepSize < 1 not allowed!!!"); + return; + } + + final StringBuilder sb = new StringBuilder(); + sb.append("shift [" + elem + "] (" + stepSize + "),nTotal,inRing,isArom,q" + elem + "," + elem + "H," + elem + "H2," + elem + "H3,"); + for (int i = 0; i < neighborElems.length; i++) { + for (int j = 0; j < bondsSet.length; j++) { + sb.append(bondsSet[j] + "[" + neighborElems[i] + "]"); + if (j < bondsSet.length - 1) { + sb.append(","); + } + } + if (i < neighborElems.length - 1) { + sb.append(","); + } + } + sb.append("\n"); + for (int i = 0; i < stepSize * (max - min) + 1; i++) { + sb.append((i + min) + ","); + for (int j = 0; j < 3 + 4 + neighborElems.length * bondsSet.length; j++) { + sb.append(m[i][j]); + if (j < 3 + 4 + neighborElems.length * bondsSet.length - 1) { + sb.append(","); + } + } + sb.append("\n"); + } + + final FileWriter writer = new FileWriter(pathToOutput); + writer.append(sb.toString()); + writer.flush(); + writer.close(); + } + + + public static String sortString(final String s) { + final char[] c = s.toCharArray(); + Arrays.sort(c); + return new String(c); + } + + + public static ArrayList> getBondOrderSets(final String[] valences) { + + final ArrayList> bondOrderSets = new ArrayList<>(); + for (int i = 0; i < valences.length; i++) { + bondOrderSets.add(new ArrayList<>()); + for (int k = 0; k < StringUtils.countMatches(valences[i], "-"); k++) { + bondOrderSets.get(i).add(IBond.Order.SINGLE); + } + for (int k = 0; k < StringUtils.countMatches(valences[i], "="); k++) { + bondOrderSets.get(i).add(IBond.Order.DOUBLE); + } + for (int k = 0; k < StringUtils.countMatches(valences[i], "%"); k++) { + bondOrderSets.get(i).add(IBond.Order.TRIPLE); + } + } + + return bondOrderSets; + } + + + public static String getStringFromBondOrder(final IBond.Order order) { + switch (order) { + case SINGLE: + return "-"; + case DOUBLE: + return "="; + case TRIPLE: + return "%"; + default: + return null; + } + } + + + public static IBond.Order getBondOrderFromString(final String order){ + switch(order){ + case "-": return IBond.Order.SINGLE; + case "=": return IBond.Order.DOUBLE; + case "%": return IBond.Order.TRIPLE; + default: return null; + } + } + + + public static void writeCSV(final String pathToOutput, final String table) throws IOException { + FileWriter fr = new FileWriter(new File(pathToOutput)); + BufferedWriter br = new BufferedWriter(fr); + br.write(table); + br.close(); + } + + + /** + * + * @param input + * @return + */ + public static ArrayList getOutliers(ArrayList input) { + final ArrayList outliers = new ArrayList<>(); + if(input.size() <= 1){ + return outliers; + } + Collections.sort(input); + final List data1 = input.subList(0, input.size() / 2); + final List data2; + if (input.size() % 2 == 0) { + data2 = input.subList(input.size() / 2, input.size()); + } else { + data2 = input.subList(input.size() / 2 + 1, input.size()); + } + final double q1 = getMedian(data1); + final double q3 = getMedian(data2); + final double iqr = q3 - q1; + final double lowerFence = q1 - 1.5 * iqr; + final double upperFence = q3 + 1.5 * iqr; + for (int i = 0; i < input.size(); i++) { + if ((input.get(i) < lowerFence) || (input.get(i) > upperFence)) { + outliers.add(input.get(i)); + } + } +// System.out.println("input size: " + input.size()); +// System.out.println("output size: " + outliers.size()); + return outliers; + } + + /** + * + * @param data + * @return + */ + public static double getMedian(final List data) { + if(data.size() == 1){ + return data.get(0); + } + Collections.sort(data); + if (data.size() % 2 == 1) { + return data.get(data.size() / 2); + } else { + return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; + } + } + + + /** + * + * @param data + * @return + */ + public static double getMedian(final ArrayList data) { + if(data.size() == 1){ + return data.get(0); + } + Collections.sort(data); + if (data.size() % 2 == 1) { + return data.get(data.size() / 2); + } else { + return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; + } + } + + + /** + * + * @param data + * @return + */ + public static double getRMS(final ArrayList data) { + if(data.size() == 1){ + return data.get(0); + } + double qSum = 0; + for (final Double d : data) { + qSum += d*d; + } + + return Math.sqrt(qSum/data.size()); + } + + + /** + * + * @param data + * @return + */ + public static double getMean(final ArrayList data) { + double sum = 0; + for (Double d : data) { + sum += d; + } + return sum/data.size(); + } + + + /** + * Tests whether two array lists of integers are equal which also means + * bidirectional values to each other. + * + * @param shiftMatches1 + * @param shiftMatches2 + * @return + */ + public static boolean isBidirectional(final ArrayList shiftMatches1, final ArrayList shiftMatches2) { + final ArrayList temp1 = new ArrayList<>(shiftMatches1); + final ArrayList temp2 = new ArrayList<>(shiftMatches2); + Collections.sort(temp1); + Collections.sort(temp2); + + return temp1.equals(temp2); + } + + /** + * + * @param ac + * @param shiftMatches1 + * @param shiftMatches2 + * @param prop + */ + public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayList shiftMatches1, final ArrayList shiftMatches2, final String prop) { + + ArrayList propList1, propList2; + for (int i = 0; i < shiftMatches1.size(); i++) { + if (shiftMatches1.get(i) >= 0 && shiftMatches2.get(i) >= 0) { + if (ac.getAtom(shiftMatches1.get(i)).getProperty(prop) == null) { + ac.getAtom(shiftMatches1.get(i)).setProperty(prop, new ArrayList<>()); + } + if (ac.getAtom(shiftMatches2.get(i)).getProperty(prop) == null) { + ac.getAtom(shiftMatches2.get(i)).setProperty(prop, new ArrayList<>()); + } + propList1 = ac.getAtom(shiftMatches1.get(i)).getProperty(prop); + propList2 = ac.getAtom(shiftMatches2.get(i)).getProperty(prop); + if (!propList1.contains(shiftMatches2.get(i))) { + propList1.add(shiftMatches2.get(i)); + } + if (!propList2.contains(shiftMatches1.get(i))) { + propList2.add(shiftMatches1.get(i)); + } + } + } + } + + + public static ArrayList countSetShiftInAtomContainer(final IAtomContainer ac, final ArrayList indices){ + + final ArrayList shifts = new ArrayList<>(); + for (final Integer index : indices) { + shifts.add(ac.getAtom(index).getProperty(Utils.getNMRShiftConstant(ac.getAtom(index).getSymbol()))); + } + return shifts; + } + + + + public static String getFileFormat(final String pathToFile) { + + if(pathToFile == null || pathToFile.trim().isEmpty()){ + return ""; + } + final String[] split = pathToFile.split("\\."); + + return split[split.length - 1]; + } +// +// public static IAtomContainer getStructureFromINCHICode(final String inchi) throws CDKException { +// final InChIToStructure intostruct = InChIGeneratorFactory.getInstance().getInChIToStructure( +// inchi, SilentChemObjectBuilder.getInstance() +// ); +// +// INCHI_RET ret = intostruct.getReturnStatus(); +// if (ret == INCHI_RET.WARNING) { +// // Structure generated, but with warning message +// System.out.println("InChI warning: " + intostruct.getMessage()); +// } else if (ret != INCHI_RET.OKAY) { +// // Structure generation failed +// throw new CDKException("Structure generation failed: " + ret.toString() +// + " [" + intostruct.getMessage() + "]"); +// } +// +// System.out.println("inchi ac: " + intostruct.getAtomContainer().getAtomCount()); +// +// return SilentChemObjectBuilder.getInstance().newAtomContainer();//intostruct.getAtomContainer(); +// +// } + + + + + + /** + * + * @param lookup + * @return + */ + public static HashMap getRMS(final HashMap> lookup){ + + final HashMap rms = new HashMap<>(); + for (final String key : lookup.keySet()) { + rms.put(key, NMR.Utils.getRMS(lookup.get(key))); +// System.out.println("count: " + lookup.get(key).size() + ", mean: " + NMR.Utils.getMean(lookup.get(key)) + ", rms: " + rms.get(key) + ", median: " + NMR.Utils.getMedian(lookup.get(key))); + } + + return rms; + } + + + + + + + + + public static IAtomContainer setAromaticitiesInAtomContainer(final IAtomContainer ac, final int maxCycleSize) throws CDKException { + + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); + final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); + final CycleFinder cycles = Cycles.or(Cycles.all(), Cycles.all(maxCycleSize)); + final Aromaticity aromaticity = new Aromaticity(model, cycles); + aromaticity.apply(ac); + + return ac; + } + + + public static double getTanimotoCoefficient(final IAtomContainer a, final IAtomContainer b) throws CDKException, IOException, CloneNotSupportedException{ + + // pubchem fingerprinter expects + // 1. explicit hydrogens +// AtomContainerManipulator.convertImplicitToExplicitHydrogens(a); + // 2. set atom type names -> done during setting of aromaticities + // 3. set aromaticity -> done during DB scanning + + SubstructureFingerprinter substructfp = new SubstructureFingerprinter(); + IBitFingerprint fingerprint = substructfp.getBitFingerprint(a); + System.out.println("\n\ndefault substructure bitstring: " + fingerprint.asBitSet()); + for (int setbit : fingerprint.getSetbits()) { + System.out.println("default substructure of index " + setbit + ": " + substructfp.getSubstructure(setbit));// + " -> " + SMARTSParser.parse(substructfp.getSubstructure(setbit), SilentChemObjectBuilder.getInstance())); + } + +// DepictionGenerator dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); + IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); + QueryAtomContainer qac = SMARTSParser.parse(substructfp.getSubstructure(fingerprint.getSetbits()[1]), SilentChemObjectBuilder.getInstance()); + + System.out.println("qac: " + qac.getAtomCount() + ", " + qac.getBondCount() + " -> " + qac.getProperties()); + for (IAtom atom : qac.atoms()) { + ac.addAtom(atom); + System.out.println("qac atom: "); + } + for (IBond bond : qac.bonds()) { + ac.addBond(bond); + System.out.println("qac bond: " + bond); + } + System.out.println("ac: " + ac.getAtomCount() + ", " + ac.getBondCount() + " -> " + ac.getProperties()); + + +// dg.depict(ac).writeTo("/Users/mwenk/Downloads/test.png"); + + System.out.println("\n\n"); + SubstructureFingerprinter klekotasubstructfp = new KlekotaRothFingerprinter(); + fingerprint = klekotasubstructfp.getBitFingerprint(a); + System.out.println("Klekota substructure bitstring: " + fingerprint.asBitSet()); + for (int setbit : fingerprint.getSetbits()) { + System.out.println("Klekota substructure of index " + setbit + ": " + klekotasubstructfp.getSubstructure(setbit)); + } + + + return 0.0;//Tanimoto.calculate(pubchemfp.getBitFingerprint(a), pubchemfp.getBitFingerprint(b)); + } + + + + + + + + + + + + + + + + + + + + /** + * Returns + * + * @param values + * @return + */ + public static HashMap getValueFrequencies(final ArrayList values) { + + final HashMap freqs = new HashMap<>(); + final HashSet valueLevels = new HashSet<>(values); + int sum = 0; + for (int value : valueLevels) { + sum += Collections.frequency(values, value); + } + for (int value : valueLevels) { + freqs.put(value, (Collections.frequency(values, value) / (double) sum)); + } + + return freqs; + } + + + + + + +} From 10906544184fc1687e0e4daee29945ada967a57c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 Jul 2018 15:27:29 +0200 Subject: [PATCH 004/405] slightly changes --- src/casekit/HOSECodePredictor.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index 517887d..b1ae19a 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -9,12 +9,11 @@ import java.io.BufferedReader; import java.io.File; -import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; -import java.util.Hashtable; +import java.util.HashMap; import java.util.List; import java.util.StringTokenizer; @@ -53,7 +52,7 @@ */ public class HOSECodePredictor { - Hashtable> hoseLookup; + HashMap> hoseLookup; public boolean verbose = false; int maxSpheres = 6; //How many maximum spheres to use for the prediction boolean generatePictures = false; @@ -139,7 +138,7 @@ public void readHOSECodeTable(String hoseTSVfile) throws Exception if (verbose) System.out.println("Start reading HOSE codes from " + hoseTSVfile); BufferedReader br = new BufferedReader(new FileReader(hoseTSVfile)); - hoseLookup = new Hashtable>(); + hoseLookup = new HashMap>(); while((line = br.readLine()) != null) { strtok = new StringTokenizer(line, "\t"); @@ -165,6 +164,11 @@ public void readHOSECodeTable(String hoseTSVfile) throws Exception if (verbose) System.out.println("Finished reading " + linecounter + " lines of HOSE codes."); } + + public HashMap> getHOSELookup(){ + + return this.hoseLookup; + } /** * Predicts NMR chemical shifts based on a given HOSE code table read by the @@ -356,7 +360,7 @@ private Options setupOptions(String[] args) options.addOption(maxspheres); return options; } - + public static void main(String[] args) { // TODO Auto-generated method stub HOSECodePredictor hcp = null; From c5e108b196e10c87bd9abf7a2d3929239cc51e9e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 Jul 2018 15:28:03 +0200 Subject: [PATCH 005/405] slightly changes --- src/casekit/model/NMRSignal.java | 2 +- src/casekit/model/NMRSpectrum.java | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/casekit/model/NMRSignal.java b/src/casekit/model/NMRSignal.java index 5639865..fb28b7b 100644 --- a/src/casekit/model/NMRSignal.java +++ b/src/casekit/model/NMRSignal.java @@ -16,7 +16,7 @@ public class NMRSignal { - int dim; + int dim; /** * Am array of doubles to store the chemical shift of diff --git a/src/casekit/model/NMRSpectrum.java b/src/casekit/model/NMRSpectrum.java index 27959b2..3ca1a55 100644 --- a/src/casekit/model/NMRSpectrum.java +++ b/src/casekit/model/NMRSpectrum.java @@ -14,8 +14,6 @@ * */ -import javax.swing.event.ChangeEvent; -import javax.swing.event.ChangeListener; import javax.swing.event.EventListenerList; import java.util.ArrayList; import java.util.List; From 8d9bbb1cc2b9d9ab825282e6587e5d2c3db76d0b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 Jul 2018 15:28:39 +0200 Subject: [PATCH 006/405] NMR package added --- .gitignore | 3 +++ nbactions.xml | 17 +++++++++++++ pom.xml | 66 ++++++++++++++++++++++++++++++++++----------------- 3 files changed, 64 insertions(+), 22 deletions(-) create mode 100644 nbactions.xml diff --git a/.gitignore b/.gitignore index b83d222..35dce1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /target/ +/src/casekit/test.java +/src/NMR/remarks +/src/NMR/test.java \ No newline at end of file diff --git a/nbactions.xml b/nbactions.xml new file mode 100644 index 0000000..7f1dc6a --- /dev/null +++ b/nbactions.xml @@ -0,0 +1,17 @@ + + + + run + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -classpath %classpath NMR.test + java + + + diff --git a/pom.xml b/pom.xml index 92d3954..6f56bd4 100644 --- a/pom.xml +++ b/pom.xml @@ -6,28 +6,6 @@ casekit 1.0-SNAPSHOT spectra - - - org.openscience.cdk - cdk-silent - 2.1-SNAPSHOT - - - org.openscience.cdk - cdk-ctab - 2.1-SNAPSHOT - - - org.openscience.cdk - cdk-depict - 2.1-SNAPSHOT - - - commons-cli - commons-cli - 1.3.1 - - src @@ -59,4 +37,48 @@ + + + commons-cli + commons-cli + 1.3.1 + jar + + + org.openscience.cdk + cdk-core + 2.2-SNAPSHOT + jar + + + org.openscience.cdk + cdk-legacy + 2.2-SNAPSHOT + jar + + + org.openscience.cdk + cdk-depict + 2.2-SNAPSHOT + jar + + + org.openscience.cdk + cdk-qsaratomic + 2.2-SNAPSHOT + jar + + + org.apache.commons + commons-lang3 + 3.5 + jar + + + org.openscience.cdk + cdk-fingerprint + 2.2-SNAPSHOT + jar + + From 5183cbd86f4290ada4f8f3e7bf8193fe3161ecca Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Jul 2018 23:31:00 +0200 Subject: [PATCH 007/405] matchSpectrumAgainstDB: - added the Spectrum lass object --- src/NMR/DB.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/NMR/DB.java b/src/NMR/DB.java index 9450509..aa84151 100644 --- a/src/NMR/DB.java +++ b/src/NMR/DB.java @@ -213,17 +213,17 @@ public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBC return spectraIDs; } - - public static HashMap> matchSpectrumAgainstDB(final Connection DBConnection, final ArrayList spectrum, final double shiftDev, final Double intensDev, final int stepSize) throws SQLException{ + // currently only for 1D spectra + public static HashMap> matchSpectrumAgainstDB(final Connection DBConnection, final Spectrum spectrum, final double shiftDev, final Double intensDev, final int stepSize, final int dim) throws SQLException{ final HashMap> hits = new HashMap<>(); double shift; for (int i = 0; i < spectrum.size(); i++) { hits.put(i, new ArrayList<>()); - shift = Math.floor(spectrum.get(i).getShift() * stepSize) / (double) stepSize; + shift = Math.floor(spectrum.get(i).getShift(dim) * stepSize) / (double) stepSize; if(spectrum.get(i).getIntensity() != null){ - hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity() - intensDev, spectrum.get(i).getIntensity() + intensDev, spectrum.get(i).getElement())); + hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity() - intensDev, spectrum.get(i).getIntensity() + intensDev, spectrum.get(i).getNuclei()[dim])); } else { - hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity(), spectrum.get(i).getIntensity(), spectrum.get(i).getElement())); + hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity(), spectrum.get(i).getIntensity(), spectrum.get(i).getNuclei()[dim])); } } From 180a9037555737572da1fa33fbd1baa5fe4eb9cd Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Jul 2018 23:35:07 +0200 Subject: [PATCH 008/405] - usage of Spectrum class objects is now possible - new public functions for those objects to set/assign 1D/2D shifts without using using the XML or peak list parser directly - other small changes --- src/NMR/ParseRawData.java | 284 +++++++++++++++++++++++++++----------- 1 file changed, 200 insertions(+), 84 deletions(-) diff --git a/src/NMR/ParseRawData.java b/src/NMR/ParseRawData.java index 7f4b3bf..1cbd6c3 100644 --- a/src/NMR/ParseRawData.java +++ b/src/NMR/ParseRawData.java @@ -25,13 +25,13 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Scanner; import javax.xml.parsers.ParserConfigurationException; -import org.apache.commons.lang3.ArrayUtils; import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.interfaces.IAtom; @@ -52,22 +52,20 @@ public class ParseRawData { final private IAtomContainer mol; final private IMolecularFormula molFormula; final private HashMap> atomTypeIndices = new HashMap<>(); - final public static String CONST_STRING_EQUIVALENCE = "Equivalence"; - final public static String CONST_STRING_HYDROGENSHIFTS = "HydrogenShifts"; - final public static String CONST_STRING_COSY = "COSY"; - final public static String CONST_STRING_HMBC = "HMBC"; - final public static String CONST_STRING_INADEQUATE = "INADEQUATE"; + final public static String CONST_PROP_EQUIVALENCE = "Equivalence"; + final public static String CONST_PROP_PROTONSHIFTS = "ProtonShifts"; + final public static String CONST_PROP_HHCOSY = "HHCOSY"; + final public static String CONST_PROP_HMBC = "HMBC"; + final public static String CONST_PROP_INADEQUATE = "INADEQUATE"; public ParseRawData(){ - this.molFormula = null; this.mol = SilentChemObjectBuilder.getInstance().newAtomContainer(); } public ParseRawData(final IMolecularFormula molFormula){ - this.molFormula = molFormula; this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); this.setAtomTypeIndices(); @@ -132,11 +130,11 @@ public final void setEquivalentProperties() { Map properties; for (int i = 0; i < this.mol.getAtomCount(); i++) { - if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) != null) { + if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) != null) { properties = this.mol.getAtom(i).getProperties(); for (final Object prop: properties.keySet()) { - if (this.mol.getAtom(i).getProperty(prop) != null && !prop.equals(ParseRawData.CONST_STRING_EQUIVALENCE)) { - for (final int k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)) { + if (this.mol.getAtom(i).getProperty(prop) != null && !prop.equals(ParseRawData.CONST_PROP_EQUIVALENCE)) { + for (final int k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)) { this.mol.getAtom(k).setProperty(prop, this.mol.getAtom(i).getProperty(prop)); } } @@ -153,7 +151,7 @@ public final void setEquivalentProperties() { * * @param pathToPeakList * @param atomType - * @return + * @return true if a known file extension was given * @throws IOException * @throws ParserConfigurationException * @throws SAXException @@ -174,11 +172,15 @@ public final boolean parse1DNMR(final String pathToPeakList, final String atomTy /** * Assigns shift values from 1D NMR peak list to atoms of an IAtomContainer. * The shift values will be assigned sequentially. - * In case of a molecular formula is given in this class, the number of + * In case of a molecular formula is given in this class, the number of * shifts must be equal to the number of atoms in this molecular formula. + * For less shifts in shift list you will be asked for entering equivalences. * Otherwise this function will return a false value. - * In case of no molecular was given to class, new atom in the atom container - * will be created regarding to the inpout peak list. + * In case of no molecular was given to this class, a new atom in the atom container + * will be created regarding to the input shift list. + * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) + * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on + * the specified atom type (element). * * * @param pathToPeakList Path to peak list (Bruker's TopSpin csv file @@ -190,20 +192,24 @@ public final boolean parse1DNMR(final String pathToPeakList, final String atomTy * @throws java.io.IOException */ public final boolean parse1DNMRviaPeakTable(final String pathToPeakList, final String atomType) throws IOException { - - final ArrayList shifts = NMR.Utils.parsePeakTable(pathToPeakList, 4); - - return this.set1DNMRShifts(shifts, atomType); + + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier(atomType)}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 4)}); + + return this.set1DNMRShifts(spectrum); } /** * Assigns shift values from 1D NMR XML file to atoms of an IAtomContainer. * The shift values will be assigned sequentially. - * In case of a molecular formula is given in this class, the number of + * In case of a molecular formula is given in this class, the number of * shifts must be equal to the number of atoms in this molecular formula. + * For less shifts in shift list you will be asked for entering equivalences. * Otherwise this function will return a false value. - * In case of no molecular was given to class, new atom in the atom container - * will be created regarding to the inpout peak list. + * In case of no molecular was given to this class, a new atom in the atom container + * will be created regarding to the input shift list. + * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) + * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on + * the specified atom type (element). * * @param pathToXML Path to XML file (Bruker's TopSpin XML file * format) @@ -217,20 +223,31 @@ public final boolean parse1DNMRviaPeakTable(final String pathToPeakList, final S */ public final boolean parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { - final ArrayList shifts = NMR.Utils.parseXML(pathToXML, 1, 1); + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier(atomType)}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 1, 1)}); - return this.set1DNMRShifts(shifts, atomType); + return this.set1DNMRShifts(spectrum); } + /** - * - * @param shifts - * @param atomType - * @return false, if shift list size is not equal to number of atoms in the - * atom container for the given atom type; otherwise true + * Sets the 1D NMR shift values for given Spectrum object to atoms of the class IAtomContainer. + * The shift values will be assigned sequentially. + * In case of a molecular formula is given in this class, the number of + * shifts must be equal to the number of atoms in this molecular formula. + * For less shifts in shift list you will be asked for entering equivalences. + * Otherwise this function will return a false value. + * In case of no molecular was given to this class, a new atom in the atom container + * will be created regarding to the input shift list. + * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) + * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on + * the specified atom type (element). + * + * @param spectrum Spectrum class object containing the 1D shift information + * @return */ - private boolean set1DNMRShifts(final ArrayList shifts, final String atomType) { - + public final boolean set1DNMRShifts(final Spectrum spectrum){ + final String atomType = Utils.getElementIdentifier(spectrum.getNuclei()[0]); + final ArrayList shifts = spectrum.getShiftsByDim(0); // check whether indices for that atom type exist or the number of input signals are greater than the atom number in atom container for that atom type if (!this.atomTypeIndices.containsKey(atomType) || shifts.size() > this.atomTypeIndices.get(atomType).size()) { // if molecular formula is known and too much picked peak are to be assigned @@ -259,7 +276,7 @@ private boolean set1DNMRShifts(final ArrayList shifts, final String atom } // "fill up" the missing equivalent peaks // check whether the number of input signals is smaller than the number of atoms in atom container from that atom type - if (shifts.size() < this.atomTypeIndices.get(atomType).size()) { + if (spectrum.size() < this.atomTypeIndices.get(atomType).size()) { System.out.println("Not enough peaks in 1D peak list for \"" + atomType + "\"!!!"); this.askForEquivalentPeaks(atomType); } @@ -291,14 +308,14 @@ private void askForEquivalentPeaks(final String atomType) { n = reader.nextInt(); } this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), this.mol.getAtom(n).getProperty(Utils.getNMRShiftConstant(atomType))); - if(this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) == null){ - this.mol.getAtom(i).setProperty(ParseRawData.CONST_STRING_EQUIVALENCE, new ArrayList<>()); + if(this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) == null){ + this.mol.getAtom(i).setProperty(ParseRawData.CONST_PROP_EQUIVALENCE, new ArrayList<>()); } - if(this.mol.getAtom(n).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) == null){ - this.mol.getAtom(n).setProperty(ParseRawData.CONST_STRING_EQUIVALENCE, new ArrayList<>()); + if(this.mol.getAtom(n).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) == null){ + this.mol.getAtom(n).setProperty(ParseRawData.CONST_PROP_EQUIVALENCE, new ArrayList<>()); } - ((ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)).add(n); - ((ArrayList) this.mol.getAtom(n).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)).add(i); + ((ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)).add(n); + ((ArrayList) this.mol.getAtom(n).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)).add(i); } reader.close(); } @@ -409,8 +426,8 @@ private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesD } this.mol.getAtom(i).setImplicitHydrogenCount(hCount); hCountAll += hCount; - if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE) != null) { - for (Integer k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_EQUIVALENCE)) { + if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) != null) { + for (Integer k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)) { this.mol.getAtom(k).setImplicitHydrogenCount(hCount); hCountAll += hCount; } @@ -465,6 +482,8 @@ public final boolean parseHSQC(final String pathToPeakList, final String atomTyp * {@link ParseRawData#parseDEPT(String, String, double)} or * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or * {@link ParseRawData#parseDEPTviaXML(String, String, double) }. + * The property is then set to {@link #CONST_PROP_PROTONSHIFTS} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param pathToPeakList path to HSQC peak table (Bruker's TopSpin csv file * format) @@ -477,10 +496,10 @@ public final boolean parseHSQC(final String pathToPeakList, final String atomTyp */ public final void parseHSQCviaPeakTable(final String pathToPeakList, final String atomType, final double tol) throws IOException { - final ArrayList hydrogenShifts = NMR.Utils.parsePeakTable(pathToPeakList, 5); + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5)}); final ArrayList matchesAtomType = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tol, 6); - this.setImplicitHydrogenShifts(hydrogenShifts, matchesAtomType); + this.setImplicitHydrogenShifts(spectrum, matchesAtomType); } /** @@ -491,6 +510,8 @@ public final void parseHSQCviaPeakTable(final String pathToPeakList, final Strin * {@link ParseRawData#parseDEPT(String, String, double)} or * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or * {@link ParseRawData#parseDEPTviaXML(String, String, double) }. + * The property is then set to {@link #CONST_PROP_PROTONSHIFTS} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param pathToXML path to HSQC XML file * @param atomType Element name (e.g. "C") which also occurrs in @@ -504,14 +525,14 @@ public final void parseHSQCviaPeakTable(final String pathToPeakList, final Strin */ public final void parseHSQCviaXML(final String pathToXML, final String atomType, final double tol) throws IOException, ParserConfigurationException, SAXException { - final ArrayList hydrogenShifts = NMR.Utils.parseXML(pathToXML, 2, 2); + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 2)}); final ArrayList matchesAtomType = NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, atomType, tol, 2, 1); - this.setImplicitHydrogenShifts(hydrogenShifts, matchesAtomType); + this.setImplicitHydrogenShifts(spectrum, matchesAtomType); } - private void setImplicitHydrogenShifts(final ArrayList hydrogenShifts, final ArrayList matchesAtomType) { - + private void setImplicitHydrogenShifts(final Spectrum spectrum, final ArrayList matchesAtomType) { + final ArrayList hydrogenShifts = spectrum.getShiftsByDim(0); IAtom matchAtom; ArrayList assignedHydrogensShifts; for (int i = 0; i < matchesAtomType.size(); i++) { @@ -520,16 +541,52 @@ private void setImplicitHydrogenShifts(final ArrayList hydrogenShifts, f if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { continue; } - if (matchAtom.getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS) == null) { - matchAtom.setProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); + if (matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS) == null) { + matchAtom.setProperty(ParseRawData.CONST_PROP_PROTONSHIFTS, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); + } + assignedHydrogensShifts = matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); + if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { + assignedHydrogensShifts.add(hydrogenShifts.get(i)); + } + } + } + } + + /** + * Sets the proton shift(s) as list to belonging heavy atoms of an + * HSQC signal relationship. + * The property is then set to {@link #CONST_PROP_PROTONSHIFTS} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * + * @param spectrum Spectrum class object consisting of Signal class objects + * where the heavy atom is given first and the protons value as the second. + * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + */ + public final void setHSQC(final Spectrum spectrum, final double tolHeavy) { + final ArrayList hydrogenShifts = spectrum.getShiftsByDim(1); + final ArrayList shiftsHeavy = spectrum.getShiftsByDim(0); + ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavy, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); + matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavy, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); + + IAtom matchAtom; + ArrayList assignedHydrogensShifts; + for (int i = 0; i < matchesHeavyAtom.size(); i++) { + if (matchesHeavyAtom.get(i) >= 0) { + matchAtom = this.mol.getAtom(matchesHeavyAtom.get(i)); + if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { + continue; + } + if (matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS) == null) { + matchAtom.setProperty(ParseRawData.CONST_PROP_PROTONSHIFTS, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); } - assignedHydrogensShifts = matchAtom.getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS); + assignedHydrogensShifts = matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { assignedHydrogensShifts.add(hydrogenShifts.get(i)); } } } } + /** * Finds the matches with the lowest deviations between a given hydrogen @@ -569,10 +626,10 @@ private int[] findSingleImplicitHydrogenShiftMatch(final double queryShift, fina ArrayList protonShiftList; for (int i = 0; i < this.mol.getAtomCount(); i++) { // skip atoms without implicit hydrogens - if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS) == null) { + if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS) == null) { continue; } - protonShiftList = this.mol.getAtom(i).getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS); + protonShiftList = this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); for (int j = 0; j < protonShiftList.size(); j++) { // figure out the atom with lowest shift deviation if ((queryShift - tol <= protonShiftList.get(j)) && (protonShiftList.get(j) <= queryShift + tol) && (Math.abs(queryShift - protonShiftList.get(j)) < minDiff)) { @@ -615,7 +672,7 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s matchIndex = matches.indexOf(matchIndexAtomContainer); if (matches.get(matchIndex) >= 0) { singleMatchIndex = this.findSingleImplicitHydrogenShiftMatch(shifts.get(matchIndex), tol); - singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(ParseRawData.CONST_STRING_HYDROGENSHIFTS); + singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); diff = shifts.get(matchIndex) - singleMatchShifts.get(singleMatchIndex[1]); diffs.add(diff); } @@ -653,14 +710,14 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s * @throws ParserConfigurationException * @throws SAXException */ - public final boolean parseCOSY(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final boolean parseHHCOSY(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { switch (NMR.Utils.getFileFormat(pathToPeakList)) { case "csv": - parseCOSYviaPeakTable(pathToPeakList, tol); + parseHHCOSYviaPeakTable(pathToPeakList, tol); break; case "xml": - parseCOSYviaXML(pathToPeakList, tol); + parseHHCOSYviaXML(pathToPeakList, tol); break; default: return false; @@ -681,6 +738,8 @@ public final boolean parseCOSY(final String pathToPeakList, final double tol) th * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. * Returns true if all signals are bidirectional, so that atom A has a * signal according to atom B and vice versa. + * The property is then set to {@link #CONST_PROP_HHCOSY} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param pathToPeakList path to H,H-COSY peak table (Bruker's TopSpin csv * file @@ -689,12 +748,11 @@ public final boolean parseCOSY(final String pathToPeakList, final double tol) th * @return * @throws IOException */ - public final boolean parseCOSYviaPeakTable(final String pathToPeakList, final double tol) throws IOException { + public final boolean parseHHCOSYviaPeakTable(final String pathToPeakList, final double tol) throws IOException { - final ArrayList hydrogenShifts1 = NMR.Utils.parsePeakTable(pathToPeakList, 5); - final ArrayList hydrogenShifts2 = NMR.Utils.parsePeakTable(pathToPeakList, 6); + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H"), Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), NMR.Utils.parsePeakTable(pathToPeakList, 6)}); - return this.setCOSY(hydrogenShifts1, hydrogenShifts2, tol); + return this.setHHCOSY(spectrum, tol); } /** @@ -709,6 +767,8 @@ public final boolean parseCOSYviaPeakTable(final String pathToPeakList, final do * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. Returns true if * all signals are bidirectional, so that atom A has a signal according to * atom B and vice versa. + * The property is then set to {@link #CONST_PROP_HHCOSY} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param pathToXML path to H,H-COSY peak XML file (Bruker's TopSpin XML * file format) @@ -718,23 +778,32 @@ public final boolean parseCOSYviaPeakTable(final String pathToPeakList, final do * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final boolean parseCOSYviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final boolean parseHHCOSYviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { - final ArrayList hydrogenShifts1 = NMR.Utils.parseXML(pathToXML, 2, 1); - final ArrayList hydrogenShifts2 = NMR.Utils.parseXML(pathToXML, 2, 2); + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H"), Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), NMR.Utils.parseXML(pathToXML, 2, 2)}); - return this.setCOSY(hydrogenShifts1, hydrogenShifts2, tol); + return this.setHHCOSY(spectrum, tol); } - private boolean setCOSY(final ArrayList hydrogenShifts1, final ArrayList hydrogenShifts2, final double tol) { + /** + * Sets links between two heavy atoms of H,H-COSY signals. The property + * is then set to {@link #CONST_PROP_HHCOSY} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)} + * + * @param spectrum Spectrum class object containing the 2D spectrum proton shift information + * @param tol tolerance value [ppm] for matching belonging protons + * of heavy atom + * @return true if the links could be set; otherwise false + */ + public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { - final ArrayList hydrogenShiftMatches1 = this.findImplicitHydrogenShiftMatches(hydrogenShifts1, tol); - final ArrayList hydrogenShiftMatches2 = this.findImplicitHydrogenShiftMatches(hydrogenShifts2, tol); + final ArrayList hydrogenShiftMatches1 = this.findImplicitHydrogenShiftMatches(spectrum.getShiftsByDim(0), tol); + final ArrayList hydrogenShiftMatches2 = this.findImplicitHydrogenShiftMatches(spectrum.getShiftsByDim(1), tol); // are all signals bidirectional? if (!NMR.Utils.isBidirectional(hydrogenShiftMatches1, hydrogenShiftMatches2)) { return false; } - NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, ParseRawData.CONST_STRING_COSY); + NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, ParseRawData.CONST_PROP_HHCOSY); return true; } @@ -789,10 +858,9 @@ public final boolean parseINADEQUATE(final String pathToPeakList, final double t */ public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, final double tol) throws IOException { - final ArrayList carbonShifts1 = NMR.Utils.parsePeakTable(pathToPeakList, 5); - final ArrayList carbonShifts2 = NMR.Utils.parsePeakTable(pathToPeakList, 6); - - return this.setINADEQUATE(carbonShifts1, carbonShifts2, tol); + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("C"), Utils.getNMRIsotopeIdentifier("C")}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), NMR.Utils.parsePeakTable(pathToPeakList, 6)}); + + return this.setINADEQUATE(spectrum, tol); } /** @@ -815,21 +883,32 @@ public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, fi */ public final boolean parseINADEQUATEviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { - final ArrayList carbonShifts1 = NMR.Utils.parseXML(pathToXML, 2, 1); - final ArrayList carbonShifts2 = NMR.Utils.parseXML(pathToXML, 2, 2); - - return this.setINADEQUATE(carbonShifts1, carbonShifts2, tol); + final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("C"), Utils.getNMRIsotopeIdentifier("C")}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), NMR.Utils.parseXML(pathToXML, 2, 2)}); + + return this.setINADEQUATE(spectrum, tol); } - private boolean setINADEQUATE(final ArrayList carbonShifts1, final ArrayList carbonShifts2, final double tol) { + + /** + * Sets links between two carbon atoms in an INADEQUATE signal relationship. + * The property is then set to {@link #CONST_PROP_INADEQUATE} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * Returns true if all signals are bidirectional, so that atom A has a + * signal according to atom B and vice versa. + * + * @param spectrum Spectrum class object consisting of Signal class objects + * @param tol tolerance value [ppm] for carbon atom shift matching + * @return + */ + public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { - final ArrayList carbonShiftMatches1 = NMR.Utils.findShiftMatches(this.mol, carbonShifts1, tol, "C"); - final ArrayList carbonShiftMatches2 = NMR.Utils.findShiftMatches(this.mol, carbonShifts2, tol, "C"); + final ArrayList carbonShiftMatches1 = NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(0), tol, "C"); + final ArrayList carbonShiftMatches2 = NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(1), tol, "C"); // are all signals bidirectional? if (!NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { return false; } - NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, ParseRawData.CONST_STRING_INADEQUATE); + NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, ParseRawData.CONST_PROP_INADEQUATE); return true; } @@ -877,6 +956,8 @@ public final boolean parseHMBC(final String pathToPeakList, final String atomTyp * {@link ParseRawData#parseDEPT(String, String, double) } or * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. + * The property is then set to {@link #CONST_PROP_HMBC} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param pathToPeakList path to HMBC peak table (Bruker's TopSpin csv * file format) @@ -887,7 +968,7 @@ public final boolean parseHMBC(final String pathToPeakList, final String atomTyp * @throws IOException */ public final void parseHMBCviaPeakTable(final String pathToPeakList, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException { - + final ArrayList hydrogenShifts = NMR.Utils.parsePeakTable(pathToPeakList, 5); final ArrayList hydrogenShiftMatches = this.correctHydrogenShiftMatches(hydrogenShifts, this.findImplicitHydrogenShiftMatches(hydrogenShifts, tolHydrogen), tolHydrogen); final ArrayList heavyAtomShiftMatches = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tolHeavy, 6); @@ -904,7 +985,9 @@ public final void parseHMBCviaPeakTable(final String pathToPeakList, final Strin * {@link ParseRawData#parseDEPT(String, String, double) } or * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. - * + * The property is then set to {@link #CONST_PROP_HMBC} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * * @param pathToXML path to HMBC peak XML file (Bruker's TopSpin XML file * format) * @param atomType Element name (e.g. "C") which also occurrs in @@ -929,14 +1012,47 @@ private void setHMBC(final ArrayList hydrogenShiftMatches, final ArrayL ArrayList HMBCList; for (int i = 0; i < hydrogenShiftMatches.size(); i++) { if (hydrogenShiftMatches.get(i) >= 0 && heavyAtomShiftMatches.get(i) >= 0) { - if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_STRING_HMBC) == null) { - this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(ParseRawData.CONST_STRING_HMBC, new ArrayList<>()); + if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC) == null) { + this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(ParseRawData.CONST_PROP_HMBC, new ArrayList<>()); } - HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_STRING_HMBC); + HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC); if (!HMBCList.contains(heavyAtomShiftMatches.get(i))) { HMBCList.add(heavyAtomShiftMatches.get(i)); } } } } + + + /** + * Sets links between heavy atoms which are in HMBC signal relationship. + * The property is then set to {@link #CONST_PROP_HMBC} in + * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * + * @param spectrum Spectrum class object consisting of Signal class objects + * where the heavy atom is given first and the protons value as the second. + * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching + */ + public final void setHMBC(final Spectrum spectrum, final double tolHeavy, final double tolHydrogen) { + + final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(1); + final ArrayList matchesHydrogen = this.correctHydrogenShiftMatches(shiftsHydrogen, this.findImplicitHydrogenShiftMatches(shiftsHydrogen, tolHydrogen), tolHydrogen); + final ArrayList shiftsHeavy = spectrum.getShiftsByDim(0); + ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavy, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); + matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavy, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); + + ArrayList HMBCList; + for (int i = 0; i < matchesHydrogen.size(); i++) { + if (matchesHydrogen.get(i) >= 0 && matchesHeavyAtom.get(i) >= 0) { + if (this.mol.getAtom(matchesHydrogen.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC) == null) { + this.mol.getAtom(matchesHydrogen.get(i)).setProperty(ParseRawData.CONST_PROP_HMBC, new ArrayList<>()); + } + HMBCList = this.mol.getAtom(matchesHydrogen.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC); + if (!HMBCList.contains(matchesHeavyAtom.get(i))) { + HMBCList.add(matchesHeavyAtom.get(i)); + } + } + } + } } From c32d9026ddcd7c66f0f02a8ff295c31e39a29ff6 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Jul 2018 23:36:17 +0200 Subject: [PATCH 009/405] - new added and modified by casekit.model - now in usage at Spectrum class --- src/NMR/Signal.java | 86 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 16 deletions(-) diff --git a/src/NMR/Signal.java b/src/NMR/Signal.java index 22964ef..b585d6e 100644 --- a/src/NMR/Signal.java +++ b/src/NMR/Signal.java @@ -29,31 +29,85 @@ */ public class Signal { - private final String element; - private final String multiplicity; - private final double shift; - private final Double intensity; - - public Signal(final String elem, final double shift, final String mult, final Double intens){ - this.element = elem; + private final int ndim; + + /** + * Am array of doubles to store the chemical shift of + */ + private final Double shift[]; + private final String[] nuclei; + + /* Signal intensity in arbitrary values */ + private Double intensity; + private String multiplicity; + + private Integer phase; + public final static int DIM_ONE = 1, DIM_TWO = 2, DIM_THREE = 3, DIM_FOUR = 4; + public final static int SHIFT_PROTON = 0, SHIFT_HETERO = 1; + public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; + public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; + + public Signal(final String[] nuclei, final Double[] shift, final Double intensity, final Integer phase, final String multiplicity) { + this.nuclei = nuclei; + this.ndim = this.nuclei.length; this.shift = shift; - this.multiplicity = mult; - this.intensity = intens; + this.intensity = intensity; + this.phase = phase; + this.multiplicity = multiplicity; } - public String getElement(){ - return this.element; + public int getDim(){ + return this.ndim; } - public double getShift(){ - return this.shift; + public void setShift(final Double shift, final int dim) { + this.shift[dim] = shift; + } + + public Double getShift(final int dim) { + return this.shift[dim]; } - public String getMultiplicity(){ + public void setIntensity(final Double intensity) { + this.intensity = intensity; + } + + public Double getIntensity() { + return this.intensity; + } + + public void setMultiplicity(final String multiplicity) { + this.multiplicity = multiplicity; + } + + public String getMultiplicity() { return this.multiplicity; } + + public void setPhase(final int phase) { + this.phase = phase; + } - public Double getIntensity(){ - return this.intensity; + public Integer getPhase() { + return this.phase; + } + + @Override + public String toString() { + String s = ""; + s += ndim + " -dimensional NMRSignal for nuclei "; + for (int f = 0; f < this.nuclei.length; f++) { + s += this.nuclei[f] + "; "; + } + s += "\nShiftlist: "; + for (int f = 0; f < this.shift.length; f++) { + s += this.shift[f] + "; "; + } + s += "\n\n"; + return s; + } + + public String[] getNuclei(){ + return this.nuclei; } } From 1c5bc9a6c17c8c00a7500cf32c06c330545ad0a3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Jul 2018 23:37:46 +0200 Subject: [PATCH 010/405] - new added and modified from casekit.model - now in usage by means of Signal class in several functions in Utils class and in ParseRawData class --- src/NMR/Spectrum.java | 275 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 src/NMR/Spectrum.java diff --git a/src/NMR/Spectrum.java b/src/NMR/Spectrum.java new file mode 100644 index 0000000..a3f78a4 --- /dev/null +++ b/src/NMR/Spectrum.java @@ -0,0 +1,275 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package NMR; + +import java.util.ArrayList; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Spectrum extends ArrayList{ + + /** + * An arbitrary name that can be assigned to this spectrum for identification purposes. + */ + private String name; + /** + * An arbitrary name to identify the type of this spectrum, like COSY, NOESY, HSQC, etc. I + * decided not to provide static Strings with given experiment type since the there are + * numerous experiments yielding basically identical information having different names + */ + private String specType; + +// /** +// * Not yet clear if this is needed. +// */ +// private float[] pickPrecision; + /** + * Declares how many axes are in involved in this spectrum. + */ + private final int ndim; + /** + * The nuclei of the different axes. + */ + private final String nuclei[]; + /** + * The proton frequency of the spectrometer used to record this spectrum. + */ + private Float spectrometerFrequency; + private String solvent; + private String standard; +// protected transient EventListenerList changeListeners = new EventListenerList(); + + public Spectrum(final String[] nuclei, final ArrayList[] shiftLists) { + this.nuclei = nuclei; + this.ndim = this.nuclei.length; + this.setSignals(shiftLists); + } + + public Spectrum(final String[] nuclei) { + this.nuclei = nuclei; + this.ndim = this.nuclei.length; + } + + + public void setSignals(final ArrayList[] shiftLists){ + if(shiftLists.length != this.ndim){ + System.err.println("Unequal number of nuclei and shift lists!!!"); + return; + } + Double[] shifts; + // assumes that the shift lists have the same number of entries + for (int row = 0; row < shiftLists[0].size(); row++) { + shifts = new Double[this.ndim]; + for (int col = 0; col < this.ndim; col++) { + shifts[col] = shiftLists[col].get(row); + } + this.add(new NMR.Signal(this.nuclei, shifts, null, null, null)); + } + } + + + /** + * Return the number of individual frequencies in the heteroatom shift list, which should be + * equal or smaller than the number of respective atoms + * @return + */ + public int getSignalNumber() { + return this.size(); + } + + /** + * Adds an NMRSignal to the NMRSpectrum. + * @param signal + */ + public void addSignal(Signal signal) { + this.add(signal); +// this.updateShiftLists();s + } + +// /** +// * Creates an empty signal with correct dimension +// */ +// public void newSignal() { +// System.out.println("nucleus: " + nucleus.length + nucleus[0]); +// add(new NMRSignal(nucleus)); +// updateShiftLists(); +// } + + /** + * Returns an NMRSignal at position number in the List + * @param signalIndex + * @return + */ + public Signal getSignal(final int signalIndex) { + return this.get(signalIndex); + } + + public ArrayList getShiftsByDim(final int dim){ + final ArrayList signals = new ArrayList<>(); + for (final Signal sig : this) { + signals.add(sig.getShift(dim)); + } + + return signals; + } + + /** + * Returns the position of an NMRSignal the List + * @param signal + * @return + */ + public int getSignalIndex(final Signal signal) { + for (int f = 0; f < this.size(); f++) { + if (this.get(f) == signal) { + return f; + } + } + return -1; + } + + public void setSpectrometerFrequency(final Float sf) { + this.spectrometerFrequency = sf; + } + + public float getSpectrometerFrequency() { + return spectrometerFrequency; + } + + public void setSolvent(final String solvent) { + this.solvent = solvent; + } + + public String getSolvent() { + return solvent; + } + + public void setStandard(final String standard) { + this.standard = standard; + } + + public String getStandard() { + return standard; + } + + /** + * Returns the signal closest to the shift sought. If no Signal is found within the interval + * defined by pickprecision, null is returned. + * @param shift + * @param dim + * @param pickPrecision + * @return + */ + public Signal pickClosestSignal(final Double shift, final int dim, final double pickPrecision) { + + int thisPosition = -1; + double diff = pickPrecision; + /* + * Now we search dimension dim for the chemical shift. + */ + for (int f = 0; f < this.size(); f++) { + if (Math.abs((this.get(f)).getShift(dim) - shift) < diff) { + diff = Math.abs((this.get(f)).getShift(dim) - shift); + thisPosition = f; + } + } + if(thisPosition >= 0){ + this.get(thisPosition); + } + + return null; + } + + /** + * Returns a List with signals within the interval defined by pickPrecision. If none is found + * an empty List is returned. + * @param shift + * @param dim + * @param pickPrecision + * @return + */ + public ArrayList pickSignals(final double shift, final int dim, final double pickPrecision) { + final ArrayList pickedSignals = new ArrayList<>(); + /* + * Now we search dimension dim for the chemical shift. + */ + for (final Signal sig : this) { + if (Math.abs(sig.getShift(dim) - shift) < pickPrecision) { + pickedSignals.add(sig); + } + } + return pickedSignals; + } + +// /** +// * Extracts a list of unique shifts from the list of cross signals and sorts them. This is to +// * define the column and row headers for tables. +// */ +// protected void updateShiftLists() { +// Double shift; NMR.Signal nmrSignal; +// for (int i = 0; i < this.size(); i++) { +// nmrSignal = this.get(i); +// for (int d = 0; d < nmrSignal.getDim(); d++) { +// shift = nmrSignal.getShift(d); +// if (!this.get(d).contains(shift)) { +// shiftList.get(d).add(shift); +// } +// } +// } +// } + + /** + * Creates a 2D matrix of booleans, that models the set of crosspeaks in the 2D NMR spectrum. + * The dimensions are taken from hetAtomShiftList and protonShiftList, which again are + * produced by updateShiftLists based a collection of 2D nmrSignals + *

+ * private void createMatrix(){ boolean found; float het, prot; int hetPos, protPos; + * hetCorMatrix = new boolean[hetAtomShiftList.length][protonShiftList.length]; for (int f = + * 0; f < size(); f++){ HetCorNMRSignal hetCorSignal = (HetCorNMRSignal)elementAt(f); prot = + * hetCorSignal.shift[NMRSignal.SHIFT_PROTON]; het = + * hetCorSignal.shift[NMRSignal.SHIFT_HETERO]; found = false; hetPos = + * isInShiftList(hetAtomShiftList, het, hetAtomShiftList.length); if (hetPos >= 0){ protPos = + * isInShiftList(protonShiftList, prot, protonShiftList.length); if ( protPos >= 0){ found = + * true; hetCorMatrix[hetPos][protPos] = true; } } } } + */ + public void report() { + String s = ""; + System.out.println("Report for nmr spectrum " + name + " of type " + + specType + ": "); + for (int i = 0; i < this.size(); i++) { + System.out.println("ShiftList for dimension " + (i + 1) + ":"); + for (int d = 0; d < this.get(i).getDim(); d++) { + s += this.get(i).getShift(d) + "; "; + } + System.out.println(s + "\n"); + s = ""; + } + + } + + public String[] getNuclei(){ + return this.nuclei; + } +} From da4820df019fa905f3cda4ca8022ba2a002fe946 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Jul 2018 23:39:46 +0200 Subject: [PATCH 011/405] - supports now the Spectra class - new peak parsing functions parseXMLs and parsepeakTables to get Spectrum class objects from it --- src/NMR/Utils.java | 115 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 92 insertions(+), 23 deletions(-) diff --git a/src/NMR/Utils.java b/src/NMR/Utils.java index 19280f1..c3ca051 100644 --- a/src/NMR/Utils.java +++ b/src/NMR/Utils.java @@ -39,7 +39,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Scanner; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.w3c.dom.Document; @@ -76,12 +75,7 @@ * @author Michael Wenk [https://github.com/michaelwenk] */ public class Utils { - - - public Utils() throws Exception { - - } - + /** * Splits an SDF into single molecular files and converts each of them into the LSD substructure format. * Therefore, the mol2ab executable provided by LSD is required. @@ -310,7 +304,7 @@ public static ArrayList getAtomTypeIndicesInAtomContainer(final IAtomCo return indices; } - + /** * Reads a specific column of a NMR peak table and stores it into an * ArrayList object. @@ -338,6 +332,57 @@ public static ArrayList parsePeakTable(final String pathToPeakList, fina return shifts; } + + /** + * Reads specific columns of NMR peak tables to obtain a Spectrum class + * object. + * + * @param pathsToPeakLists paths to NMR peak tables + * @param columns columns to select in each peak table + * @param atomTypes atom types (element) for each dimension + * @return Spectrum class object containing the peak lists + * @throws IOException + */ + public static Spectrum parsePeakTables(final String[] pathsToPeakLists, final int[] columns, final String[] atomTypes) throws IOException { + + final ArrayList[] shiftsList = new ArrayList[pathsToPeakLists.length]; + final String[] nuclei = new String[pathsToPeakLists.length]; + for (int i = 0; i < pathsToPeakLists.length; i++) { + shiftsList[i] = Utils.parsePeakTable(pathsToPeakLists[i], columns[i]); + nuclei[i] = Utils.getNMRIsotopeIdentifier(atomTypes[i]); + } + + return new Spectrum(nuclei, shiftsList); + } + + + /** + * Reads specific columns of NMR XML files to obtain a Spectrum class + * object. + * + * @param pathsToXMLs paths to NMR XML files + * @param dims array of dimensions of given data 1 (1D) or 2 (2D) + * @param attributes which attribute indices in XML peak nodes should be used: + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, + * intensity if 1D data) or 3 (intensity if 2D data) + * @param atomTypes atom types (element) for each dimension + * @return Spectrum class object containing the peak lists + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public static Spectrum parseXMLs(final String[] pathsToXMLs, final int[] dims, final int[] attributes, final String[] atomTypes) throws IOException, ParserConfigurationException, SAXException { + + final ArrayList[] shiftsList = new ArrayList[pathsToXMLs.length]; + final String[] nuclei = new String[pathsToXMLs.length]; + for (int i = 0; i < pathsToXMLs.length; i++) { + shiftsList[i] = Utils.parseXML(pathsToXMLs[i], dims[i], attributes[i]); + nuclei[i] = Utils.getNMRIsotopeIdentifier(atomTypes[i]); + } + + return new Spectrum(nuclei, shiftsList); + } + /** * Reads a NMR peak XML file and stores it into an @@ -345,7 +390,7 @@ public static ArrayList parsePeakTable(final String pathToPeakList, fina * The XML file must be in Bruker's TopSpin format. * * @param pathToXML Path to XML file - * @param ndim number of dimensions of given data 1 (1D) or 2 (2D) + * @param dim number of dimensions of given data 1 (1D) or 2 (2D) * @param attribute which attribute index in XML peak nodes should be used: * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, * intensity if 1D data) or 3 (intensity if 2D data) @@ -355,7 +400,7 @@ public static ArrayList parsePeakTable(final String pathToPeakList, fina * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public static ArrayList parseXML(final String pathToXML, final int ndim, final int attribute) throws IOException, ParserConfigurationException, SAXException { + public static ArrayList parseXML(final String pathToXML, final int dim, final int attribute) throws IOException, ParserConfigurationException, SAXException { final ArrayList shifts = new ArrayList<>(); final DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); @@ -364,7 +409,7 @@ public static ArrayList parseXML(final String pathToXML, final int ndim, // normalize text representation doc.getDocumentElement().normalize(); - final NodeList peakLists = doc.getElementsByTagName("Peak" + ndim + "D"); + final NodeList peakLists = doc.getElementsByTagName("Peak" + dim + "D"); for (int i = 0; i < peakLists.getLength(); i++) { shifts.add(Double.parseDouble(peakLists.item(i).getAttributes().item(attribute - 1).getNodeValue())); } @@ -387,7 +432,7 @@ public static ArrayList parseXML(final String pathToXML, final int ndim, * @param matches Match list to correct * @param tol Tolerance value * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @return */ public static ArrayList correctShiftMatches(final IAtomContainer ac, final ArrayList shifts, final ArrayList matches, final double tol, final String atomType) { @@ -428,7 +473,7 @@ public static ArrayList correctShiftMatches(final IAtomContainer ac, fi * @param shiftList shift value list to match * @param tol Tolerance value [ppm] * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @return List of match indices for every query shift within the IAtomContainer */ public static ArrayList findShiftMatches(final IAtomContainer ac, final ArrayList shiftList, final double tol, final String atomType) { @@ -450,7 +495,7 @@ public static ArrayList findShiftMatches(final IAtomContainer ac, final * @param shift Shift value to match [ppm] * @param tol Tolerance value [ppm] * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @return Match index of a query shift within the IAtomContainer */ public static int findSingleShiftMatch(final IAtomContainer ac, final double shift, final double tol, final String atomType) { @@ -483,7 +528,7 @@ public static int findSingleShiftMatch(final IAtomContainer ac, final double shi * @param ac IAtomContainer to search for matches * @param pathToPeakList Path to peak table * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @param tol Tolerance value [ppm] * @param column Column number of shift values in peak table * @return Indices of matches for each shift within the IAtomContainer @@ -508,7 +553,7 @@ public static ArrayList matchShiftsFromPeakTable(final IAtomContainer a * @param ac IAtomContainer to search for matches * @param pathToXML * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @param tol Tolerance value [ppm] * @param ndim number of dimensions of given data 1 (1D) or 2 (2D) * @param attribute which attribute index in XML peak nodes should be used: @@ -795,6 +840,30 @@ public static String getNMRIsotopeIdentifier(final String element) { } } + /** + * Returns the element identifier for a given isotope, e.g. 13C -> C. + * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. + * + * @param isotope isotope's symbol (e.g. "13C") + * @return + */ + public static String getElementIdentifier(final String isotope) { + switch (isotope) { + case "13C": return "C"; + case "1H": return "H"; + case "15N": return "N"; + case "31P": return "P"; + case "19F": return "F"; + case "17O": return "O"; + case "33S": return "S"; + case "29Si": return "Si"; + case "11B": return "B"; + case "195Pt": return "Pt"; + default: + return null; + } + } + // /** // * Returns the hybridization level of each heavy atom in given molecule which has // * its own shift value. @@ -828,7 +897,7 @@ public static String getNMRIsotopeIdentifier(final String element) { //// ac.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); //// continue; //// } -// NMRSHIFT_ATOMTYPE = testkit.Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol()); +// NMRSHIFT_ATOMTYPE = Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol()); // // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? // if ((NMRSHIFT_ATOMTYPE == null) || (ac.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null)) { // continue; @@ -870,7 +939,7 @@ public static String getNMRIsotopeIdentifier(final String element) { // // the DB entry should at least contain one carbon spectrum // toContinue = false; // for (String prop : props) { -// if (prop.contains("Spectrum " + testkit.Utils.getNMRIsotopeIdentifier("C"))) { +// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier("C"))) { // toContinue = true; // break; // } @@ -884,7 +953,7 @@ public static String getNMRIsotopeIdentifier(final String element) { // // check wether the DB entry contains a spectrum for the current query atom type // shiftsDB = null; // for (String prop : props) { -// if (prop.contains("Spectrum " + testkit.Utils.getNMRIsotopeIdentifier(qAtom.getSymbol()))) { +// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier(qAtom.getSymbol()))) { // shiftsDB = acDB.getProperty(prop); // break; // } @@ -896,18 +965,18 @@ public static String getNMRIsotopeIdentifier(final String element) { //// if (qAtom.getSymbol().equals("C") && qAtom.getImplicitHydrogenCount() >= 3) { //// continue; //// } -// shiftQ = qAtom.getProperty(testkit.Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())); +// shiftQ = qAtom.getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())); // // // check wether the DB entry contains a proton spectrum // String shiftsDBHydrogen = null; // for (String prop : props) { -// if (prop.contains("Spectrum " + testkit.Utils.getNMRIsotopeIdentifier("H"))) { +// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier("H"))) { // shiftsDBHydrogen = acDB.getProperty(prop); // break; // } // } // -// String[][] shiftsDBvalues = testkit.Utils.parseShiftsNMRShiftDB(shiftsDB); +// String[][] shiftsDBvalues = Utils.parseShiftsNMRShiftDB(shiftsDB); // for (String[] shiftsDBvalue : shiftsDBvalues) { // shiftDB = Double.parseDouble(shiftsDBvalue[0]); // atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); @@ -952,7 +1021,7 @@ public static String getNMRIsotopeIdentifier(final String element) { // // check whether the shifts of attached hydrogens are equal to hydrogen shifts of query atom -> higher priority at hybridization assignment step later // boolean added = false; // if(shiftsDBHydrogen != null){ -// String[][] shiftsDBvaluesHydrogen = testkit.Utils.parseShiftsNMRShiftDB(shiftsDBHydrogen); +// String[][] shiftsDBvaluesHydrogen = Utils.parseShiftsNMRShiftDB(shiftsDBHydrogen); // if(qAtom.getProperty("HydrogenShifts") != null){ // ArrayList shiftsQAtomvaluesHydrogen = qAtom.getProperty("HydrogenShifts"); // for (int j = 0; j < shiftsQAtomvaluesHydrogen.size(); j++) { From f4cf92719be4e76ee64ae831525d64614b9f9b72 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Jul 2018 23:40:44 +0200 Subject: [PATCH 012/405] - small changes in createLSDFile function --- src/NMR/Process.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/NMR/Process.java b/src/NMR/Process.java index c0cbf50..3226fa3 100644 --- a/src/NMR/Process.java +++ b/src/NMR/Process.java @@ -449,10 +449,12 @@ public void createLSDfile(final String projectName, final String pathToOutputFil bondTable[i][j] = 0; } } - writer.println("; " + projectName); - writer.println("; " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"); - + if(this.molFormula != null){ + writer.println("; " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"); + } else { + writer.println("; unknown molecular formula"); + } for (int i = 0; i < this.mol.getAtomCount(); i++) { // set MULT section in LSD input file // set hybridization level From 80d8afb24da12f60ee3dfe96fbfaec03d16d9418 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 27 Jul 2018 15:01:50 +0200 Subject: [PATCH 013/405] - some changes to use Spectrum and Signal class objects for all existing NMR experiment assignment methods --- src/NMR/ParseRawData.java | 279 +++--- src/NMR/Process.java | 36 +- src/NMR/Signal.java | 4 + src/NMR/Spectrum.java | 193 ++-- src/NMR/Utils.java | 1814 +++++++++++++++++++------------------ 5 files changed, 1235 insertions(+), 1091 deletions(-) diff --git a/src/NMR/ParseRawData.java b/src/NMR/ParseRawData.java index 1cbd6c3..6e9fdf7 100644 --- a/src/NMR/ParseRawData.java +++ b/src/NMR/ParseRawData.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -36,10 +35,8 @@ import org.openscience.cdk.CDKConstants; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IElement; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.xml.sax.SAXException; @@ -51,20 +48,25 @@ public class ParseRawData { final private IAtomContainer mol; final private IMolecularFormula molFormula; - final private HashMap> atomTypeIndices = new HashMap<>(); - final public static String CONST_PROP_EQUIVALENCE = "Equivalence"; - final public static String CONST_PROP_PROTONSHIFTS = "ProtonShifts"; - final public static String CONST_PROP_HHCOSY = "HHCOSY"; - final public static String CONST_PROP_HMBC = "HMBC"; - final public static String CONST_PROP_INADEQUATE = "INADEQUATE"; - + private HashMap> atomTypeIndices; + final public static String PROP_EQUIVALENCE = "Equivalence"; + + /** + * Creates an instances of this class with an empty class atom container. + */ public ParseRawData(){ this.molFormula = null; this.mol = SilentChemObjectBuilder.getInstance().newAtomContainer(); + this.setAtomTypeIndices(); } - + /** + * Creates an instances of this class with a class atom container consisting + * of all heavy atoms in given molecular formula. + * + * @param molFormula IMolecularFormula object for IAtomContainer creation + */ public ParseRawData(final IMolecularFormula molFormula){ this.molFormula = molFormula; this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); @@ -73,8 +75,9 @@ public ParseRawData(final IMolecularFormula molFormula){ /** - * - * @return used IMolecularFormula object for this class instance + * Returns used IMolecularFormula object for this class instance. + * + * @return */ public final IMolecularFormula getMolecularFormula() { @@ -83,31 +86,15 @@ public final IMolecularFormula getMolecularFormula() { /** + * Returns used IAtomContainer object for this class instance. * - * @return used IAtomContainer object for this class instance + * @return */ public final IAtomContainer getAtomContainer() { return this.mol; } - public final void setAtomTypeIndices() { - - final HashSet atomTypes = new HashSet<>(); - if(this.molFormula != null){ - for (final IElement heavyElem : MolecularFormulaManipulator.getHeavyElements(this.molFormula)) { - atomTypes.add(heavyElem.getSymbol()); - } - } else { - for (final IAtom heavyAtom : AtomContainerManipulator.getHeavyAtoms(this.mol)) { - atomTypes.add(heavyAtom.getSymbol()); - } - } - for (final String atomType : atomTypes) { - this.atomTypeIndices.put(atomType, Utils.getAtomTypeIndicesInAtomContainer(this.mol, atomType)); - } - } - /** * Returns a HashMap object with the indices of all atoms for all atom types @@ -121,6 +108,17 @@ public final HashMap> getAtomTypeIndices() { } + /** + * Sets the indices of all atoms in this class atom container. + * @see Utils#getAtomTypeIndices(org.openscience.cdk.interfaces.IAtomContainer) + * + */ + public final void setAtomTypeIndices(){ + + this.atomTypeIndices = Utils.getAtomTypeIndices(this.mol); + } + + /** * Copies all up to here set properties from an atom in atom container to its * linked atoms with equivalent shift values. @@ -130,11 +128,11 @@ public final void setEquivalentProperties() { Map properties; for (int i = 0; i < this.mol.getAtomCount(); i++) { - if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) != null) { + if (this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE) != null) { properties = this.mol.getAtom(i).getProperties(); for (final Object prop: properties.keySet()) { - if (this.mol.getAtom(i).getProperty(prop) != null && !prop.equals(ParseRawData.CONST_PROP_EQUIVALENCE)) { - for (final int k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)) { + if (this.mol.getAtom(i).getProperty(prop) != null && !prop.equals(ParseRawData.PROP_EQUIVALENCE)) { + for (final int k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE)) { this.mol.getAtom(k).setProperty(prop, this.mol.getAtom(i).getProperty(prop)); } } @@ -186,14 +184,14 @@ public final boolean parse1DNMR(final String pathToPeakList, final String atomTy * @param pathToPeakList Path to peak list (Bruker's TopSpin csv file * format) * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @return false if input shift list size greater than the number of atoms in * molecular formula, if such was given to the class * @throws java.io.IOException */ public final boolean parse1DNMRviaPeakTable(final String pathToPeakList, final String atomType) throws IOException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier(atomType)}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 4)}); + final Spectrum spectrum = Utils.parsePeakTable(pathToPeakList, new int[]{4}, new String[]{atomType}, 6); return this.set1DNMRShifts(spectrum); } @@ -214,7 +212,7 @@ public final boolean parse1DNMRviaPeakTable(final String pathToPeakList, final S * @param pathToXML Path to XML file (Bruker's TopSpin XML file * format) * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @return false if input shift list size greater than the number of atoms in * molecular formula, if such was given to the class * @throws java.io.IOException @@ -223,7 +221,7 @@ public final boolean parse1DNMRviaPeakTable(final String pathToPeakList, final S */ public final boolean parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier(atomType)}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 1, 1)}); + final Spectrum spectrum = Utils.parseXML(pathToXML, 1, new int[]{1}, new String[]{atomType}); return this.set1DNMRShifts(spectrum); } @@ -308,14 +306,14 @@ private void askForEquivalentPeaks(final String atomType) { n = reader.nextInt(); } this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), this.mol.getAtom(n).getProperty(Utils.getNMRShiftConstant(atomType))); - if(this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) == null){ - this.mol.getAtom(i).setProperty(ParseRawData.CONST_PROP_EQUIVALENCE, new ArrayList<>()); + if(this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE) == null){ + this.mol.getAtom(i).setProperty(ParseRawData.PROP_EQUIVALENCE, new ArrayList<>()); } - if(this.mol.getAtom(n).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) == null){ - this.mol.getAtom(n).setProperty(ParseRawData.CONST_PROP_EQUIVALENCE, new ArrayList<>()); + if(this.mol.getAtom(n).getProperty(ParseRawData.PROP_EQUIVALENCE) == null){ + this.mol.getAtom(n).setProperty(ParseRawData.PROP_EQUIVALENCE, new ArrayList<>()); } - ((ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)).add(n); - ((ArrayList) this.mol.getAtom(n).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)).add(i); + ((ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE)).add(n); + ((ArrayList) this.mol.getAtom(n).getProperty(ParseRawData.PROP_EQUIVALENCE)).add(i); } reader.close(); } @@ -334,17 +332,15 @@ private void askForEquivalentPeaks(final String atomType) { * @throws ParserConfigurationException * @throws SAXException */ - public final boolean parseDEPT(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final int parseDEPT(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { if(NMR.Utils.getFileFormat(pathToDEPT90).equals("csv") && NMR.Utils.getFileFormat(pathToDEPT135).equals("csv")) { - this.parseDEPTviaPeakTable(pathToDEPT90, pathToDEPT135, tol); + return this.parseDEPTviaPeakTable(pathToDEPT90, pathToDEPT135, tol); } else if(NMR.Utils.getFileFormat(pathToDEPT90).equals("xml") && NMR.Utils.getFileFormat(pathToDEPT135).equals("xml")) { - this.parseDEPTviaXML(pathToDEPT90, pathToDEPT135, tol); - } else { - return false; - } + return this.parseDEPTviaXML(pathToDEPT90, pathToDEPT135, tol); + } - return true; + return 0; } @@ -353,46 +349,76 @@ public final boolean parseDEPT(final String pathToDEPT90, final String pathToDEP * peak * tables to carbon atoms. The meanwhile found matches are corrected, * see - * {@link testkit.Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. + * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. * * @param pathToDEPT90 Path to DEPT90 peak list (Bruker's TopSpin csv file * format) * @param pathToDEPT135 Path to DEPT135 peak list (Bruker's TopSpin csv file * format) * @param tol Tolance value [ppm] when matching carbon shifts + * @return * @throws java.io.IOException */ - public final void parseDEPTviaPeakTable(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException { + public final int parseDEPTviaPeakTable(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException { final ArrayList matchesDEPT90 = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT90, "C", tol, 4); final ArrayList matchesDEPT135 = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT135, "C", tol, 4); final ArrayList intensitiesDEPT135 = NMR.Utils.parsePeakTable(pathToDEPT135, 6); - this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); } /** * Sets the number of implicit hydrogens from two carbon DEPT90 and DEPT135 * XML files to carbon atoms. The meanwhile found matches are corrected, see - * {@link testkit.Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. + * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. * * @param pathToDEPT90 Path to DEPT90 peak list (Bruker's TopSpin XML file * format) * @param pathToDEPT135 Path to DEPT135 peak list (Bruker's TopSpin XML file * format) - * @param tol Tolance value [ppm] when matching carbon shifts + * @param tol Tolance value [ppm] for matching carbon shifts + * @return * @throws java.io.IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final void parseDEPTviaXML(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final int parseDEPTviaXML(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { final ArrayList matchesDEPT90 = NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT90, "C", tol, 1, 1); final ArrayList matchesDEPT135 = NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT135, "C", tol, 1, 1); final ArrayList intensitiesDEPT135 = NMR.Utils.parseXML(pathToDEPT135, 1, 2); - this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); } + + /** + * Sets the hydrogen count information of carbon atoms in atom conatiner + * by usage of DEPT90 and DEPT135 information. + * + * @param spectrumDEPT90 DEPT90 spectrum + * @param spectrumDEPT135 DEPT135 spectrum which has to contain intensity + * information + * @param tol tolerance value [ppm] for carbon shift matching + * @return false if one of the spectra is not set or the intensities are missing + */ + public final int setDEPT(final Spectrum spectrumDEPT90, final Spectrum spectrumDEPT135, final double tol){ + + if(spectrumDEPT90 == null || spectrumDEPT135 == null || spectrumDEPT135.getIntensities() == null){ + return 0; + } + final ArrayList shiftsDEPT90 = spectrumDEPT90.getShiftsByDim(0); + final ArrayList shiftsDEPT135 = spectrumDEPT135.getShiftsByDim(0); + final ArrayList intensitiesDEPT135 = spectrumDEPT135.getIntensities(); + ArrayList matchesDEPT90 = NMR.Utils.findShiftMatches(this.mol, shiftsDEPT90, tol, "C"); + matchesDEPT90 = NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT90, matchesDEPT90, tol, "C"); + ArrayList matchesDEPT135 = NMR.Utils.findShiftMatches(this.mol, shiftsDEPT135, tol, "C"); + matchesDEPT135 = NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT135, matchesDEPT135, tol, "C"); + + + return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + } + /** * @@ -400,7 +426,7 @@ public final void parseDEPTviaXML(final String pathToDEPT90, final String pathTo * @param matchesDEPT135 * @param intensitiesDEPT135 */ - private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDEPT90, final ArrayList matchesDEPT135, final ArrayList intensitiesDEPT135) { + private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDEPT90, final ArrayList matchesDEPT135, final ArrayList intensitiesDEPT135) { int matchDEPT90, matchDEPT135, hCount, hCountAll = 0; for (int i : this.atomTypeIndices.get("C")) { @@ -426,13 +452,12 @@ private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesD } this.mol.getAtom(i).setImplicitHydrogenCount(hCount); hCountAll += hCount; - if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE) != null) { - for (Integer k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_EQUIVALENCE)) { + if (this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE) != null) { + for (Integer k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE)) { this.mol.getAtom(k).setImplicitHydrogenCount(hCount); hCountAll += hCount; } } - } } if(this.molFormula != null){ @@ -440,6 +465,8 @@ private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesD } else { System.out.println("assigned protons to carbons: " + hCountAll+ "!!!"); } + + return hCountAll; } @@ -450,7 +477,7 @@ private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesD * and {@link NMR.ParseRawData#parseHSQCviaXML(java.lang.String, java.lang.String, double)} * * @param pathToPeakList - * @param atomType + * @param atomType * @param tol * @return * @throws IOException @@ -487,19 +514,19 @@ public final boolean parseHSQC(final String pathToPeakList, final String atomTyp * * @param pathToPeakList path to HSQC peak table (Bruker's TopSpin csv file * format) - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param heavyAtomType Element name of H bonded heavy atom (e.g. "C") which also occurrs in + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @param tol tolerance value [ppm] for matching the atoms of given atom * type * within the atom container * @throws IOException */ - public final void parseHSQCviaPeakTable(final String pathToPeakList, final String atomType, final double tol) throws IOException { + public final void parseHSQCviaPeakTable(final String pathToPeakList, final String heavyAtomType, final double tol) throws IOException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5)}); - final ArrayList matchesAtomType = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tol, 6); + final ArrayList shiftsHydrogen = NMR.Utils.parsePeakTable(pathToPeakList, 5); + final ArrayList matchesHeavyAtomType = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, heavyAtomType, tol, 6); - this.setImplicitHydrogenShifts(spectrum, matchesAtomType); + this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtomType); } /** @@ -514,8 +541,8 @@ public final void parseHSQCviaPeakTable(final String pathToPeakList, final Strin * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param pathToXML path to HSQC XML file - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * @param heavyAtomType Element name of H bonded heavy atom (e.g. "C") which also occurrs in + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @param tol tolerance value [ppm] for matching the atoms of given atom * type * within the atom container @@ -523,30 +550,31 @@ public final void parseHSQCviaPeakTable(final String pathToPeakList, final Strin * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final void parseHSQCviaXML(final String pathToXML, final String atomType, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final void parseHSQCviaXML(final String pathToXML, final String heavyAtomType, final double tol) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 2)}); - final ArrayList matchesAtomType = NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, atomType, tol, 2, 1); + final ArrayList shiftsHydrogen = NMR.Utils.parseXML(pathToXML, 2, 2); + final ArrayList matchesHeavyAtomType = NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, heavyAtomType, tol, 2, 1); - this.setImplicitHydrogenShifts(spectrum, matchesAtomType); + this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtomType); } + - private void setImplicitHydrogenShifts(final Spectrum spectrum, final ArrayList matchesAtomType) { - final ArrayList hydrogenShifts = spectrum.getShiftsByDim(0); + private void setImplicitHydrogenShifts(final ArrayList shiftsHydrogen, final ArrayList matchesHeavyAtomType) { + IAtom matchAtom; ArrayList assignedHydrogensShifts; - for (int i = 0; i < matchesAtomType.size(); i++) { - if (matchesAtomType.get(i) >= 0) { - matchAtom = this.mol.getAtom(matchesAtomType.get(i)); + for (int i = 0; i < matchesHeavyAtomType.size(); i++) { + if (matchesHeavyAtomType.get(i) >= 0) { + matchAtom = this.mol.getAtom(matchesHeavyAtomType.get(i)); if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { continue; } - if (matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS) == null) { - matchAtom.setProperty(ParseRawData.CONST_PROP_PROTONSHIFTS, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); + if (matchAtom.getProperty(Spectrum.SPECTYPE_HSQC) == null) { + matchAtom.setProperty(Spectrum.SPECTYPE_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); } - assignedHydrogensShifts = matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); + assignedHydrogensShifts = matchAtom.getProperty(Spectrum.SPECTYPE_HSQC); if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { - assignedHydrogensShifts.add(hydrogenShifts.get(i)); + assignedHydrogensShifts.add(shiftsHydrogen.get(i)); } } } @@ -559,14 +587,15 @@ private void setImplicitHydrogenShifts(final Spectrum spectrum, final ArrayList< * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param spectrum Spectrum class object consisting of Signal class objects - * where the heavy atom is given first and the protons value as the second. + * where the proton values are given first and the heavy atom values as the second. * @param tolHeavy tolerance value [ppm] for heavy atom shift matching */ public final void setHSQC(final Spectrum spectrum, final double tolHeavy) { - final ArrayList hydrogenShifts = spectrum.getShiftsByDim(1); - final ArrayList shiftsHeavy = spectrum.getShiftsByDim(0); - ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavy, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); - matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavy, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); + + final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(0); + final ArrayList shiftsHeavyAtom = spectrum.getShiftsByDim(1); + ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); IAtom matchAtom; ArrayList assignedHydrogensShifts; @@ -576,12 +605,12 @@ public final void setHSQC(final Spectrum spectrum, final double tolHeavy) { if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { continue; } - if (matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS) == null) { - matchAtom.setProperty(ParseRawData.CONST_PROP_PROTONSHIFTS, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); + if (matchAtom.getProperty(Spectrum.SPECTYPE_HSQC) == null) { + matchAtom.setProperty(Spectrum.SPECTYPE_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); } - assignedHydrogensShifts = matchAtom.getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); + assignedHydrogensShifts = matchAtom.getProperty(Spectrum.SPECTYPE_HSQC); if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { - assignedHydrogensShifts.add(hydrogenShifts.get(i)); + assignedHydrogensShifts.add(shiftsHydrogen.get(i)); } } } @@ -626,10 +655,10 @@ private int[] findSingleImplicitHydrogenShiftMatch(final double queryShift, fina ArrayList protonShiftList; for (int i = 0; i < this.mol.getAtomCount(); i++) { // skip atoms without implicit hydrogens - if (this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS) == null) { + if (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HSQC) == null) { continue; } - protonShiftList = this.mol.getAtom(i).getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); + protonShiftList = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HSQC); for (int j = 0; j < protonShiftList.size(); j++) { // figure out the atom with lowest shift deviation if ((queryShift - tol <= protonShiftList.get(j)) && (protonShiftList.get(j) <= queryShift + tol) && (Math.abs(queryShift - protonShiftList.get(j)) < minDiff)) { @@ -672,7 +701,7 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s matchIndex = matches.indexOf(matchIndexAtomContainer); if (matches.get(matchIndex) >= 0) { singleMatchIndex = this.findSingleImplicitHydrogenShiftMatch(shifts.get(matchIndex), tol); - singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(ParseRawData.CONST_PROP_PROTONSHIFTS); + singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(Spectrum.SPECTYPE_HSQC); diff = shifts.get(matchIndex) - singleMatchShifts.get(singleMatchIndex[1]); diffs.add(diff); } @@ -750,7 +779,11 @@ public final boolean parseHHCOSY(final String pathToPeakList, final double tol) */ public final boolean parseHHCOSYviaPeakTable(final String pathToPeakList, final double tol) throws IOException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H"), Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), NMR.Utils.parsePeakTable(pathToPeakList, 6)}); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("H"), + Utils.getNMRIsotopeIdentifier("H")}, + new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), + NMR.Utils.parsePeakTable(pathToPeakList, 6)}, + NMR.Utils.parsePeakTable(pathToPeakList, 9)); return this.setHHCOSY(spectrum, tol); } @@ -780,7 +813,11 @@ public final boolean parseHHCOSYviaPeakTable(final String pathToPeakList, final */ public final boolean parseHHCOSYviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("H"), Utils.getNMRIsotopeIdentifier("H")}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), NMR.Utils.parseXML(pathToXML, 2, 2)}); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("H"), + Utils.getNMRIsotopeIdentifier("H")}, + new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), + NMR.Utils.parseXML(pathToXML, 2, 2)}, + NMR.Utils.parseXML(pathToXML, 2, 3)); return this.setHHCOSY(spectrum, tol); } @@ -803,7 +840,7 @@ public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { if (!NMR.Utils.isBidirectional(hydrogenShiftMatches1, hydrogenShiftMatches2)) { return false; } - NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, ParseRawData.CONST_PROP_HHCOSY); + NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, Spectrum.SPECTYPE_HHCOSY); return true; } @@ -858,7 +895,11 @@ public final boolean parseINADEQUATE(final String pathToPeakList, final double t */ public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, final double tol) throws IOException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("C"), Utils.getNMRIsotopeIdentifier("C")}, new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), NMR.Utils.parsePeakTable(pathToPeakList, 6)}); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("C"), + Utils.getNMRIsotopeIdentifier("C")}, + new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), + NMR.Utils.parsePeakTable(pathToPeakList, 6)}, + NMR.Utils.parsePeakTable(pathToPeakList, 9)); return this.setINADEQUATE(spectrum, tol); } @@ -883,7 +924,11 @@ public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, fi */ public final boolean parseINADEQUATEviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = new Spectrum(new String[]{Utils.getNMRIsotopeIdentifier("C"), Utils.getNMRIsotopeIdentifier("C")}, new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), NMR.Utils.parseXML(pathToXML, 2, 2)}); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("C"), + Utils.getNMRIsotopeIdentifier("C")}, + new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), + NMR.Utils.parseXML(pathToXML, 2, 2)}, + NMR.Utils.parseXML(pathToXML, 2, 3)); return this.setINADEQUATE(spectrum, tol); } @@ -908,7 +953,7 @@ public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { if (!NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { return false; } - NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, ParseRawData.CONST_PROP_INADEQUATE); + NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, Spectrum.SPECTYPE_INADEQUATE); return true; } @@ -962,7 +1007,7 @@ public final boolean parseHMBC(final String pathToPeakList, final String atomTyp * @param pathToPeakList path to HMBC peak table (Bruker's TopSpin csv * file format) * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching * @param tolHeavy tolerance value [ppm] for heavy atom shift matching * @throws IOException @@ -991,7 +1036,7 @@ public final void parseHMBCviaPeakTable(final String pathToPeakList, final Strin * @param pathToXML path to HMBC peak XML file (Bruker's TopSpin XML file * format) * @param atomType Element name (e.g. "C") which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} + * {@link Utils#getNMRShiftConstant(java.lang.String)} * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching * @param tolHeavy tolerance value [ppm] for heavy atom shift matching * @throws IOException @@ -1012,10 +1057,10 @@ private void setHMBC(final ArrayList hydrogenShiftMatches, final ArrayL ArrayList HMBCList; for (int i = 0; i < hydrogenShiftMatches.size(); i++) { if (hydrogenShiftMatches.get(i) >= 0 && heavyAtomShiftMatches.get(i) >= 0) { - if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC) == null) { - this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(ParseRawData.CONST_PROP_HMBC, new ArrayList<>()); + if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(Spectrum.SPECTYPE_HMBC) == null) { + this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(Spectrum.SPECTYPE_HMBC, new ArrayList<>()); } - HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC); + HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(Spectrum.SPECTYPE_HMBC); if (!HMBCList.contains(heavyAtomShiftMatches.get(i))) { HMBCList.add(heavyAtomShiftMatches.get(i)); } @@ -1030,25 +1075,25 @@ private void setHMBC(final ArrayList hydrogenShiftMatches, final ArrayL * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * * @param spectrum Spectrum class object consisting of Signal class objects - * where the heavy atom is given first and the protons value as the second. - * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * where the proton shift values is given first and the heavy atom shifts as the second. * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching + * @param tolHeavy tolerance value [ppm] for heavy atom shift matching */ - public final void setHMBC(final Spectrum spectrum, final double tolHeavy, final double tolHydrogen) { + public final void setHMBC(final Spectrum spectrum, final double tolHydrogen, final double tolHeavy) { - final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(1); + final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(0); final ArrayList matchesHydrogen = this.correctHydrogenShiftMatches(shiftsHydrogen, this.findImplicitHydrogenShiftMatches(shiftsHydrogen, tolHydrogen), tolHydrogen); - final ArrayList shiftsHeavy = spectrum.getShiftsByDim(0); - ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavy, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); - matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavy, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[0])); + final ArrayList shiftsHeavyAtom = spectrum.getShiftsByDim(1); + ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); ArrayList HMBCList; for (int i = 0; i < matchesHydrogen.size(); i++) { if (matchesHydrogen.get(i) >= 0 && matchesHeavyAtom.get(i) >= 0) { - if (this.mol.getAtom(matchesHydrogen.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC) == null) { - this.mol.getAtom(matchesHydrogen.get(i)).setProperty(ParseRawData.CONST_PROP_HMBC, new ArrayList<>()); + if (this.mol.getAtom(matchesHydrogen.get(i)).getProperty(Spectrum.SPECTYPE_HMBC) == null) { + this.mol.getAtom(matchesHydrogen.get(i)).setProperty(Spectrum.SPECTYPE_HMBC, new ArrayList<>()); } - HMBCList = this.mol.getAtom(matchesHydrogen.get(i)).getProperty(ParseRawData.CONST_PROP_HMBC); + HMBCList = this.mol.getAtom(matchesHydrogen.get(i)).getProperty(Spectrum.SPECTYPE_HMBC); if (!HMBCList.contains(matchesHeavyAtom.get(i))) { HMBCList.add(matchesHeavyAtom.get(i)); } diff --git a/src/NMR/Process.java b/src/NMR/Process.java index 3226fa3..eece28f 100644 --- a/src/NMR/Process.java +++ b/src/NMR/Process.java @@ -239,7 +239,7 @@ public void setBonds(final String[] experiments){ if ((i == bondPartnerIndex)) {// || (this.mol.getBond(this.mol.getAtom(i), this.mol.getAtom(bondPartnerIndex)) != null)) { continue; } - if(experiments[e].equals("HMBC")){ + if(experiments[e].equals(Spectrum.SPECTYPE_HMBC)){ System.out.println("HMBC bond setting: still to come!!!"); } else { this.setBond(i, bondPartnerIndex); @@ -331,15 +331,15 @@ public boolean addCOSY(final String atomType1, final double shift1, final double if ((atomIndex1 < 0) || (atomIndex2 < 0) || (atomIndex1 == atomIndex2)) { return false; } - if(this.mol.getAtom(atomIndex1).getProperty("COSY") == null){ - this.mol.getAtom(atomIndex1).setProperty("COSY", new ArrayList<>()); + if(this.mol.getAtom(atomIndex1).getProperty(Spectrum.SPECTYPE_HHCOSY) == null){ + this.mol.getAtom(atomIndex1).setProperty(Spectrum.SPECTYPE_HHCOSY, new ArrayList<>()); } - if(this.mol.getAtom(atomIndex2).getProperty("COSY") == null){ - this.mol.getAtom(atomIndex2).setProperty("COSY", new ArrayList<>()); + if(this.mol.getAtom(atomIndex2).getProperty(Spectrum.SPECTYPE_HHCOSY) == null){ + this.mol.getAtom(atomIndex2).setProperty(Spectrum.SPECTYPE_HHCOSY, new ArrayList<>()); } - final ArrayList COSYList = this.mol.getAtom(atomIndex1).getProperty("COSY"); - final ArrayList COSYListX = this.mol.getAtom(atomIndex2).getProperty("COSY"); + final ArrayList COSYList = this.mol.getAtom(atomIndex1).getProperty(Spectrum.SPECTYPE_HHCOSY); + final ArrayList COSYListX = this.mol.getAtom(atomIndex2).getProperty(Spectrum.SPECTYPE_HHCOSY); COSYList.add(atomIndex2); COSYListX.add(atomIndex1); @@ -383,7 +383,7 @@ public int addHSQC(final String atomType, final Double shift, final double tol, } else { // set HSQC for the first atom of given atom type without a already set shift value and without attached proton shifts for (Integer i : this.atomTypeIndices.get(atomType)) { - if ((this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null) && (this.mol.getAtom(i).getProperty("HydrogenShifts") == null)) { + if ((this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null) && (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HSQC) == null)) { atomIndex = i; break; } @@ -395,10 +395,10 @@ public int addHSQC(final String atomType, final Double shift, final double tol, } // add the proton shift value if it is higher than 0 if(shiftH != null){ - if (this.mol.getAtom(atomIndex).getProperty("HydrogenShifts") == null) { - this.mol.getAtom(atomIndex).setProperty("HydrogenShifts", new ArrayList<>()); + if (this.mol.getAtom(atomIndex).getProperty(Spectrum.SPECTYPE_HSQC) == null) { + this.mol.getAtom(atomIndex).setProperty(Spectrum.SPECTYPE_HSQC, new ArrayList<>()); } - final ArrayList protonShifts = this.mol.getAtom(atomIndex).getProperty("HydrogenShifts"); + final ArrayList protonShifts = this.mol.getAtom(atomIndex).getProperty(Spectrum.SPECTYPE_HSQC); protonShifts.add(shiftH); } // increase the implicit proton number @@ -438,7 +438,7 @@ public void addAtom(final String atomType, final Double shift){ * @throws FileNotFoundException * @throws UnsupportedEncodingException */ - public void createLSDfile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException{ + public void createLSDFile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException{ PrintWriter writer = new PrintWriter(pathToOutputFile, "UTF-8"); ArrayList idxs; @@ -489,8 +489,8 @@ public void createLSDfile(final String projectName, final String pathToOutputFil HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + "\n"; } // set BOND section in LSD input file from INADEQUATE - if (this.mol.getAtom(i).getProperty("INADEQUATE") != null) { - idxs = this.mol.getAtom(i).getProperty("INADEQUATE"); + if (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_INADEQUATE) != null) { + idxs = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_INADEQUATE); for (Integer idx : idxs) { if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { bondTable[i][idx] = 1; @@ -499,8 +499,8 @@ public void createLSDfile(final String projectName, final String pathToOutputFil } } // set BOND section in LSD input file from COSY - if(this.mol.getAtom(i).getProperty("COSY") != null){ - idxs = this.mol.getAtom(i).getProperty("COSY"); + if(this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HHCOSY) != null){ + idxs = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HHCOSY); for (Integer idx : idxs) { if(bondTable[i][idx] == 0 && bondTable[idx][i] == 0){ bondTable[i][idx] = 1; @@ -513,8 +513,8 @@ public void createLSDfile(final String projectName, final String pathToOutputFil // set HMBC section in LSD input file // sets only HMBC signals which are not represented by a bond boolean test3JviaNextNeighborBond; - if (this.mol.getAtom(i).getProperty("HMBC") != null) { - idxs = this.mol.getAtom(i).getProperty("HMBC"); + if (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HMBC) != null) { + idxs = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HMBC); for (Integer idx : idxs) { if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { test3JviaNextNeighborBond = false; diff --git a/src/NMR/Signal.java b/src/NMR/Signal.java index b585d6e..97cf6cf 100644 --- a/src/NMR/Signal.java +++ b/src/NMR/Signal.java @@ -1,3 +1,7 @@ +/* +* This class was copied and modified from NMRSignal class in casekit.model package (by Christoph Steinbeck) +*/ + /* * The MIT License * diff --git a/src/NMR/Spectrum.java b/src/NMR/Spectrum.java index a3f78a4..d248516 100644 --- a/src/NMR/Spectrum.java +++ b/src/NMR/Spectrum.java @@ -1,3 +1,8 @@ +/* +* This class was copied and modified from NMRSpectrum class in casekit.model package (by Christoph Steinbeck) +*/ + + /* * The MIT License * @@ -24,6 +29,7 @@ package NMR; import java.util.ArrayList; +import java.util.Arrays; /** * @@ -31,10 +37,23 @@ */ public class Spectrum extends ArrayList{ + static public final String SPECTYPE_1D = "1D"; + static public final String SPECTYPE_DEPT90 = "DEPT90"; + static public final String SPECTYPE_DEPT135 = "DEPT135"; + static public final String SPECTYPE_HSQC = "HSQC"; + static public final String SPECTYPE_HHCOSY = "HHCOSY"; + static public final String SPECTYPE_INADEQUATE = "INADEQUATE"; + static public final String SPECTYPE_HMBC = "HMBC"; + static private final String[] SPECTYPES = new String[]{ SPECTYPE_1D, SPECTYPE_DEPT90, + SPECTYPE_DEPT135, SPECTYPE_HSQC, + SPECTYPE_HHCOSY, SPECTYPE_INADEQUATE, + SPECTYPE_HMBC}; + + /** - * An arbitrary name that can be assigned to this spectrum for identification purposes. + * An arbitrary name or description that can be assigned to this spectrum for identification purposes. */ - private String name; + private String description; /** * An arbitrary name to identify the type of this spectrum, like COSY, NOESY, HSQC, etc. I * decided not to provide static Strings with given experiment type since the there are @@ -60,12 +79,18 @@ public class Spectrum extends ArrayList{ private Float spectrometerFrequency; private String solvent; private String standard; -// protected transient EventListenerList changeListeners = new EventListenerList(); + + /** + * This holds sorted list of Chemical Shifts of all axes. The first dimension addresses the + * axes, the second the shift values in this axis, starting from the highest value. + */ + private final ArrayList> shiftList = new ArrayList<>(); + - public Spectrum(final String[] nuclei, final ArrayList[] shiftLists) { + public Spectrum(final String[] nuclei, final ArrayList[] shiftLists, final ArrayList intensities) { this.nuclei = nuclei; this.ndim = this.nuclei.length; - this.setSignals(shiftLists); + this.addSignals(shiftLists, intensities); } public Spectrum(final String[] nuclei) { @@ -73,21 +98,66 @@ public Spectrum(final String[] nuclei) { this.ndim = this.nuclei.length; } + public String[] getNuclei(){ + return this.nuclei; + } + + public int getDimCount(){ + return this.ndim; + } + + public void setSpecType(final String specType){ + for (final String stype : SPECTYPES) { + if(specType.equals(stype)){ + this.specType = specType; + break; + } + } + } + + public String getSpecType(){ + return this.specType; + } + + public void setSpecDescription(final String description){ + this.description = description; + } + + public String getSpecDescription(){ + return this.description; + } - public void setSignals(final ArrayList[] shiftLists){ + public final void addSignals(final ArrayList[] shiftLists, final ArrayList intensities){ + // assumes the same number of shift lists as set dimension number if(shiftLists.length != this.ndim){ - System.err.println("Unequal number of nuclei and shift lists!!!"); + System.err.println("Unequal number of nuclei (dimension) and shift lists!!!"); return; } - Double[] shifts; // assumes that the shift lists have the same number of entries + int prevShiftListSize = shiftLists[0].size(); + for (int i = 0; i < shiftLists.length; i++) { + if(shiftLists[i].size() != prevShiftListSize){ + System.err.println("Unequal number of shifts in " + (i+1) + " shift list!!!"); + return; + } + if(intensities != null && shiftLists[i].size() != intensities.size()){ + System.err.println("Unequal number of shifts in shift list " + (i+1) + " and intensities number!!!"); + return; + } + } + Double[] shifts; for (int row = 0; row < shiftLists[0].size(); row++) { shifts = new Double[this.ndim]; for (int col = 0; col < this.ndim; col++) { shifts[col] = shiftLists[col].get(row); } - this.add(new NMR.Signal(this.nuclei, shifts, null, null, null)); + if(intensities != null){ + this.add(new NMR.Signal(this.nuclei, shifts, intensities.get(row), null, null)); + } else { + this.add(new NMR.Signal(this.nuclei, shifts, null, null, null)); + } } + this.updateShiftLists(); } @@ -96,28 +166,29 @@ public void setSignals(final ArrayList[] shiftLists){ * equal or smaller than the number of respective atoms * @return */ - public int getSignalNumber() { + public int getSignalCount() { return this.size(); } /** - * Adds an NMRSignal to the NMRSpectrum. + * Adds a Signal ({@link NMR.Signal}) to this Spectrum class object. * @param signal */ - public void addSignal(Signal signal) { + public void addSignal(final Signal signal) { this.add(signal); -// this.updateShiftLists();s + this.updateShiftLists(); } -// /** -// * Creates an empty signal with correct dimension -// */ -// public void newSignal() { -// System.out.println("nucleus: " + nucleus.length + nucleus[0]); -// add(new NMRSignal(nucleus)); -// updateShiftLists(); -// } - + public void removeSignal(final Signal signal){ + this.remove(this.getSignalIndex(signal)); + this.updateShiftLists(); + } + + public void removeSignal(final int signalIndex){ + this.remove(signalIndex); + this.updateShiftLists(); + } + /** * Returns an NMRSignal at position number in the List * @param signalIndex @@ -127,13 +198,22 @@ public Signal getSignal(final int signalIndex) { return this.get(signalIndex); } + public ArrayList getIntensities(){ + final ArrayList intensities = new ArrayList<>(); + for (Signal sig : this) { + intensities.add(sig.getIntensity()); + } + + return intensities; + } + public ArrayList getShiftsByDim(final int dim){ - final ArrayList signals = new ArrayList<>(); + final ArrayList shifts = new ArrayList<>(); for (final Signal sig : this) { - signals.add(sig.getShift(dim)); + shifts.add(sig.getShift(dim)); } - return signals; + return shifts; } /** @@ -223,22 +303,26 @@ public ArrayList pickSignals(final double shift, final int dim, final do return pickedSignals; } -// /** -// * Extracts a list of unique shifts from the list of cross signals and sorts them. This is to -// * define the column and row headers for tables. -// */ -// protected void updateShiftLists() { -// Double shift; NMR.Signal nmrSignal; -// for (int i = 0; i < this.size(); i++) { -// nmrSignal = this.get(i); -// for (int d = 0; d < nmrSignal.getDim(); d++) { -// shift = nmrSignal.getShift(d); -// if (!this.get(d).contains(shift)) { -// shiftList.get(d).add(shift); -// } -// } -// } -// } + /** + * Extracts a list of unique shifts from the list of cross signals. This is to + * define the column and row headers for tables. + */ + private void updateShiftLists() { + this.shiftList.clear(); + for (int dim = 0; dim < this.getDimCount(); dim++) { + this.shiftList.add(dim, new ArrayList<>()); + } + Double shift; NMR.Signal nmrSignal; + for (int i = 0; i < this.size(); i++) { + nmrSignal = this.get(i); + for (int d = 0; d < this.getDimCount(); d++) { + shift = nmrSignal.getShift(d); + if (!this.shiftList.get(d).contains(shift)) { + this.shiftList.get(d).add(shift); + } + } + } + } /** * Creates a 2D matrix of booleans, that models the set of crosspeaks in the 2D NMR spectrum. @@ -253,23 +337,22 @@ public ArrayList pickSignals(final double shift, final int dim, final do * isInShiftList(hetAtomShiftList, het, hetAtomShiftList.length); if (hetPos >= 0){ protPos = * isInShiftList(protonShiftList, prot, protonShiftList.length); if ( protPos >= 0){ found = * true; hetCorMatrix[hetPos][protPos] = true; } } } } + * @return */ - public void report() { + public String report() { String s = ""; - System.out.println("Report for nmr spectrum " + name + " of type " - + specType + ": "); - for (int i = 0; i < this.size(); i++) { - System.out.println("ShiftList for dimension " + (i + 1) + ":"); - for (int d = 0; d < this.get(i).getDim(); d++) { - s += this.get(i).getShift(d) + "; "; +// s+= "Report for nmr spectrum " + name + " of type " +// + specType + ":\n\n"; + for (int i = 0; i < this.shiftList.size(); i++) { + s += "\nShiftList for dimension " + (i + 1) + ":\n"; + for (int d = 0; d < this.shiftList.get(i).size(); d++) { + s += this.shiftList.get(i).get(d) + "; "; } - System.out.println(s + "\n"); - s = ""; } - - } - - public String[] getNuclei(){ - return this.nuclei; + s += "\nBelonging intensities:\n"; + for (Signal sig : this) { + s += sig.getIntensity() + "; "; + } + return s; } } diff --git a/src/NMR/Utils.java b/src/NMR/Utils.java index c3ca051..a6ec7d4 100644 --- a/src/NMR/Utils.java +++ b/src/NMR/Utils.java @@ -60,6 +60,8 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IElement; +import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; @@ -67,6 +69,7 @@ import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.smarts.parser.SMARTSParser; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; @@ -149,138 +152,27 @@ public static void convertSDFtoLSD(final String pathSDF, final String pathOut, f } - - /** - * Creates an IAtomContainer object containing atoms without any bond - * information, given by a molecular formula. - * - * @param molFormula Molecular Formula - * @return - * @deprecated - */ - @Deprecated - public static IAtomContainer createAtomContainer(final String molFormula) { - - HashMap hash = NMR.Utils.getAtomCountsInMolecularFormula(molFormula); - IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); - - for (String elem : hash.keySet()) { - // add atoms of current element - ac = NMR.Utils.addAtoms(ac, elem, hash.get(elem)); - } - - return ac; - } - - /** - * Creates a HashMap with the number of atoms for each occurring atom type. - * - * @deprecated - * @param molFormula - * @return - */ - @Deprecated - public static HashMap getAtomCountsInMolecularFormula(final String molFormula) { - - HashMap hash = new HashMap<>(); - String[] molFormSplit = molFormula.split("[A-Z]"); - Matcher m = Pattern.compile("[A-Z]").matcher(molFormula); - String elem; - int noAtoms; - int k = 1; - - while (m.find()) { - // name of current element - elem = molFormula.substring(m.start(), m.end()); - if (k >= molFormSplit.length || molFormSplit[k].isEmpty()) { - // if no atom number is given then assume only one atom - noAtoms = 1; - } else if (Character.isLowerCase(molFormSplit[k].charAt(0))) { - // if element's name contains two letters then extend it - elem += molFormSplit[k].charAt(0); - // if more than one atoms of that element with two letters exist - if (molFormSplit[k].length() > 1) { - // check given atom number - noAtoms = Integer.parseInt(molFormSplit[k].substring(1)); - } else { - noAtoms = 1; - } - } else { - // if atom number is given - noAtoms = Integer.parseInt(molFormSplit[k].substring(0)); - } - try { - // add atom type and frequency to class hashmap - hash.put(elem, noAtoms); - } catch (Exception e) { - System.err.println("Illegal element \"" + elem + "\" will be ignored!!!"); - } - - k++; - } - - return hash; - } - - /** - * - * @param ac - * @param atomType - * @return - * @deprecated - */ - @Deprecated - public static int getAtomTypeCount(final IAtomContainer ac, final String atomType) { - - int noAtoms = 0; - for (int i = 0; i < ac.getAtomCount(); i++) { - if (ac.getAtom(i).getSymbol().equals(atomType)) { - noAtoms++; - } - } - - return noAtoms; - } - - /** - * Creates atoms of the same atom type and store it into an atom container. - * - * @param ac Atom container - * @param noAtoms Number of atoms to create - * @param atomType Atom type (element's name, e.g. C or Br) - * @return - */ - public static IAtomContainer addAtoms(final IAtomContainer ac, final String atomType, final int noAtoms) throws IllegalArgumentException { - - for (int i = 0; i < noAtoms; i++) { - ac.addAtom(new Atom(atomType)); - } - - return ac; - } - /** - * Removes atoms from a given atom type from an atom container. + * Returns a hashmap constisting of lists of atom indices in an atom container. + * This is done for all atom types (e.g. C or Br) in given atom container. * - * @param ac IAtomContainer object where to remove the atoms - * @param atomType Atom type (element's name, e.g. C or Br) - * @return IAtomContainer where the atoms were removed + * @param ac IAtomContainer to look in + * @return + * @see #getAtomTypeIndicesByElement(org.openscience.cdk.interfaces.IAtomContainer, java.lang.String) */ - public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType){ + public static HashMap> getAtomTypeIndices(final IAtomContainer ac) { - List toRemoveList = new ArrayList<>(); - for (IAtom atomA : ac.atoms()) { - if (atomA.getSymbol().equals(atomType)){// detect wether the current atom A is a from the given atom type - toRemoveList.add(atomA); - } + final HashMap> atomTypeIndices = new HashMap<>(); + final HashSet atomTypes = new HashSet<>(); + for (final IAtom heavyAtom : AtomContainerManipulator.getHeavyAtoms(ac)) { + atomTypes.add(heavyAtom.getSymbol()); } - - for (IAtom iAtom : toRemoveList) { - ac.removeAtom(iAtom); + for (final String atomType : atomTypes) { + atomTypeIndices.put(atomType, Utils.getAtomTypeIndicesByElement(ac, atomType)); } - return ac; + return atomTypeIndices; } @@ -292,7 +184,7 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a * @param atomType Atom type to find in atom container * @return */ - public static ArrayList getAtomTypeIndicesInAtomContainer(final IAtomContainer ac, final String atomType){ + public static ArrayList getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType){ final ArrayList indices = new ArrayList<>(); for (int i = 0; i < ac.getAtomCount(); i++) { @@ -334,58 +226,38 @@ public static ArrayList parsePeakTable(final String pathToPeakList, fina /** - * Reads specific columns of NMR peak tables to obtain a Spectrum class - * object. + * Reads specific columns of one NMR peak table to obtain a Spectrum class + * object and set intensitiy values. + * The number of columns and atom types has to be the same and defines the + * dimension of the returning spectrum. * - * @param pathsToPeakLists paths to NMR peak tables + * @param pathToPeakList path to NMR peak table * @param columns columns to select in each peak table * @param atomTypes atom types (element) for each dimension + * @param intensityColumnIndex column index for intensity values * @return Spectrum class object containing the peak lists * @throws IOException */ - public static Spectrum parsePeakTables(final String[] pathsToPeakLists, final int[] columns, final String[] atomTypes) throws IOException { + public static Spectrum parsePeakTable(final String pathToPeakList, final int[] columns, final String[] atomTypes, final int intensityColumnIndex) throws IOException { - final ArrayList[] shiftsList = new ArrayList[pathsToPeakLists.length]; - final String[] nuclei = new String[pathsToPeakLists.length]; - for (int i = 0; i < pathsToPeakLists.length; i++) { - shiftsList[i] = Utils.parsePeakTable(pathsToPeakLists[i], columns[i]); - nuclei[i] = Utils.getNMRIsotopeIdentifier(atomTypes[i]); + // assumes the same number of selected columns and atom types + if(columns.length != atomTypes.length){ + return null; } - - return new Spectrum(nuclei, shiftsList); - } - - - /** - * Reads specific columns of NMR XML files to obtain a Spectrum class - * object. - * - * @param pathsToXMLs paths to NMR XML files - * @param dims array of dimensions of given data 1 (1D) or 2 (2D) - * @param attributes which attribute indices in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, - * intensity if 1D data) or 3 (intensity if 2D data) - * @param atomTypes atom types (element) for each dimension - * @return Spectrum class object containing the peak lists - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static Spectrum parseXMLs(final String[] pathsToXMLs, final int[] dims, final int[] attributes, final String[] atomTypes) throws IOException, ParserConfigurationException, SAXException { - - final ArrayList[] shiftsList = new ArrayList[pathsToXMLs.length]; - final String[] nuclei = new String[pathsToXMLs.length]; - for (int i = 0; i < pathsToXMLs.length; i++) { - shiftsList[i] = Utils.parseXML(pathsToXMLs[i], dims[i], attributes[i]); - nuclei[i] = Utils.getNMRIsotopeIdentifier(atomTypes[i]); + final ArrayList[] shiftsList = new ArrayList[columns.length]; + final String[] nuclei = new String[columns.length]; + for (int col = 0; col < columns.length; col++) { + shiftsList[col] = Utils.parsePeakTable(pathToPeakList, columns[col]); + nuclei[col] = Utils.getNMRIsotopeIdentifier(atomTypes[col]); } + final ArrayList intensities = parsePeakTable(pathToPeakList, intensityColumnIndex); - return new Spectrum(nuclei, shiftsList); + return new Spectrum(nuclei, shiftsList, intensities); } /** - * Reads a NMR peak XML file and stores it into an + * Reads a NMR peak XML file and returns one attribute of nodes (column) into an * ArrayList object. * The XML file must be in Bruker's TopSpin format. * @@ -402,6 +274,11 @@ public static Spectrum parseXMLs(final String[] pathsToXMLs, final int[] dims, f */ public static ArrayList parseXML(final String pathToXML, final int dim, final int attribute) throws IOException, ParserConfigurationException, SAXException { + // assumes a attribute value between 1 and 3 + if(attribute < 1 || attribute > 3){ + return null; + } + final ArrayList shifts = new ArrayList<>(); final DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); final DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); @@ -418,6 +295,40 @@ public static ArrayList parseXML(final String pathToXML, final int dim, } + /** + * Reads specific columns of NMR XML files to obtain a Spectrum class + * object. + * The XML file must be in Bruker's TopSpin format. + * + * @param pathToXML path to NMR XML file in Bruker's XML file format + * @param ndim number of dimensions: 1 (1D) or 2 (2D) + * @param attributes which attribute indices in XML peak nodes should be used: + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, + * intensity if 1D data) or 3 (intensity if 2D data) + * @param atomTypes atom types (element) for each dimension + * @return Spectrum class object containing the selected peak lists + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + public static Spectrum parseXML(final String pathToXML, final int ndim, final int[] attributes, final String[] atomTypes) throws IOException, ParserConfigurationException, SAXException { + + // assumes the same number of dims, attributes and atom types and a maximum number of dims of 2 + if((ndim != attributes.length) || (ndim != atomTypes.length) || (attributes.length != atomTypes.length) + || (ndim < 1 || ndim > 2)){ + return null; + } + final ArrayList[] shiftLists = new ArrayList[ndim]; + final String[] nuclei = new String[ndim]; + for (int nucl = 0; nucl < ndim; nucl++) { + nuclei[nucl] = Utils.getNMRIsotopeIdentifier(atomTypes[nucl]); + shiftLists[nucl] = Utils.parseXML(pathToXML, ndim, attributes[nucl]); + } + + return new Spectrum(nuclei, shiftLists, Utils.parseXML(pathToXML, ndim, ndim + 1)); + } + + /** * Corrects a match list regarding a given shift list and an atom container. * This is useful when two ore more shift values (e.g. DEPT shifts) match @@ -630,876 +541,977 @@ public static String getNMRShiftConstant(final String element){ } + public static int getElectronNumberByBondOrder(final IBond.Order order) { + switch (order) { + case SINGLE: + return 1; + case DOUBLE: + return 2; + case TRIPLE: + return 3; + case QUADRUPLE: + return 4; + case QUINTUPLE: + return 5; + case SEXTUPLE: + return 6; + default: + return 0; + } + } + + /** - * Returns a bond type for two bond atoms from its hybridization. - * CURRENTLY ONLY SINGLE BOND DETECTION POSSIBLE!!! - * This function detects single, double and triple bonds and returns a - * bond order from {@link org.openscience.cdk.interfaces.IBond.Order}. - * If no bond type could be detected then - * {@link org.openscience.cdk.interfaces.IBond.Order#UNSET} will be returned. - * For single and double bond detection, the following elements are defined so far: C, O, N, S. - * For triple bond detection, the following elements are defined so far: C, N. + * Returns the NMR isotope identifier for a given element, e.g. C -> 13C. + * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. * - * - * @param atom1 - * @param atom2 + * @param element element's symbol (e.g. "C") * @return */ - public static IBond.Order getBondTypeFromHybridizations(final IAtom atom1, final IAtom atom2){ - - final String atomType1 = atom1.getSymbol(); - final IAtomType.Hybridization hybridization1 = atom1.getHybridization(); - final String atomType2 = atom2.getSymbol(); - final IAtomType.Hybridization hybridization2 = atom2.getHybridization(); - - if(hybridization1 == null || hybridization2 == null){ - return IBond.Order.UNSET; - } - IBond.Order bondOrder1 = IBond.Order.UNSET; - IBond.Order bondOrder2 = IBond.Order.UNSET; - // single bond detection, the "3" means all SP3 hybrdidizations like SP3, SP3D2 or PLANAR3 - if ((atomType1.equals("C") || atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) - && hybridization1.toString().contains("3")) { - return IBond.Order.SINGLE; - } - if ((atomType2.equals("C") || atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) - && hybridization2.toString().contains("3")) { - return IBond.Order.SINGLE; - } -// // double bond detection -// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1) || hybridization1.equals(IAtomType.Hybridization.SP2))) -// || ((atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) && (hybridization1.equals(IAtomType.Hybridization.SP2)))) { -// bondOrder1 = IBond.Order.DOUBLE; -// } -// if ((atomType2.equals("C") && (hybridization2.equals(IAtomType.Hybridization.SP1) || hybridization2.equals(IAtomType.Hybridization.SP2))) -// || ((atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) && hybridization2.equals(IAtomType.Hybridization.SP2))) { -// bondOrder2 = IBond.Order.DOUBLE; -// } -// // triple bond detection -// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1))) -// && (atomType2.equals("N") && hybridization2.equals(IAtomType.Hybridization.SP1))) { -// bondOrder1 = IBond.Order.TRIPLE; -// } -// if ((atomType2.equals("N") && (hybridization2.equals(IAtomType.Hybridization.SP1))) -// && (atomType1.equals("C") && hybridization1.equals(IAtomType.Hybridization.SP1))) { -// bondOrder2 = IBond.Order.TRIPLE; -// } - - if (bondOrder1.equals(bondOrder2)) { - return bondOrder1; + public static String getNMRIsotopeIdentifier(final String element) { + switch(element){ + case "C": return "13C"; + case "H": return "1H"; + case "N": return "15N"; + case "P": return "31P"; + case "F": return "19F"; + case "O": return "17O"; + case "S": return "33S"; + case "Si": return "29Si"; + case "B": return "11B"; + case "Pt": return "195Pt"; + default: + return null; } - - return IBond.Order.UNSET; } /** - * Returns a list of open bonds of an atom. + * Returns the element identifier for a given isotope, e.g. 13C -> C. + * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. * - * @param ac atom container - * @param atomIndex index of the atom to test - * @return + * @param isotope isotope's symbol (e.g. "13C") + * @return */ - public static ArrayList getOpenBonds(final IAtomContainer ac, final int atomIndex){ - - final IAtom atom = ac.getAtom(atomIndex); - if(atom.getHybridization() == null){ - return null; - } - final ArrayList bondOrderList = new ArrayList<>(); - final AtomValenceDescriptor valenceDesc = new AtomValenceDescriptor(); - final int valence = Integer.valueOf(valenceDesc.calculate(atom, ac).getValue().toString()); - int electronsLeft = (8 - (valence + atom.getImplicitHydrogenCount())); - - if (electronsLeft == 0) { -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); - return bondOrderList; - } - // only one single bond left; possible at SP1, SP2 and SP3 - if (electronsLeft == 1) { - bondOrderList.add(IBond.Order.SINGLE); -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); - return bondOrderList; - } - // with SP3 are only single bonds possible - if (atom.getHybridization().equals(IAtomType.Hybridization.SP3)) { - // subtract the single bonded neighbor number - electronsLeft -= ac.getConnectedAtomsList(atom).size(); - for (int k = 0; k < electronsLeft; k++) { - bondOrderList.add(IBond.Order.SINGLE); - } -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); - return bondOrderList; + public static String getElementIdentifier(final String isotope) { + switch (isotope) { + case "13C": return "C"; + case "1H": return "H"; + case "15N": return "N"; + case "31P": return "P"; + case "19F": return "F"; + case "17O": return "O"; + case "33S": return "S"; + case "29Si": return "Si"; + case "11B": return "B"; + case "195Pt": return "Pt"; + default: + return null; } - - if (atom.getHybridization().equals(IAtomType.Hybridization.SP2)) { - switch (atom.getSymbol()) { - case "O": - case "S": - bondOrderList.add(IBond.Order.DOUBLE); - return bondOrderList; - case "C": - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.DOUBLE); - break; - case "N": - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.DOUBLE); - break; - default: - break; + } + + + public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final String[] neighborElems){ + final int[] counts = new int[neighborElems.length * bondsSet.length]; + String foundBonds; + // for all given neighbor element types + for (int n = 0; n < neighborElems.length; n++) { + foundBonds = ""; + // for all next neighbors of a specific element + for (IAtom neighborAtom : ac.getConnectedAtomsList(ac.getAtom(indexAC))) { + // skip if not the right neighborhood element or bond type is unknown/unset + if ((!neighborAtom.getSymbol().equals(neighborElems[n])) || (NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { + continue; + } + foundBonds += NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); } - } else if (atom.getHybridization().equals(IAtomType.Hybridization.SP1)) { - switch (atom.getSymbol()) { - case "C": - bondOrderList.add(IBond.Order.DOUBLE); - bondOrderList.add(IBond.Order.DOUBLE); - // or - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.TRIPLE); - break; - case "N": - bondOrderList.add(IBond.Order.TRIPLE); - break; - default: + for (int k = 0; k < bondsSet.length; k++) { + counts[n * bondsSet.length + k] = 0; + if (NMR.Utils.sortString(foundBonds).equals(NMR.Utils.sortString(bondsSet[k]))) { + counts[n * bondsSet.length + k] = 1; break; + } } } - for (IAtom neighbor : ac.getConnectedAtomsList(atom)) { - bondOrderList.remove(ac.getBond(atom, neighbor).getOrder()); - electronsLeft -= NMR.Utils.getElectronNumberByBondOrder(ac.getBond(atom, neighbor).getOrder()); - } - int theoCounter = 0; - for (IBond.Order order : bondOrderList) { - theoCounter += NMR.Utils.getElectronNumberByBondOrder(order); + return counts; + } + + + public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, String[] neighborElems, final int min, final int max, final int stepSize) throws IOException{ + + if(stepSize < 1){ + System.err.println("stepSize < 1 not allowed!!!"); + return; } - switch(Math.abs(theoCounter - electronsLeft)){ - case 1: - bondOrderList.remove(IBond.Order.SINGLE); - theoCounter -= 1; - break; - case 2: - - break; - case 3: - - break; + final StringBuilder sb = new StringBuilder(); + sb.append("shift [" + elem + "] (" + stepSize + "),nTotal,inRing,isArom,q" + elem + "," + elem + "H," + elem + "H2," + elem + "H3,"); + for (int i = 0; i < neighborElems.length; i++) { + for (int j = 0; j < bondsSet.length; j++) { + sb.append(bondsSet[j] + "[" + neighborElems[i] + "]"); + if (j < bondsSet.length - 1) { + sb.append(","); + } + } + if (i < neighborElems.length - 1) { + sb.append(","); + } + } + sb.append("\n"); + for (int i = 0; i < stepSize * (max - min) + 1; i++) { + sb.append((i + min) + ","); + for (int j = 0; j < 3 + 4 + neighborElems.length * bondsSet.length; j++) { + sb.append(m[i][j]); + if (j < 3 + 4 + neighborElems.length * bondsSet.length - 1) { + sb.append(","); + } + } + sb.append("\n"); } + final FileWriter writer = new FileWriter(pathToOutput); + writer.append(sb.toString()); + writer.flush(); + writer.close(); + } + + + public static String sortString(final String s) { + final char[] c = s.toCharArray(); + Arrays.sort(c); + return new String(c); + } + + + public static ArrayList> getBondOrderSets(final String[] valences) { -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList + " -> e: " + theoCounter + " (theo) vs. " + electronsLeft + " (real), bond counter: " + ac.getConnectedAtomsList(atom).size() + " (+" + atom.getImplicitHydrogenCount() + "H)"); + final ArrayList> bondOrderSets = new ArrayList<>(); + for (int i = 0; i < valences.length; i++) { + bondOrderSets.add(new ArrayList<>()); + for (int k = 0; k < StringUtils.countMatches(valences[i], "-"); k++) { + bondOrderSets.get(i).add(IBond.Order.SINGLE); + } + for (int k = 0; k < StringUtils.countMatches(valences[i], "="); k++) { + bondOrderSets.get(i).add(IBond.Order.DOUBLE); + } + for (int k = 0; k < StringUtils.countMatches(valences[i], "%"); k++) { + bondOrderSets.get(i).add(IBond.Order.TRIPLE); + } + } - - return bondOrderList; + return bondOrderSets; } - public static int getElectronNumberByBondOrder(final IBond.Order order) { + public static String getStringFromBondOrder(final IBond.Order order) { switch (order) { case SINGLE: - return 1; + return "-"; case DOUBLE: - return 2; + return "="; case TRIPLE: - return 3; - case QUADRUPLE: - return 4; - case QUINTUPLE: - return 5; - case SEXTUPLE: - return 6; + return "%"; default: - return 0; + return null; } } - /** - * Returns the NMR isotope identifier for a given element, e.g. C -> 13C. - * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. - * - * @param element element's symbol (e.g. "C") - * @return - */ - public static String getNMRIsotopeIdentifier(final String element) { - switch(element){ - case "C": return "13C"; - case "H": return "1H"; - case "N": return "15N"; - case "P": return "31P"; - case "F": return "19F"; - case "O": return "17O"; - case "S": return "33S"; - case "Si": return "29Si"; - case "B": return "11B"; - case "Pt": return "195Pt"; - default: - return null; + public static IBond.Order getBondOrderFromString(final String order){ + switch(order){ + case "-": return IBond.Order.SINGLE; + case "=": return IBond.Order.DOUBLE; + case "%": return IBond.Order.TRIPLE; + default: return null; } } + + public static void writeCSV(final String pathToOutput, final String table) throws IOException { + FileWriter fr = new FileWriter(new File(pathToOutput)); + BufferedWriter br = new BufferedWriter(fr); + br.write(table); + br.close(); + } + + /** - * Returns the element identifier for a given isotope, e.g. 13C -> C. - * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. * - * @param isotope isotope's symbol (e.g. "13C") + * @param input * @return */ - public static String getElementIdentifier(final String isotope) { - switch (isotope) { - case "13C": return "C"; - case "1H": return "H"; - case "15N": return "N"; - case "31P": return "P"; - case "19F": return "F"; - case "17O": return "O"; - case "33S": return "S"; - case "29Si": return "Si"; - case "11B": return "B"; - case "195Pt": return "Pt"; - default: - return null; + public static ArrayList getOutliers(ArrayList input) { + final ArrayList outliers = new ArrayList<>(); + if(input.size() <= 1){ + return outliers; } - } - -// /** -// * Returns the hybridization level of each heavy atom in given molecule which has -// * its own shift value. -// * First it compares the number of attached (implicit) hydrogens and sets -// * the hybridization level from it directly. This is only possible for -// * carbons with three or four attached hydrogens (sp3). [CURRENTLY DISABLED] -// * -// * If less than three hydrogens are attached or in case of other heavy -// * atoms then a NMRShiftDB file will be used to obtain the -// * frequencies of the different hybridization levels from the database. -// * This happens for directly bonded neighbors too. -// * -// * -// * @param ac -// * @param pathToNMRShiftDB -// * @param tol -// * @param molFormula -// * @return -// * @throws FileNotFoundException -// */ -// public static HashMap>>> getHybridizationsFromNMRShiftDB(final IAtomContainer ac, final String pathToNMRShiftDB, final double tol, final IMolecularFormula molFormula) throws FileNotFoundException{ -// -// final HashMap>> elementsHybridCounter = new HashMap<>(); -// final HashMap>> elementsBondTypeCounter = new HashMap<>(); -// final HashMap> expactedNeighbors = new HashMap<>(); -// String NMRSHIFT_ATOMTYPE; -// // initializations only -// for (int i = 0; i < ac.getAtomCount(); i++) { -// // sure case for carbon: 3 or 4 hydrogens -> sp3 -//// if (ac.getAtom(i).getSymbol().equals("C") && ac.getAtom(i).getImplicitHydrogenCount() >= 3) { -//// ac.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); -//// continue; -//// } -// NMRSHIFT_ATOMTYPE = Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol()); -// // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? -// if ((NMRSHIFT_ATOMTYPE == null) || (ac.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null)) { -// continue; -// } -// elementsHybridCounter.put(i, new HashMap<>()); -// elementsBondTypeCounter.put(i, new HashMap<>()); -// elementsHybridCounter.get(i).put("query", new ArrayList<>()); -// elementsHybridCounter.get(i).put("queryH", new ArrayList<>()); -// // create an array list for each atom type in given molecular formula -// for (IElement elem : MolecularFormulaManipulator.getHeavyElements(molFormula)) { -// elementsHybridCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); -// elementsBondTypeCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); -// } -// -// expactedNeighbors.put(i, new HashMap<>()); -// for (IAtom expNeighbor : ac.getConnectedAtomsList(ac.getAtom(i))) { -// if (!expactedNeighbors.get(i).keySet().contains(expNeighbor.getSymbol())) { -// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), 0); -// } -// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), expactedNeighbors.get(i).get(expNeighbor.getSymbol()) + 1); -// } -// } -// // beginning of DB search -// String shiftsDB; -// double shiftDB, shiftQ; -// int atomIndexDB; -// boolean add, toContinue; -// final AtomHybridizationDescriptor hybridDesc = new AtomHybridizationDescriptor(); -// IAtom qAtom; -// IAtomContainer acDB; -// final IteratingSDFReader iterator = new IteratingSDFReader( -// new FileReader(pathToNMRShiftDB), -// SilentChemObjectBuilder.getInstance() -// ); -// while (iterator.hasNext()) { -// acDB = iterator.next(); -// ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(acDB.getProperties().keySet())); -// Collections.sort(props); -// // the DB entry should at least contain one carbon spectrum -// toContinue = false; -// for (String prop : props) { -// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier("C"))) { -// toContinue = true; -// break; -// } -// } -// if (!toContinue) { -// continue; -// } -// -// for (int i : elementsHybridCounter.keySet()) { -// qAtom = ac.getAtom(i); -// // check wether the DB entry contains a spectrum for the current query atom type -// shiftsDB = null; -// for (String prop : props) { -// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier(qAtom.getSymbol()))) { -// shiftsDB = acDB.getProperty(prop); -// break; -// } -// } -// if(shiftsDB == null){ -// continue; -// } -// // ignore the already set sp3 hybridizations at carbon atoms with at least 3 implicit hydrogens -//// if (qAtom.getSymbol().equals("C") && qAtom.getImplicitHydrogenCount() >= 3) { -//// continue; -//// } -// shiftQ = qAtom.getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())); -// -// // check wether the DB entry contains a proton spectrum -// String shiftsDBHydrogen = null; -// for (String prop : props) { -// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier("H"))) { -// shiftsDBHydrogen = acDB.getProperty(prop); -// break; -// } -// } -// -// String[][] shiftsDBvalues = Utils.parseShiftsNMRShiftDB(shiftsDB); -// for (String[] shiftsDBvalue : shiftsDBvalues) { -// shiftDB = Double.parseDouble(shiftsDBvalue[0]); -// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); -// add = true; -// // shift match within a shift tolerance range -// if ((shiftQ - tol <= shiftDB) && (shiftDB <= shiftQ + tol)) { -// // matched atom should have the same number of attached (implicit) hydrogens -// if (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount().intValue() == qAtom.getImplicitHydrogenCount().intValue()) { -// // count next neighbors -// HashMap foundNeighbors = new HashMap<>(); -// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { -// if (!foundNeighbors.keySet().contains(neighborAtomDB.getSymbol())) { -// foundNeighbors.put(neighborAtomDB.getSymbol(), 0); -// } -// foundNeighbors.put(neighborAtomDB.getSymbol(), foundNeighbors.get(neighborAtomDB.getSymbol()) + 1); -// } -// // check whether the number of expacted next neighbors is higher than the number of found next neighbor, if yes then skip this DB atom match -// for (String elemExpNeighbor : expactedNeighbors.get(i).keySet()) { -// if (foundNeighbors.get(elemExpNeighbor) == null || (expactedNeighbors.get(i).get(elemExpNeighbor) > foundNeighbors.get(elemExpNeighbor))) { -// add = false; -// } -// } -// if(!add){ -// continue; -// } -// // only elements which occur in molecular formula of the unknown are allowed, otherwise skip this matched DB atom -// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { -// if (MolecularFormulaManipulator.getElementCount(molFormula, neighborAtomDB.getSymbol()) == 0) { -// add = false; -// break; -// } -// // ignore explicit protons; ignore query atoms here, add them as below -> otherwise multiple counting -// if (!neighborAtomDB.getSymbol().equals("H")){// && !neighborAtomDB.getSymbol().equals(qAtom.getSymbol())) { -// elementsHybridCounter.get(i).get(neighborAtomDB.getSymbol()).add(Integer.parseInt(hybridDesc.calculate(neighborAtomDB, acDB).getValue().toString())); -// elementsBondTypeCounter.get(i).get(neighborAtomDB.getSymbol()).add(acDB.getBond(acDB.getAtom(atomIndexDB), neighborAtomDB).getOrder().numeric()); -// } -// } -// if(!add){ -// continue; -// } -// // likely allowed to add hybridization for query atom -// // check whether the shifts of attached hydrogens are equal to hydrogen shifts of query atom -> higher priority at hybridization assignment step later -// boolean added = false; -// if(shiftsDBHydrogen != null){ -// String[][] shiftsDBvaluesHydrogen = Utils.parseShiftsNMRShiftDB(shiftsDBHydrogen); -// if(qAtom.getProperty("HydrogenShifts") != null){ -// ArrayList shiftsQAtomvaluesHydrogen = qAtom.getProperty("HydrogenShifts"); -// for (int j = 0; j < shiftsQAtomvaluesHydrogen.size(); j++) { -// for (String[] shiftsDBvalueHydrogen : shiftsDBvaluesHydrogen) { -// shiftDB = Double.parseDouble(shiftsDBvalueHydrogen[0]); -// if((shiftsQAtomvaluesHydrogen.get(j) - 0.1 <= shiftDB) && (shiftDB <= shiftsQAtomvaluesHydrogen.get(j) + 0.1)){ -// elementsHybridCounter.get(i).get("queryH").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); -// added = true; -// break; -// } -// } -// if(added){ -// break; -// } -// } -// } -// -// } else { -// elementsHybridCounter.get(i).get("query").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); -// } -// } -// } -// } -// } -// } -// final HashMap>>> toReturn = new HashMap<>(); -// toReturn.put("hybridCounter", elementsHybridCounter); -// toReturn.put("bondTypeCounter", elementsBondTypeCounter); -// -// return toReturn; -// } - + Collections.sort(input); + final List data1 = input.subList(0, input.size() / 2); + final List data2; + if (input.size() % 2 == 0) { + data2 = input.subList(input.size() / 2, input.size()); + } else { + data2 = input.subList(input.size() / 2 + 1, input.size()); + } + final double q1 = getMedian(data1); + final double q3 = getMedian(data2); + final double iqr = q3 - q1; + final double lowerFence = q1 - 1.5 * iqr; + final double upperFence = q3 + 1.5 * iqr; + for (int i = 0; i < input.size(); i++) { + if ((input.get(i) < lowerFence) || (input.get(i) > upperFence)) { + outliers.add(input.get(i)); + } + } +// System.out.println("input size: " + input.size()); +// System.out.println("output size: " + outliers.size()); + return outliers; + } + + /** + * + * @param data + * @return + */ + public static double getMedian(final List data) { + if(data.size() == 1){ + return data.get(0); + } + Collections.sort(data); + if (data.size() % 2 == 1) { + return data.get(data.size() / 2); + } else { + return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; + } + } + /** + * + * @param data + * @return + */ + public static double getMedian(final ArrayList data) { + if(data.size() == 1){ + return data.get(0); + } + Collections.sort(data); + if (data.size() % 2 == 1) { + return data.get(data.size() / 2); + } else { + return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; + } + } - public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final String[] neighborElems){ - final int[] counts = new int[neighborElems.length * bondsSet.length]; - String foundBonds; - // for all given neighbor element types - for (int n = 0; n < neighborElems.length; n++) { - foundBonds = ""; - // for all next neighbors of a specific element - for (IAtom neighborAtom : ac.getConnectedAtomsList(ac.getAtom(indexAC))) { - // skip if not the right neighborhood element or bond type is unknown/unset - if ((!neighborAtom.getSymbol().equals(neighborElems[n])) || (NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { - continue; - } - foundBonds += NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); - } - for (int k = 0; k < bondsSet.length; k++) { - counts[n * bondsSet.length + k] = 0; - if (NMR.Utils.sortString(foundBonds).equals(NMR.Utils.sortString(bondsSet[k]))) { - counts[n * bondsSet.length + k] = 1; - break; - } - } + /** + * + * @param data + * @return + */ + public static double getRMS(final ArrayList data) { + if(data.size() == 1){ + return data.get(0); + } + double qSum = 0; + for (final Double d : data) { + qSum += d*d; } - return counts; + return Math.sqrt(qSum/data.size()); } - public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, String[] neighborElems, final int min, final int max, final int stepSize) throws IOException{ - - if(stepSize < 1){ - System.err.println("stepSize < 1 not allowed!!!"); - return; + /** + * + * @param data + * @return + */ + public static double getMean(final ArrayList data) { + double sum = 0; + for (Double d : data) { + sum += d; } - - final StringBuilder sb = new StringBuilder(); - sb.append("shift [" + elem + "] (" + stepSize + "),nTotal,inRing,isArom,q" + elem + "," + elem + "H," + elem + "H2," + elem + "H3,"); - for (int i = 0; i < neighborElems.length; i++) { - for (int j = 0; j < bondsSet.length; j++) { - sb.append(bondsSet[j] + "[" + neighborElems[i] + "]"); - if (j < bondsSet.length - 1) { - sb.append(","); + return sum/data.size(); + } + + + /** + * Tests whether two array lists of integers are equal which also means + * bidirectional values to each other. + * + * @param shiftMatches1 + * @param shiftMatches2 + * @return + */ + public static boolean isBidirectional(final ArrayList shiftMatches1, final ArrayList shiftMatches2) { + final ArrayList temp1 = new ArrayList<>(shiftMatches1); + final ArrayList temp2 = new ArrayList<>(shiftMatches2); + Collections.sort(temp1); + Collections.sort(temp2); + + return temp1.equals(temp2); + } + + /** + * + * @param ac + * @param shiftMatches1 + * @param shiftMatches2 + * @param prop + */ + public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayList shiftMatches1, final ArrayList shiftMatches2, final String prop) { + + ArrayList propList1, propList2; + for (int i = 0; i < shiftMatches1.size(); i++) { + if (shiftMatches1.get(i) >= 0 && shiftMatches2.get(i) >= 0) { + if (ac.getAtom(shiftMatches1.get(i)).getProperty(prop) == null) { + ac.getAtom(shiftMatches1.get(i)).setProperty(prop, new ArrayList<>()); } - } - if (i < neighborElems.length - 1) { - sb.append(","); - } - } - sb.append("\n"); - for (int i = 0; i < stepSize * (max - min) + 1; i++) { - sb.append((i + min) + ","); - for (int j = 0; j < 3 + 4 + neighborElems.length * bondsSet.length; j++) { - sb.append(m[i][j]); - if (j < 3 + 4 + neighborElems.length * bondsSet.length - 1) { - sb.append(","); + if (ac.getAtom(shiftMatches2.get(i)).getProperty(prop) == null) { + ac.getAtom(shiftMatches2.get(i)).setProperty(prop, new ArrayList<>()); + } + propList1 = ac.getAtom(shiftMatches1.get(i)).getProperty(prop); + propList2 = ac.getAtom(shiftMatches2.get(i)).getProperty(prop); + if (!propList1.contains(shiftMatches2.get(i))) { + propList1.add(shiftMatches2.get(i)); + } + if (!propList2.contains(shiftMatches1.get(i))) { + propList2.add(shiftMatches1.get(i)); } } - sb.append("\n"); } + } + + + public static ArrayList countSetShiftInAtomContainer(final IAtomContainer ac, final ArrayList indices){ - final FileWriter writer = new FileWriter(pathToOutput); - writer.append(sb.toString()); - writer.flush(); - writer.close(); + final ArrayList shifts = new ArrayList<>(); + for (final Integer index : indices) { + shifts.add(ac.getAtom(index).getProperty(Utils.getNMRShiftConstant(ac.getAtom(index).getSymbol()))); + } + return shifts; } - public static String sortString(final String s) { - final char[] c = s.toCharArray(); - Arrays.sort(c); - return new String(c); + + public static String getFileFormat(final String pathToFile) { + + if(pathToFile == null || pathToFile.trim().isEmpty()){ + return ""; + } + final String[] split = pathToFile.split("\\."); + + return split[split.length - 1]; } + - public static ArrayList> getBondOrderSets(final String[] valences) { + + /** + * + * @param lookup + * @return + */ + public static HashMap getRMS(final HashMap> lookup){ - final ArrayList> bondOrderSets = new ArrayList<>(); - for (int i = 0; i < valences.length; i++) { - bondOrderSets.add(new ArrayList<>()); - for (int k = 0; k < StringUtils.countMatches(valences[i], "-"); k++) { - bondOrderSets.get(i).add(IBond.Order.SINGLE); - } - for (int k = 0; k < StringUtils.countMatches(valences[i], "="); k++) { - bondOrderSets.get(i).add(IBond.Order.DOUBLE); - } - for (int k = 0; k < StringUtils.countMatches(valences[i], "%"); k++) { - bondOrderSets.get(i).add(IBond.Order.TRIPLE); - } + final HashMap rms = new HashMap<>(); + for (final String key : lookup.keySet()) { + rms.put(key, NMR.Utils.getRMS(lookup.get(key))); +// System.out.println("count: " + lookup.get(key).size() + ", mean: " + NMR.Utils.getMean(lookup.get(key)) + ", rms: " + rms.get(key) + ", median: " + NMR.Utils.getMedian(lookup.get(key))); } + + return rms; + } + + public static IAtomContainer setAromaticitiesInAtomContainer(final IAtomContainer ac, final int maxCycleSize) throws CDKException { - return bondOrderSets; + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); + final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); + final CycleFinder cycles = Cycles.or(Cycles.all(), Cycles.all(maxCycleSize)); + final Aromaticity aromaticity = new Aromaticity(model, cycles); + aromaticity.apply(ac); + + return ac; } - public static String getStringFromBondOrder(final IBond.Order order) { - switch (order) { - case SINGLE: - return "-"; - case DOUBLE: - return "="; - case TRIPLE: - return "%"; - default: - return null; + /** + * Removes atoms from a given atom type from an atom container. + * + * @param ac IAtomContainer object where to remove the atoms + * @param atomType Atom type (element's name, e.g. C or Br) + * @return IAtomContainer where the atoms were removed + */ + public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType) { + + ArrayList toRemoveList = new ArrayList<>(); + for (IAtom atomA : ac.atoms()) { + if (atomA.getSymbol().equals(atomType)) {// detect wether the current atom A is a from the given atom type + toRemoveList.add(atomA); + } + } + + for (IAtom iAtom : toRemoveList) { + ac.removeAtom(iAtom); } + + return ac; } - public static IBond.Order getBondOrderFromString(final String order){ - switch(order){ - case "-": return IBond.Order.SINGLE; - case "=": return IBond.Order.DOUBLE; - case "%": return IBond.Order.TRIPLE; - default: return null; - } - } - public static void writeCSV(final String pathToOutput, final String table) throws IOException { - FileWriter fr = new FileWriter(new File(pathToOutput)); - BufferedWriter br = new BufferedWriter(fr); - br.write(table); - br.close(); - } - /** + + + + + // deprecated functions + + /** + * Creates an IAtomContainer object containing atoms without any bond + * information, given by a molecular formula. * - * @param input + * @param molFormula Molecular Formula * @return + * @deprecated */ - public static ArrayList getOutliers(ArrayList input) { - final ArrayList outliers = new ArrayList<>(); - if(input.size() <= 1){ - return outliers; - } - Collections.sort(input); - final List data1 = input.subList(0, input.size() / 2); - final List data2; - if (input.size() % 2 == 0) { - data2 = input.subList(input.size() / 2, input.size()); - } else { - data2 = input.subList(input.size() / 2 + 1, input.size()); - } - final double q1 = getMedian(data1); - final double q3 = getMedian(data2); - final double iqr = q3 - q1; - final double lowerFence = q1 - 1.5 * iqr; - final double upperFence = q3 + 1.5 * iqr; - for (int i = 0; i < input.size(); i++) { - if ((input.get(i) < lowerFence) || (input.get(i) > upperFence)) { - outliers.add(input.get(i)); - } + public static IAtomContainer createAtomContainer(final String molFormula) { + + HashMap hash = NMR.Utils.getAtomCountsInMolecularFormula(molFormula); + IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); + + for (String elem : hash.keySet()) { + // add atoms of current element + ac = NMR.Utils.addAtoms(ac, elem, hash.get(elem)); } -// System.out.println("input size: " + input.size()); -// System.out.println("output size: " + outliers.size()); - return outliers; + + return ac; } /** + * Creates a HashMap with the number of atoms for each occurring atom type. * - * @param data + * @deprecated + * @param molFormula * @return */ - public static double getMedian(final List data) { - if(data.size() == 1){ - return data.get(0); - } - Collections.sort(data); - if (data.size() % 2 == 1) { - return data.get(data.size() / 2); - } else { - return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; + public static HashMap getAtomCountsInMolecularFormula(final String molFormula) { + + HashMap hash = new HashMap<>(); + String[] molFormSplit = molFormula.split("[A-Z]"); + Matcher m = Pattern.compile("[A-Z]").matcher(molFormula); + String elem; + int noAtoms; + int k = 1; + + while (m.find()) { + // name of current element + elem = molFormula.substring(m.start(), m.end()); + if (k >= molFormSplit.length || molFormSplit[k].isEmpty()) { + // if no atom number is given then assume only one atom + noAtoms = 1; + } else if (Character.isLowerCase(molFormSplit[k].charAt(0))) { + // if element's name contains two letters then extend it + elem += molFormSplit[k].charAt(0); + // if more than one atoms of that element with two letters exist + if (molFormSplit[k].length() > 1) { + // check given atom number + noAtoms = Integer.parseInt(molFormSplit[k].substring(1)); + } else { + noAtoms = 1; + } + } else { + // if atom number is given + noAtoms = Integer.parseInt(molFormSplit[k].substring(0)); + } + try { + // add atom type and frequency to class hashmap + hash.put(elem, noAtoms); + } catch (Exception e) { + System.err.println("Illegal element \"" + elem + "\" will be ignored!!!"); + } + + k++; } + + return hash; } - - + /** * - * @param data + * @param ac + * @param atomType * @return + * @deprecated */ - public static double getMedian(final ArrayList data) { - if(data.size() == 1){ - return data.get(0); - } - Collections.sort(data); - if (data.size() % 2 == 1) { - return data.get(data.size() / 2); - } else { - return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; + public static int getAtomTypeCount(final IAtomContainer ac, final String atomType) { + + int noAtoms = 0; + for (int i = 0; i < ac.getAtomCount(); i++) { + if (ac.getAtom(i).getSymbol().equals(atomType)) { + noAtoms++; + } } + + return noAtoms; } - - + /** + * Creates atoms of the same atom type and store it into an atom container. * - * @param data + * @param ac Atom container + * @param noAtoms Number of atoms to create + * @param atomType Atom type (element's name, e.g. C or Br) * @return + * @deprecated */ - public static double getRMS(final ArrayList data) { - if(data.size() == 1){ - return data.get(0); - } - double qSum = 0; - for (final Double d : data) { - qSum += d*d; + public static IAtomContainer addAtoms(final IAtomContainer ac, final String atomType, final int noAtoms) throws IllegalArgumentException { + + for (int i = 0; i < noAtoms; i++) { + ac.addAtom(new Atom(atomType)); } - - return Math.sqrt(qSum/data.size()); + + return ac; } + - /** - * - * @param data - * @return - */ - public static double getMean(final ArrayList data) { - double sum = 0; - for (Double d : data) { - sum += d; + + + + + + + + + + + // test functions -> not ready to use + + + public static double getTanimotoCoefficient(final IAtomContainer a, final IAtomContainer b) throws CDKException, IOException, CloneNotSupportedException{ + + // pubchem fingerprinter expects + // 1. explicit hydrogens +// AtomContainerManipulator.convertImplicitToExplicitHydrogens(a); + // 2. set atom type names -> done during setting of aromaticities + // 3. set aromaticity -> done during DB scanning + + SubstructureFingerprinter substructfp = new SubstructureFingerprinter(); + IBitFingerprint fingerprint = substructfp.getBitFingerprint(a); + System.out.println("\n\ndefault substructure bitstring: " + fingerprint.asBitSet()); + for (int setbit : fingerprint.getSetbits()) { + System.out.println("default substructure of index " + setbit + ": " + substructfp.getSubstructure(setbit));// + " -> " + SMARTSParser.parse(substructfp.getSubstructure(setbit), SilentChemObjectBuilder.getInstance())); + } + +// DepictionGenerator dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); + IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); + QueryAtomContainer qac = SMARTSParser.parse(substructfp.getSubstructure(fingerprint.getSetbits()[1]), SilentChemObjectBuilder.getInstance()); + + System.out.println("qac: " + qac.getAtomCount() + ", " + qac.getBondCount() + " -> " + qac.getProperties()); + for (IAtom atom : qac.atoms()) { + ac.addAtom(atom); + System.out.println("qac atom: "); + } + for (IBond bond : qac.bonds()) { + ac.addBond(bond); + System.out.println("qac bond: " + bond); + } + System.out.println("ac: " + ac.getAtomCount() + ", " + ac.getBondCount() + " -> " + ac.getProperties()); + + +// dg.depict(ac).writeTo("/Users/mwenk/Downloads/test.png"); + + System.out.println("\n\n"); + SubstructureFingerprinter klekotasubstructfp = new KlekotaRothFingerprinter(); + fingerprint = klekotasubstructfp.getBitFingerprint(a); + System.out.println("Klekota substructure bitstring: " + fingerprint.asBitSet()); + for (int setbit : fingerprint.getSetbits()) { + System.out.println("Klekota substructure of index " + setbit + ": " + klekotasubstructfp.getSubstructure(setbit)); } - return sum/data.size(); + + + return 0.0;//Tanimoto.calculate(pubchemfp.getBitFingerprint(a), pubchemfp.getBitFingerprint(b)); } - /** - * Tests whether two array lists of integers are equal which also means - * bidirectional values to each other. + /** + * Returns * - * @param shiftMatches1 - * @param shiftMatches2 + * @param values * @return */ - public static boolean isBidirectional(final ArrayList shiftMatches1, final ArrayList shiftMatches2) { - final ArrayList temp1 = new ArrayList<>(shiftMatches1); - final ArrayList temp2 = new ArrayList<>(shiftMatches2); - Collections.sort(temp1); - Collections.sort(temp2); - - return temp1.equals(temp2); - } - - /** - * - * @param ac - * @param shiftMatches1 - * @param shiftMatches2 - * @param prop - */ - public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayList shiftMatches1, final ArrayList shiftMatches2, final String prop) { - - ArrayList propList1, propList2; - for (int i = 0; i < shiftMatches1.size(); i++) { - if (shiftMatches1.get(i) >= 0 && shiftMatches2.get(i) >= 0) { - if (ac.getAtom(shiftMatches1.get(i)).getProperty(prop) == null) { - ac.getAtom(shiftMatches1.get(i)).setProperty(prop, new ArrayList<>()); - } - if (ac.getAtom(shiftMatches2.get(i)).getProperty(prop) == null) { - ac.getAtom(shiftMatches2.get(i)).setProperty(prop, new ArrayList<>()); - } - propList1 = ac.getAtom(shiftMatches1.get(i)).getProperty(prop); - propList2 = ac.getAtom(shiftMatches2.get(i)).getProperty(prop); - if (!propList1.contains(shiftMatches2.get(i))) { - propList1.add(shiftMatches2.get(i)); - } - if (!propList2.contains(shiftMatches1.get(i))) { - propList2.add(shiftMatches1.get(i)); - } - } - } - } - - - public static ArrayList countSetShiftInAtomContainer(final IAtomContainer ac, final ArrayList indices){ + public static HashMap getValueFrequencies(final ArrayList values) { - final ArrayList shifts = new ArrayList<>(); - for (final Integer index : indices) { - shifts.add(ac.getAtom(index).getProperty(Utils.getNMRShiftConstant(ac.getAtom(index).getSymbol()))); + final HashMap freqs = new HashMap<>(); + final HashSet valueLevels = new HashSet<>(values); + int sum = 0; + for (int value : valueLevels) { + sum += Collections.frequency(values, value); } - return shifts; + for (int value : valueLevels) { + freqs.put(value, (Collections.frequency(values, value) / (double) sum)); + } + + return freqs; } - - public static String getFileFormat(final String pathToFile) { - - if(pathToFile == null || pathToFile.trim().isEmpty()){ - return ""; - } - final String[] split = pathToFile.split("\\."); - - return split[split.length - 1]; - } -// -// public static IAtomContainer getStructureFromINCHICode(final String inchi) throws CDKException { -// final InChIToStructure intostruct = InChIGeneratorFactory.getInstance().getInChIToStructure( -// inchi, SilentChemObjectBuilder.getInstance() + // /** +// * Returns the hybridization level of each heavy atom in given molecule which has +// * its own shift value. +// * First it compares the number of attached (implicit) hydrogens and sets +// * the hybridization level from it directly. This is only possible for +// * carbons with three or four attached hydrogens (sp3). [CURRENTLY DISABLED] +// * +// * If less than three hydrogens are attached or in case of other heavy +// * atoms then a NMRShiftDB file will be used to obtain the +// * frequencies of the different hybridization levels from the database. +// * This happens for directly bonded neighbors too. +// * +// * +// * @param ac +// * @param pathToNMRShiftDB +// * @param tol +// * @param molFormula +// * @return +// * @throws FileNotFoundException +// */ +// public static HashMap>>> getHybridizationsFromNMRShiftDB(final IAtomContainer ac, final String pathToNMRShiftDB, final double tol, final IMolecularFormula molFormula) throws FileNotFoundException{ +// +// final HashMap>> elementsHybridCounter = new HashMap<>(); +// final HashMap>> elementsBondTypeCounter = new HashMap<>(); +// final HashMap> expactedNeighbors = new HashMap<>(); +// String NMRSHIFT_ATOMTYPE; +// // initializations only +// for (int i = 0; i < ac.getAtomCount(); i++) { +// // sure case for carbon: 3 or 4 hydrogens -> sp3 +//// if (ac.getAtom(i).getSymbol().equals("C") && ac.getAtom(i).getImplicitHydrogenCount() >= 3) { +//// ac.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); +//// continue; +//// } +// NMRSHIFT_ATOMTYPE = Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol()); +// // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? +// if ((NMRSHIFT_ATOMTYPE == null) || (ac.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null)) { +// continue; +// } +// elementsHybridCounter.put(i, new HashMap<>()); +// elementsBondTypeCounter.put(i, new HashMap<>()); +// elementsHybridCounter.get(i).put("query", new ArrayList<>()); +// elementsHybridCounter.get(i).put("queryH", new ArrayList<>()); +// // create an array list for each atom type in given molecular formula +// for (IElement elem : MolecularFormulaManipulator.getHeavyElements(molFormula)) { +// elementsHybridCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); +// elementsBondTypeCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); +// } +// +// expactedNeighbors.put(i, new HashMap<>()); +// for (IAtom expNeighbor : ac.getConnectedAtomsList(ac.getAtom(i))) { +// if (!expactedNeighbors.get(i).keySet().contains(expNeighbor.getSymbol())) { +// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), 0); +// } +// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), expactedNeighbors.get(i).get(expNeighbor.getSymbol()) + 1); +// } +// } +// // beginning of DB search +// String shiftsDB; +// double shiftDB, shiftQ; +// int atomIndexDB; +// boolean add, toContinue; +// final AtomHybridizationDescriptor hybridDesc = new AtomHybridizationDescriptor(); +// IAtom qAtom; +// IAtomContainer acDB; +// final IteratingSDFReader iterator = new IteratingSDFReader( +// new FileReader(pathToNMRShiftDB), +// SilentChemObjectBuilder.getInstance() // ); +// while (iterator.hasNext()) { +// acDB = iterator.next(); +// ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(acDB.getProperties().keySet())); +// Collections.sort(props); +// // the DB entry should at least contain one carbon spectrum +// toContinue = false; +// for (String prop : props) { +// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier("C"))) { +// toContinue = true; +// break; +// } +// } +// if (!toContinue) { +// continue; +// } +// +// for (int i : elementsHybridCounter.keySet()) { +// qAtom = ac.getAtom(i); +// // check wether the DB entry contains a spectrum for the current query atom type +// shiftsDB = null; +// for (String prop : props) { +// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier(qAtom.getSymbol()))) { +// shiftsDB = acDB.getProperty(prop); +// break; +// } +// } +// if(shiftsDB == null){ +// continue; +// } +// // ignore the already set sp3 hybridizations at carbon atoms with at least 3 implicit hydrogens +//// if (qAtom.getSymbol().equals("C") && qAtom.getImplicitHydrogenCount() >= 3) { +//// continue; +//// } +// shiftQ = qAtom.getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())); +// +// // check wether the DB entry contains a proton spectrum +// String shiftsDBHydrogen = null; +// for (String prop : props) { +// if (prop.contains("Spectrum " + Utils.getNMRIsotopeIdentifier("H"))) { +// shiftsDBHydrogen = acDB.getProperty(prop); +// break; +// } +// } +// +// String[][] shiftsDBvalues = Utils.parseShiftsNMRShiftDB(shiftsDB); +// for (String[] shiftsDBvalue : shiftsDBvalues) { +// shiftDB = Double.parseDouble(shiftsDBvalue[0]); +// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); +// add = true; +// // shift match within a shift tolerance range +// if ((shiftQ - tol <= shiftDB) && (shiftDB <= shiftQ + tol)) { +// // matched atom should have the same number of attached (implicit) hydrogens +// if (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount().intValue() == qAtom.getImplicitHydrogenCount().intValue()) { +// // count next neighbors +// HashMap foundNeighbors = new HashMap<>(); +// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { +// if (!foundNeighbors.keySet().contains(neighborAtomDB.getSymbol())) { +// foundNeighbors.put(neighborAtomDB.getSymbol(), 0); +// } +// foundNeighbors.put(neighborAtomDB.getSymbol(), foundNeighbors.get(neighborAtomDB.getSymbol()) + 1); +// } +// // check whether the number of expacted next neighbors is higher than the number of found next neighbor, if yes then skip this DB atom match +// for (String elemExpNeighbor : expactedNeighbors.get(i).keySet()) { +// if (foundNeighbors.get(elemExpNeighbor) == null || (expactedNeighbors.get(i).get(elemExpNeighbor) > foundNeighbors.get(elemExpNeighbor))) { +// add = false; +// } +// } +// if(!add){ +// continue; +// } +// // only elements which occur in molecular formula of the unknown are allowed, otherwise skip this matched DB atom +// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { +// if (MolecularFormulaManipulator.getElementCount(molFormula, neighborAtomDB.getSymbol()) == 0) { +// add = false; +// break; +// } +// // ignore explicit protons; ignore query atoms here, add them as below -> otherwise multiple counting +// if (!neighborAtomDB.getSymbol().equals("H")){// && !neighborAtomDB.getSymbol().equals(qAtom.getSymbol())) { +// elementsHybridCounter.get(i).get(neighborAtomDB.getSymbol()).add(Integer.parseInt(hybridDesc.calculate(neighborAtomDB, acDB).getValue().toString())); +// elementsBondTypeCounter.get(i).get(neighborAtomDB.getSymbol()).add(acDB.getBond(acDB.getAtom(atomIndexDB), neighborAtomDB).getOrder().numeric()); +// } +// } +// if(!add){ +// continue; +// } +// // likely allowed to add hybridization for query atom +// // check whether the shifts of attached hydrogens are equal to hydrogen shifts of query atom -> higher priority at hybridization assignment step later +// boolean added = false; +// if(shiftsDBHydrogen != null){ +// String[][] shiftsDBvaluesHydrogen = Utils.parseShiftsNMRShiftDB(shiftsDBHydrogen); +// if(qAtom.getProperty("HydrogenShifts") != null){ +// ArrayList shiftsQAtomvaluesHydrogen = qAtom.getProperty("HydrogenShifts"); +// for (int j = 0; j < shiftsQAtomvaluesHydrogen.size(); j++) { +// for (String[] shiftsDBvalueHydrogen : shiftsDBvaluesHydrogen) { +// shiftDB = Double.parseDouble(shiftsDBvalueHydrogen[0]); +// if((shiftsQAtomvaluesHydrogen.get(j) - 0.1 <= shiftDB) && (shiftDB <= shiftsQAtomvaluesHydrogen.get(j) + 0.1)){ +// elementsHybridCounter.get(i).get("queryH").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); +// added = true; +// break; +// } +// } +// if(added){ +// break; +// } +// } +// } // -// INCHI_RET ret = intostruct.getReturnStatus(); -// if (ret == INCHI_RET.WARNING) { -// // Structure generated, but with warning message -// System.out.println("InChI warning: " + intostruct.getMessage()); -// } else if (ret != INCHI_RET.OKAY) { -// // Structure generation failed -// throw new CDKException("Structure generation failed: " + ret.toString() -// + " [" + intostruct.getMessage() + "]"); +// } else { +// elementsHybridCounter.get(i).get("query").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); +// } +// } +// } +// } +// } // } -// -// System.out.println("inchi ac: " + intostruct.getAtomContainer().getAtomCount()); -// -// return SilentChemObjectBuilder.getInstance().newAtomContainer();//intostruct.getAtomContainer(); -// +// final HashMap>>> toReturn = new HashMap<>(); +// toReturn.put("hybridCounter", elementsHybridCounter); +// toReturn.put("bondTypeCounter", elementsBondTypeCounter); +// +// return toReturn; // } - - - - - - /** + + /** + * Returns a list of open bonds of an atom. * - * @param lookup + * @param ac atom container + * @param atomIndex index of the atom to test * @return */ - public static HashMap getRMS(final HashMap> lookup){ - - final HashMap rms = new HashMap<>(); - for (final String key : lookup.keySet()) { - rms.put(key, NMR.Utils.getRMS(lookup.get(key))); -// System.out.println("count: " + lookup.get(key).size() + ", mean: " + NMR.Utils.getMean(lookup.get(key)) + ", rms: " + rms.get(key) + ", median: " + NMR.Utils.getMedian(lookup.get(key))); + public static ArrayList getOpenBonds(final IAtomContainer ac, final int atomIndex) { + + final IAtom atom = ac.getAtom(atomIndex); + if (atom.getHybridization() == null) { + return null; } - - return rms; - } - - - - - - - - - public static IAtomContainer setAromaticitiesInAtomContainer(final IAtomContainer ac, final int maxCycleSize) throws CDKException { - - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); - final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); - final CycleFinder cycles = Cycles.or(Cycles.all(), Cycles.all(maxCycleSize)); - final Aromaticity aromaticity = new Aromaticity(model, cycles); - aromaticity.apply(ac); - - return ac; - } - - - public static double getTanimotoCoefficient(final IAtomContainer a, final IAtomContainer b) throws CDKException, IOException, CloneNotSupportedException{ - - // pubchem fingerprinter expects - // 1. explicit hydrogens -// AtomContainerManipulator.convertImplicitToExplicitHydrogens(a); - // 2. set atom type names -> done during setting of aromaticities - // 3. set aromaticity -> done during DB scanning - - SubstructureFingerprinter substructfp = new SubstructureFingerprinter(); - IBitFingerprint fingerprint = substructfp.getBitFingerprint(a); - System.out.println("\n\ndefault substructure bitstring: " + fingerprint.asBitSet()); - for (int setbit : fingerprint.getSetbits()) { - System.out.println("default substructure of index " + setbit + ": " + substructfp.getSubstructure(setbit));// + " -> " + SMARTSParser.parse(substructfp.getSubstructure(setbit), SilentChemObjectBuilder.getInstance())); + final ArrayList bondOrderList = new ArrayList<>(); + final AtomValenceDescriptor valenceDesc = new AtomValenceDescriptor(); + final int valence = Integer.valueOf(valenceDesc.calculate(atom, ac).getValue().toString()); + int electronsLeft = (8 - (valence + atom.getImplicitHydrogenCount())); + + if (electronsLeft == 0) { +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); + return bondOrderList; } - -// DepictionGenerator dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); - IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); - QueryAtomContainer qac = SMARTSParser.parse(substructfp.getSubstructure(fingerprint.getSetbits()[1]), SilentChemObjectBuilder.getInstance()); - - System.out.println("qac: " + qac.getAtomCount() + ", " + qac.getBondCount() + " -> " + qac.getProperties()); - for (IAtom atom : qac.atoms()) { - ac.addAtom(atom); - System.out.println("qac atom: "); + // only one single bond left; possible at SP1, SP2 and SP3 + if (electronsLeft == 1) { + bondOrderList.add(IBond.Order.SINGLE); +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); + return bondOrderList; } - for (IBond bond : qac.bonds()) { - ac.addBond(bond); - System.out.println("qac bond: " + bond); + // with SP3 are only single bonds possible + if (atom.getHybridization().equals(IAtomType.Hybridization.SP3)) { + // subtract the single bonded neighbor number + electronsLeft -= ac.getConnectedAtomsList(atom).size(); + for (int k = 0; k < electronsLeft; k++) { + bondOrderList.add(IBond.Order.SINGLE); + } +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); + return bondOrderList; } - System.out.println("ac: " + ac.getAtomCount() + ", " + ac.getBondCount() + " -> " + ac.getProperties()); - - -// dg.depict(ac).writeTo("/Users/mwenk/Downloads/test.png"); - - System.out.println("\n\n"); - SubstructureFingerprinter klekotasubstructfp = new KlekotaRothFingerprinter(); - fingerprint = klekotasubstructfp.getBitFingerprint(a); - System.out.println("Klekota substructure bitstring: " + fingerprint.asBitSet()); - for (int setbit : fingerprint.getSetbits()) { - System.out.println("Klekota substructure of index " + setbit + ": " + klekotasubstructfp.getSubstructure(setbit)); + + if (atom.getHybridization().equals(IAtomType.Hybridization.SP2)) { + switch (atom.getSymbol()) { + case "O": + case "S": + bondOrderList.add(IBond.Order.DOUBLE); + return bondOrderList; + case "C": + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.DOUBLE); + break; + case "N": + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.DOUBLE); + break; + default: + break; + } + } else if (atom.getHybridization().equals(IAtomType.Hybridization.SP1)) { + switch (atom.getSymbol()) { + case "C": + bondOrderList.add(IBond.Order.DOUBLE); + bondOrderList.add(IBond.Order.DOUBLE); + // or + bondOrderList.add(IBond.Order.SINGLE); + bondOrderList.add(IBond.Order.TRIPLE); + break; + case "N": + bondOrderList.add(IBond.Order.TRIPLE); + break; + default: + break; + } } - - - return 0.0;//Tanimoto.calculate(pubchemfp.getBitFingerprint(a), pubchemfp.getBitFingerprint(b)); + for (IAtom neighbor : ac.getConnectedAtomsList(atom)) { + bondOrderList.remove(ac.getBond(atom, neighbor).getOrder()); + electronsLeft -= NMR.Utils.getElectronNumberByBondOrder(ac.getBond(atom, neighbor).getOrder()); + } + + int theoCounter = 0; + for (IBond.Order order : bondOrderList) { + theoCounter += NMR.Utils.getElectronNumberByBondOrder(order); + } + + switch (Math.abs(theoCounter - electronsLeft)) { + case 1: + bondOrderList.remove(IBond.Order.SINGLE); + theoCounter -= 1; + break; + case 2: + + break; + case 3: + + break; + } + +// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList + " -> e: " + theoCounter + " (theo) vs. " + electronsLeft + " (real), bond counter: " + ac.getConnectedAtomsList(atom).size() + " (+" + atom.getImplicitHydrogenCount() + "H)"); + return bondOrderList; } - - - - - - - - - - - - - - - - - - - - /** - * Returns + + /** + * Returns a bond type for two bond atoms from its hybridization. + * CURRENTLY ONLY SINGLE BOND DETECTION POSSIBLE!!! + * This function detects single, double and triple bonds and returns a + * bond order from {@link org.openscience.cdk.interfaces.IBond.Order}. + * If no bond type could be detected then + * {@link org.openscience.cdk.interfaces.IBond.Order#UNSET} will be + * returned. + * For single and double bond detection, the following elements are defined + * so far: C, O, N, S. + * For triple bond detection, the following elements are defined so far: C, + * N. * - * @param values + * + * @param atom1 + * @param atom2 * @return */ - public static HashMap getValueFrequencies(final ArrayList values) { - - final HashMap freqs = new HashMap<>(); - final HashSet valueLevels = new HashSet<>(values); - int sum = 0; - for (int value : valueLevels) { - sum += Collections.frequency(values, value); + public static IBond.Order getBondTypeFromHybridizations(final IAtom atom1, final IAtom atom2) { + + final String atomType1 = atom1.getSymbol(); + final IAtomType.Hybridization hybridization1 = atom1.getHybridization(); + final String atomType2 = atom2.getSymbol(); + final IAtomType.Hybridization hybridization2 = atom2.getHybridization(); + + if (hybridization1 == null || hybridization2 == null) { + return IBond.Order.UNSET; } - for (int value : valueLevels) { - freqs.put(value, (Collections.frequency(values, value) / (double) sum)); + IBond.Order bondOrder1 = IBond.Order.UNSET; + IBond.Order bondOrder2 = IBond.Order.UNSET; + // single bond detection, the "3" means all SP3 hybrdidizations like SP3, SP3D2 or PLANAR3 + if ((atomType1.equals("C") || atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) + && hybridization1.toString().contains("3")) { + return IBond.Order.SINGLE; } - - return freqs; + if ((atomType2.equals("C") || atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) + && hybridization2.toString().contains("3")) { + return IBond.Order.SINGLE; + } +// // double bond detection +// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1) || hybridization1.equals(IAtomType.Hybridization.SP2))) +// || ((atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) && (hybridization1.equals(IAtomType.Hybridization.SP2)))) { +// bondOrder1 = IBond.Order.DOUBLE; +// } +// if ((atomType2.equals("C") && (hybridization2.equals(IAtomType.Hybridization.SP1) || hybridization2.equals(IAtomType.Hybridization.SP2))) +// || ((atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) && hybridization2.equals(IAtomType.Hybridization.SP2))) { +// bondOrder2 = IBond.Order.DOUBLE; +// } +// // triple bond detection +// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1))) +// && (atomType2.equals("N") && hybridization2.equals(IAtomType.Hybridization.SP1))) { +// bondOrder1 = IBond.Order.TRIPLE; +// } +// if ((atomType2.equals("N") && (hybridization2.equals(IAtomType.Hybridization.SP1))) +// && (atomType1.equals("C") && hybridization1.equals(IAtomType.Hybridization.SP1))) { +// bondOrder2 = IBond.Order.TRIPLE; +// } + + if (bondOrder1.equals(bondOrder2)) { + return bondOrder1; + } + + return IBond.Order.UNSET; } - - - - - - } From 3ef91d3d45281d3184d08937d631bf3a0eb552a2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 31 Jul 2018 16:08:11 +0200 Subject: [PATCH 014/405] - small bugfixes --- nb-configuration.xml | 18 ++++++++++++++++++ pom.xml | 3 ++- src/NMR/Spectrum.java | 1 - 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 nb-configuration.xml diff --git a/nb-configuration.xml b/nb-configuration.xml new file mode 100644 index 0000000..2dc4557 --- /dev/null +++ b/nb-configuration.xml @@ -0,0 +1,18 @@ + + + + + + mit + + diff --git a/pom.xml b/pom.xml index 6f56bd4..224baad 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ casekit casekit 1.0-SNAPSHOT - spectra + CASEkit src @@ -15,6 +15,7 @@ 1.8 1.8 + true diff --git a/src/NMR/Spectrum.java b/src/NMR/Spectrum.java index d248516..3d4e247 100644 --- a/src/NMR/Spectrum.java +++ b/src/NMR/Spectrum.java @@ -29,7 +29,6 @@ package NMR; import java.util.ArrayList; -import java.util.Arrays; /** * From f19e5887c0dea1409cf1d0f24831bcde0b8b886a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 2 Aug 2018 14:16:03 +0200 Subject: [PATCH 015/405] - just to copy it --- nbactions.xml | 31 ++- pom.xml | 2 +- src/{ => casekit}/NMR/DB.java | 21 +- src/{ => casekit}/NMR/ParseRawData.java | 149 +++++------ src/{ => casekit}/NMR/Process.java | 72 +++--- src/{ => casekit}/NMR/ShiftMatcher.java | 2 +- src/{ => casekit}/NMR/Utils.java | 62 +++-- src/{NMR => casekit/NMR/model}/Signal.java | 75 +++++- src/{NMR => casekit/NMR/model}/Spectrum.java | 37 +-- src/casekit/NMR/remarks | 21 ++ src/casekit/NMR/test.java | 179 +++++++++++++ src/casekit/Signal.java | 45 ---- src/casekit/SimilarityRanker.java | 36 +-- src/casekit/model/NMRSignal.java | 95 ------- src/casekit/model/NMRSpectrum.java | 258 ------------------- 15 files changed, 474 insertions(+), 611 deletions(-) rename src/{ => casekit}/NMR/DB.java (93%) rename src/{ => casekit}/NMR/ParseRawData.java (87%) rename src/{ => casekit}/NMR/Process.java (90%) rename src/{ => casekit}/NMR/ShiftMatcher.java (99%) rename src/{ => casekit}/NMR/Utils.java (95%) rename src/{NMR => casekit/NMR/model}/Signal.java (62%) rename src/{NMR => casekit/NMR/model}/Spectrum.java (88%) create mode 100644 src/casekit/NMR/remarks create mode 100644 src/casekit/NMR/test.java delete mode 100644 src/casekit/Signal.java delete mode 100644 src/casekit/model/NMRSignal.java delete mode 100644 src/casekit/model/NMRSpectrum.java diff --git a/nbactions.xml b/nbactions.xml index 7f1dc6a..9db7bbf 100644 --- a/nbactions.xml +++ b/nbactions.xml @@ -10,7 +10,36 @@ org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - -classpath %classpath NMR.test + -classpath %classpath casekit.NMR.test + java + + + + debug + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath casekit.NMR.test + java + true + + + + profile + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -classpath %classpath casekit.NMR.test java diff --git a/pom.xml b/pom.xml index 224baad..b705ffe 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ casekit casekit 1.0-SNAPSHOT - CASEkit + casekit src diff --git a/src/NMR/DB.java b/src/casekit/NMR/DB.java similarity index 93% rename from src/NMR/DB.java rename to src/casekit/NMR/DB.java index aa84151..681f97a 100644 --- a/src/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package NMR; +package casekit.NMR; +import casekit.NMR.model.Spectrum; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; @@ -77,7 +78,7 @@ public static IAtomContainerSet getStructuresFromNMRShiftDBFile(final String pat SilentChemObjectBuilder.getInstance() ); while (iterator.hasNext()) { - acSet.addAtomContainer(NMR.Utils.setAromaticitiesInAtomContainer(iterator.next(), maxCycleSize)); + acSet.addAtomContainer(casekit.NMR.Utils.setAromaticitiesInAtomContainer(iterator.next(), maxCycleSize)); } return acSet; @@ -111,7 +112,7 @@ public static int[][] countNeighborhoodBonds(final Connection DBConnection, fina + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + " spec.REVIEW_FLAG = \"true\" AND \n" + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\" AND \n" + + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\" AND \n" + " nmrsig.MULTIPLICITY IS NOT NULL AND \n" + " nmrsig.MULTIPLICITY != \"\"" + "GROUP BY shift, mult \n" @@ -194,7 +195,7 @@ public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBC + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\" \n"; + + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\" \n"; if(mult != null && !mult.trim().isEmpty()){ query += " AND nmrsig.MULTIPLICITY = \"" + mult + "\" \n"; } else { @@ -205,7 +206,7 @@ public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBC } query += " ;"; System.out.println("\n\ngetSpectraIDs:\nQUERY: " + query); - final ResultSet resultSet = NMR.DB.getResultSet(DBConnection, query); + final ResultSet resultSet = casekit.NMR.DB.getResultSet(DBConnection, query); while (resultSet.next()) { spectraIDs.add(resultSet.getInt("sigID")); } @@ -221,9 +222,9 @@ public static HashMap> matchSpectrumAgainstDB(final hits.put(i, new ArrayList<>()); shift = Math.floor(spectrum.get(i).getShift(dim) * stepSize) / (double) stepSize; if(spectrum.get(i).getIntensity() != null){ - hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity() - intensDev, spectrum.get(i).getIntensity() + intensDev, spectrum.get(i).getNuclei()[dim])); + hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity() - intensDev, spectrum.get(i).getIntensity() + intensDev, spectrum.get(i).getNuclei()[dim])); } else { - hits.get(i).addAll(NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity(), spectrum.get(i).getIntensity(), spectrum.get(i).getNuclei()[dim])); + hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity(), spectrum.get(i).getIntensity(), spectrum.get(i).getNuclei()[dim])); } } @@ -253,7 +254,7 @@ public static HashMap> getLookupTableFromNMRShiftDB(fi + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\";"; + + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\";"; System.out.println("\n\ngetLookupTable:\nQUERY: " + query); final ResultSet resultSet = statement.executeQuery(query); while (resultSet.next()) { @@ -292,7 +293,7 @@ public static HashMap getRMS(final Connection DBConnection, fina + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + NMR.Utils.getNMRIsotopeIdentifier(elem) + "\" AND \n" + + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\" AND \n" + " nmrsig.MULTIPLICITY IS NOT NULL AND \n" + " nmrsig.MULTIPLICITY != \"\" \n" + " GROUP BY hose;"; @@ -311,7 +312,7 @@ public static ArrayList getSpectraFromNMRShiftDBEntry(final IAtomContain ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); final ArrayList spectra = new ArrayList<>(); for (String prop : props) { - if (prop.contains("Spectrum " + NMR.Utils.getNMRIsotopeIdentifier(elem))) { + if (prop.contains("Spectrum " + casekit.NMR.Utils.getIsotopeIdentifier(elem))) { spectra.add(ac.getProperty(prop)); } } diff --git a/src/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java similarity index 87% rename from src/NMR/ParseRawData.java rename to src/casekit/NMR/ParseRawData.java index 6e9fdf7..11336c4 100644 --- a/src/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package NMR; +package casekit.NMR; +import casekit.NMR.model.Spectrum; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -156,7 +157,7 @@ public final void setEquivalentProperties() { */ public final boolean parse1DNMR(final String pathToPeakList, final String atomType) throws IOException, ParserConfigurationException, SAXException{ - switch (NMR.Utils.getFileFormat(pathToPeakList)) { + switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { case "csv": return this.parse1DNMRviaPeakTable(pathToPeakList, atomType); case "xml": @@ -248,7 +249,7 @@ public final boolean set1DNMRShifts(final Spectrum spectrum){ final ArrayList shifts = spectrum.getShiftsByDim(0); // check whether indices for that atom type exist or the number of input signals are greater than the atom number in atom container for that atom type if (!this.atomTypeIndices.containsKey(atomType) || shifts.size() > this.atomTypeIndices.get(atomType).size()) { - // if molecular formula is known and too much picked peak are to be assigned + // if molecular formula is known and too much picked peaks are to be assigned if(this.atomTypeIndices.containsKey(atomType)){ System.err.println("Too many peaks in peak list for \"" + atomType + "\" and molecular formula \"" + MolecularFormulaManipulator.getString(this.molFormula) + "\"!!!"); return false; @@ -257,7 +258,7 @@ public final boolean set1DNMRShifts(final Spectrum spectrum){ IAtom atom; for (final double shift : shifts) { atom = new Atom(atomType); - atom.setProperty(NMR.Utils.getNMRShiftConstant(atomType), shift); + atom.setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shift); atom.setImplicitHydrogenCount(null); this.mol.addAtom(atom); } @@ -268,7 +269,7 @@ public final boolean set1DNMRShifts(final Spectrum spectrum){ for (final int i : this.atomTypeIndices.get(atomType)) { if(assignedShiftCount < shifts.size()){ // shift assignment - this.mol.getAtom(i).setProperty(NMR.Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); + this.mol.getAtom(i).setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); } assignedShiftCount++; } @@ -334,9 +335,9 @@ private void askForEquivalentPeaks(final String atomType) { */ public final int parseDEPT(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { - if(NMR.Utils.getFileFormat(pathToDEPT90).equals("csv") && NMR.Utils.getFileFormat(pathToDEPT135).equals("csv")) { + if(casekit.NMR.Utils.getFileFormat(pathToDEPT90).equals("csv") && casekit.NMR.Utils.getFileFormat(pathToDEPT135).equals("csv")) { return this.parseDEPTviaPeakTable(pathToDEPT90, pathToDEPT135, tol); - } else if(NMR.Utils.getFileFormat(pathToDEPT90).equals("xml") && NMR.Utils.getFileFormat(pathToDEPT135).equals("xml")) { + } else if(casekit.NMR.Utils.getFileFormat(pathToDEPT90).equals("xml") && casekit.NMR.Utils.getFileFormat(pathToDEPT135).equals("xml")) { return this.parseDEPTviaXML(pathToDEPT90, pathToDEPT135, tol); } @@ -361,9 +362,9 @@ public final int parseDEPT(final String pathToDEPT90, final String pathToDEPT135 */ public final int parseDEPTviaPeakTable(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException { - final ArrayList matchesDEPT90 = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT90, "C", tol, 4); - final ArrayList matchesDEPT135 = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT135, "C", tol, 4); - final ArrayList intensitiesDEPT135 = NMR.Utils.parsePeakTable(pathToDEPT135, 6); + final ArrayList matchesDEPT90 = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT90, "C", tol, 4); + final ArrayList matchesDEPT135 = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT135, "C", tol, 4); + final ArrayList intensitiesDEPT135 = casekit.NMR.Utils.parsePeakTable(pathToDEPT135, 6); return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); } @@ -385,9 +386,9 @@ public final int parseDEPTviaPeakTable(final String pathToDEPT90, final String p */ public final int parseDEPTviaXML(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { - final ArrayList matchesDEPT90 = NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT90, "C", tol, 1, 1); - final ArrayList matchesDEPT135 = NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT135, "C", tol, 1, 1); - final ArrayList intensitiesDEPT135 = NMR.Utils.parseXML(pathToDEPT135, 1, 2); + final ArrayList matchesDEPT90 = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT90, "C", tol, 1, 1); + final ArrayList matchesDEPT135 = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT135, "C", tol, 1, 1); + final ArrayList intensitiesDEPT135 = casekit.NMR.Utils.parseXML(pathToDEPT135, 1, 2); return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); } @@ -410,10 +411,10 @@ public final int setDEPT(final Spectrum spectrumDEPT90, final Spectrum spectrumD final ArrayList shiftsDEPT90 = spectrumDEPT90.getShiftsByDim(0); final ArrayList shiftsDEPT135 = spectrumDEPT135.getShiftsByDim(0); final ArrayList intensitiesDEPT135 = spectrumDEPT135.getIntensities(); - ArrayList matchesDEPT90 = NMR.Utils.findShiftMatches(this.mol, shiftsDEPT90, tol, "C"); - matchesDEPT90 = NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT90, matchesDEPT90, tol, "C"); - ArrayList matchesDEPT135 = NMR.Utils.findShiftMatches(this.mol, shiftsDEPT135, tol, "C"); - matchesDEPT135 = NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT135, matchesDEPT135, tol, "C"); + ArrayList matchesDEPT90 = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsDEPT90, tol, "C"); + matchesDEPT90 = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT90, matchesDEPT90, tol, "C"); + ArrayList matchesDEPT135 = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsDEPT135, tol, "C"); + matchesDEPT135 = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT135, matchesDEPT135, tol, "C"); return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); @@ -486,7 +487,7 @@ private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDE */ public final boolean parseHSQC(final String pathToPeakList, final String atomType, final double tol) throws IOException, ParserConfigurationException, SAXException { - switch (NMR.Utils.getFileFormat(pathToPeakList)) { + switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { case "csv": parseHSQCviaPeakTable(pathToPeakList, atomType, tol); break; @@ -523,8 +524,8 @@ public final boolean parseHSQC(final String pathToPeakList, final String atomTyp */ public final void parseHSQCviaPeakTable(final String pathToPeakList, final String heavyAtomType, final double tol) throws IOException { - final ArrayList shiftsHydrogen = NMR.Utils.parsePeakTable(pathToPeakList, 5); - final ArrayList matchesHeavyAtomType = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, heavyAtomType, tol, 6); + final ArrayList shiftsHydrogen = casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5); + final ArrayList matchesHeavyAtomType = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, heavyAtomType, tol, 6); this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtomType); } @@ -552,8 +553,8 @@ public final void parseHSQCviaPeakTable(final String pathToPeakList, final Strin */ public final void parseHSQCviaXML(final String pathToXML, final String heavyAtomType, final double tol) throws IOException, ParserConfigurationException, SAXException { - final ArrayList shiftsHydrogen = NMR.Utils.parseXML(pathToXML, 2, 2); - final ArrayList matchesHeavyAtomType = NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, heavyAtomType, tol, 2, 1); + final ArrayList shiftsHydrogen = casekit.NMR.Utils.parseXML(pathToXML, 2, 2); + final ArrayList matchesHeavyAtomType = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, heavyAtomType, tol, 2, 1); this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtomType); } @@ -569,10 +570,10 @@ private void setImplicitHydrogenShifts(final ArrayList shiftsHydrogen, f if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { continue; } - if (matchAtom.getProperty(Spectrum.SPECTYPE_HSQC) == null) { - matchAtom.setProperty(Spectrum.SPECTYPE_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); + if (matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { + matchAtom.setProperty(CDKConstants.NMRSPECTYPE_2D_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); } - assignedHydrogensShifts = matchAtom.getProperty(Spectrum.SPECTYPE_HSQC); + assignedHydrogensShifts = matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { assignedHydrogensShifts.add(shiftsHydrogen.get(i)); } @@ -594,8 +595,8 @@ public final void setHSQC(final Spectrum spectrum, final double tolHeavy) { final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(0); final ArrayList shiftsHeavyAtom = spectrum.getShiftsByDim(1); - ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + ArrayList matchesHeavyAtom = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + matchesHeavyAtom = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); IAtom matchAtom; ArrayList assignedHydrogensShifts; @@ -605,10 +606,10 @@ public final void setHSQC(final Spectrum spectrum, final double tolHeavy) { if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { continue; } - if (matchAtom.getProperty(Spectrum.SPECTYPE_HSQC) == null) { - matchAtom.setProperty(Spectrum.SPECTYPE_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); + if (matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { + matchAtom.setProperty(CDKConstants.NMRSPECTYPE_2D_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); } - assignedHydrogensShifts = matchAtom.getProperty(Spectrum.SPECTYPE_HSQC); + assignedHydrogensShifts = matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { assignedHydrogensShifts.add(shiftsHydrogen.get(i)); } @@ -655,10 +656,10 @@ private int[] findSingleImplicitHydrogenShiftMatch(final double queryShift, fina ArrayList protonShiftList; for (int i = 0; i < this.mol.getAtomCount(); i++) { // skip atoms without implicit hydrogens - if (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HSQC) == null) { + if (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { continue; } - protonShiftList = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HSQC); + protonShiftList = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); for (int j = 0; j < protonShiftList.size(); j++) { // figure out the atom with lowest shift deviation if ((queryShift - tol <= protonShiftList.get(j)) && (protonShiftList.get(j) <= queryShift + tol) && (Math.abs(queryShift - protonShiftList.get(j)) < minDiff)) { @@ -701,7 +702,7 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s matchIndex = matches.indexOf(matchIndexAtomContainer); if (matches.get(matchIndex) >= 0) { singleMatchIndex = this.findSingleImplicitHydrogenShiftMatch(shifts.get(matchIndex), tol); - singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(Spectrum.SPECTYPE_HSQC); + singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); diff = shifts.get(matchIndex) - singleMatchShifts.get(singleMatchIndex[1]); diffs.add(diff); } @@ -741,7 +742,7 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s */ public final boolean parseHHCOSY(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { - switch (NMR.Utils.getFileFormat(pathToPeakList)) { + switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { case "csv": parseHHCOSYviaPeakTable(pathToPeakList, tol); break; @@ -779,11 +780,11 @@ public final boolean parseHHCOSY(final String pathToPeakList, final double tol) */ public final boolean parseHHCOSYviaPeakTable(final String pathToPeakList, final double tol) throws IOException { - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("H"), - Utils.getNMRIsotopeIdentifier("H")}, - new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), - NMR.Utils.parsePeakTable(pathToPeakList, 6)}, - NMR.Utils.parsePeakTable(pathToPeakList, 9)); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), + Utils.getIsotopeIdentifier("H")}, + new ArrayList[]{casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5), + casekit.NMR.Utils.parsePeakTable(pathToPeakList, 6)}, + casekit.NMR.Utils.parsePeakTable(pathToPeakList, 9)); return this.setHHCOSY(spectrum, tol); } @@ -813,11 +814,11 @@ public final boolean parseHHCOSYviaPeakTable(final String pathToPeakList, final */ public final boolean parseHHCOSYviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("H"), - Utils.getNMRIsotopeIdentifier("H")}, - new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), - NMR.Utils.parseXML(pathToXML, 2, 2)}, - NMR.Utils.parseXML(pathToXML, 2, 3)); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), + Utils.getIsotopeIdentifier("H")}, + new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 1), + casekit.NMR.Utils.parseXML(pathToXML, 2, 2)}, + casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); return this.setHHCOSY(spectrum, tol); } @@ -837,10 +838,10 @@ public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { final ArrayList hydrogenShiftMatches1 = this.findImplicitHydrogenShiftMatches(spectrum.getShiftsByDim(0), tol); final ArrayList hydrogenShiftMatches2 = this.findImplicitHydrogenShiftMatches(spectrum.getShiftsByDim(1), tol); // are all signals bidirectional? - if (!NMR.Utils.isBidirectional(hydrogenShiftMatches1, hydrogenShiftMatches2)) { + if (!casekit.NMR.Utils.isBidirectional(hydrogenShiftMatches1, hydrogenShiftMatches2)) { return false; } - NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, Spectrum.SPECTYPE_HHCOSY); + casekit.NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, CDKConstants.NMRSPECTYPE_2D_HHCOSY); return true; } @@ -861,7 +862,7 @@ public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { */ public final boolean parseINADEQUATE(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { - switch (NMR.Utils.getFileFormat(pathToPeakList)) { + switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { case "csv": parseINADEQUATEviaPeakTable(pathToPeakList, tol); break; @@ -895,11 +896,11 @@ public final boolean parseINADEQUATE(final String pathToPeakList, final double t */ public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, final double tol) throws IOException { - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("C"), - Utils.getNMRIsotopeIdentifier("C")}, - new ArrayList[]{NMR.Utils.parsePeakTable(pathToPeakList, 5), - NMR.Utils.parsePeakTable(pathToPeakList, 6)}, - NMR.Utils.parsePeakTable(pathToPeakList, 9)); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), + Utils.getIsotopeIdentifier("C")}, + new ArrayList[]{casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5), + casekit.NMR.Utils.parsePeakTable(pathToPeakList, 6)}, + casekit.NMR.Utils.parsePeakTable(pathToPeakList, 9)); return this.setINADEQUATE(spectrum, tol); } @@ -924,11 +925,11 @@ public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, fi */ public final boolean parseINADEQUATEviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getNMRIsotopeIdentifier("C"), - Utils.getNMRIsotopeIdentifier("C")}, - new ArrayList[]{NMR.Utils.parseXML(pathToXML, 2, 1), - NMR.Utils.parseXML(pathToXML, 2, 2)}, - NMR.Utils.parseXML(pathToXML, 2, 3)); + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), + Utils.getIsotopeIdentifier("C")}, + new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 1), + casekit.NMR.Utils.parseXML(pathToXML, 2, 2)}, + casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); return this.setINADEQUATE(spectrum, tol); } @@ -947,13 +948,13 @@ public final boolean parseINADEQUATEviaXML(final String pathToXML, final double */ public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { - final ArrayList carbonShiftMatches1 = NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(0), tol, "C"); - final ArrayList carbonShiftMatches2 = NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(1), tol, "C"); + final ArrayList carbonShiftMatches1 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(0), tol, "C"); + final ArrayList carbonShiftMatches2 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(1), tol, "C"); // are all signals bidirectional? - if (!NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { + if (!casekit.NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { return false; } - NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, Spectrum.SPECTYPE_INADEQUATE); + casekit.NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, CDKConstants.NMRSPECTYPE_2D_INADEQUATE); return true; } @@ -977,7 +978,7 @@ public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { */ public final boolean parseHMBC(final String pathToPeakList, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException, ParserConfigurationException, SAXException { - switch (NMR.Utils.getFileFormat(pathToPeakList)) { + switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { case "csv": parseHMBCviaPeakTable(pathToPeakList, atomType, tolHydrogen, tolHeavy); break; @@ -1014,9 +1015,9 @@ public final boolean parseHMBC(final String pathToPeakList, final String atomTyp */ public final void parseHMBCviaPeakTable(final String pathToPeakList, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException { - final ArrayList hydrogenShifts = NMR.Utils.parsePeakTable(pathToPeakList, 5); + final ArrayList hydrogenShifts = casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5); final ArrayList hydrogenShiftMatches = this.correctHydrogenShiftMatches(hydrogenShifts, this.findImplicitHydrogenShiftMatches(hydrogenShifts, tolHydrogen), tolHydrogen); - final ArrayList heavyAtomShiftMatches = NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tolHeavy, 6); + final ArrayList heavyAtomShiftMatches = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tolHeavy, 6); this.setHMBC(hydrogenShiftMatches, heavyAtomShiftMatches); } @@ -1045,9 +1046,9 @@ public final void parseHMBCviaPeakTable(final String pathToPeakList, final Strin */ public final void parseHMBCviaXML(final String pathToXML, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException, ParserConfigurationException, SAXException { - final ArrayList hydrogenShifts = NMR.Utils.parseXML(pathToXML, 2, 2); + final ArrayList hydrogenShifts = casekit.NMR.Utils.parseXML(pathToXML, 2, 2); final ArrayList hydrogenShiftMatches = this.correctHydrogenShiftMatches(hydrogenShifts, this.findImplicitHydrogenShiftMatches(hydrogenShifts, tolHydrogen), tolHydrogen); - final ArrayList heavyAtomShiftMatches = NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, atomType, tolHeavy, 2, 1); + final ArrayList heavyAtomShiftMatches = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, atomType, tolHeavy, 2, 1); this.setHMBC(hydrogenShiftMatches, heavyAtomShiftMatches); } @@ -1057,10 +1058,10 @@ private void setHMBC(final ArrayList hydrogenShiftMatches, final ArrayL ArrayList HMBCList; for (int i = 0; i < hydrogenShiftMatches.size(); i++) { if (hydrogenShiftMatches.get(i) >= 0 && heavyAtomShiftMatches.get(i) >= 0) { - if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(Spectrum.SPECTYPE_HMBC) == null) { - this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(Spectrum.SPECTYPE_HMBC, new ArrayList<>()); + if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) == null) { + this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(CDKConstants.NMRSPECTYPE_2D_HMBC, new ArrayList<>()); } - HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(Spectrum.SPECTYPE_HMBC); + HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC); if (!HMBCList.contains(heavyAtomShiftMatches.get(i))) { HMBCList.add(heavyAtomShiftMatches.get(i)); } @@ -1084,16 +1085,16 @@ public final void setHMBC(final Spectrum spectrum, final double tolHydrogen, fin final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(0); final ArrayList matchesHydrogen = this.correctHydrogenShiftMatches(shiftsHydrogen, this.findImplicitHydrogenShiftMatches(shiftsHydrogen, tolHydrogen), tolHydrogen); final ArrayList shiftsHeavyAtom = spectrum.getShiftsByDim(1); - ArrayList matchesHeavyAtom = NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - matchesHeavyAtom = NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + ArrayList matchesHeavyAtom = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + matchesHeavyAtom = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); ArrayList HMBCList; for (int i = 0; i < matchesHydrogen.size(); i++) { if (matchesHydrogen.get(i) >= 0 && matchesHeavyAtom.get(i) >= 0) { - if (this.mol.getAtom(matchesHydrogen.get(i)).getProperty(Spectrum.SPECTYPE_HMBC) == null) { - this.mol.getAtom(matchesHydrogen.get(i)).setProperty(Spectrum.SPECTYPE_HMBC, new ArrayList<>()); + if (this.mol.getAtom(matchesHydrogen.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) == null) { + this.mol.getAtom(matchesHydrogen.get(i)).setProperty(CDKConstants.NMRSPECTYPE_2D_HMBC, new ArrayList<>()); } - HMBCList = this.mol.getAtom(matchesHydrogen.get(i)).getProperty(Spectrum.SPECTYPE_HMBC); + HMBCList = this.mol.getAtom(matchesHydrogen.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC); if (!HMBCList.contains(matchesHeavyAtom.get(i))) { HMBCList.add(matchesHeavyAtom.get(i)); } diff --git a/src/NMR/Process.java b/src/casekit/NMR/Process.java similarity index 90% rename from src/NMR/Process.java rename to src/casekit/NMR/Process.java index eece28f..fed1511 100644 --- a/src/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package NMR; +package casekit.NMR; +import casekit.NMR.model.Spectrum; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -31,6 +32,7 @@ import java.util.ArrayList; import java.util.HashMap; import org.openscience.cdk.Atom; +import org.openscience.cdk.CDKConstants; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; @@ -229,7 +231,7 @@ public void setBonds(final String[] experiments){ ArrayList signalList; for (int e = 0; e < experiments.length; e++) { for (int i = 0; i < this.mol.getAtomCount(); i++) { - NMRSHIFT_ATOMTYPE = NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol()); + NMRSHIFT_ATOMTYPE = casekit.NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol()); // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? if (NMRSHIFT_ATOMTYPE != null && this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) != null) { if (this.mol.getAtom(i).getProperties().containsKey(experiments[e])) { @@ -239,7 +241,7 @@ public void setBonds(final String[] experiments){ if ((i == bondPartnerIndex)) {// || (this.mol.getBond(this.mol.getAtom(i), this.mol.getAtom(bondPartnerIndex)) != null)) { continue; } - if(experiments[e].equals(Spectrum.SPECTYPE_HMBC)){ + if(experiments[e].equals(CDKConstants.NMRSPECTYPE_2D_HMBC)){ System.out.println("HMBC bond setting: still to come!!!"); } else { this.setBond(i, bondPartnerIndex); @@ -257,7 +259,7 @@ private void setBond(final int index1, final int index2){ if(this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null){ this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); } - this.mol.addBond(index1, index2, NMR.Utils.getBondTypeFromHybridizations(this.mol.getAtom(index1), this.mol.getAtom(index2))); + this.mol.addBond(index1, index2, casekit.NMR.Utils.getBondTypeFromHybridizations(this.mol.getAtom(index1), this.mol.getAtom(index2))); } @@ -281,13 +283,13 @@ private void setBond(final int index1, final int index2){ */ public boolean addBond(final String atomType1, final double shift1, final double tol1, final String atomType2, final double shift2, final double tol2) { - final String NMRSHIFT_ATOMTYPE1 = NMR.Utils.getNMRShiftConstant(atomType1); - final String NMRSHIFT_ATOMTYPE2 = NMR.Utils.getNMRShiftConstant(atomType2); + final String NMRSHIFT_ATOMTYPE1 = casekit.NMR.Utils.getNMRShiftConstant(atomType1); + final String NMRSHIFT_ATOMTYPE2 = casekit.NMR.Utils.getNMRShiftConstant(atomType2); if ((NMRSHIFT_ATOMTYPE1 == null) || (NMRSHIFT_ATOMTYPE2 == null)) { return false; } - int atomIndex1 = NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); - int atomIndex2 = NMR.Utils.findSingleShiftMatch(this.mol, shift2, tol2, atomType2); + int atomIndex1 = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); + int atomIndex2 = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift2, tol2, atomType2); if ((atomIndex1 < 0) || (atomIndex2 < 0) || (atomIndex1 == atomIndex2)) { return false; } @@ -321,25 +323,25 @@ public boolean addBond(final String atomType1, final double shift1, final double */ public boolean addCOSY(final String atomType1, final double shift1, final double tol1, final String atomType2, final Double shift2, final double tol2, final Double shiftH){ - final String NMRSHIFT_ATOMTYPE1 = NMR.Utils.getNMRShiftConstant(atomType1); - final String NMRSHIFT_ATOMTYPE2 = NMR.Utils.getNMRShiftConstant(atomType2); + final String NMRSHIFT_ATOMTYPE1 = casekit.NMR.Utils.getNMRShiftConstant(atomType1); + final String NMRSHIFT_ATOMTYPE2 = casekit.NMR.Utils.getNMRShiftConstant(atomType2); if ((NMRSHIFT_ATOMTYPE1 == null) || (NMRSHIFT_ATOMTYPE2 == null)) { return false; } - int atomIndex1 = NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); + int atomIndex1 = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); int atomIndex2 = this.addHSQC(atomType2, shift2, tol2, shiftH); if ((atomIndex1 < 0) || (atomIndex2 < 0) || (atomIndex1 == atomIndex2)) { return false; } - if(this.mol.getAtom(atomIndex1).getProperty(Spectrum.SPECTYPE_HHCOSY) == null){ - this.mol.getAtom(atomIndex1).setProperty(Spectrum.SPECTYPE_HHCOSY, new ArrayList<>()); + if(this.mol.getAtom(atomIndex1).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) == null){ + this.mol.getAtom(atomIndex1).setProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY, new ArrayList<>()); } - if(this.mol.getAtom(atomIndex2).getProperty(Spectrum.SPECTYPE_HHCOSY) == null){ - this.mol.getAtom(atomIndex2).setProperty(Spectrum.SPECTYPE_HHCOSY, new ArrayList<>()); + if(this.mol.getAtom(atomIndex2).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) == null){ + this.mol.getAtom(atomIndex2).setProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY, new ArrayList<>()); } - final ArrayList COSYList = this.mol.getAtom(atomIndex1).getProperty(Spectrum.SPECTYPE_HHCOSY); - final ArrayList COSYListX = this.mol.getAtom(atomIndex2).getProperty(Spectrum.SPECTYPE_HHCOSY); + final ArrayList COSYList = this.mol.getAtom(atomIndex1).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY); + final ArrayList COSYListX = this.mol.getAtom(atomIndex2).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY); COSYList.add(atomIndex2); COSYListX.add(atomIndex1); @@ -373,17 +375,17 @@ public boolean addCOSY(final String atomType1, final double shift1, final double public int addHSQC(final String atomType, final Double shift, final double tol, final Double shiftH ){ int atomIndex = -1; - final String NMRSHIFT_ATOMTYPE = NMR.Utils.getNMRShiftConstant(atomType); + final String NMRSHIFT_ATOMTYPE = casekit.NMR.Utils.getNMRShiftConstant(atomType); if ((NMRSHIFT_ATOMTYPE == null) || (this.atomTypeIndices.get(atomType) == null)) { return -1; } // set additional HSQC for an atom with already set shift value if(shift != null){ - atomIndex = NMR.Utils.findSingleShiftMatch(this.mol, shift, tol, atomType); + atomIndex = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift, tol, atomType); } else { // set HSQC for the first atom of given atom type without a already set shift value and without attached proton shifts for (Integer i : this.atomTypeIndices.get(atomType)) { - if ((this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null) && (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HSQC) == null)) { + if ((this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null) && (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null)) { atomIndex = i; break; } @@ -395,10 +397,10 @@ public int addHSQC(final String atomType, final Double shift, final double tol, } // add the proton shift value if it is higher than 0 if(shiftH != null){ - if (this.mol.getAtom(atomIndex).getProperty(Spectrum.SPECTYPE_HSQC) == null) { - this.mol.getAtom(atomIndex).setProperty(Spectrum.SPECTYPE_HSQC, new ArrayList<>()); + if (this.mol.getAtom(atomIndex).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { + this.mol.getAtom(atomIndex).setProperty(CDKConstants.NMRSPECTYPE_2D_HSQC, new ArrayList<>()); } - final ArrayList protonShifts = this.mol.getAtom(atomIndex).getProperty(Spectrum.SPECTYPE_HSQC); + final ArrayList protonShifts = this.mol.getAtom(atomIndex).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); protonShifts.add(shiftH); } // increase the implicit proton number @@ -424,7 +426,7 @@ public void addAtom(final String atomType, final Double shift){ this.mol.addAtom(new Atom(atomType)); if(shift != null){ - this.mol.getAtom(this.mol.getAtomCount() - 1).setProperty(NMR.Utils.getNMRShiftConstant(atomType), shift); + this.mol.getAtom(this.mol.getAtomCount() - 1).setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shift); } this.setAtomTypeIndices(); } @@ -480,8 +482,8 @@ public void createLSDFile(final String projectName, final String pathToOutputFil protons = String.valueOf(this.mol.getAtom(i).getImplicitHydrogenCount()); } MULT += "MULT " + (i+1) + " " + this.mol.getAtom(i).getSymbol() + " " + hybrid + " " + protons; - if(this.mol.getAtom(i).getProperty(NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) != null){ - MULT += ";\t" + this.mol.getAtom(i).getProperty(NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())); + if(this.mol.getAtom(i).getProperty(casekit.NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) != null){ + MULT += ";\t" + this.mol.getAtom(i).getProperty(casekit.NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())); } MULT += "\n"; // set HSQC section in LSD input file @@ -489,8 +491,8 @@ public void createLSDFile(final String projectName, final String pathToOutputFil HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + "\n"; } // set BOND section in LSD input file from INADEQUATE - if (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_INADEQUATE) != null) { - idxs = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_INADEQUATE); + if (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE) != null) { + idxs = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); for (Integer idx : idxs) { if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { bondTable[i][idx] = 1; @@ -499,8 +501,8 @@ public void createLSDFile(final String projectName, final String pathToOutputFil } } // set BOND section in LSD input file from COSY - if(this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HHCOSY) != null){ - idxs = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HHCOSY); + if(this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) != null){ + idxs = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY); for (Integer idx : idxs) { if(bondTable[i][idx] == 0 && bondTable[idx][i] == 0){ bondTable[i][idx] = 1; @@ -513,8 +515,8 @@ public void createLSDFile(final String projectName, final String pathToOutputFil // set HMBC section in LSD input file // sets only HMBC signals which are not represented by a bond boolean test3JviaNextNeighborBond; - if (this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HMBC) != null) { - idxs = this.mol.getAtom(i).getProperty(Spectrum.SPECTYPE_HMBC); + if (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) != null) { + idxs = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC); for (Integer idx : idxs) { if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { test3JviaNextNeighborBond = false; @@ -598,11 +600,11 @@ public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] for (int k = 0; k < acSet.getAtomContainerCount(); k++) { acDB = acSet.getAtomContainer(k); // for all DB entries containing a spectrum for the current query atom type - for (final String shiftsDB : NMR.DB.getSpectraFromNMRShiftDBEntry(acDB, elem)) { + for (final String shiftsDB : casekit.NMR.DB.getSpectraFromNMRShiftDBEntry(acDB, elem)) { if (shiftsDB == null) { continue; } - String[][] shiftsDBvalues = NMR.Utils.parseShiftsNMRShiftDB(shiftsDB); + String[][] shiftsDBvalues = casekit.NMR.Utils.parseShiftsNMRShiftDB(shiftsDB); for (String[] shiftsDBvalue : shiftsDBvalues) { atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); // sometimes the DB atom index is wrong and out of array range @@ -623,7 +625,7 @@ public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] this.neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 this.neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 // add counts for a specific atom to matrix m - int[] counts = NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); + int[] counts = casekit.NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); for (int i = 0; i < counts.length; i++) { this.neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; } diff --git a/src/NMR/ShiftMatcher.java b/src/casekit/NMR/ShiftMatcher.java similarity index 99% rename from src/NMR/ShiftMatcher.java rename to src/casekit/NMR/ShiftMatcher.java index 5e3647f..a46fe32 100644 --- a/src/NMR/ShiftMatcher.java +++ b/src/casekit/NMR/ShiftMatcher.java @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package NMR; +package casekit.NMR; import java.io.FileNotFoundException; diff --git a/src/NMR/Utils.java b/src/casekit/NMR/Utils.java similarity index 95% rename from src/NMR/Utils.java rename to src/casekit/NMR/Utils.java index a6ec7d4..ba775cd 100644 --- a/src/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -21,9 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package NMR; +package casekit.NMR; +import casekit.NMR.model.Spectrum; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -248,7 +249,7 @@ public static Spectrum parsePeakTable(final String pathToPeakList, final int[] c final String[] nuclei = new String[columns.length]; for (int col = 0; col < columns.length; col++) { shiftsList[col] = Utils.parsePeakTable(pathToPeakList, columns[col]); - nuclei[col] = Utils.getNMRIsotopeIdentifier(atomTypes[col]); + nuclei[col] = Utils.getIsotopeIdentifier(atomTypes[col]); } final ArrayList intensities = parsePeakTable(pathToPeakList, intensityColumnIndex); @@ -321,7 +322,7 @@ public static Spectrum parseXML(final String pathToXML, final int ndim, final in final ArrayList[] shiftLists = new ArrayList[ndim]; final String[] nuclei = new String[ndim]; for (int nucl = 0; nucl < ndim; nucl++) { - nuclei[nucl] = Utils.getNMRIsotopeIdentifier(atomTypes[nucl]); + nuclei[nucl] = Utils.getIsotopeIdentifier(atomTypes[nucl]); shiftLists[nucl] = Utils.parseXML(pathToXML, ndim, attributes[nucl]); } @@ -356,19 +357,19 @@ public static ArrayList correctShiftMatches(final IAtomContainer ac, fi if (Collections.frequency(matches, matchIndexAtomContainer) == 1) { matchIndex = matches.indexOf(matchIndexAtomContainer); if (matches.get(matchIndex) >= 0) { - diffs.add(shifts.get(matchIndex) - Double.parseDouble(ac.getAtom(matches.get(matchIndex)).getProperty(NMR.Utils.getNMRShiftConstant(atomType)).toString())); + diffs.add(shifts.get(matchIndex) - Double.parseDouble(ac.getAtom(matches.get(matchIndex)).getProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType)).toString())); } } } // calculate the median of found unique match differences if (diffs.size() > 0) { - final double median = NMR.Utils.getMedian(diffs); + final double median = casekit.NMR.Utils.getMedian(diffs); // add or subtract the median of the differences to all shift list values (input) and match again then for (int i = 0; i < shifts.size(); i++) { shifts.set(i, shifts.get(i) - median); } // rematch - return NMR.Utils.findShiftMatches(ac, shifts, tol, atomType); + return casekit.NMR.Utils.findShiftMatches(ac, shifts, tol, atomType); } return matches; @@ -391,7 +392,7 @@ public static ArrayList findShiftMatches(final IAtomContainer ac, final final ArrayList matches = new ArrayList<>(); for (int i = 0; i < shiftList.size(); i++) { - matches.add(NMR.Utils.findSingleShiftMatch(ac, shiftList.get(i), tol, atomType)); + matches.add(casekit.NMR.Utils.findSingleShiftMatch(ac, shiftList.get(i), tol, atomType)); } return matches; @@ -415,11 +416,11 @@ public static int findSingleShiftMatch(final IAtomContainer ac, final double shi double minDiff = tol, acShift; for (int k = 0; k < ac.getAtomCount(); k++) { // skip other atom types than given - if (ac.getAtom(k).getProperty(NMR.Utils.getNMRShiftConstant(atomType)) == null) { + if (ac.getAtom(k).getProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType)) == null) { continue; } // figure out the atom with lowest shift deviation - acShift = Double.parseDouble(ac.getAtom(k).getProperty(NMR.Utils.getNMRShiftConstant(atomType)).toString()); + acShift = Double.parseDouble(ac.getAtom(k).getProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType)).toString()); if ((shift - tol <= acShift) && (acShift <= shift + tol) && (Math.abs(shift - acShift) < minDiff)) { minDiff = Math.abs(shift - acShift); matchIndex = k; @@ -447,9 +448,9 @@ public static int findSingleShiftMatch(final IAtomContainer ac, final double shi */ public static ArrayList matchShiftsFromPeakTable(final IAtomContainer ac, final String pathToPeakList, final String atomType, final double tol, final int column) throws IOException { - final ArrayList shiftsAtomType = NMR.Utils.parsePeakTable(pathToPeakList, column); - ArrayList matchesAtomType = NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); - matchesAtomType = NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); + final ArrayList shiftsAtomType = casekit.NMR.Utils.parsePeakTable(pathToPeakList, column); + ArrayList matchesAtomType = casekit.NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); + matchesAtomType = casekit.NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); return matchesAtomType; } @@ -477,9 +478,9 @@ public static ArrayList matchShiftsFromPeakTable(final IAtomContainer a */ public static ArrayList matchShiftsFromXML(final IAtomContainer ac, final String pathToXML, final String atomType, final double tol, final int ndim, final int attribute) throws IOException, ParserConfigurationException, SAXException { - final ArrayList shiftsAtomType = NMR.Utils.parseXML(pathToXML, ndim, attribute); - ArrayList matchesAtomType = NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); - matchesAtomType = NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); + final ArrayList shiftsAtomType = casekit.NMR.Utils.parseXML(pathToXML, ndim, attribute); + ArrayList matchesAtomType = casekit.NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); + matchesAtomType = casekit.NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); return matchesAtomType; } @@ -529,12 +530,7 @@ public static String getNMRShiftConstant(final String element){ case "N": return CDKConstants.NMRSHIFT_NITROGEN; case "P": return CDKConstants.NMRSHIFT_PHOSPORUS; case "F": return CDKConstants.NMRSHIFT_FLUORINE; - case "D": return CDKConstants.NMRSHIFT_DEUTERIUM; - case "O": return "oxygen nmr shift"; - case "S": return "sulfur nmr shift"; - case "Si": return "silicon nmr shift"; - case "B": return "boron nmr shift"; - case "Pt": return "platinum nmr shift"; + case "S": return CDKConstants.NMRSHIFT_SULFUR; default: return null; } @@ -568,7 +564,7 @@ public static int getElectronNumberByBondOrder(final IBond.Order order) { * @param element element's symbol (e.g. "C") * @return */ - public static String getNMRIsotopeIdentifier(final String element) { + public static String getIsotopeIdentifier(final String element) { switch(element){ case "C": return "13C"; case "H": return "1H"; @@ -619,14 +615,14 @@ public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int // for all next neighbors of a specific element for (IAtom neighborAtom : ac.getConnectedAtomsList(ac.getAtom(indexAC))) { // skip if not the right neighborhood element or bond type is unknown/unset - if ((!neighborAtom.getSymbol().equals(neighborElems[n])) || (NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { + if ((!neighborAtom.getSymbol().equals(neighborElems[n])) || (casekit.NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { continue; } - foundBonds += NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); + foundBonds += casekit.NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); } for (int k = 0; k < bondsSet.length; k++) { counts[n * bondsSet.length + k] = 0; - if (NMR.Utils.sortString(foundBonds).equals(NMR.Utils.sortString(bondsSet[k]))) { + if (casekit.NMR.Utils.sortString(foundBonds).equals(casekit.NMR.Utils.sortString(bondsSet[k]))) { counts[n * bondsSet.length + k] = 1; break; } @@ -917,7 +913,7 @@ public static HashMap getRMS(final HashMap rms = new HashMap<>(); for (final String key : lookup.keySet()) { - rms.put(key, NMR.Utils.getRMS(lookup.get(key))); + rms.put(key, casekit.NMR.Utils.getRMS(lookup.get(key))); // System.out.println("count: " + lookup.get(key).size() + ", mean: " + NMR.Utils.getMean(lookup.get(key)) + ", rms: " + rms.get(key) + ", median: " + NMR.Utils.getMedian(lookup.get(key))); } @@ -980,12 +976,12 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a */ public static IAtomContainer createAtomContainer(final String molFormula) { - HashMap hash = NMR.Utils.getAtomCountsInMolecularFormula(molFormula); + HashMap hash = casekit.NMR.Utils.getAtomCountsInMolecularFormula(molFormula); IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); for (String elem : hash.keySet()) { // add atoms of current element - ac = NMR.Utils.addAtoms(ac, elem, hash.get(elem)); + ac = casekit.NMR.Utils.addAtoms(ac, elem, hash.get(elem)); } return ac; @@ -1235,7 +1231,7 @@ public static HashMap getValueFrequencies(final ArrayList getValueFrequencies(final ArrayList getValueFrequencies(final ArrayList getOpenBonds(final IAtomContainer ac, final } for (IAtom neighbor : ac.getConnectedAtomsList(atom)) { bondOrderList.remove(ac.getBond(atom, neighbor).getOrder()); - electronsLeft -= NMR.Utils.getElectronNumberByBondOrder(ac.getBond(atom, neighbor).getOrder()); + electronsLeft -= casekit.NMR.Utils.getElectronNumberByBondOrder(ac.getBond(atom, neighbor).getOrder()); } int theoCounter = 0; for (IBond.Order order : bondOrderList) { - theoCounter += NMR.Utils.getElectronNumberByBondOrder(order); + theoCounter += casekit.NMR.Utils.getElectronNumberByBondOrder(order); } switch (Math.abs(theoCounter - electronsLeft)) { diff --git a/src/NMR/Signal.java b/src/casekit/NMR/model/Signal.java similarity index 62% rename from src/NMR/Signal.java rename to src/casekit/NMR/model/Signal.java index 97cf6cf..1714647 100644 --- a/src/NMR/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -25,7 +25,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package NMR; +package casekit.NMR.model; + +import java.util.ArrayList; /** * @@ -38,38 +40,45 @@ public class Signal { /** * Am array of doubles to store the chemical shift of */ - private final Double shift[]; + private final Double shifts[]; private final String[] nuclei; + private int[] assignedAtomIndices; /* Signal intensity in arbitrary values */ private Double intensity; private String multiplicity; private Integer phase; - public final static int DIM_ONE = 1, DIM_TWO = 2, DIM_THREE = 3, DIM_FOUR = 4; - public final static int SHIFT_PROTON = 0, SHIFT_HETERO = 1; public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; - public Signal(final String[] nuclei, final Double[] shift, final Double intensity, final Integer phase, final String multiplicity) { + public Signal(final String[] nuclei, final Double[] shifts) { + this.nuclei = nuclei; + this.ndim = this.nuclei.length; + this.shifts = shifts; + } + + public Signal(final String[] nuclei, final Double[] shifts, final Double intensity) { this.nuclei = nuclei; this.ndim = this.nuclei.length; - this.shift = shift; + this.shifts = shifts; this.intensity = intensity; - this.phase = phase; - this.multiplicity = multiplicity; } public int getDim(){ return this.ndim; } + public String[] getNuclei(){ + return this.nuclei; + } + public void setShift(final Double shift, final int dim) { - this.shift[dim] = shift; + this.shifts[dim] = shift; } public Double getShift(final int dim) { - return this.shift[dim]; + return this.shifts[dim]; } public void setIntensity(final Double intensity) { @@ -87,6 +96,38 @@ public void setMultiplicity(final String multiplicity) { public String getMultiplicity() { return this.multiplicity; } + + public boolean setAssignedAtomIndices(final int[] indices){ + if(indices.length != this.ndim){ + return false; + } + this.assignedAtomIndices = indices; + + return true; + } + + public int[] getAssignedIndices(){ + return this.assignedAtomIndices; + } + + public boolean setAssignedAtomIndex(final int index, final int dim){ + if(dim < 0 || dim >= this.ndim){ + return false; + } + if(this.assignedAtomIndices == null){ + this.assignedAtomIndices = new int[this.ndim]; + for (int i = 0; i < this.ndim; i++) { + this.assignedAtomIndices[i] = -1; + } + } + this.assignedAtomIndices[dim] = index; + + return true; + } + + public int getAssignedAtomIndices(final int dim){ + return this.assignedAtomIndices[dim]; + } public void setPhase(final int phase) { this.phase = phase; @@ -104,14 +145,20 @@ public String toString() { s += this.nuclei[f] + "; "; } s += "\nShiftlist: "; - for (int f = 0; f < this.shift.length; f++) { - s += this.shift[f] + "; "; + for (int f = 0; f < this.shifts.length; f++) { + s += this.shifts[f] + "; "; } s += "\n\n"; return s; } - public String[] getNuclei(){ - return this.nuclei; + public Signal getClone(){ + final Signal signalClone = new Signal(this.nuclei, this.shifts); + signalClone.setIntensity(this.intensity); + signalClone.setMultiplicity(this.multiplicity); + signalClone.setPhase(this.phase); + signalClone.setAssignedAtomIndices(this.assignedAtomIndices); + + return signalClone; } } diff --git a/src/NMR/Spectrum.java b/src/casekit/NMR/model/Spectrum.java similarity index 88% rename from src/NMR/Spectrum.java rename to src/casekit/NMR/model/Spectrum.java index 3d4e247..576645a 100644 --- a/src/NMR/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -26,7 +26,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package NMR; +package casekit.NMR.model; import java.util.ArrayList; @@ -34,21 +34,8 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Spectrum extends ArrayList{ - - static public final String SPECTYPE_1D = "1D"; - static public final String SPECTYPE_DEPT90 = "DEPT90"; - static public final String SPECTYPE_DEPT135 = "DEPT135"; - static public final String SPECTYPE_HSQC = "HSQC"; - static public final String SPECTYPE_HHCOSY = "HHCOSY"; - static public final String SPECTYPE_INADEQUATE = "INADEQUATE"; - static public final String SPECTYPE_HMBC = "HMBC"; - static private final String[] SPECTYPES = new String[]{ SPECTYPE_1D, SPECTYPE_DEPT90, - SPECTYPE_DEPT135, SPECTYPE_HSQC, - SPECTYPE_HHCOSY, SPECTYPE_INADEQUATE, - SPECTYPE_HMBC}; - - +public class Spectrum extends ArrayList{ + /** * An arbitrary name or description that can be assigned to this spectrum for identification purposes. */ @@ -106,12 +93,8 @@ public int getDimCount(){ } public void setSpecType(final String specType){ - for (final String stype : SPECTYPES) { - if(specType.equals(stype)){ - this.specType = specType; - break; - } - } + + this.specType = specType; } public String getSpecType(){ @@ -151,9 +134,9 @@ public final void addSignals(final ArrayList[] shiftLists, final ArrayLi shifts[col] = shiftLists[col].get(row); } if(intensities != null){ - this.add(new NMR.Signal(this.nuclei, shifts, intensities.get(row), null, null)); + this.add(new casekit.NMR.model.Signal(this.nuclei, shifts, intensities.get(row))); } else { - this.add(new NMR.Signal(this.nuclei, shifts, null, null, null)); + this.add(new casekit.NMR.model.Signal(this.nuclei, shifts)); } } this.updateShiftLists(); @@ -170,7 +153,7 @@ public int getSignalCount() { } /** - * Adds a Signal ({@link NMR.Signal}) to this Spectrum class object. + * Adds a Signal ({@link casekit.NMR.model.Signal}) to this Spectrum class object. * @param signal */ public void addSignal(final Signal signal) { @@ -290,7 +273,7 @@ public Signal pickClosestSignal(final Double shift, final int dim, final double * @return */ public ArrayList pickSignals(final double shift, final int dim, final double pickPrecision) { - final ArrayList pickedSignals = new ArrayList<>(); + final ArrayList pickedSignals = new ArrayList<>(); /* * Now we search dimension dim for the chemical shift. */ @@ -311,7 +294,7 @@ private void updateShiftLists() { for (int dim = 0; dim < this.getDimCount(); dim++) { this.shiftList.add(dim, new ArrayList<>()); } - Double shift; NMR.Signal nmrSignal; + Double shift; casekit.NMR.model.Signal nmrSignal; for (int i = 0; i < this.size(); i++) { nmrSignal = this.get(i); for (int d = 0; d < this.getDimCount(); d++) { diff --git a/src/casekit/NMR/remarks b/src/casekit/NMR/remarks new file mode 100644 index 0000000..11a5071 --- /dev/null +++ b/src/casekit/NMR/remarks @@ -0,0 +1,21 @@ +MIT license + +- JUnit +- PMD: CDK GitHub PMD example +- JCoCo +- yourkit (license received) + + +MolSpec: +- [SEMI] determine hybridization via CDK (#H >= 3) and DB lookup (max. freq, TEMPORARILY)? +- setHybridizations function: parallization? + +Utils: +- write a parse function to read nmrML files +- in getHybridizations function: +1. parallization? +2. comparing of all attached/used hydrogens with molecular formula -> helps for bond type decisions +- in getBondTypeFromHybridizations: +1. can sulfur only have 1x double bond or 2x single bond in metabolomic systems? +2. as 1. but for nitrogen and phosphorus +3. is S hybridization always single bond? \ No newline at end of file diff --git a/src/casekit/NMR/test.java b/src/casekit/NMR/test.java new file mode 100644 index 0000000..5a5ffb4 --- /dev/null +++ b/src/casekit/NMR/test.java @@ -0,0 +1,179 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +package casekit.NMR; + +import casekit.NMR.model.Spectrum; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.xml.parsers.ParserConfigurationException; +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.DefaultChemObjectBuilder; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; +import org.xml.sax.SAXException; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class test { + + public static void main(String[] args) throws ParserConfigurationException, SAXException, CloneNotSupportedException, FileNotFoundException, SQLException, ClassNotFoundException { + + final int maxSpheres = 1; +// final String[] args2 = new String[]{"-i", "/Users/mwenk/Downloads/nmrshiftdb2withsignals.sd", "-o", "/Users/mwenk/Downloads/hose" + maxSpheres + ".tsv", "-m", String.valueOf(maxSpheres), "-v"}; +// try { +// final NMRShiftDBSDFParser parser = new NMRShiftDBSDFParser(args2); +// } catch (Exception ex) { +// Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); +// } + + + final String Peaks13C_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/13C/50/pdata/1/peaklist.xml"; + final String Peaks13C_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_13C_NMR.csv"; + final String PeaksH1_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/1H/1/pdata/1/peaklist.xml"; + final String Peaks1H_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_1H_NMR.csv"; + final String PeaksDEPT90_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HJ555_DEPT90_pseudo.xml"; + final String PeaksDEPT90_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_DEPT90_NMR_pseudo.csv"; + final String PeaksDEPT135_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/DEPT135/5/pdata/1/peaklist.xml"; + final String PeaksDEPT135_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_DEPT135_NMR.csv"; + final String PeaksHSQC_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HSQC/3/pdata/1/peaklist.xml"; + final String PeaksINADEQUATE_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HJ555_INADEQUATE_pseudo.xml"; + final String PeaksHSQC_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_HSQC_NMR.csv"; + final String PeaksHMBC_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HMBC/4/pdata/1/peaklist.xml"; + final String PeaksHMBC_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_HMBC_NMR.csv"; + final String PeaksCOSY_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/COSY/2/pdata/1/peaklist.xml"; + final String PeaksCOSY_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_COSY_NMR.csv"; + + final String pathToNMRShiftDB = "/Users/mwenk/Downloads/nmrshiftdb2withsignals.sd"; + final String pathToNMRShiftDBTest = "/Users/mwenk/Downloads/test.sdf"; + final String pathToNMRShiftDBHOSE = "/Users/mwenk/Downloads/hose" + maxSpheres + ".tsv"; + + + + final double tolC = 0.5; + final double tolH = 0.2; + final String molFormulaString_HJ555 = "C21H19NO8"; + final String molFormulaString_HJ777 = "C28H25NO11S"; + final IMolecularFormula molFormula_HJ555 = MolecularFormulaManipulator.getMolecularFormula(molFormulaString_HJ555, DefaultChemObjectBuilder.getInstance()); + final IMolecularFormula molFormula_HJ777 = MolecularFormulaManipulator.getMolecularFormula(molFormulaString_HJ777, DefaultChemObjectBuilder.getInstance()); + String projectName = ""; + casekit.NMR.Process process = null; + Spectrum spec; + + + IAtomContainer ac; + try { + // HJ555 + projectName = "HJ555"; + process = new Process(molFormula_HJ555); + spec = Utils.parseXML(Peaks13C_HJ555, 1, new int[]{1}, new String[]{"C"}); + process.set1DNMRShifts(spec); +// process.parse1DNMR(Peaks13C_HJ555, "C"); + + spec = Utils.parseXML(PeaksDEPT90_HJ555, 1, new int[]{1}, new String[]{"C"}); + Spectrum spec135 = Utils.parseXML(PeaksDEPT135_HJ555, 1, new int[]{1}, new String[]{"C"}); + int assignedHAtoms = process.setDEPT(spec, spec135, tolC); +// int assignedHAtoms = process.parseDEPT(PeaksDEPT90_HJ555, PeaksDEPT135_HJ555, tolC); + System.out.println("assigned protons: " + assignedHAtoms); + + spec = Utils.parseXML(PeaksHSQC_HJ555, 2, new int[]{2, 1}, new String[]{"H", "C"}); + process.setHSQC(spec, tolC); +// process.parseHSQC(PeaksHSQC_HJ555, "C", tolH); + + spec = Utils.parseXML(PeaksCOSY_HJ555, 2, new int[]{2, 1}, new String[]{"H", "H"}); + process.setHHCOSY(spec, tolH); +// process.parseHHCOSY(PeaksCOSY_HJ555, tolH); + + spec = Utils.parseXML(PeaksINADEQUATE_HJ555, 2, new int[]{2, 1}, new String[]{"C", "C"}); + process.setINADEQUATE(spec, tolC); +// process.parseINADEQUATE(PeaksINADEQUATE_HJ555, tolC); + + spec = Utils.parseXML(PeaksHMBC_HJ555, 2, new int[]{2, 1}, new String[]{"H", "C"}); + process.setHMBC(spec, tolH, tolC); +// process.parseHMBC(PeaksHMBC_HJ555, "C", tolH, tolC); + + process.setEquivalentProperties(); + process.setBonds(new String[]{CDKConstants.NMRSPECTYPE_2D_HHCOSY, CDKConstants.NMRSPECTYPE_2D_INADEQUATE, CDKConstants.NMRSPECTYPE_2D_HMBC}); // without hybridizations + process.createLSDFile(projectName, "/Users/mwenk/Downloads/testLSD", new String[]{"/Users/mwenk/work/software/LSD-3.4.9/Filters/", "/Users/mwenk/work/software/LSD-3.4.9/Filters/MOLGEN/badlist1/"}); + + +// // definition of all possible bond combinations up to 6 valences +// final String[] bondsSet = {"-", "--", "---", "----", "=", "==", "=-", "=--", "%", "%-"}; // up to 4 valences (carbon) +// //"-----", "------", "=---", "=----", "==-", "==--", "===", "%%", "%--", "%---", "%=", "%=-"}; // up to 6 valences (e.g. sulfur) +// final String[] neighborElems = new String[]{"C", "O", "N", "S", "P", "Br", "Cl"}; +//// final IAtomContainerSet acSet = NMR.DB.getStructuresFromNMRShiftDBFile(pathToNMRShiftDB, 10); // ring size of 10 in aromaticity search (pubchem txt file) +// final Connection DBConnection = NMR.DB.getDBConnection("jdbc:mysql://localhost/nmrshiftdb", "useUnicode=true&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC&useSSL=false", "root", "jmd2017a"); +//// NMR.Utils.getSpectraIDsFromNMRShiftDB(DBConnection, 155.0, 156.0, "C"); +//// final HashMap> lookup = NMR.Utils.getLookupTableFromNMRShiftDB(DBConnection, "C"); +//// NMR.Utils.getRMS(lookup); +// final int minShift = 0, maxShift = 220, stepSize = 10; +// final String elem = "C"; +// NMR.DB.getRMS(DBConnection, minShift, maxShift, elem); +// final int[][] neighborhoodCountsMatrix = NMR.DB.countNeighborhoodBonds(DBConnection, bondsSet, elem, neighborElems, minShift, maxShift, stepSize); +// NMR.Utils.writeNeighborhoodBondsCountMatrix("/Users/mwenk/Downloads/countMatrix_" + elem + "_SQL.csv", neighborhoodCountsMatrix, bondsSet, elem, neighborElems, minShift, maxShift, stepSize); + + // create 1D spectrum + // coffein: 27.8;0.0Q;9|29.6;0.0Q;10|33.5;0.0Q;11|107.8;0.0S;5|144.3;0.0D;7|147.5;0.0S;4|151.6;0.0S;2|155.3;0.0S;0| +// final ArrayList spectrum = new ArrayList<>(); +// spectrum.add(new Signal(elem, 27.8, "Q", null)); +// spectrum.add(new Signal(elem, 29.6, "Q", null)); +// spectrum.add(new Signal(elem, 33.5, "Q", null)); +// spectrum.add(new Signal(elem, 107.8, "S", null)); +// spectrum.add(new Signal(elem, 144.3, "D", null)); +// spectrum.add(new Signal(elem, 147.5, "S", null)); +// spectrum.add(new Signal(elem, 151.6, "S", null)); +// spectrum.add(new Signal(elem, 155.3, "S", null)); +// NMR.DB.matchSpectrumAgainstDB(DBConnection, spectrum, 0.1, null, stepSize); +// proc.countNeighborhoodBonds(acSet, bondsSet, elem, neighborElems, minShift, maxShift, stepSize); +// NMR.Utils.writeNeighborhoodBondsCountMatrix("/Users/mwenk/Downloads/countMatrix_" + elem + ".csv", proc.getNeighborhoodBondsCountMatrix(), bondsSet, elem, neighborElems, minShift, maxShift, stepSize); + + + + } catch (IOException ex) { + Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); + } + + ac = process.getAtomContainer(); +// final HashMap> atomTypeIndices = proc.getAtomTypeIndices(); + System.out.println("\n"); + System.out.println(process.getAtomTypeIndices()); + for (int i = 0; i< ac.getAtomCount(); i++) { + System.out.println("i: " + i + " -> atom: " + ac.getAtom(i).getSymbol() + ", shift: " + ac.getAtom(i).getProperty(casekit.NMR.Utils.getNMRShiftConstant("C")) + ", #H: " + ac.getAtom(i).getImplicitHydrogenCount() + + ", H shifts: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) + ", Hybrid.: " + ac.getAtom(i).getHybridization() + ", HHCOSY: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) + + ", INADEQUATE: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE) + ", HMBC: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) + ", EQUAL: " + ac.getAtom(i).getProperty(casekit.NMR.ParseRawData.PROP_EQUIVALENCE)); + } + System.out.println("\nbond count: " + ac.getBondCount() + ":"); + for (IBond bond : ac.bonds()) { + System.out.println("bond: " + bond); + } + + +// System.out.println("\n\nOpen Bonds:\n"); +// for (int i = 0; i < ac.getAtomCount(); i++) { +// Utils.getOpenBonds(ac, i); +// } + + +// try { +// Utils.convertSDFtoLSD("/Users/mwenk/work/software/molgen5.02/badlist2.sdf", "/Users/mwenk/Downloads/", "/Users/mwenk/work/software/LSD-3.4.9/Mol2abSrc"); +// } catch (CDKException ex) { +// Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); +// } catch (IOException ex) { +// Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); +// } + + } +} diff --git a/src/casekit/Signal.java b/src/casekit/Signal.java deleted file mode 100644 index 1e87f1f..0000000 --- a/src/casekit/Signal.java +++ /dev/null @@ -1,45 +0,0 @@ -/* -* This Open Source Software is provided to you under the MIT License - * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck - */ - -package casekit; - -public class Signal { - - Double shift = null; - Integer mult = null; - - public Signal() { - // TODO Auto-generated constructor stub - } - - Signal(double shift) - { - setShift(shift); - } - - Signal(double shift, int mult) - { - setShift(shift); - setMult(mult); - } - - public Integer getMult() { - return mult; - } - public void setMult(Integer mult) { - this.mult = mult; - } - - public Double getShift() { - return shift; - } - - public void setShift(Double shift) { - this.shift = shift; - } - -} diff --git a/src/casekit/SimilarityRanker.java b/src/casekit/SimilarityRanker.java index a85d1b9..6ace636 100644 --- a/src/casekit/SimilarityRanker.java +++ b/src/casekit/SimilarityRanker.java @@ -7,6 +7,8 @@ */ package casekit; +import casekit.NMR.model.Signal; +import casekit.NMR.model.Spectrum; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -85,23 +87,23 @@ public void readSpectrum() throws NumberFormatException, IOException Integer mult = null; Signal signal; String tempString; - ArrayList spectrum = new ArrayList(); + Spectrum spectrum = new Spectrum(null); BufferedReader br = new BufferedReader(new FileReader(spectrumFile)); if (verbose) System.out.println("Start reading spectrum from " + spectrumFile); - while((line = br.readLine()) != null) - { - if (!line.startsWith("#") && line.trim().length() > 0) - { - strtok = new StringTokenizer(line, ";"); - if (verbose) System.out.println(line); - linecounter++; - - shift = Double.parseDouble(strtok.nextToken().trim()); - mult = Integer.parseInt(strtok.nextToken().trim()); - signal = new Signal(shift, mult); - spectrum.add(signal); - } - } +// while((line = br.readLine()) != null) +// { +// if (!line.startsWith("#") && line.trim().length() > 0) +// { +// strtok = new StringTokenizer(line, ";"); +// if (verbose) System.out.println(line); +// linecounter++; +// +// shift = Double.parseDouble(strtok.nextToken().trim()); +// mult = Integer.parseInt(strtok.nextToken().trim()); +// signal = new Signal(); +// spectrum.addSignal(signal); +// } +// } br.close(); if (verbose) System.out.println("Read " + linecounter + " signals from spectrum in file " + spectrumFile); @@ -177,8 +179,8 @@ public double calculateSimilarity(IAtomContainer ac, ArrayList spectrum) matchFound = false; for (int g = 0; g < spectrum.size(); g++) { - if (shifts[f] > spectrum.get(g).getShift().doubleValue()) diff = shifts[f] - spectrum.get(g).getShift().doubleValue(); - else diff = spectrum.get(g).getShift().doubleValue() - shifts[f]; +// if (shifts[f] > spectrum.get(g).getShift().doubleValue()) diff = shifts[f] - spectrum.get(g).getShift().doubleValue(); +// else diff = spectrum.get(g).getShift().doubleValue() - shifts[f]; df.format(diff); if (diff < lastDiff) { diff --git a/src/casekit/model/NMRSignal.java b/src/casekit/model/NMRSignal.java deleted file mode 100644 index fb28b7b..0000000 --- a/src/casekit/model/NMRSignal.java +++ /dev/null @@ -1,95 +0,0 @@ -package casekit.model; - -/* NMRSignal.java -* -* Copyright (C) Dr. Christoph Steinbeck -* -* Contact: christoph.steinbeck@uni-jena.de -* -* This software is published and distributed under MIT license -*/ - - -/** -* A class to store the properties of a single N-dimensional NMR signal -*/ - -public class NMRSignal { - - int dim; - - /** - * Am array of doubles to store the chemical shift of - */ - public float shift[]; - public String[] nucleus; - - /* Signal intensity in arbitrary values */ - public float intensity; - - public int phase; - public static int DIM_ONE = 1, DIM_TWO = 2, DIM_THREE = 3, DIM_FOUR = 4; - public static int SHIFT_PROTON = 0, SHIFT_HETERO = 1; - public static int PHASE_NEGATIVE = 2, PHASE_POSITIVE = 1, PHASE_NONE = 0; - public static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; - - public NMRSignal(String[] nucleus) { - this.dim = nucleus.length; - this.shift = new float[dim]; - this.nucleus = nucleus; - for (int f = 0; f < dim; f++) - shift[f] = 0; - intensity = 1; - phase = PHASE_POSITIVE; - } - - public NMRSignal(String[] nucleus, float[] shift, float intensity, int phase) { - this.dim = nucleus.length; - this.shift = shift; - this.nucleus = nucleus; - this.intensity = intensity; - this.phase = phase; - } - - public void setShift(float sshift, String nnucleus) { - for (int f = 0; f < nucleus.length; f++) { - if (nucleus[f].equals(nnucleus)) { - shift[f] = sshift; - break; - } - } - } - - public void setShift(float sshift, int dim) { - shift[dim] = sshift; - } - - public float getShift(String nnucleus) { - for (int f = 0; f < nucleus.length; f++) { - if (nucleus[f].equals(nnucleus)) { - return shift[f]; - } - } - - return Float.MAX_VALUE; - - } - - public float getShift(int dim) { - return shift[dim]; - } - - - public String toString() { - String s = ""; - s += dim + " -dimensional NMRSignal for nuclei "; - for (int f = 0; f < nucleus.length; f++) - s += nucleus[f] + "; "; - s += "\nShiftlist: "; - for (int f = 0; f < shift.length; f++) - s += shift[f] + "; "; - s += "\n\n"; - return s; - } - -} \ No newline at end of file diff --git a/src/casekit/model/NMRSpectrum.java b/src/casekit/model/NMRSpectrum.java deleted file mode 100644 index 3ca1a55..0000000 --- a/src/casekit/model/NMRSpectrum.java +++ /dev/null @@ -1,258 +0,0 @@ -package casekit.model; - -/* NMRSpectrum.java -* -* Copyright (C) 1997-2007 Christoph Steinbeck -* -* Contact: christoph.steinbeck@uni-jena.de -* -* This software is published and distributed under MIT License -*/ - -/** -* A Class to model an n-dimensional NMR spectrum, -* -*/ - -import javax.swing.event.EventListenerList; -import java.util.ArrayList; -import java.util.List; - -public class NMRSpectrum extends ArrayList{ - - /** - * An arbitrary name that can be assigned to this spectrum for identification purposes. - */ - public String name = ""; - /** - * An arbitrary name to identify the type of this spectrum, like COSY, NOESY, HSQC, etc. I - * decided not to provide static Strings with given experiment type since the there are - * numerous experiments yielding basically identical information having different names - */ - public String specType = ""; - /** - * The actual spectrum, i.e. a collection of nmrSignals - */ - // protected NMRSignal[] nmrSignals; - /** - * This holds sorted list of Chemical Shifts of all axes. The first dimension addresses the - * axes, the second the shift values in this axis, starting from the highest value. - */ - public List shiftList; - /** - * Not yet clear if this is needed. - */ - public float[] pickPrecision; - /** - * Declares how many axes are in involved in this spectrum. - */ - public int dim = 1; - /** - * The nuclei of the different axes. - */ - public String nucleus[]; - /** - * The proton frequency of the spectrometer used to record this spectrum. - */ - public float spectrometerFrequency; - public String solvent = ""; - public String standard = ""; - /** - * Some standard nulcei for the 'nucleus' field. - */ - public static String NUC_PROTON = "1H"; - public static String NUC_CARBON = "13C"; - public static String NUC_NITROGEN = "15N"; - public static String NUC_PHOSPHORUS = "31P"; - // ... to be continued... - public static String[] SPECTYPE_BB = {NUC_CARBON}; - public static String[] SPECTYPE_DEPT = {NUC_CARBON}; - public static String[] SPECTYPE_HMQC = {NUC_PROTON, NUC_CARBON}; - public static String[] SPECTYPE_HSQC = {NUC_PROTON, NUC_CARBON}; - public static String[] SPECTYPE_NHCORR = {NUC_PROTON, NUC_NITROGEN}; - public static String[] SPECTYPE_HMBC = {NUC_PROTON, NUC_CARBON}; - public static String[] SPECTYPE_HHCOSY = {NUC_PROTON, NUC_PROTON}; - public static String[] SPECTYPE_NOESY = {NUC_PROTON, NUC_PROTON}; - protected transient EventListenerList changeListeners = new EventListenerList(); - - public NMRSpectrum(String[] nucleus, String name) { - this.dim = nucleus.length; // redundant, I know :-) - this.nucleus = nucleus; - shiftList = new ArrayList(dim); - for (int f = 0; f < dim; f++) { - shiftList.add(f, new ArrayList()); - } - this.name = name; - } - - /** - * Return the number of individual frequencies in the heteroatom shift list, which should be - * equal or smaller than the number of respective atoms - */ - public int getSignalNumber(int axis) { - return shiftList.get(axis).size(); - } - - /** - * Adds an NMRSignal to the NMRSpectrum. - */ - public void addSignal(NMRSignal thisSignal) { - add(thisSignal); - updateShiftLists(); - } - - /** - * Creates an empty signal with correct dimension - */ - public void newSignal() { - System.out.println("nucleus: " + nucleus.length + nucleus[0]); - add(new NMRSignal(nucleus)); - updateShiftLists(); - } - - /** - * Returns an NMRSignal at position number in the List - */ - public Object getSignal(int number) { - return get(number); - } - - /** - * Returns the position of an NMRSignal the List - */ - public int getSignalNumber(NMRSignal signal) { - for (int f = 0; f < size(); f++) { - if (((NMRSignal) get(f)) == signal) { - return f; - } - } - return -1; - } - - public void setSpectrometerFrequency(float sf) { - this.spectrometerFrequency = sf; - } - - public float getSpectrometerFrequency() { - return spectrometerFrequency; - } - - public void setSolvent(String solvent) { - this.solvent = solvent; - } - - public String getSolvent() { - return solvent; - } - - public void setStandard(String standard) { - this.standard = standard; - } - - public String getStandard() { - return standard; - } - - /** - * Returns the signal closest to the shift sought. If no Signal is found within the interval - * defined by pickprecision, null is returned. - */ - public Object pickClosestSignal(float shift, String nnucleus, - float pickprecision) { - int dim = -1, thisPosition = -1; - float diff = Float.MAX_VALUE; - for (int f = 0; f < nucleus.length; f++) { - if (nucleus[f].equals(nnucleus)) { - dim = f; - break; - } - } - - /* - * Now we search dimension dim for the chemical shift. - */ - for (int f = 0; f < size(); f++) { - if (diff > Math.abs(((NMRSignal) get(f)).shift[dim] - shift)) { - diff = Math.abs(((NMRSignal) get(f)).shift[dim] - shift); - diff = (float) Math.ceil(diff * 2) / 2; - thisPosition = f; - } - } - if (diff < pickprecision) { - return get(thisPosition); - } - return null; - } - - /** - * Returns a List with signals within the interval defined by pickprecision. If none is found - * an empty List is returned. - */ - public List pickSignals(float shift, String nnucleus, float pickprecision) { - int dim = -1; - List pickedSignals = new ArrayList(); - for (int f = 0; f < nucleus.length; f++) { - if (nucleus[f].equals(nnucleus)) { - dim = f; - break; - } - } - /* - * Now we search dimension dim for the chemical shift. - */ - for (int f = 0; f < size(); f++) { - if (pickprecision > Math.abs(((NMRSignal) get(f)).shift[dim] - - shift)) { - pickedSignals.add(get(f)); - } - } - return pickedSignals; - } - - /** - * Extracts a list of unique shifts from the list of cross signals and sorts them. This is to - * define the column and row headers for tables. - */ - protected void updateShiftLists() { - Float shift; - for (int i = 0; i < size(); i++) { - NMRSignal nmrSignal = (NMRSignal) get(i); - for (int j = 0; j < nmrSignal.shift.length; j++) { - shift = new Float(nmrSignal.shift[j]); - if (!shiftList.get(j).contains(shift)) { - shiftList.get(j).add(shift); - } - } - } - } - - /** - * Creates a 2D matrix of booleans, that models the set of crosspeaks in the 2D NMR spectrum. - * The dimensions are taken from hetAtomShiftList and protonShiftList, which again are - * produced by updateShiftLists based a collection of 2D nmrSignals - *

- * private void createMatrix(){ boolean found; float het, prot; int hetPos, protPos; - * hetCorMatrix = new boolean[hetAtomShiftList.length][protonShiftList.length]; for (int f = - * 0; f < size(); f++){ HetCorNMRSignal hetCorSignal = (HetCorNMRSignal)elementAt(f); prot = - * hetCorSignal.shift[NMRSignal.SHIFT_PROTON]; het = - * hetCorSignal.shift[NMRSignal.SHIFT_HETERO]; found = false; hetPos = - * isInShiftList(hetAtomShiftList, het, hetAtomShiftList.length); if (hetPos >= 0){ protPos = - * isInShiftList(protonShiftList, prot, protonShiftList.length); if ( protPos >= 0){ found = - * true; hetCorMatrix[hetPos][protPos] = true; } } } } - */ - public void report() { - String s = ""; - System.out.println("Report for nmr spectrum " + name + " of type " - + specType + ": "); - for (int i = 0; i < shiftList.size(); i++) { - System.out.println("ShiftList for dimension " + (i + 1) + ":"); - for (int j = 0; j < shiftList.get(i).size(); j++) { - s += shiftList.get(i).get(j) + "; "; - } - System.out.println(s + "\n"); - s = ""; - } - - } - -} From 3b6621690144779cdfb7a2eca1ba779da18b7fe4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 18:44:38 +0200 Subject: [PATCH 016/405] - only changes to avoid build or run errors - this class is not ready to use properly --- src/casekit/SimilarityRanker.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/casekit/SimilarityRanker.java b/src/casekit/SimilarityRanker.java index 6ace636..024a58e 100644 --- a/src/casekit/SimilarityRanker.java +++ b/src/casekit/SimilarityRanker.java @@ -53,7 +53,7 @@ public class SimilarityRanker { public String outPath = null; public String spectrumFile = null; public String hoseTSVFile = null; - ArrayList spectrum = null; + Spectrum spectrum = null; ArrayList results = null; public boolean isVerbose() { @@ -154,7 +154,7 @@ public ArrayList rank() throws Exception return results; } - public double calculateSimilarity(IAtomContainer ac, ArrayList spectrum) + public double calculateSimilarity(IAtomContainer ac, Spectrum spectrum) { double similarity = 0.0; double lastDiff = 0.0; @@ -162,7 +162,7 @@ public double calculateSimilarity(IAtomContainer ac, ArrayList spectrum) String shift = null; boolean matchFound = false; double diff = 0.0; - double shifts[] = new double[spectrum.size()]; + double shifts[] = new double[spectrum.getSignalCount()]; for (IAtom atom : ac.atoms()) { if (atom.getAtomicNumber() == 6) @@ -173,11 +173,11 @@ public double calculateSimilarity(IAtomContainer ac, ArrayList spectrum) counter ++; } } - for (int f = 0; f < spectrum.size(); f++) + for (int f = 0; f < spectrum.getSignalCount(); f++) { lastDiff = 10000000000.0; matchFound = false; - for (int g = 0; g < spectrum.size(); g++) + for (int g = 0; g < spectrum.getSignalCount(); g++) { // if (shifts[f] > spectrum.get(g).getShift().doubleValue()) diff = shifts[f] - spectrum.get(g).getShift().doubleValue(); // else diff = spectrum.get(g).getShift().doubleValue() - shifts[f]; @@ -190,7 +190,7 @@ public double calculateSimilarity(IAtomContainer ac, ArrayList spectrum) } if (matchFound) similarity += lastDiff; } - return similarity/spectrum.size(); + return similarity/spectrum.getSignalCount(); } public void reportResults() throws Exception From 1a45997265cee4a4b7c4b94ee5fb1a0dc10fafaf Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 18:46:16 +0200 Subject: [PATCH 017/405] - removed: some unnecessary lines --- src/casekit/NMR/DB.java | 99 +++-------------------------------------- 1 file changed, 5 insertions(+), 94 deletions(-) diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java index 681f97a..6a0c484 100644 --- a/src/casekit/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -34,21 +34,12 @@ import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.fingerprint.Fingerprinter; -import org.openscience.cdk.fingerprint.IBitFingerprint; -import org.openscience.cdk.fingerprint.IFingerprinter; -import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; -import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; import org.openscience.cdk.silent.AtomContainerSet; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; /** * @@ -218,13 +209,13 @@ public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBC public static HashMap> matchSpectrumAgainstDB(final Connection DBConnection, final Spectrum spectrum, final double shiftDev, final Double intensDev, final int stepSize, final int dim) throws SQLException{ final HashMap> hits = new HashMap<>(); double shift; - for (int i = 0; i < spectrum.size(); i++) { + for (int i = 0; i < spectrum.getSignalCount(); i++) { hits.put(i, new ArrayList<>()); - shift = Math.floor(spectrum.get(i).getShift(dim) * stepSize) / (double) stepSize; - if(spectrum.get(i).getIntensity() != null){ - hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity() - intensDev, spectrum.get(i).getIntensity() + intensDev, spectrum.get(i).getNuclei()[dim])); + shift = Math.floor(spectrum.getSignal(i).getShift(dim) * stepSize) / (double) stepSize; + if(spectrum.getSignal(i).getIntensity() != null){ + hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.getSignal(i).getMultiplicity(), spectrum.getSignal(i).getIntensity() - intensDev, spectrum.getSignal(i).getIntensity() + intensDev, spectrum.getSignal(i).getNuclei()[dim])); } else { - hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.get(i).getMultiplicity(), spectrum.get(i).getIntensity(), spectrum.get(i).getIntensity(), spectrum.get(i).getNuclei()[dim])); + hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.getSignal(i).getMultiplicity(), spectrum.getSignal(i).getIntensity(), spectrum.getSignal(i).getIntensity(), spectrum.getSignal(i).getNuclei()[dim])); } } @@ -319,84 +310,4 @@ public static ArrayList getSpectraFromNMRShiftDBEntry(final IAtomContain return spectra; } - - - - - - - - - - - - - - - - - - - - - - - // TRIALS - - - public static void findSubstructuresInNMRShiftDB(final IAtomContainer acQ, final String pathToNMRShiftDB) throws CDKException, FileNotFoundException, CloneNotSupportedException { - - final IAtomContainer acQcopy = acQ.clone(); - AtomContainerManipulator.convertImplicitToExplicitHydrogens(acQcopy); - - final IFingerprinter fingerprinter = new Fingerprinter(); - final IBitFingerprint fingerprintQ = fingerprinter.getBitFingerprint(acQcopy); -// System.out.println("Q: cardinality: " + fingerprintQ.cardinality() + ", bit set: " + Arrays.toString(fingerprintQ.getSetbits())); - IBitFingerprint fingerprintDB; - IAtomContainer acDB; - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToNMRShiftDB), - SilentChemObjectBuilder.getInstance() - ); - final AtomHybridizationDescriptor hybridDesc = new AtomHybridizationDescriptor(); - int molCounter = 1; - while (iterator.hasNext()) { - acDB = iterator.next(); -// // skip structures which do not at least contain one carbon spectrum -// if (!acDB.getProperties().containsKey("Spectrum 13C 0")) { -// continue; -// } -// IAtomContainer acDBcopy = acDB.clone(); -// AtomContainerManipulator.convertImplicitToExplicitHydrogens(acDBcopy); -// fingerprintDB = fingerprinter.getBitFingerprint(acDBcopy); -//// System.out.println("DB: cardinality: " + fingerprintDB.cardinality() + ", bit set: " + Arrays.toString(fingerprintDB.getSetbits())); -// -//// fingerprintDB.and(fingerprintQ); -//// System.out.println("and: " + Arrays.toString(fingerprintDB.getSetbits())); -// -// if(Tanimoto.calculate(fingerprintQ, fingerprintDB) >= 0.25) -// System.out.println("similarity: " + Tanimoto.calculate(fingerprintQ, fingerprintDB) + " at " + acDB.getProperties()); - - int counter = 0; - for (int i = 0; i < acDB.getAtomCount(); i++) { - if (acDB.getAtom(i).getSymbol().equals("N")) { - for (IAtom neighbor : acDB.getConnectedAtomsList(acDB.getAtom(i))) { - if (neighbor.getSymbol().equals("C") && IAtomType.Hybridization.values()[Integer.parseInt(hybridDesc.calculate(neighbor, acDB).getValue().toString())].equals(IAtomType.Hybridization.SP2)) { - for (IAtom neighbor2 : acDB.getConnectedAtomsList(neighbor)) { - if (neighbor2.getSymbol().equals("C") && neighbor2.getImplicitHydrogenCount() == 3) { - counter++; - } - } - } - } - if (counter >= 2) { - System.out.println(molCounter); - } - break; - } - } - molCounter++; - } - - } } From e3ce9733856ce88bc1c69e455419231cffabe222 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 18:47:51 +0200 Subject: [PATCH 018/405] - saving atom matching indices in atom container in spectrum class objects directly --- src/casekit/NMR/ParseRawData.java | 580 ++++++++---------------------- 1 file changed, 150 insertions(+), 430 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index 11336c4..d659e46 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -24,13 +24,14 @@ package casekit.NMR; import casekit.NMR.model.Spectrum; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; -import java.util.Scanner; import javax.xml.parsers.ParserConfigurationException; import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; @@ -50,7 +51,8 @@ public class ParseRawData { final private IAtomContainer mol; final private IMolecularFormula molFormula; private HashMap> atomTypeIndices; - final public static String PROP_EQUIVALENCE = "Equivalence"; + + public final static String PROP_EQUIVALENCE = "equivalences"; /** @@ -144,87 +146,40 @@ public final void setEquivalentProperties() { /** - * Wrapper function for automatically choosing which file format to take. - * For more details see {@link NMR.ParseRawData#parse1DNMRviaPeakTable(String, String)} - * and {@link NMR.ParseRawData#parse1DNMRviaXML(String, String)} + * Creates a Spectrum class object from 1D NMR peak list in CSV file format. * - * @param pathToPeakList - * @param atomType - * @return true if a known file extension was given - * @throws IOException - * @throws ParserConfigurationException - * @throws SAXException - */ - public final boolean parse1DNMR(final String pathToPeakList, final String atomType) throws IOException, ParserConfigurationException, SAXException{ - - switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { - case "csv": - return this.parse1DNMRviaPeakTable(pathToPeakList, atomType); - case "xml": - return this.parse1DNMRviaXML(pathToPeakList, atomType); - default: - return false; - } - } - - - /** - * Assigns shift values from 1D NMR peak list to atoms of an IAtomContainer. - * The shift values will be assigned sequentially. - * In case of a molecular formula is given in this class, the number of - * shifts must be equal to the number of atoms in this molecular formula. - * For less shifts in shift list you will be asked for entering equivalences. - * Otherwise this function will return a false value. - * In case of no molecular was given to this class, a new atom in the atom container - * will be created regarding to the input shift list. - * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) - * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on - * the specified atom type (element). - * - * - * @param pathToPeakList Path to peak list (Bruker's TopSpin csv file + * @param pathToCSV Path to peak list (Bruker's TopSpin csv file * format) * @param atomType Element name (e.g. "C") which also occurrs in * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return false if input shift list size greater than the number of atoms in - * molecular formula, if such was given to the class + * @return Spectrum class object from given input file * @throws java.io.IOException */ - public final boolean parse1DNMRviaPeakTable(final String pathToPeakList, final String atomType) throws IOException { + public final Spectrum parse1DNMRviaCSV(final String pathToCSV, final String atomType) throws IOException { - final Spectrum spectrum = Utils.parsePeakTable(pathToPeakList, new int[]{4}, new String[]{atomType}, 6); + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{atomType}, 6); - return this.set1DNMRShifts(spectrum); + return spectrum; } + /** - * Assigns shift values from 1D NMR XML file to atoms of an IAtomContainer. - * The shift values will be assigned sequentially. - * In case of a molecular formula is given in this class, the number of - * shifts must be equal to the number of atoms in this molecular formula. - * For less shifts in shift list you will be asked for entering equivalences. - * Otherwise this function will return a false value. - * In case of no molecular was given to this class, a new atom in the atom container - * will be created regarding to the input shift list. - * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) - * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on - * the specified atom type (element). - * - * @param pathToXML Path to XML file (Bruker's TopSpin XML file + * Creates a Spectrum class object from 1D NMR peak list in XML file format. + * + * @param pathToXML Path to peak list (Bruker's TopSpin csv file * format) * @param atomType Element name (e.g. "C") which also occurrs in * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return false if input shift list size greater than the number of atoms in - * molecular formula, if such was given to the class + * @return Spectrum class object from given input file * @throws java.io.IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final boolean parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { + public final Spectrum parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.parseXML(pathToXML, 1, new int[]{1}, new String[]{atomType}); + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{atomType}); - return this.set1DNMRShifts(spectrum); + return spectrum; } @@ -240,17 +195,21 @@ public final boolean parse1DNMRviaXML(final String pathToXML, final String atomT * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on * the specified atom type (element). + * After usage of this function, the input Spectrum class object might be extended during + * equivalent signal selection by user and contains the signal assignment indices + * in atom container. * * @param spectrum Spectrum class object containing the 1D shift information - * @return + * @return + * @throws java.io.IOException */ - public final boolean set1DNMRShifts(final Spectrum spectrum){ + public final boolean set1DNMR(final Spectrum spectrum) throws IOException{ final String atomType = Utils.getElementIdentifier(spectrum.getNuclei()[0]); - final ArrayList shifts = spectrum.getShiftsByDim(0); + ArrayList shifts = spectrum.getShiftsByDim(0); // check whether indices for that atom type exist or the number of input signals are greater than the atom number in atom container for that atom type if (!this.atomTypeIndices.containsKey(atomType) || shifts.size() > this.atomTypeIndices.get(atomType).size()) { // if molecular formula is known and too much picked peaks are to be assigned - if(this.atomTypeIndices.containsKey(atomType)){ + if(this.atomTypeIndices.containsKey(atomType) || MolecularFormulaManipulator.getElementCount(this.molFormula, atomType) == 0){ System.err.println("Too many peaks in peak list for \"" + atomType + "\" and molecular formula \"" + MolecularFormulaManipulator.getString(this.molFormula) + "\"!!!"); return false; } else { // @@ -265,35 +224,38 @@ public final boolean set1DNMRShifts(final Spectrum spectrum){ this.setAtomTypeIndices(); } } + // assign shift values to atoms sequentially int assignedShiftCount = 0; for (final int i : this.atomTypeIndices.get(atomType)) { if(assignedShiftCount < shifts.size()){ - // shift assignment + // shift assignment in atom this.mol.getAtom(i).setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); + spectrum.getSignal(assignedShiftCount).setAssignedAtomIndex(i, 0); } assignedShiftCount++; } // "fill up" the missing equivalent peaks // check whether the number of input signals is smaller than the number of atoms in atom container from that atom type - if (spectrum.size() < this.atomTypeIndices.get(atomType).size()) { + if (shifts.size() < this.atomTypeIndices.get(atomType).size()) { System.out.println("Not enough peaks in 1D peak list for \"" + atomType + "\"!!!"); - this.askForEquivalentPeaks(atomType); + this.askForEquivalentPeaks(spectrum, atomType); } - this.setAtomTypeIndices(); - return true; } - private void askForEquivalentPeaks(final String atomType) { + private void askForEquivalentPeaks(final Spectrum spectrum, final String atomType) throws IOException { - final Scanner reader = new Scanner(System.in); int n = -1; - final HashSet validIndices = new HashSet<>(); + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); int n; + final ArrayList validIndices = new ArrayList<>(); + // walk through all atoms of given atom type for (final int i : this.atomTypeIndices.get(atomType)) { + // ignore atoms with already set NMR shift value if (this.mol.getAtom(i).getProperty(Utils.getNMRShiftConstant(atomType)) != null) { continue; } + // display all selectable atom indices in atom container System.out.println("\nThe " + i + "th shift value is missing!\nWhich shift value is not unique?"); for (final int k : this.atomTypeIndices.get(atomType)) { if(this.mol.getAtom(k).getProperty(Utils.getNMRShiftConstant(atomType)) != null){ @@ -301,12 +263,18 @@ private void askForEquivalentPeaks(final String atomType) { validIndices.add(k); } } + // get selected index by user input n = -1; while(!validIndices.contains(n)){ System.out.println("Enter the index: "); - n = reader.nextInt(); + n = Integer.parseInt(br.readLine()); } + + spectrum.addSignal(spectrum.getSignal(validIndices.indexOf(n))); + // copy NMR shift value from already set value in selected atom n to unset value in atom i this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), this.mol.getAtom(n).getProperty(Utils.getNMRShiftConstant(atomType))); + + if(this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE) == null){ this.mol.getAtom(i).setProperty(ParseRawData.PROP_EQUIVALENCE, new ArrayList<>()); } @@ -316,35 +284,9 @@ private void askForEquivalentPeaks(final String atomType) { ((ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE)).add(n); ((ArrayList) this.mol.getAtom(n).getProperty(ParseRawData.PROP_EQUIVALENCE)).add(i); } - reader.close(); } - /** - * Wrapper function for automatically choosing which file format to take. - * For more details see - * {@link NMR.ParseRawData#parseDEPTviaPeakTable(java.lang.String, java.lang.String, double) } - * and {@link NMR.ParseRawData#parseDEPTviaXML(java.lang.String, java.lang.String, double) } - * @param pathToDEPT90 - * @param pathToDEPT135 - * @param tol - * @return - * @throws IOException - * @throws ParserConfigurationException - * @throws SAXException - */ - public final int parseDEPT(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { - - if(casekit.NMR.Utils.getFileFormat(pathToDEPT90).equals("csv") && casekit.NMR.Utils.getFileFormat(pathToDEPT135).equals("csv")) { - return this.parseDEPTviaPeakTable(pathToDEPT90, pathToDEPT135, tol); - } else if(casekit.NMR.Utils.getFileFormat(pathToDEPT90).equals("xml") && casekit.NMR.Utils.getFileFormat(pathToDEPT135).equals("xml")) { - return this.parseDEPTviaXML(pathToDEPT90, pathToDEPT135, tol); - } - - return 0; - } - - /** * Sets the number of implicit hydrogens from two carbon DEPT90 and DEPT135 * peak @@ -352,21 +294,16 @@ public final int parseDEPT(final String pathToDEPT90, final String pathToDEPT135 * see * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. * - * @param pathToDEPT90 Path to DEPT90 peak list (Bruker's TopSpin csv file + * @param pathToDEPT Path to one DEPT peak list (Bruker's TopSpin csv file * format) - * @param pathToDEPT135 Path to DEPT135 peak list (Bruker's TopSpin csv file - * format) - * @param tol Tolance value [ppm] when matching carbon shifts * @return * @throws java.io.IOException */ - public final int parseDEPTviaPeakTable(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException { - - final ArrayList matchesDEPT90 = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT90, "C", tol, 4); - final ArrayList matchesDEPT135 = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToDEPT135, "C", tol, 4); - final ArrayList intensitiesDEPT135 = casekit.NMR.Utils.parsePeakTable(pathToDEPT135, 6); + public final Spectrum parseDEPTviaCSV(final String pathToDEPT) throws IOException { + + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToDEPT, new int[]{4}, new String[]{"C"}, 6); - return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + return spectrum; } /** @@ -374,23 +311,18 @@ public final int parseDEPTviaPeakTable(final String pathToDEPT90, final String p * XML files to carbon atoms. The meanwhile found matches are corrected, see * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. * - * @param pathToDEPT90 Path to DEPT90 peak list (Bruker's TopSpin XML file - * format) - * @param pathToDEPT135 Path to DEPT135 peak list (Bruker's TopSpin XML file + * @param pathToDEPT Path to one DEPT peak list (Bruker's TopSpin XML file * format) - * @param tol Tolance value [ppm] for matching carbon shifts * @return * @throws java.io.IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final int parseDEPTviaXML(final String pathToDEPT90, final String pathToDEPT135, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final Spectrum parseDEPTviaXML(final String pathToDEPT) throws IOException, ParserConfigurationException, SAXException { - final ArrayList matchesDEPT90 = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT90, "C", tol, 1, 1); - final ArrayList matchesDEPT135 = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToDEPT135, "C", tol, 1, 1); - final ArrayList intensitiesDEPT135 = casekit.NMR.Utils.parseXML(pathToDEPT135, 1, 2); + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToDEPT, 1, new int[]{1}, new String[]{"C"}); - return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + return spectrum; } /** @@ -416,6 +348,8 @@ public final int setDEPT(final Spectrum spectrumDEPT90, final Spectrum spectrumD ArrayList matchesDEPT135 = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsDEPT135, tol, "C"); matchesDEPT135 = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT135, matchesDEPT135, tol, "C"); + spectrumDEPT90.setAssignedAtomIndicesByDim(matchesDEPT90, 0); + spectrumDEPT135.setAssignedAtomIndicesByDim(matchesDEPT135, 0); return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); } @@ -472,116 +406,50 @@ private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDE /** - * Wrapper function for automatically choosing which file format to take. - * For more details see - * {@link NMR.ParseRawData#parseHSQCviaPeakTable(java.lang.String, java.lang.String, double)} - * and {@link NMR.ParseRawData#parseHSQCviaXML(java.lang.String, java.lang.String, double)} - * - * @param pathToPeakList - * @param atomType - * @param tol - * @return - * @throws IOException - * @throws ParserConfigurationException - * @throws SAXException - */ - public final boolean parseHSQC(final String pathToPeakList, final String atomType, final double tol) throws IOException, ParserConfigurationException, SAXException { - - switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { - case "csv": - parseHSQCviaPeakTable(pathToPeakList, atomType, tol); - break; - case "xml": - parseHSQCviaXML(pathToPeakList, atomType, tol); - break; - default: - return false; - } - - return true; - } - - - /** - * Assigns shifts to implicit hydrogens of a given atom type from HSQC - * peak table, e.g. 1H,13C-HSQC or 1H,15N-HSQC. The implicit hydrogen - * number for an atom of the given atom type must be set beforehand. - * In case of 1H,13C-HSQC, this could be done by - * {@link ParseRawData#parseDEPT(String, String, double)} or - * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or - * {@link ParseRawData#parseDEPTviaXML(String, String, double) }. - * The property is then set to {@link #CONST_PROP_PROTONSHIFTS} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * Creates a Spectrum class object from given HSQC input file in CSV format. * - * @param pathToPeakList path to HSQC peak table (Bruker's TopSpin csv file + * @param pathToCSV path to HSQC peak table (Bruker's TopSpin csv file * format) * @param heavyAtomType Element name of H bonded heavy atom (e.g. "C") which also occurrs in * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @param tol tolerance value [ppm] for matching the atoms of given atom - * type - * within the atom container + * @return * @throws IOException */ - public final void parseHSQCviaPeakTable(final String pathToPeakList, final String heavyAtomType, final double tol) throws IOException { - - final ArrayList shiftsHydrogen = casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5); - final ArrayList matchesHeavyAtomType = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, heavyAtomType, tol, 6); + public final Spectrum parseHSQCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { - this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtomType); + final Spectrum spectrum = new Spectrum( new String[]{Utils.getIsotopeIdentifier("H"), + Utils.getIsotopeIdentifier(heavyAtomType)}, + new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), + casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, + casekit.NMR.Utils.parseCSV(pathToCSV, 9)); + + return spectrum; } /** - * Assigns shifts to implicit hydrogens of a given atom type from HSQC XML - * file, e.g. 1H,13C-HSQC or 1H,15N-HSQC. The implicit hydrogen - * number for an atom of the given atom type must be set beforehand. - * In case of 1H,13C-HSQC, this could be done by - * {@link ParseRawData#parseDEPT(String, String, double)} or - * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or - * {@link ParseRawData#parseDEPTviaXML(String, String, double) }. - * The property is then set to {@link #CONST_PROP_PROTONSHIFTS} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * Creates a Spectrum class object from given HSQC input file in XML format. * * @param pathToXML path to HSQC XML file * @param heavyAtomType Element name of H bonded heavy atom (e.g. "C") which also occurrs in * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @param tol tolerance value [ppm] for matching the atoms of given atom - * type - * within the atom container + * @return * @throws IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final void parseHSQCviaXML(final String pathToXML, final String heavyAtomType, final double tol) throws IOException, ParserConfigurationException, SAXException { - - final ArrayList shiftsHydrogen = casekit.NMR.Utils.parseXML(pathToXML, 2, 2); - final ArrayList matchesHeavyAtomType = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, heavyAtomType, tol, 2, 1); - - this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtomType); - } - + public final Spectrum parseHSQCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { - private void setImplicitHydrogenShifts(final ArrayList shiftsHydrogen, final ArrayList matchesHeavyAtomType) { + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), + Utils.getIsotopeIdentifier(heavyAtomType)}, + new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), + casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, + casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); - IAtom matchAtom; - ArrayList assignedHydrogensShifts; - for (int i = 0; i < matchesHeavyAtomType.size(); i++) { - if (matchesHeavyAtomType.get(i) >= 0) { - matchAtom = this.mol.getAtom(matchesHeavyAtomType.get(i)); - if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { - continue; - } - if (matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { - matchAtom.setProperty(CDKConstants.NMRSPECTYPE_2D_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); - } - assignedHydrogensShifts = matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); - if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { - assignedHydrogensShifts.add(shiftsHydrogen.get(i)); - } - } - } + return spectrum; } - /** + + /** * Sets the proton shift(s) as list to belonging heavy atoms of an * HSQC signal relationship. * The property is then set to {@link #CONST_PROP_PROTONSHIFTS} in @@ -589,20 +457,29 @@ private void setImplicitHydrogenShifts(final ArrayList shiftsHydrogen, f * * @param spectrum Spectrum class object consisting of Signal class objects * where the proton values are given first and the heavy atom values as the second. - * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * @param tolHeavyAtom tolerance value [ppm] for heavy atom shift matching */ - public final void setHSQC(final Spectrum spectrum, final double tolHeavy) { + public final void setHSQC(final Spectrum spectrum, final double tolHeavyAtom) { final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(0); final ArrayList shiftsHeavyAtom = spectrum.getShiftsByDim(1); - ArrayList matchesHeavyAtom = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - matchesHeavyAtom = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + ArrayList matchesHeavyAtom = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + matchesHeavyAtom = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + + spectrum.setAssignedAtomIndicesByDim(matchesHeavyAtom, 1); + + this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtom); + + } + + + private void setImplicitHydrogenShifts(final ArrayList shiftsHydrogen, final ArrayList matchesHeavyAtomType) { IAtom matchAtom; ArrayList assignedHydrogensShifts; - for (int i = 0; i < matchesHeavyAtom.size(); i++) { - if (matchesHeavyAtom.get(i) >= 0) { - matchAtom = this.mol.getAtom(matchesHeavyAtom.get(i)); + for (int i = 0; i < matchesHeavyAtomType.size(); i++) { + if (matchesHeavyAtomType.get(i) >= 0) { + matchAtom = this.mol.getAtom(matchesHeavyAtomType.get(i)); if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { continue; } @@ -726,101 +603,46 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s return matches; } - - /** - * Wrapper function for automatically choosing which file format to take. - * For more details see - * {@link NMR.ParseRawData#parseCOSYviaPeakTable(java.lang.String, double)} - * and {@link NMR.ParseRawData#parseCOSYviaXML(java.lang.String, double)} - * - * @param pathToPeakList - * @param tol - * @return - * @throws IOException - * @throws ParserConfigurationException - * @throws SAXException - */ - public final boolean parseHHCOSY(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { - - switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { - case "csv": - parseHHCOSYviaPeakTable(pathToPeakList, tol); - break; - case "xml": - parseHHCOSYviaXML(pathToPeakList, tol); - break; - default: - return false; - } - - return true; - } - /** - * Sets links between implicit hydrogens from H,H-COSY peak table to heavy - * atoms in the atom container. The implicit hydrogen number for - * a heavy atom, which is the corresponding heavy atom for an H shift value, - * must be set beforehand. In case of carbons, this could be done by parsing - * the DEPT information: - * {@link ParseRawData#parseDEPT(String, String, double)} or - * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or - * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. - * Returns true if all signals are bidirectional, so that atom A has a - * signal according to atom B and vice versa. - * The property is then set to {@link #CONST_PROP_HHCOSY} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * Creates a Spectrum class object from given H,H-COSY input file in CSV format. * - * @param pathToPeakList path to H,H-COSY peak table (Bruker's TopSpin csv + * @param pathToCSV path to H,H-COSY peak table (Bruker's TopSpin csv * file * format) - * @param tol tolerance value [ppm] for hydrogen shift matching * @return * @throws IOException */ - public final boolean parseHHCOSYviaPeakTable(final String pathToPeakList, final double tol) throws IOException { + public final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IOException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier("H")}, - new ArrayList[]{casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5), - casekit.NMR.Utils.parsePeakTable(pathToPeakList, 6)}, - casekit.NMR.Utils.parsePeakTable(pathToPeakList, 9)); + new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), + casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, + casekit.NMR.Utils.parseCSV(pathToCSV, 9)); - return this.setHHCOSY(spectrum, tol); + return spectrum; } /** - * Sets links between implicit hydrogens from H,H-COSY peak XML file to - * heavy - * atoms in the atom container. The implicit hydrogen number for a heavy - * atom, which is the corresponding heavy atom for an H shift value, must be - * set beforehand. In case of carbons, this could be done by parsing the - * DEPT information: - * {@link ParseRawData#parseDEPT(String, String, double)} or - * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or - * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. Returns true if - * all signals are bidirectional, so that atom A has a signal according to - * atom B and vice versa. - * The property is then set to {@link #CONST_PROP_HHCOSY} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * Creates a Spectrum class object from given H,H-COSY input file in XML format. * * @param pathToXML path to H,H-COSY peak XML file (Bruker's TopSpin XML * file format) - * @param tol tolerance value [ppm] for hydrogen shift matching * @return * @throws IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final boolean parseHHCOSYviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier("H")}, - new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 1), - casekit.NMR.Utils.parseXML(pathToXML, 2, 2)}, + new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), + casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); - return this.setHHCOSY(spectrum, tol); + return spectrum; } /** @@ -843,95 +665,51 @@ public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { } casekit.NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, CDKConstants.NMRSPECTYPE_2D_HHCOSY); + spectrum.setAssignedAtomIndicesByDim(hydrogenShiftMatches1, 0); + spectrum.setAssignedAtomIndicesByDim(hydrogenShiftMatches2, 1); + return true; } /** - * Wrapper function for automatically choosing which file format to take. - * For more details see - * {@link NMR.ParseRawData#parseINADEQUATEviaPeakTable(java.lang.String, double)} - * and {@link NMR.ParseRawData#parseINADEQUATEviaXML(java.lang.String, double)} - * - * @param pathToPeakList - * @param tol - * @return - * @throws IOException - * @throws ParserConfigurationException - * @throws SAXException - */ - public final boolean parseINADEQUATE(final String pathToPeakList, final double tol) throws IOException, ParserConfigurationException, SAXException { - - switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { - case "csv": - parseINADEQUATEviaPeakTable(pathToPeakList, tol); - break; - case "xml": - parseINADEQUATEviaXML(pathToPeakList, tol); - break; - default: - return false; - } - - return true; - } - - - /** - * Sets links between carbons from INADEQUATE peak table in the atom - * container. - * To match the shift values, the carbon shifts must be set beforehand. - * This could be done by - * {@link ParseRawData#parse1DNMR(String, String)} or - * {@link ParseRawData#parse1DNMRviaPeakTable(String, String)} or - * {@link ParseRawData#parse1DNMRviaXML(String, String) }. - * Returns true if all signals are bidirectional, so that atom A has a - * signal according to atom B and vice versa. + * Creates a Spectrum class object from given INADEQUATE input file in CSV format. * - * @param pathToPeakList path to INADEQUATE peak table (Bruker's TopSpin csv + * @param pathToCSV path to INADEQUATE peak table (Bruker's TopSpin csv * file format) - * @param tol tolerance value [ppm] for carbon shift matching * @return * @throws IOException */ - public final boolean parseINADEQUATEviaPeakTable(final String pathToPeakList, final double tol) throws IOException { + public final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throws IOException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), Utils.getIsotopeIdentifier("C")}, - new ArrayList[]{casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5), - casekit.NMR.Utils.parsePeakTable(pathToPeakList, 6)}, - casekit.NMR.Utils.parsePeakTable(pathToPeakList, 9)); + new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), + casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, + casekit.NMR.Utils.parseCSV(pathToCSV, 9)); - return this.setINADEQUATE(spectrum, tol); + return spectrum; } /** - * Sets links between carbons from INADEQUATE xml peak file in the atom - * container. - * To match the shift values, the carbon shifts must be set beforehand. - * This could be done by - * {@link ParseRawData#parse1DNMRviaPeakTable(String, String)} or - * {@link ParseRawData#parse1DNMRviaXML(String, String) }. - * Returns true if all signals are bidirectional, so that atom A has a - * signal according to atom B and vice versa. + * Creates a Spectrum class object from given INADEQUATE input file in XML format. * * @param pathToXML path to INADEQUATE peak XML file (Bruker's TopSpin XML * file format) - * @param tol tolerance value [ppm] for hydrogen shift matching * @return * @throws IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final boolean parseINADEQUATEviaXML(final String pathToXML, final double tol) throws IOException, ParserConfigurationException, SAXException { + public final Spectrum parseINADEQUATEviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), Utils.getIsotopeIdentifier("C")}, - new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 1), - casekit.NMR.Utils.parseXML(pathToXML, 2, 2)}, + new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), + casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); - return this.setINADEQUATE(spectrum, tol); + return spectrum; } @@ -956,117 +734,56 @@ public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { } casekit.NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - return true; - } - - - /** - * Wrapper function for automatically choosing which file format to take. - * For more details see - * {@link NMR.ParseRawData#parseHMBCviaPeakTable(String, String, double, double)} - * and - * {@link NMR.ParseRawData#parseHMBCviaXML(String, String, double, double)} - * - * @param pathToPeakList - * @param atomType - * @param tolHydrogen - * @param tolHeavy - * @return - * @throws IOException - * @throws ParserConfigurationException - * @throws SAXException - */ - public final boolean parseHMBC(final String pathToPeakList, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException, ParserConfigurationException, SAXException { - - switch (casekit.NMR.Utils.getFileFormat(pathToPeakList)) { - case "csv": - parseHMBCviaPeakTable(pathToPeakList, atomType, tolHydrogen, tolHeavy); - break; - case "xml": - parseHMBCviaXML(pathToPeakList, atomType, tolHydrogen, tolHeavy); - break; - default: - return false; - } - + spectrum.setAssignedAtomIndicesByDim(carbonShiftMatches1, 0); + spectrum.setAssignedAtomIndicesByDim(carbonShiftMatches2, 1); + return true; } /** - * Sets links between implicit hydrogens and heavy atoms from HMBC peak - * tablein the atom container. The implicit hydrogen number for a heavy - * atom, which is the corresponding heavy atom for an H shift value, must be - * set beforehand. In case of carbon, this could be done by parsing the - * DEPT information: - * {@link ParseRawData#parseDEPT(String, String, double) } or - * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or - * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. - * The property is then set to {@link #CONST_PROP_HMBC} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * Creates a Spectrum class object from given HMBC input file in CSV format. * - * @param pathToPeakList path to HMBC peak table (Bruker's TopSpin csv + * @param pathToCSV path to HMBC peak table (Bruker's TopSpin csv * file format) - * @param atomType Element name (e.g. "C") which also occurrs in + * @param heavyAtomType Element name (e.g. "C") which also occurrs in * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching - * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * @return * @throws IOException */ - public final void parseHMBCviaPeakTable(final String pathToPeakList, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException { + public final Spectrum parseHMBCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { - final ArrayList hydrogenShifts = casekit.NMR.Utils.parsePeakTable(pathToPeakList, 5); - final ArrayList hydrogenShiftMatches = this.correctHydrogenShiftMatches(hydrogenShifts, this.findImplicitHydrogenShiftMatches(hydrogenShifts, tolHydrogen), tolHydrogen); - final ArrayList heavyAtomShiftMatches = casekit.NMR.Utils.matchShiftsFromPeakTable(this.mol, pathToPeakList, atomType, tolHeavy, 6); - - this.setHMBC(hydrogenShiftMatches, heavyAtomShiftMatches); + final Spectrum spectrum = new Spectrum( new String[]{Utils.getIsotopeIdentifier("H"), + Utils.getIsotopeIdentifier(heavyAtomType)}, + new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), + casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, + casekit.NMR.Utils.parseCSV(pathToCSV, 9)); + + return spectrum; } + /** - * Sets links between implicit hydrogens and heavy atoms from HMBC peak - * XML file in the atom container. The implicit hydrogen number for a heavy - * atom, which is the corresponding heavy atom for an H shift value, must be - * set beforehand. In case of carbon, this could be done by parsing the DEPT - * information: - * {@link ParseRawData#parseDEPT(String, String, double) } or - * {@link ParseRawData#parseDEPTviaPeakTable(String, String, double)} or - * {@link ParseRawData#parseDEPTviaXML(String, String, double)}. - * The property is then set to {@link #CONST_PROP_HMBC} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. + * Creates a Spectrum class object from given HMBC input file in XML format. * * @param pathToXML path to HMBC peak XML file (Bruker's TopSpin XML file * format) - * @param atomType Element name (e.g. "C") which also occurrs in + * @param heavyAtomType Element name (e.g. "C") which also occurrs in * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching - * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * @return * @throws IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final void parseHMBCviaXML(final String pathToXML, final String atomType, final double tolHydrogen, final double tolHeavy) throws IOException, ParserConfigurationException, SAXException { - - final ArrayList hydrogenShifts = casekit.NMR.Utils.parseXML(pathToXML, 2, 2); - final ArrayList hydrogenShiftMatches = this.correctHydrogenShiftMatches(hydrogenShifts, this.findImplicitHydrogenShiftMatches(hydrogenShifts, tolHydrogen), tolHydrogen); - final ArrayList heavyAtomShiftMatches = casekit.NMR.Utils.matchShiftsFromXML(this.mol, pathToXML, atomType, tolHeavy, 2, 1); + public final Spectrum parseHMBCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { - this.setHMBC(hydrogenShiftMatches, heavyAtomShiftMatches); - } - - private void setHMBC(final ArrayList hydrogenShiftMatches, final ArrayList heavyAtomShiftMatches) { - - ArrayList HMBCList; - for (int i = 0; i < hydrogenShiftMatches.size(); i++) { - if (hydrogenShiftMatches.get(i) >= 0 && heavyAtomShiftMatches.get(i) >= 0) { - if (this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) == null) { - this.mol.getAtom(hydrogenShiftMatches.get(i)).setProperty(CDKConstants.NMRSPECTYPE_2D_HMBC, new ArrayList<>()); - } - HMBCList = this.mol.getAtom(hydrogenShiftMatches.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC); - if (!HMBCList.contains(heavyAtomShiftMatches.get(i))) { - HMBCList.add(heavyAtomShiftMatches.get(i)); - } - } - } + final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), + Utils.getIsotopeIdentifier(heavyAtomType)}, + new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), + casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, + casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); + + return spectrum; } @@ -1100,5 +817,8 @@ public final void setHMBC(final Spectrum spectrum, final double tolHydrogen, fin } } } + + spectrum.setAssignedAtomIndicesByDim(matchesHydrogen, 0); + spectrum.setAssignedAtomIndicesByDim(matchesHeavyAtom, 1); } } From 51e752e335c98ab031b82b161eee54cab6f4d054 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 18:49:05 +0200 Subject: [PATCH 019/405] - adjustments for ParseRawData class --- src/casekit/NMR/Process.java | 325 +---------------------------------- 1 file changed, 1 insertion(+), 324 deletions(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index fed1511..f845b85 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -23,7 +23,6 @@ */ package casekit.NMR; -import casekit.NMR.model.Spectrum; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -31,14 +30,11 @@ import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; -import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; -import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; /** @@ -68,162 +64,12 @@ public Process(final IMolecularFormula molFormula){ } - - - -// /** -// * Sets the hybridization level of each heavy atom in the molecule which has -// * its own shift value (property), only if a frequency threshold value for one -// * hybridization level is reached. -// * For further details see {@link testkit.Utils#getHybridizationsFromNMRShiftDB(IAtomContainer, String, double, IMolecularFormula)} -// * Two threshold value are used to accept a hybridization level and a found -// * neighbor as real neighbor (see thrs parameter descriptions). -// * -// * @param pathToNMRShiftDB path to NMRShiftDB sdf file -// * @param tol tolerance value [ppm] for atom shift matching in DB -// * @param thrsHybrid threshold for accepting a hybridization frequency rate, e.g. -// * the value 0.9 means that 90% of all found hybridizations for given carbon -// * shift must be from the same hybridization level -// * @param thrsNeighbor threshold for accepting a found neighbor frequency rate -// * (atom type) in database as real neighbor for the unknown structure. E.g. -// * the value 0.9 means that 90% of all found neighbors for given carbon -// * shift must be from the same atom type, like nitrogen or oxygen. -// * -// * @throws IOException -// */ -// public void setHybridizationsFromNMRShiftDB(final String pathToNMRShiftDB, final double tol, final double thrsHybrid, final double thrsNeighbor) throws IOException { -// -// final HashMap>>> elementsHybridAndBondTypeCounter = testkit.Utils.getHybridizationsFromNMRShiftDB(this.mol, pathToNMRShiftDB, tol, this.molFormula); -// final HashMap>> elementsHybridCounter = elementsHybridAndBondTypeCounter.get("hybridCounter"); -// final HashMap>> elementsBondTypeCounter = elementsHybridAndBondTypeCounter.get("bondTypeCounter"); -// -// ArrayList hybrids; -// final HashMap probsNeighbors = new HashMap<>(); -// int idx = 0; -// Double[] temp; -// final HashMap elementsHybridCounterSum = new HashMap<>(); -// for (int qAtomIndex : elementsHybridCounter.keySet()) { -// for (String keyValue : elementsHybridCounter.get(qAtomIndex).keySet()) { -// if(keyValue.equals("query") || keyValue.equals("queryH")){ -// continue; -// } -// if(!probsNeighbors.containsKey(keyValue)){ -// probsNeighbors.put(keyValue, new Double[elementsHybridCounter.keySet().size()]); -// } -// hybrids = elementsHybridCounter.get(qAtomIndex).get(keyValue); -// temp = probsNeighbors.get(keyValue); -// temp[idx] = (double) hybrids.size(); -// probsNeighbors.put(keyValue, temp); -// -// if(!elementsHybridCounterSum.containsKey(keyValue)){ -// elementsHybridCounterSum.put(keyValue, 0); -// } -// elementsHybridCounterSum.put(keyValue, elementsHybridCounterSum.get(keyValue) + hybrids.size()); -// } -// idx++; -// } -// int sumQueryAtom; -// for (int i = 0; i < idx; i++) { -// sumQueryAtom = 0; -// for (String keyValue : probsNeighbors.keySet()) { -// sumQueryAtom += probsNeighbors.get(keyValue)[i]; -// } -// for (String keyValue : probsNeighbors.keySet()) { -// temp = probsNeighbors.get(keyValue); -// temp[i] = temp[i]/sumQueryAtom;//0.5 * (temp[i]/sumQueryAtom + temp[i]/elementsHybridCounterSum.get(keyValue)); -// probsNeighbors.put(keyValue, temp); -// } -// } -// -// -// HashMap hybridFreqs; -// int maxFreqHybridValue; -// double maxFreq; -// IAtom qAtom; -// // for all query atoms which have their own NMR shift value -// idx = 0; -// for (int qAtomIndex : elementsHybridCounter.keySet()) { -// qAtom = this.mol.getAtom(qAtomIndex); -// System.out.println("\nmain key: " + qAtomIndex + " -> H: " + qAtom.getImplicitHydrogenCount() + ", nmr shift: " + qAtom.getProperty(testkit.Utils.getNMRShiftConstant(qAtom.getSymbol()))); -// // for all possible neighbors -// for (String keyValue : elementsHybridCounter.get(qAtomIndex).keySet()) { -// hybrids = elementsHybridCounter.get(qAtomIndex).get(keyValue); -// if(hybrids.isEmpty()){ -// continue; -// } -// hybridFreqs = testkit.Utils.getValueFrequencies(hybrids); -// maxFreqHybridValue = -1; -// maxFreq = Collections.max(hybridFreqs.values()); -// for (int hybridValue : hybridFreqs.keySet()) { -// if(hybridFreqs.get(hybridValue) == maxFreq){ -// maxFreqHybridValue = hybridValue; -// break; -// } -// } -// -// // set hybridization for a query atom which has at least one match with an attached hydrogen shift -// // value for a matched heavy atom in DB; this method is preferred -// switch (keyValue) { -// case "queryH": -// System.out.println("queryH -> " + hybrids.size() + " -> " + IAtomType.Hybridization.values()[maxFreqHybridValue] + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); -// if(hybridFreqs.get(maxFreqHybridValue) >= thrsHybrid){ -// qAtom.setHybridization(IAtomType.Hybridization.values()[maxFreqHybridValue]); -//// System.out.println("queryH -> " + hybrids.size() + " -> " + qAtom.getHybridization() + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); -// } break; -// case "query": -// System.out.println("query -> " + hybrids.size() + " -> " + IAtomType.Hybridization.values()[maxFreqHybridValue] + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); -// if(qAtom.getHybridization() == null && (hybridFreqs.get(maxFreqHybridValue) >= thrsHybrid)){ -// // set hybridization from DB entries without attached hydrogen shift matches for an heavy atom -// qAtom.setHybridization(IAtomType.Hybridization.values()[maxFreqHybridValue]); -//// System.out.println("query -> " + hybrids.size() + " -> " + qAtom.getHybridization() + " (" + hybridFreqs.get(maxFreqHybridValue) + ")"); -// } break; -// default: -// System.out.println(idx + ": " + keyValue + ": " + probsNeighbors.get(keyValue)[idx] + " (" + IAtomType.Hybridization.values()[maxFreqHybridValue] + ", " + hybridFreqs.get(maxFreqHybridValue) + ")"); -// HashMap freqs = testkit.Utils.getValueFrequencies(elementsBondTypeCounter.get(qAtomIndex).get(keyValue)); -// for (Integer bondType : freqs.keySet()) { -// System.out.println(" -> " + IBond.Order.values()[bondType - 1] + " (" + freqs.get(bondType) + ")"); -// } break; -// } -// } -// idx++; -// } -// -// System.out.println("\n"); -// idx = 0; -// for (int qAtomIndex : elementsHybridCounter.keySet()) { -// qAtom = this.mol.getAtom(qAtomIndex); -// String output = qAtomIndex + "\t(" + String.format( "%.3f", (double) qAtom.getProperty(testkit.Utils.getNMRShiftConstant(qAtom.getSymbol()))) + ",\t" + qAtom.getHybridization() + ",\tH:" + qAtom.getImplicitHydrogenCount() + "):\t"; -// for (String keyValue : elementsHybridCounter.get(qAtomIndex).keySet()) { -// if (keyValue.equals("queryH") || keyValue.equals("query")) { -// continue; -// } -// if(probsNeighbors.get(keyValue)[idx] >= 0.1){ -// output += keyValue + ": " + String.format( "%.3f", probsNeighbors.get(keyValue)[idx]) + " "; -// } else { -// output += keyValue + ": ----- "; -// } -// } -// -//// for (IAtom neighbor : this.mol.getConnectedAtomsList(qAtom)) { -//// if(neighbor.getProperty(testkit.Utils.getNMRShiftConstant(neighbor.getSymbol())) == null){ -//// output += " -> " + neighbor.getSymbol(); -//// } -//// } -// -// -// System.out.println(output); -// idx++; -// -// testkit.Utils.getOpenBonds(this.mol, qAtomIndex); -// } -// } - - /** * Sets bonds from already set experiment information (H,H-COSY, INADEQUATE and HMBC). * Additionally, this function is build for bond type recognition, * for details see {@link testkit.Utils#getBondTypeFromHybridizations(java.lang.String, org.openscience.cdk.interfaces.IAtomType.Hybridization, java.lang.String, org.openscience.cdk.interfaces.IAtomType.Hybridization)}. * + * @param experiments */ public void setBonds(final String[] experiments){ @@ -263,175 +109,6 @@ private void setBond(final int index1, final int index2){ } - /** - * Adds a bond manually after reading the experimental data and - * setting bonds from that automatically. - * - * @param atomType1 Element name (e.g. "C") for the first heavy atom - * which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} - * @param shift1 shift value [ppm] of the first heavy atom for matching - * @param tol1 tolerance value for first heavy atom matching - * @param atomType2 Element name (e.g. "C") for the second heavy atom - * which also occurrs in - * {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} - * @param shift2 shift value [ppm] of the second heavy atom for matching - * @param tol2 tolerance value for second heavy atom matching - * @return returns false if no matches were found and no bond could - * be stored or the matched atom indices are the same, otherwise true - * @deprecated - */ - public boolean addBond(final String atomType1, final double shift1, final double tol1, final String atomType2, final double shift2, final double tol2) { - - final String NMRSHIFT_ATOMTYPE1 = casekit.NMR.Utils.getNMRShiftConstant(atomType1); - final String NMRSHIFT_ATOMTYPE2 = casekit.NMR.Utils.getNMRShiftConstant(atomType2); - if ((NMRSHIFT_ATOMTYPE1 == null) || (NMRSHIFT_ATOMTYPE2 == null)) { - return false; - } - int atomIndex1 = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); - int atomIndex2 = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift2, tol2, atomType2); - if ((atomIndex1 < 0) || (atomIndex2 < 0) || (atomIndex1 == atomIndex2)) { - return false; - } - this.setBond(atomIndex1, atomIndex2); - - - return true; - } - - - /** - * Adds a H,H-COSY signal and bond between two heavy atoms. To add such a signal, - * at least the first heavy atom has to have a shift value match within - * the atom container of the unknown. - * For matching the second heavy atom and creating a (pseudo) HSQC signal, - * the function {@link #addHSQC(java.lang.String, double, double, double) } - * is used. - * - * @param atomType1 Element name (e.g. "C") for the first heavy atom - * which also occurrs in {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} - * @param shift1 shift value [ppm] of the first heavy atom for matching - * @param tol1 tolerance value for first heavy atom matching - * @param atomType2 Element name (e.g. "C") for the second heavy atom - * which also occurrs in {@link testkit.Utils#getNMRShiftConstant(java.lang.String)} - * @param shift2 shift value [ppm] of the second heavy atom for matching - * @param tol2 tolerance value for second heavy atom matching - * @param shiftH proton shift value [ppm] to store - * @return returns false if no matches were found and no COSY signal could - * be stored or the matched atom indices are the same, otherwise true - * @deprecated - */ - public boolean addCOSY(final String atomType1, final double shift1, final double tol1, final String atomType2, final Double shift2, final double tol2, final Double shiftH){ - - final String NMRSHIFT_ATOMTYPE1 = casekit.NMR.Utils.getNMRShiftConstant(atomType1); - final String NMRSHIFT_ATOMTYPE2 = casekit.NMR.Utils.getNMRShiftConstant(atomType2); - if ((NMRSHIFT_ATOMTYPE1 == null) || (NMRSHIFT_ATOMTYPE2 == null)) { - return false; - } - int atomIndex1 = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift1, tol1, atomType1); - int atomIndex2 = this.addHSQC(atomType2, shift2, tol2, shiftH); - if ((atomIndex1 < 0) || (atomIndex2 < 0) || (atomIndex1 == atomIndex2)) { - return false; - } - if(this.mol.getAtom(atomIndex1).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) == null){ - this.mol.getAtom(atomIndex1).setProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY, new ArrayList<>()); - } - if(this.mol.getAtom(atomIndex2).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) == null){ - this.mol.getAtom(atomIndex2).setProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY, new ArrayList<>()); - } - - final ArrayList COSYList = this.mol.getAtom(atomIndex1).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY); - final ArrayList COSYListX = this.mol.getAtom(atomIndex2).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY); - COSYList.add(atomIndex2); - COSYListX.add(atomIndex1); - - this.setBond(atomIndex1, atomIndex2); - - // set new hybridization of the COSY partner - final AtomHybridizationDescriptor desc = new AtomHybridizationDescriptor(); - this.mol.getAtom(atomIndex1).setHybridization(IAtomType.Hybridization.values()[Integer.parseInt(desc.calculate(this.mol.getAtom(atomIndex1), this.mol).getValue().toString())]); - this.mol.getAtom(atomIndex2).setHybridization(IAtomType.Hybridization.values()[Integer.parseInt(desc.calculate(this.mol.getAtom(atomIndex2), this.mol).getValue().toString())]); - - return true; - } - - - /** - * Adds a HSQC signal manually after reading the experimental data and setting bonds from that automatically. - * If a shift value for a heavy atom is >0.0 then this shift value will be used - * to find a heavy atom match between this given shift value and atoms - * of the atom container of the unknown. Otherwise the first heavy atom without stored - * NMR shift entry and without stored proton shifts in the atom container - * is used for attaching a proton. Additionally, a given proton shift value >0.0 - * is used to store it into the matched heavy atom's proton shift list. - * - * @param atomType atom type used for matching - * @param shift shift valuen [ppm] of the heavy atom - * @param tol tolerance value [ppm] for matching - * @param shiftH proton shift value [ppm] to store - * @return index of matched heavy atom within the atom container; returns -1 if no heavy atom match was found - * @deprecated - */ - public int addHSQC(final String atomType, final Double shift, final double tol, final Double shiftH ){ - - int atomIndex = -1; - final String NMRSHIFT_ATOMTYPE = casekit.NMR.Utils.getNMRShiftConstant(atomType); - if ((NMRSHIFT_ATOMTYPE == null) || (this.atomTypeIndices.get(atomType) == null)) { - return -1; - } - // set additional HSQC for an atom with already set shift value - if(shift != null){ - atomIndex = casekit.NMR.Utils.findSingleShiftMatch(this.mol, shift, tol, atomType); - } else { - // set HSQC for the first atom of given atom type without a already set shift value and without attached proton shifts - for (Integer i : this.atomTypeIndices.get(atomType)) { - if ((this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null) && (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null)) { - atomIndex = i; - break; - } - } - } - // if no atom found to attach a proton - if (atomIndex < 0) { - return -1; - } - // add the proton shift value if it is higher than 0 - if(shiftH != null){ - if (this.mol.getAtom(atomIndex).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { - this.mol.getAtom(atomIndex).setProperty(CDKConstants.NMRSPECTYPE_2D_HSQC, new ArrayList<>()); - } - final ArrayList protonShifts = this.mol.getAtom(atomIndex).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); - protonShifts.add(shiftH); - } - // increase the implicit proton number - if(this.mol.getAtom(atomIndex).getImplicitHydrogenCount() == null){ - this.mol.getAtom(atomIndex).setImplicitHydrogenCount(0); - } - this.mol.getAtom(atomIndex).setImplicitHydrogenCount(this.mol.getAtom(atomIndex).getImplicitHydrogenCount() + 1); - // set the (new) hybridization - final AtomHybridizationDescriptor desc = new AtomHybridizationDescriptor(); - this.mol.getAtom(atomIndex).setHybridization(IAtomType.Hybridization.values()[Integer.parseInt(desc.calculate(this.mol.getAtom(atomIndex), this.mol).getValue().toString())]); - - - return atomIndex; - } - - /** - * - * @param atomType - * @param shift - * @deprecated - */ - public void addAtom(final String atomType, final Double shift){ - - this.mol.addAtom(new Atom(atomType)); - if(shift != null){ - this.mol.getAtom(this.mol.getAtomCount() - 1).setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shift); - } - this.setAtomTypeIndices(); - } - - /** * * @param projectName From e9f44d188ae7113293b8d496ae2611cbf6aef1a1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 18:49:49 +0200 Subject: [PATCH 020/405] - some unnecessary functions are removed --- src/casekit/NMR/Utils.java | 383 +------------------------------------ 1 file changed, 9 insertions(+), 374 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index ba775cd..40f38eb 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -61,8 +61,6 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.interfaces.IElement; -import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; @@ -70,7 +68,6 @@ import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.smarts.parser.SMARTSParser; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; @@ -207,7 +204,7 @@ public static ArrayList getAtomTypeIndicesByElement(final IAtomContaine * @return ArrayList of Double shift values * @throws IOException */ - public static ArrayList parsePeakTable(final String pathToPeakList, final int column) throws IOException { + public static ArrayList parseCSV(final String pathToPeakList, final int column) throws IOException { final ArrayList shifts = new ArrayList<>(); String line; @@ -239,7 +236,7 @@ public static ArrayList parsePeakTable(final String pathToPeakList, fina * @return Spectrum class object containing the peak lists * @throws IOException */ - public static Spectrum parsePeakTable(final String pathToPeakList, final int[] columns, final String[] atomTypes, final int intensityColumnIndex) throws IOException { + public static Spectrum CSVtoSpectrum(final String pathToPeakList, final int[] columns, final String[] atomTypes, final int intensityColumnIndex) throws IOException { // assumes the same number of selected columns and atom types if(columns.length != atomTypes.length){ @@ -248,10 +245,10 @@ public static Spectrum parsePeakTable(final String pathToPeakList, final int[] c final ArrayList[] shiftsList = new ArrayList[columns.length]; final String[] nuclei = new String[columns.length]; for (int col = 0; col < columns.length; col++) { - shiftsList[col] = Utils.parsePeakTable(pathToPeakList, columns[col]); + shiftsList[col] = Utils.parseCSV(pathToPeakList, columns[col]); nuclei[col] = Utils.getIsotopeIdentifier(atomTypes[col]); } - final ArrayList intensities = parsePeakTable(pathToPeakList, intensityColumnIndex); + final ArrayList intensities = parseCSV(pathToPeakList, intensityColumnIndex); return new Spectrum(nuclei, shiftsList, intensities); } @@ -301,18 +298,17 @@ public static ArrayList parseXML(final String pathToXML, final int dim, * object. * The XML file must be in Bruker's TopSpin format. * - * @param pathToXML path to NMR XML file in Bruker's XML file format + * @param pathToXML path to NMR XML file in Bruker's TopSpin XML file format * @param ndim number of dimensions: 1 (1D) or 2 (2D) * @param attributes which attribute indices in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, - * intensity if 1D data) or 3 (intensity if 2D data) + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data) * @param atomTypes atom types (element) for each dimension * @return Spectrum class object containing the selected peak lists * @throws IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public static Spectrum parseXML(final String pathToXML, final int ndim, final int[] attributes, final String[] atomTypes) throws IOException, ParserConfigurationException, SAXException { + public static Spectrum XMLtoSpectrum(final String pathToXML, final int ndim, final int[] attributes, final String[] atomTypes) throws IOException, ParserConfigurationException, SAXException { // assumes the same number of dims, attributes and atom types and a maximum number of dims of 2 if((ndim != attributes.length) || (ndim != atomTypes.length) || (attributes.length != atomTypes.length) @@ -448,7 +444,7 @@ public static int findSingleShiftMatch(final IAtomContainer ac, final double shi */ public static ArrayList matchShiftsFromPeakTable(final IAtomContainer ac, final String pathToPeakList, final String atomType, final double tol, final int column) throws IOException { - final ArrayList shiftsAtomType = casekit.NMR.Utils.parsePeakTable(pathToPeakList, column); + final ArrayList shiftsAtomType = casekit.NMR.Utils.parseCSV(pathToPeakList, column); ArrayList matchesAtomType = casekit.NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); matchesAtomType = casekit.NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); @@ -957,181 +953,10 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a - - - - - - - - // deprecated functions - - /** - * Creates an IAtomContainer object containing atoms without any bond - * information, given by a molecular formula. - * - * @param molFormula Molecular Formula - * @return - * @deprecated - */ - public static IAtomContainer createAtomContainer(final String molFormula) { - - HashMap hash = casekit.NMR.Utils.getAtomCountsInMolecularFormula(molFormula); - IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); - - for (String elem : hash.keySet()) { - // add atoms of current element - ac = casekit.NMR.Utils.addAtoms(ac, elem, hash.get(elem)); - } - - return ac; - } - - /** - * Creates a HashMap with the number of atoms for each occurring atom type. - * - * @deprecated - * @param molFormula - * @return - */ - public static HashMap getAtomCountsInMolecularFormula(final String molFormula) { - - HashMap hash = new HashMap<>(); - String[] molFormSplit = molFormula.split("[A-Z]"); - Matcher m = Pattern.compile("[A-Z]").matcher(molFormula); - String elem; - int noAtoms; - int k = 1; - - while (m.find()) { - // name of current element - elem = molFormula.substring(m.start(), m.end()); - if (k >= molFormSplit.length || molFormSplit[k].isEmpty()) { - // if no atom number is given then assume only one atom - noAtoms = 1; - } else if (Character.isLowerCase(molFormSplit[k].charAt(0))) { - // if element's name contains two letters then extend it - elem += molFormSplit[k].charAt(0); - // if more than one atoms of that element with two letters exist - if (molFormSplit[k].length() > 1) { - // check given atom number - noAtoms = Integer.parseInt(molFormSplit[k].substring(1)); - } else { - noAtoms = 1; - } - } else { - // if atom number is given - noAtoms = Integer.parseInt(molFormSplit[k].substring(0)); - } - try { - // add atom type and frequency to class hashmap - hash.put(elem, noAtoms); - } catch (Exception e) { - System.err.println("Illegal element \"" + elem + "\" will be ignored!!!"); - } - - k++; - } - - return hash; - } - - /** - * - * @param ac - * @param atomType - * @return - * @deprecated - */ - public static int getAtomTypeCount(final IAtomContainer ac, final String atomType) { - - int noAtoms = 0; - for (int i = 0; i < ac.getAtomCount(); i++) { - if (ac.getAtom(i).getSymbol().equals(atomType)) { - noAtoms++; - } - } - - return noAtoms; - } - - /** - * Creates atoms of the same atom type and store it into an atom container. - * - * @param ac Atom container - * @param noAtoms Number of atoms to create - * @param atomType Atom type (element's name, e.g. C or Br) - * @return - * @deprecated - */ - public static IAtomContainer addAtoms(final IAtomContainer ac, final String atomType, final int noAtoms) throws IllegalArgumentException { - - for (int i = 0; i < noAtoms; i++) { - ac.addAtom(new Atom(atomType)); - } - - return ac; - } - - - - - - - - - - - - - + // ######################################################################################################## // test functions -> not ready to use - public static double getTanimotoCoefficient(final IAtomContainer a, final IAtomContainer b) throws CDKException, IOException, CloneNotSupportedException{ - - // pubchem fingerprinter expects - // 1. explicit hydrogens -// AtomContainerManipulator.convertImplicitToExplicitHydrogens(a); - // 2. set atom type names -> done during setting of aromaticities - // 3. set aromaticity -> done during DB scanning - - SubstructureFingerprinter substructfp = new SubstructureFingerprinter(); - IBitFingerprint fingerprint = substructfp.getBitFingerprint(a); - System.out.println("\n\ndefault substructure bitstring: " + fingerprint.asBitSet()); - for (int setbit : fingerprint.getSetbits()) { - System.out.println("default substructure of index " + setbit + ": " + substructfp.getSubstructure(setbit));// + " -> " + SMARTSParser.parse(substructfp.getSubstructure(setbit), SilentChemObjectBuilder.getInstance())); - } - -// DepictionGenerator dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); - IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); - QueryAtomContainer qac = SMARTSParser.parse(substructfp.getSubstructure(fingerprint.getSetbits()[1]), SilentChemObjectBuilder.getInstance()); - - System.out.println("qac: " + qac.getAtomCount() + ", " + qac.getBondCount() + " -> " + qac.getProperties()); - for (IAtom atom : qac.atoms()) { - ac.addAtom(atom); - System.out.println("qac atom: "); - } - for (IBond bond : qac.bonds()) { - ac.addBond(bond); - System.out.println("qac bond: " + bond); - } - System.out.println("ac: " + ac.getAtomCount() + ", " + ac.getBondCount() + " -> " + ac.getProperties()); - - -// dg.depict(ac).writeTo("/Users/mwenk/Downloads/test.png"); - - System.out.println("\n\n"); - SubstructureFingerprinter klekotasubstructfp = new KlekotaRothFingerprinter(); - fingerprint = klekotasubstructfp.getBitFingerprint(a); - System.out.println("Klekota substructure bitstring: " + fingerprint.asBitSet()); - for (int setbit : fingerprint.getSetbits()) { - System.out.println("Klekota substructure of index " + setbit + ": " + klekotasubstructfp.getSubstructure(setbit)); - } - - - return 0.0;//Tanimoto.calculate(pubchemfp.getBitFingerprint(a), pubchemfp.getBitFingerprint(b)); - } /** @@ -1155,196 +980,6 @@ public static HashMap getValueFrequencies(final ArrayList>>> getHybridizationsFromNMRShiftDB(final IAtomContainer ac, final String pathToNMRShiftDB, final double tol, final IMolecularFormula molFormula) throws FileNotFoundException{ -// -// final HashMap>> elementsHybridCounter = new HashMap<>(); -// final HashMap>> elementsBondTypeCounter = new HashMap<>(); -// final HashMap> expactedNeighbors = new HashMap<>(); -// String NMRSHIFT_ATOMTYPE; -// // initializations only -// for (int i = 0; i < ac.getAtomCount(); i++) { -// // sure case for carbon: 3 or 4 hydrogens -> sp3 -//// if (ac.getAtom(i).getSymbol().equals("C") && ac.getAtom(i).getImplicitHydrogenCount() >= 3) { -//// ac.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); -//// continue; -//// } -// NMRSHIFT_ATOMTYPE = Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol()); -// // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? -// if ((NMRSHIFT_ATOMTYPE == null) || (ac.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) == null)) { -// continue; -// } -// elementsHybridCounter.put(i, new HashMap<>()); -// elementsBondTypeCounter.put(i, new HashMap<>()); -// elementsHybridCounter.get(i).put("query", new ArrayList<>()); -// elementsHybridCounter.get(i).put("queryH", new ArrayList<>()); -// // create an array list for each atom type in given molecular formula -// for (IElement elem : MolecularFormulaManipulator.getHeavyElements(molFormula)) { -// elementsHybridCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); -// elementsBondTypeCounter.get(i).put(elem.getSymbol(), new ArrayList<>()); -// } -// -// expactedNeighbors.put(i, new HashMap<>()); -// for (IAtom expNeighbor : ac.getConnectedAtomsList(ac.getAtom(i))) { -// if (!expactedNeighbors.get(i).keySet().contains(expNeighbor.getSymbol())) { -// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), 0); -// } -// expactedNeighbors.get(i).put(expNeighbor.getSymbol(), expactedNeighbors.get(i).get(expNeighbor.getSymbol()) + 1); -// } -// } -// // beginning of DB search -// String shiftsDB; -// double shiftDB, shiftQ; -// int atomIndexDB; -// boolean add, toContinue; -// final AtomHybridizationDescriptor hybridDesc = new AtomHybridizationDescriptor(); -// IAtom qAtom; -// IAtomContainer acDB; -// final IteratingSDFReader iterator = new IteratingSDFReader( -// new FileReader(pathToNMRShiftDB), -// SilentChemObjectBuilder.getInstance() -// ); -// while (iterator.hasNext()) { -// acDB = iterator.next(); -// ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(acDB.getProperties().keySet())); -// Collections.sort(props); -// // the DB entry should at least contain one carbon spectrum -// toContinue = false; -// for (String prop : props) { -// if (prop.contains("Spectrum " + Utils.getIsotopeIdentifier("C"))) { -// toContinue = true; -// break; -// } -// } -// if (!toContinue) { -// continue; -// } -// -// for (int i : elementsHybridCounter.keySet()) { -// qAtom = ac.getAtom(i); -// // check wether the DB entry contains a spectrum for the current query atom type -// shiftsDB = null; -// for (String prop : props) { -// if (prop.contains("Spectrum " + Utils.getIsotopeIdentifier(qAtom.getSymbol()))) { -// shiftsDB = acDB.getProperty(prop); -// break; -// } -// } -// if(shiftsDB == null){ -// continue; -// } -// // ignore the already set sp3 hybridizations at carbon atoms with at least 3 implicit hydrogens -//// if (qAtom.getSymbol().equals("C") && qAtom.getImplicitHydrogenCount() >= 3) { -//// continue; -//// } -// shiftQ = qAtom.getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())); -// -// // check wether the DB entry contains a proton spectrum -// String shiftsDBHydrogen = null; -// for (String prop : props) { -// if (prop.contains("Spectrum " + Utils.getIsotopeIdentifier("H"))) { -// shiftsDBHydrogen = acDB.getProperty(prop); -// break; -// } -// } -// -// String[][] shiftsDBvalues = Utils.parseShiftsNMRShiftDB(shiftsDB); -// for (String[] shiftsDBvalue : shiftsDBvalues) { -// shiftDB = Double.parseDouble(shiftsDBvalue[0]); -// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); -// add = true; -// // shift match within a shift tolerance range -// if ((shiftQ - tol <= shiftDB) && (shiftDB <= shiftQ + tol)) { -// // matched atom should have the same number of attached (implicit) hydrogens -// if (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount().intValue() == qAtom.getImplicitHydrogenCount().intValue()) { -// // count next neighbors -// HashMap foundNeighbors = new HashMap<>(); -// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { -// if (!foundNeighbors.keySet().contains(neighborAtomDB.getSymbol())) { -// foundNeighbors.put(neighborAtomDB.getSymbol(), 0); -// } -// foundNeighbors.put(neighborAtomDB.getSymbol(), foundNeighbors.get(neighborAtomDB.getSymbol()) + 1); -// } -// // check whether the number of expacted next neighbors is higher than the number of found next neighbor, if yes then skip this DB atom match -// for (String elemExpNeighbor : expactedNeighbors.get(i).keySet()) { -// if (foundNeighbors.get(elemExpNeighbor) == null || (expactedNeighbors.get(i).get(elemExpNeighbor) > foundNeighbors.get(elemExpNeighbor))) { -// add = false; -// } -// } -// if(!add){ -// continue; -// } -// // only elements which occur in molecular formula of the unknown are allowed, otherwise skip this matched DB atom -// for (IAtom neighborAtomDB : acDB.getConnectedAtomsList(acDB.getAtom(atomIndexDB))) { -// if (MolecularFormulaManipulator.getElementCount(molFormula, neighborAtomDB.getSymbol()) == 0) { -// add = false; -// break; -// } -// // ignore explicit protons; ignore query atoms here, add them as below -> otherwise multiple counting -// if (!neighborAtomDB.getSymbol().equals("H")){// && !neighborAtomDB.getSymbol().equals(qAtom.getSymbol())) { -// elementsHybridCounter.get(i).get(neighborAtomDB.getSymbol()).add(Integer.parseInt(hybridDesc.calculate(neighborAtomDB, acDB).getValue().toString())); -// elementsBondTypeCounter.get(i).get(neighborAtomDB.getSymbol()).add(acDB.getBond(acDB.getAtom(atomIndexDB), neighborAtomDB).getOrder().numeric()); -// } -// } -// if(!add){ -// continue; -// } -// // likely allowed to add hybridization for query atom -// // check whether the shifts of attached hydrogens are equal to hydrogen shifts of query atom -> higher priority at hybridization assignment step later -// boolean added = false; -// if(shiftsDBHydrogen != null){ -// String[][] shiftsDBvaluesHydrogen = Utils.parseShiftsNMRShiftDB(shiftsDBHydrogen); -// if(qAtom.getProperty("HydrogenShifts") != null){ -// ArrayList shiftsQAtomvaluesHydrogen = qAtom.getProperty("HydrogenShifts"); -// for (int j = 0; j < shiftsQAtomvaluesHydrogen.size(); j++) { -// for (String[] shiftsDBvalueHydrogen : shiftsDBvaluesHydrogen) { -// shiftDB = Double.parseDouble(shiftsDBvalueHydrogen[0]); -// if((shiftsQAtomvaluesHydrogen.get(j) - 0.1 <= shiftDB) && (shiftDB <= shiftsQAtomvaluesHydrogen.get(j) + 0.1)){ -// elementsHybridCounter.get(i).get("queryH").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); -// added = true; -// break; -// } -// } -// if(added){ -// break; -// } -// } -// } -// -// } else { -// elementsHybridCounter.get(i).get("query").add(Integer.parseInt(hybridDesc.calculate(acDB.getAtom(atomIndexDB), acDB).getValue().toString())); -// } -// } -// } -// } -// } -// } -// final HashMap>>> toReturn = new HashMap<>(); -// toReturn.put("hybridCounter", elementsHybridCounter); -// toReturn.put("bondTypeCounter", elementsBondTypeCounter); -// -// return toReturn; -// } /** * Returns a list of open bonds of an atom. From b061089ce9f30d7e14f98846d5e6bd7a345f0d9f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 18:52:42 +0200 Subject: [PATCH 021/405] - supports now the saving of assigment atom indices in Signal class objects - therefore in both classes some functions were added - Spectrum class no longer extends ArrayList object --- src/casekit/NMR/model/Signal.java | 46 +++++++--- src/casekit/NMR/model/Spectrum.java | 132 +++++++++++----------------- 2 files changed, 83 insertions(+), 95 deletions(-) diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java index 1714647..aaf3e14 100644 --- a/src/casekit/NMR/model/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -27,8 +27,6 @@ */ package casekit.NMR.model; -import java.util.ArrayList; - /** * * @author Michael Wenk [https://github.com/michaelwenk] @@ -40,9 +38,9 @@ public class Signal { /** * Am array of doubles to store the chemical shift of */ - private final Double shifts[]; + private final Double[] shifts; private final String[] nuclei; - private int[] assignedAtomIndices; + private final int[] assignedAtomIndices; /* Signal intensity in arbitrary values */ private Double intensity; @@ -56,15 +54,35 @@ public Signal(final String[] nuclei, final Double[] shifts) { this.nuclei = nuclei; this.ndim = this.nuclei.length; this.shifts = shifts; + this.assignedAtomIndices = this.initAssignedAtomIndices(this.ndim); } public Signal(final String[] nuclei, final Double[] shifts, final Double intensity) { this.nuclei = nuclei; this.ndim = this.nuclei.length; - this.shifts = shifts; + this.shifts = this.initShifts(shifts, this.ndim); + this.assignedAtomIndices = this.initAssignedAtomIndices(this.ndim); this.intensity = intensity; } + private Double[] initShifts(final Double[] shifts, final int ndim){ + final Double[] tempShifts = new Double[ndim]; + for (int d = 0; d < ndim; d++) { + tempShifts[d] = shifts[d]; + } + + return tempShifts; + } + + private int[] initAssignedAtomIndices(final int ndim){ + final int[] tempAssignedAtomIndices = new int[ndim]; + for (int d = 0; d < this.ndim; d++) { + tempAssignedAtomIndices[d] = -1; + } + + return tempAssignedAtomIndices; + } + public int getDim(){ return this.ndim; } @@ -101,7 +119,9 @@ public boolean setAssignedAtomIndices(final int[] indices){ if(indices.length != this.ndim){ return false; } - this.assignedAtomIndices = indices; + for (int d = 0; d < this.ndim; d++) { + this.assignedAtomIndices[d] = indices[d]; + } return true; } @@ -114,18 +134,12 @@ public boolean setAssignedAtomIndex(final int index, final int dim){ if(dim < 0 || dim >= this.ndim){ return false; } - if(this.assignedAtomIndices == null){ - this.assignedAtomIndices = new int[this.ndim]; - for (int i = 0; i < this.ndim; i++) { - this.assignedAtomIndices[i] = -1; - } - } this.assignedAtomIndices[dim] = index; return true; } - public int getAssignedAtomIndices(final int dim){ + public int getAssignedAtomIndex(final int dim){ return this.assignedAtomIndices[dim]; } @@ -152,6 +166,11 @@ public String toString() { return s; } + /** + * + * @return + * @deprecated + */ public Signal getClone(){ final Signal signalClone = new Signal(this.nuclei, this.shifts); signalClone.setIntensity(this.intensity); @@ -161,4 +180,5 @@ public Signal getClone(){ return signalClone; } + } diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 576645a..6c821f4 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -34,7 +34,7 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Spectrum extends ArrayList{ +public class Spectrum { /** * An arbitrary name or description that can be assigned to this spectrum for identification purposes. @@ -47,10 +47,6 @@ public class Spectrum extends ArrayList{ */ private String specType; -// /** -// * Not yet clear if this is needed. -// */ -// private float[] pickPrecision; /** * Declares how many axes are in involved in this spectrum. */ @@ -66,11 +62,7 @@ public class Spectrum extends ArrayList{ private String solvent; private String standard; - /** - * This holds sorted list of Chemical Shifts of all axes. The first dimension addresses the - * axes, the second the shift values in this axis, starting from the highest value. - */ - private final ArrayList> shiftList = new ArrayList<>(); + private final ArrayList signals = new ArrayList<>(); public Spectrum(final String[] nuclei, final ArrayList[] shiftLists, final ArrayList intensities) { @@ -134,12 +126,11 @@ public final void addSignals(final ArrayList[] shiftLists, final ArrayLi shifts[col] = shiftLists[col].get(row); } if(intensities != null){ - this.add(new casekit.NMR.model.Signal(this.nuclei, shifts, intensities.get(row))); + this.addSignal(new casekit.NMR.model.Signal(this.nuclei, shifts, intensities.get(row))); } else { - this.add(new casekit.NMR.model.Signal(this.nuclei, shifts)); + this.addSignal(new casekit.NMR.model.Signal(this.nuclei, shifts)); } } - this.updateShiftLists(); } @@ -149,7 +140,7 @@ public final void addSignals(final ArrayList[] shiftLists, final ArrayLi * @return */ public int getSignalCount() { - return this.size(); + return this.signals.size(); } /** @@ -157,18 +148,15 @@ public int getSignalCount() { * @param signal */ public void addSignal(final Signal signal) { - this.add(signal); - this.updateShiftLists(); + this.signals.add(signal); } public void removeSignal(final Signal signal){ - this.remove(this.getSignalIndex(signal)); - this.updateShiftLists(); + this.signals.remove(this.getSignalIndex(signal)); } public void removeSignal(final int signalIndex){ - this.remove(signalIndex); - this.updateShiftLists(); + this.signals.remove(signalIndex); } /** @@ -177,12 +165,12 @@ public void removeSignal(final int signalIndex){ * @return */ public Signal getSignal(final int signalIndex) { - return this.get(signalIndex); + return this.signals.get(signalIndex); } public ArrayList getIntensities(){ final ArrayList intensities = new ArrayList<>(); - for (Signal sig : this) { + for (Signal sig : this.signals) { intensities.add(sig.getIntensity()); } @@ -191,12 +179,25 @@ public ArrayList getIntensities(){ public ArrayList getShiftsByDim(final int dim){ final ArrayList shifts = new ArrayList<>(); - for (final Signal sig : this) { + for (final Signal sig : this.signals) { shifts.add(sig.getShift(dim)); } return shifts; } + + public int getAssignmentAtomIndexByDim(final int signalIndex, final int dim){ + return this.getSignal(signalIndex).getAssignedAtomIndex(dim); + } + + public ArrayList getAssignmentAtomIndicesByDim(final int dim){ + final ArrayList indices = new ArrayList<>(); + for (final Signal sig : this.signals) { + indices.add(sig.getAssignedAtomIndex(dim)); + } + + return indices; + } /** * Returns the position of an NMRSignal the List @@ -204,8 +205,8 @@ public ArrayList getShiftsByDim(final int dim){ * @return */ public int getSignalIndex(final Signal signal) { - for (int f = 0; f < this.size(); f++) { - if (this.get(f) == signal) { + for (int f = 0; f < this.signals.size(); f++) { + if (this.signals.get(f) == signal) { return f; } } @@ -235,6 +236,15 @@ public void setStandard(final String standard) { public String getStandard() { return standard; } + + public void setAssignedAtomIndicesByDim(final ArrayList indices, final int dim){ + if(indices.size() != this.getShiftsByDim(0).size()){ + return; + } + for (int i = 0; i < indices.size(); i++) { + this.getSignal(i).setAssignedAtomIndex(indices.get(i), dim); + } + } /** * Returns the signal closest to the shift sought. If no Signal is found within the interval @@ -251,14 +261,14 @@ public Signal pickClosestSignal(final Double shift, final int dim, final double /* * Now we search dimension dim for the chemical shift. */ - for (int f = 0; f < this.size(); f++) { - if (Math.abs((this.get(f)).getShift(dim) - shift) < diff) { - diff = Math.abs((this.get(f)).getShift(dim) - shift); + for (int f = 0; f < this.signals.size(); f++) { + if (Math.abs((this.signals.get(f)).getShift(dim) - shift) < diff) { + diff = Math.abs((this.signals.get(f)).getShift(dim) - shift); thisPosition = f; } } if(thisPosition >= 0){ - this.get(thisPosition); + this.signals.get(thisPosition); } return null; @@ -266,7 +276,7 @@ public Signal pickClosestSignal(final Double shift, final int dim, final double /** * Returns a List with signals within the interval defined by pickPrecision. If none is found - * an empty List is returned. + * an empty ArrayList is returned. * @param shift * @param dim * @param pickPrecision @@ -277,64 +287,22 @@ public ArrayList pickSignals(final double shift, final int dim, final do /* * Now we search dimension dim for the chemical shift. */ - for (final Signal sig : this) { + for (final Signal sig : this.signals) { if (Math.abs(sig.getShift(dim) - shift) < pickPrecision) { pickedSignals.add(sig); } } return pickedSignals; } - - /** - * Extracts a list of unique shifts from the list of cross signals. This is to - * define the column and row headers for tables. - */ - private void updateShiftLists() { - this.shiftList.clear(); - for (int dim = 0; dim < this.getDimCount(); dim++) { - this.shiftList.add(dim, new ArrayList<>()); - } - Double shift; casekit.NMR.model.Signal nmrSignal; - for (int i = 0; i < this.size(); i++) { - nmrSignal = this.get(i); - for (int d = 0; d < this.getDimCount(); d++) { - shift = nmrSignal.getShift(d); - if (!this.shiftList.get(d).contains(shift)) { - this.shiftList.get(d).add(shift); - } - } - } - } - - /** - * Creates a 2D matrix of booleans, that models the set of crosspeaks in the 2D NMR spectrum. - * The dimensions are taken from hetAtomShiftList and protonShiftList, which again are - * produced by updateShiftLists based a collection of 2D nmrSignals - *

- * private void createMatrix(){ boolean found; float het, prot; int hetPos, protPos; - * hetCorMatrix = new boolean[hetAtomShiftList.length][protonShiftList.length]; for (int f = - * 0; f < size(); f++){ HetCorNMRSignal hetCorSignal = (HetCorNMRSignal)elementAt(f); prot = - * hetCorSignal.shift[NMRSignal.SHIFT_PROTON]; het = - * hetCorSignal.shift[NMRSignal.SHIFT_HETERO]; found = false; hetPos = - * isInShiftList(hetAtomShiftList, het, hetAtomShiftList.length); if (hetPos >= 0){ protPos = - * isInShiftList(protonShiftList, prot, protonShiftList.length); if ( protPos >= 0){ found = - * true; hetCorMatrix[hetPos][protPos] = true; } } } } - * @return - */ - public String report() { - String s = ""; -// s+= "Report for nmr spectrum " + name + " of type " -// + specType + ":\n\n"; - for (int i = 0; i < this.shiftList.size(); i++) { - s += "\nShiftList for dimension " + (i + 1) + ":\n"; - for (int d = 0; d < this.shiftList.get(i).size(); d++) { - s += this.shiftList.get(i).get(d) + "; "; - } - } - s += "\nBelonging intensities:\n"; - for (Signal sig : this) { - s += sig.getIntensity() + "; "; + + public ArrayList[] getShifts(){ + final ArrayList[] shiftsList = new ArrayList[this.ndim]; + for (int d = 0; d < this.ndim; d++) { + shiftsList[d] = this.getShiftsByDim(d); } - return s; + + return shiftsList; } + + } From c3f3b28dc30958ddf04be48a960599ff0cbbb49c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 19:00:23 +0200 Subject: [PATCH 022/405] - some changes --- src/casekit/NMR/test.java | 42 +++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/src/casekit/NMR/test.java b/src/casekit/NMR/test.java index 5a5ffb4..57a4b3e 100644 --- a/src/casekit/NMR/test.java +++ b/src/casekit/NMR/test.java @@ -12,11 +12,14 @@ import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Arrays; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.ParserConfigurationException; +import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.DefaultChemObjectBuilder; +import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IMolecularFormula; @@ -71,7 +74,7 @@ public static void main(String[] args) throws ParserConfigurationException, SAXE final IMolecularFormula molFormula_HJ777 = MolecularFormulaManipulator.getMolecularFormula(molFormulaString_HJ777, DefaultChemObjectBuilder.getInstance()); String projectName = ""; casekit.NMR.Process process = null; - Spectrum spec; + Spectrum spec = null; IAtomContainer ac; @@ -79,32 +82,51 @@ public static void main(String[] args) throws ParserConfigurationException, SAXE // HJ555 projectName = "HJ555"; process = new Process(molFormula_HJ555); - spec = Utils.parseXML(Peaks13C_HJ555, 1, new int[]{1}, new String[]{"C"}); - process.set1DNMRShifts(spec); + spec = process.parse1DNMRviaXML(Peaks13C_HJ555, "C"); +// spec.setSpecType(CDKConstants.NMRSPECTYPE_1D_ARBITRARY); + process.set1DNMR(spec); + System.out.println("assignments spectrum 13C: " + spec.getAssignmentAtomIndicesByDim(0)); // process.parse1DNMR(Peaks13C_HJ555, "C"); - spec = Utils.parseXML(PeaksDEPT90_HJ555, 1, new int[]{1}, new String[]{"C"}); - Spectrum spec135 = Utils.parseXML(PeaksDEPT135_HJ555, 1, new int[]{1}, new String[]{"C"}); + spec = process.parseDEPTviaXML(PeaksDEPT90_HJ555); + spec.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT90); + Spectrum spec135 = process.parseDEPTviaXML(PeaksDEPT135_HJ555); + spec135.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT135); int assignedHAtoms = process.setDEPT(spec, spec135, tolC); // int assignedHAtoms = process.parseDEPT(PeaksDEPT90_HJ555, PeaksDEPT135_HJ555, tolC); System.out.println("assigned protons: " + assignedHAtoms); + System.out.println("assignments spectrum DEPT90: " + spec.getAssignmentAtomIndicesByDim(0)); + System.out.println("assignments spectrum DEPT135: " + spec135.getAssignmentAtomIndicesByDim(0)); - spec = Utils.parseXML(PeaksHSQC_HJ555, 2, new int[]{2, 1}, new String[]{"H", "C"}); + spec = process.parseHSQCviaXML(PeaksHSQC_HJ555, "C"); + spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HSQC); process.setHSQC(spec, tolC); // process.parseHSQC(PeaksHSQC_HJ555, "C", tolH); + System.out.println("assignments spectrum HSQC 1: " + spec.getAssignmentAtomIndicesByDim(0)); + System.out.println("assignments spectrum HSQC 2: " + spec.getAssignmentAtomIndicesByDim(1)); - spec = Utils.parseXML(PeaksCOSY_HJ555, 2, new int[]{2, 1}, new String[]{"H", "H"}); + spec = process.parseHHCOSYviaXML(PeaksCOSY_HJ555); + spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HHCOSY); process.setHHCOSY(spec, tolH); // process.parseHHCOSY(PeaksCOSY_HJ555, tolH); + System.out.println("assignments spectrum H,H-COSY 1: " + spec.getAssignmentAtomIndicesByDim(0)); + System.out.println("assignments spectrum H,H-COSY 2: " + spec.getAssignmentAtomIndicesByDim(1)); - spec = Utils.parseXML(PeaksINADEQUATE_HJ555, 2, new int[]{2, 1}, new String[]{"C", "C"}); + spec = process.parseINADEQUATEviaXML(PeaksINADEQUATE_HJ555); + spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); process.setINADEQUATE(spec, tolC); // process.parseINADEQUATE(PeaksINADEQUATE_HJ555, tolC); + System.out.println("assignments spectrum INADEQUATE 1: " + spec.getAssignmentAtomIndicesByDim(0)); + System.out.println("assignments spectrum INADEQUATE 2: " + spec.getAssignmentAtomIndicesByDim(1)); - spec = Utils.parseXML(PeaksHMBC_HJ555, 2, new int[]{2, 1}, new String[]{"H", "C"}); + spec = process.parseHMBCviaXML(PeaksHMBC_HJ555, "C"); + spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HMBC); process.setHMBC(spec, tolH, tolC); // process.parseHMBC(PeaksHMBC_HJ555, "C", tolH, tolC); - + System.out.println("assignments spectrum HMBC 1: " + spec.getAssignmentAtomIndicesByDim(0)); + System.out.println("assignments spectrum HMBC 2: " + spec.getAssignmentAtomIndicesByDim(1)); + + process.setEquivalentProperties(); process.setBonds(new String[]{CDKConstants.NMRSPECTYPE_2D_HHCOSY, CDKConstants.NMRSPECTYPE_2D_INADEQUATE, CDKConstants.NMRSPECTYPE_2D_HMBC}); // without hybridizations process.createLSDFile(projectName, "/Users/mwenk/Downloads/testLSD", new String[]{"/Users/mwenk/work/software/LSD-3.4.9/Filters/", "/Users/mwenk/work/software/LSD-3.4.9/Filters/MOLGEN/badlist1/"}); From c1bc5cbcfc2a10cced7644f21481757e4847cf88 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 6 Aug 2018 19:40:31 +0200 Subject: [PATCH 023/405] - made the parsing functions static --- src/casekit/NMR/ParseRawData.java | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index d659e46..72f6655 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -155,7 +155,7 @@ public final void setEquivalentProperties() { * @return Spectrum class object from given input file * @throws java.io.IOException */ - public final Spectrum parse1DNMRviaCSV(final String pathToCSV, final String atomType) throws IOException { + public static final Spectrum parse1DNMRviaCSV(final String pathToCSV, final String atomType) throws IOException { final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{atomType}, 6); @@ -175,7 +175,7 @@ public final Spectrum parse1DNMRviaCSV(final String pathToCSV, final String atom * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final Spectrum parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { + public static final Spectrum parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{atomType}); @@ -299,7 +299,7 @@ private void askForEquivalentPeaks(final Spectrum spectrum, final String atomTyp * @return * @throws java.io.IOException */ - public final Spectrum parseDEPTviaCSV(final String pathToDEPT) throws IOException { + public static final Spectrum parseDEPTviaCSV(final String pathToDEPT) throws IOException { final Spectrum spectrum = Utils.CSVtoSpectrum(pathToDEPT, new int[]{4}, new String[]{"C"}, 6); @@ -318,7 +318,7 @@ public final Spectrum parseDEPTviaCSV(final String pathToDEPT) throws IOExceptio * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final Spectrum parseDEPTviaXML(final String pathToDEPT) throws IOException, ParserConfigurationException, SAXException { + public static final Spectrum parseDEPTviaXML(final String pathToDEPT) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = Utils.XMLtoSpectrum(pathToDEPT, 1, new int[]{1}, new String[]{"C"}); @@ -415,7 +415,7 @@ private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDE * @return * @throws IOException */ - public final Spectrum parseHSQCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { + public static final Spectrum parseHSQCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { final Spectrum spectrum = new Spectrum( new String[]{Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier(heavyAtomType)}, @@ -437,7 +437,7 @@ public final Spectrum parseHSQCviaCSV(final String pathToCSV, final String heavy * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final Spectrum parseHSQCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { + public static final Spectrum parseHSQCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier(heavyAtomType)}, @@ -613,7 +613,7 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s * @return * @throws IOException */ - public final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IOException { + public static final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IOException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier("H")}, @@ -634,7 +634,7 @@ public final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IOExcepti * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { + public static final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier("H")}, @@ -680,7 +680,7 @@ public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { * @return * @throws IOException */ - public final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throws IOException { + public static final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throws IOException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), Utils.getIsotopeIdentifier("C")}, @@ -701,7 +701,7 @@ public final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throws IOExc * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final Spectrum parseINADEQUATEviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { + public static final Spectrum parseINADEQUATEviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), Utils.getIsotopeIdentifier("C")}, @@ -751,7 +751,7 @@ public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { * @return * @throws IOException */ - public final Spectrum parseHMBCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { + public static final Spectrum parseHMBCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { final Spectrum spectrum = new Spectrum( new String[]{Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier(heavyAtomType)}, @@ -775,7 +775,7 @@ public final Spectrum parseHMBCviaCSV(final String pathToCSV, final String heavy * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public final Spectrum parseHMBCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { + public static final Spectrum parseHMBCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), Utils.getIsotopeIdentifier(heavyAtomType)}, From bb5d4e7ad2765255726d20520ac5993a485a93e1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 7 Aug 2018 15:25:31 +0200 Subject: [PATCH 024/405] - new class constructor added --- src/casekit/NMR/model/Spectrum.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 6c821f4..7227c08 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -63,7 +63,7 @@ public class Spectrum { private String standard; private final ArrayList signals = new ArrayList<>(); - + public Spectrum(final String[] nuclei, final ArrayList[] shiftLists, final ArrayList intensities) { this.nuclei = nuclei; @@ -71,6 +71,12 @@ public Spectrum(final String[] nuclei, final ArrayList[] shiftLists, fin this.addSignals(shiftLists, intensities); } + public Spectrum(final String[] nuclei, final ArrayList[] shiftLists) { + this.nuclei = nuclei; + this.ndim = this.nuclei.length; + this.addSignals(shiftLists, null); + } + public Spectrum(final String[] nuclei) { this.nuclei = nuclei; this.ndim = this.nuclei.length; @@ -133,12 +139,6 @@ public final void addSignals(final ArrayList[] shiftLists, final ArrayLi } } - - /** - * Return the number of individual frequencies in the heteroatom shift list, which should be - * equal or smaller than the number of respective atoms - * @return - */ public int getSignalCount() { return this.signals.size(); } From 5cf99bf60f723c5eb831f194749262b99fca228e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 10 Aug 2018 20:42:01 +0200 Subject: [PATCH 025/405] - changes in Set1D/2D functions: to get match indices in atom container to use it for new developed Assignment class objects --- src/casekit/NMR/ParseRawData.java | 347 +++++++++++------------------- 1 file changed, 130 insertions(+), 217 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index 72f6655..79aef9b 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -24,14 +24,11 @@ package casekit.NMR; import casekit.NMR.model.Spectrum; -import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; @@ -51,9 +48,7 @@ public class ParseRawData { final private IAtomContainer mol; final private IMolecularFormula molFormula; private HashMap> atomTypeIndices; - - public final static String PROP_EQUIVALENCE = "equivalences"; - + /** * Creates an instances of this class with an empty class atom container. @@ -122,29 +117,6 @@ public final void setAtomTypeIndices(){ } - /** - * Copies all up to here set properties from an atom in atom container to its - * linked atoms with equivalent shift values. - * - */ - public final void setEquivalentProperties() { - - Map properties; - for (int i = 0; i < this.mol.getAtomCount(); i++) { - if (this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE) != null) { - properties = this.mol.getAtom(i).getProperties(); - for (final Object prop: properties.keySet()) { - if (this.mol.getAtom(i).getProperty(prop) != null && !prop.equals(ParseRawData.PROP_EQUIVALENCE)) { - for (final int k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE)) { - this.mol.getAtom(k).setProperty(prop, this.mol.getAtom(i).getProperty(prop)); - } - } - } - } - } - } - - /** * Creates a Spectrum class object from 1D NMR peak list in CSV file format. * @@ -156,10 +128,8 @@ public final void setEquivalentProperties() { * @throws java.io.IOException */ public static final Spectrum parse1DNMRviaCSV(final String pathToCSV, final String atomType) throws IOException { - - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{atomType}, 6); - - return spectrum; + + return Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{atomType}, 6); } @@ -177,9 +147,7 @@ public static final Spectrum parse1DNMRviaCSV(final String pathToCSV, final Stri */ public static final Spectrum parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{atomType}); - - return spectrum; + return Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{atomType}); } @@ -196,94 +164,88 @@ public static final Spectrum parse1DNMRviaXML(final String pathToXML, final Stri * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on * the specified atom type (element). * After usage of this function, the input Spectrum class object might be extended during - * equivalent signal selection by user and contains the signal assignment indices - * in atom container. + * equivalent signal selection by user. * * @param spectrum Spectrum class object containing the 1D shift information * @return * @throws java.io.IOException */ - public final boolean set1DNMR(final Spectrum spectrum) throws IOException{ - final String atomType = Utils.getElementIdentifier(spectrum.getNuclei()[0]); - ArrayList shifts = spectrum.getShiftsByDim(0); - // check whether indices for that atom type exist or the number of input signals are greater than the atom number in atom container for that atom type - if (!this.atomTypeIndices.containsKey(atomType) || shifts.size() > this.atomTypeIndices.get(atomType).size()) { - // if molecular formula is known and too much picked peaks are to be assigned - if(this.atomTypeIndices.containsKey(atomType) || MolecularFormulaManipulator.getElementCount(this.molFormula, atomType) == 0){ - System.err.println("Too many peaks in peak list for \"" + atomType + "\" and molecular formula \"" + MolecularFormulaManipulator.getString(this.molFormula) + "\"!!!"); - return false; - } else { // - // "fill up" the first peaks for that atom type from given peak list - IAtom atom; - for (final double shift : shifts) { - atom = new Atom(atomType); - atom.setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shift); - atom.setImplicitHydrogenCount(null); - this.mol.addAtom(atom); - } - this.setAtomTypeIndices(); + public final ArrayList set1DNMR(final Spectrum spectrum) throws IOException{ + + // checks whether number of signals is equal to molecular formula if given + // if not equal then edit signal list in spectrum + this.check1DSpectrum(spectrum); + // assign shift values to atoms sequentially + this.assignShiftValues(spectrum); + + return this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0)); + } + + /** + * Checks the number of signals in a spectrum against the number of atoms + * in molecular formula of class, if given. In case of different numbers, + * a user input will be requested. + * + * @param spectrum + * @throws IOException + * @see Utils#editSignalsInSpectrum(casekit.NMR.model.Spectrum, org.openscience.cdk.interfaces.IMolecularFormula) + */ + public void check1DSpectrum(final Spectrum spectrum) throws IOException{ + if(this.molFormula != null){ + final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula); + if (diff != 0) { + // adjust Spectrum size by user + Utils.editSignalsInSpectrum(spectrum, this.molFormula); } } - // assign shift values to atoms sequentially + } + + + private void assignShiftValues(final Spectrum spectrum){ + final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); + final ArrayList shifts = spectrum.getShifts(0); + if(this.molFormula == null){ + this.removeAtoms(atomType); + // fill up the peaks for that atom type from given peak list in spectrum + IAtom atom; + for (final double shift : shifts) { + atom = new Atom(atomType); + atom.setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shift); + atom.setImplicitHydrogenCount(null); + this.mol.addAtom(atom); + } + this.setAtomTypeIndices(); + } int assignedShiftCount = 0; for (final int i : this.atomTypeIndices.get(atomType)) { - if(assignedShiftCount < shifts.size()){ + if (assignedShiftCount < shifts.size()) { // shift assignment in atom this.mol.getAtom(i).setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); - spectrum.getSignal(assignedShiftCount).setAssignedAtomIndex(i, 0); } assignedShiftCount++; } - // "fill up" the missing equivalent peaks - // check whether the number of input signals is smaller than the number of atoms in atom container from that atom type - if (shifts.size() < this.atomTypeIndices.get(atomType).size()) { - System.out.println("Not enough peaks in 1D peak list for \"" + atomType + "\"!!!"); - this.askForEquivalentPeaks(spectrum, atomType); - } - - return true; } - private void askForEquivalentPeaks(final Spectrum spectrum, final String atomType) throws IOException { - - BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); int n; - final ArrayList validIndices = new ArrayList<>(); - // walk through all atoms of given atom type - for (final int i : this.atomTypeIndices.get(atomType)) { - // ignore atoms with already set NMR shift value - if (this.mol.getAtom(i).getProperty(Utils.getNMRShiftConstant(atomType)) != null) { - continue; - } - // display all selectable atom indices in atom container - System.out.println("\nThe " + i + "th shift value is missing!\nWhich shift value is not unique?"); - for (final int k : this.atomTypeIndices.get(atomType)) { - if(this.mol.getAtom(k).getProperty(Utils.getNMRShiftConstant(atomType)) != null){ - System.out.println(k + "\t: " + this.mol.getAtom(k).getProperty(Utils.getNMRShiftConstant(atomType))); - validIndices.add(k); - } - } - // get selected index by user input - n = -1; - while(!validIndices.contains(n)){ - System.out.println("Enter the index: "); - n = Integer.parseInt(br.readLine()); - } - - spectrum.addSignal(spectrum.getSignal(validIndices.indexOf(n))); - // copy NMR shift value from already set value in selected atom n to unset value in atom i - this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), this.mol.getAtom(n).getProperty(Utils.getNMRShiftConstant(atomType))); - - - if(this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE) == null){ - this.mol.getAtom(i).setProperty(ParseRawData.PROP_EQUIVALENCE, new ArrayList<>()); - } - if(this.mol.getAtom(n).getProperty(ParseRawData.PROP_EQUIVALENCE) == null){ - this.mol.getAtom(n).setProperty(ParseRawData.PROP_EQUIVALENCE, new ArrayList<>()); - } - ((ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE)).add(n); - ((ArrayList) this.mol.getAtom(n).getProperty(ParseRawData.PROP_EQUIVALENCE)).add(i); + /** + * Removes atoms from a given atom type from the class' atom container. + * + * @param atomType Atom type (element's name, e.g. C or Br) + * @return IAtomContainer where the atoms were removed + */ + private void removeAtoms(final String atomType) { + if(this.getAtomTypeIndices().get(atomType) == null){ + return; } + final ArrayList toRemoveList = new ArrayList<>(); + for (final int i: this.getAtomTypeIndices().get(atomType)) { + toRemoveList.add(this.mol.getAtom(i)); + } + for (IAtom iAtom : toRemoveList) { + this.mol.removeAtom(iAtom); + } + + this.setAtomTypeIndices(); } @@ -294,16 +256,14 @@ private void askForEquivalentPeaks(final Spectrum spectrum, final String atomTyp * see * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. * - * @param pathToDEPT Path to one DEPT peak list (Bruker's TopSpin csv file + * @param pathToCSV Path to one DEPT peak list (Bruker's TopSpin csv file * format) * @return * @throws java.io.IOException */ - public static final Spectrum parseDEPTviaCSV(final String pathToDEPT) throws IOException { + public static final Spectrum parseDEPTviaCSV(final String pathToCSV) throws IOException { - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToDEPT, new int[]{4}, new String[]{"C"}, 6); - - return spectrum; + return Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{"C"}, 6); } /** @@ -311,18 +271,16 @@ public static final Spectrum parseDEPTviaCSV(final String pathToDEPT) throws IOE * XML files to carbon atoms. The meanwhile found matches are corrected, see * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. * - * @param pathToDEPT Path to one DEPT peak list (Bruker's TopSpin XML file + * @param pathToXML Path to one DEPT peak list (Bruker's TopSpin XML file * format) * @return * @throws java.io.IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public static final Spectrum parseDEPTviaXML(final String pathToDEPT) throws IOException, ParserConfigurationException, SAXException { + public static final Spectrum parseDEPTviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToDEPT, 1, new int[]{1}, new String[]{"C"}); - - return spectrum; + return Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{"C"}); } /** @@ -333,25 +291,27 @@ public static final Spectrum parseDEPTviaXML(final String pathToDEPT) throws IOE * @param spectrumDEPT135 DEPT135 spectrum which has to contain intensity * information * @param tol tolerance value [ppm] for carbon shift matching - * @return false if one of the spectra is not set or the intensities are missing + * @return false if one of the spectra is not set or the intensities in + * DEPT135 are missing */ - public final int setDEPT(final Spectrum spectrumDEPT90, final Spectrum spectrumDEPT135, final double tol){ - + public final HashMap> setDEPT(final Spectrum spectrumDEPT90, final Spectrum spectrumDEPT135, final double tol){ + final HashMap> matches = new HashMap<>(); if(spectrumDEPT90 == null || spectrumDEPT135 == null || spectrumDEPT135.getIntensities() == null){ - return 0; + return null; } - final ArrayList shiftsDEPT90 = spectrumDEPT90.getShiftsByDim(0); - final ArrayList shiftsDEPT135 = spectrumDEPT135.getShiftsByDim(0); + final ArrayList shiftsDEPT90 = spectrumDEPT90.getShifts(0); + final ArrayList shiftsDEPT135 = spectrumDEPT135.getShifts(0); final ArrayList intensitiesDEPT135 = spectrumDEPT135.getIntensities(); ArrayList matchesDEPT90 = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsDEPT90, tol, "C"); matchesDEPT90 = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT90, matchesDEPT90, tol, "C"); + matches.put("DEPT90", matchesDEPT90); ArrayList matchesDEPT135 = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsDEPT135, tol, "C"); matchesDEPT135 = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT135, matchesDEPT135, tol, "C"); + matches.put("DEPT135", matchesDEPT135); - spectrumDEPT90.setAssignedAtomIndicesByDim(matchesDEPT90, 0); - spectrumDEPT135.setAssignedAtomIndicesByDim(matchesDEPT135, 0); + this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); - return this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); + return matches; } @@ -361,7 +321,7 @@ public final int setDEPT(final Spectrum spectrumDEPT90, final Spectrum spectrumD * @param matchesDEPT135 * @param intensitiesDEPT135 */ - private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDEPT90, final ArrayList matchesDEPT135, final ArrayList intensitiesDEPT135) { + private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDEPT90, final ArrayList matchesDEPT135, final ArrayList intensitiesDEPT135) { int matchDEPT90, matchDEPT135, hCount, hCountAll = 0; for (int i : this.atomTypeIndices.get("C")) { @@ -387,12 +347,6 @@ private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDE } this.mol.getAtom(i).setImplicitHydrogenCount(hCount); hCountAll += hCount; - if (this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE) != null) { - for (Integer k : (ArrayList) this.mol.getAtom(i).getProperty(ParseRawData.PROP_EQUIVALENCE)) { - this.mol.getAtom(k).setImplicitHydrogenCount(hCount); - hCountAll += hCount; - } - } } } if(this.molFormula != null){ @@ -401,7 +355,6 @@ private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDE System.out.println("assigned protons to carbons: " + hCountAll+ "!!!"); } - return hCountAll; } @@ -417,13 +370,7 @@ private int setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDE */ public static final Spectrum parseHSQCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { - final Spectrum spectrum = new Spectrum( new String[]{Utils.getIsotopeIdentifier("H"), - Utils.getIsotopeIdentifier(heavyAtomType)}, - new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), - casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, - casekit.NMR.Utils.parseCSV(pathToCSV, 9)); - - return spectrum; + return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); } /** @@ -438,14 +385,8 @@ public static final Spectrum parseHSQCviaCSV(final String pathToCSV, final Strin * @throws org.xml.sax.SAXException */ public static final Spectrum parseHSQCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { - - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), - Utils.getIsotopeIdentifier(heavyAtomType)}, - new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), - casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, - casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); - return spectrum; + return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); } @@ -458,18 +399,20 @@ public static final Spectrum parseHSQCviaXML(final String pathToXML, final Strin * @param spectrum Spectrum class object consisting of Signal class objects * where the proton values are given first and the heavy atom values as the second. * @param tolHeavyAtom tolerance value [ppm] for heavy atom shift matching + * @return */ - public final void setHSQC(final Spectrum spectrum, final double tolHeavyAtom) { - - final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(0); - final ArrayList shiftsHeavyAtom = spectrum.getShiftsByDim(1); - ArrayList matchesHeavyAtom = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - matchesHeavyAtom = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - - spectrum.setAssignedAtomIndicesByDim(matchesHeavyAtom, 1); + public final HashMap> setHSQC(final Spectrum spectrum, final double tolHeavyAtom) { + final HashMap> matches = new HashMap<>(); + final ArrayList shiftsHydrogen = spectrum.getShifts(0); + final ArrayList shiftsHeavyAtom = spectrum.getShifts(1); + ArrayList matchesHeavyAtom = Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + matchesHeavyAtom = Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), matchesHeavyAtom); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), matchesHeavyAtom); this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtom); + return matches; } @@ -615,13 +558,7 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s */ public static final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IOException { - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), - Utils.getIsotopeIdentifier("H")}, - new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), - casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, - casekit.NMR.Utils.parseCSV(pathToCSV, 9)); - - return spectrum; + return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", "H"}, 9); } /** @@ -636,13 +573,7 @@ public static final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IO */ public static final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), - Utils.getIsotopeIdentifier("H")}, - new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), - casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, - casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); - - return spectrum; + return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", "H"}); } /** @@ -655,20 +586,21 @@ public static final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IO * of heavy atom * @return true if the links could be set; otherwise false */ - public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { - - final ArrayList hydrogenShiftMatches1 = this.findImplicitHydrogenShiftMatches(spectrum.getShiftsByDim(0), tol); - final ArrayList hydrogenShiftMatches2 = this.findImplicitHydrogenShiftMatches(spectrum.getShiftsByDim(1), tol); + public final HashMap> setHHCOSY(final Spectrum spectrum, final double tol) { + + final ArrayList hydrogenShiftMatches1 = this.findImplicitHydrogenShiftMatches(spectrum.getShifts(0), tol); + final ArrayList hydrogenShiftMatches2 = this.findImplicitHydrogenShiftMatches(spectrum.getShifts(1), tol); // are all signals bidirectional? if (!casekit.NMR.Utils.isBidirectional(hydrogenShiftMatches1, hydrogenShiftMatches2)) { - return false; + return null; } casekit.NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, CDKConstants.NMRSPECTYPE_2D_HHCOSY); - spectrum.setAssignedAtomIndicesByDim(hydrogenShiftMatches1, 0); - spectrum.setAssignedAtomIndicesByDim(hydrogenShiftMatches2, 1); + final HashMap> matches = new HashMap<>(); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), hydrogenShiftMatches1); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), hydrogenShiftMatches2); - return true; + return matches; } @@ -682,13 +614,7 @@ public final boolean setHHCOSY(final Spectrum spectrum, final double tol) { */ public static final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throws IOException { - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), - Utils.getIsotopeIdentifier("C")}, - new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), - casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, - casekit.NMR.Utils.parseCSV(pathToCSV, 9)); - - return spectrum; + return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"C", "C"}, 9); } /** @@ -702,14 +628,8 @@ public static final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throw * @throws org.xml.sax.SAXException */ public static final Spectrum parseINADEQUATEviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { - - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("C"), - Utils.getIsotopeIdentifier("C")}, - new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), - casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, - casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); - return spectrum; + return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"C", "C"}); } @@ -724,20 +644,21 @@ public static final Spectrum parseINADEQUATEviaXML(final String pathToXML) throw * @param tol tolerance value [ppm] for carbon atom shift matching * @return */ - public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { + public final HashMap> setINADEQUATE(final Spectrum spectrum, final double tol) { - final ArrayList carbonShiftMatches1 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(0), tol, "C"); - final ArrayList carbonShiftMatches2 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShiftsByDim(1), tol, "C"); + final ArrayList carbonShiftMatches1 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShifts(0), tol, "C"); + final ArrayList carbonShiftMatches2 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShifts(1), tol, "C"); // are all signals bidirectional? if (!casekit.NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { - return false; + return null; } casekit.NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - spectrum.setAssignedAtomIndicesByDim(carbonShiftMatches1, 0); - spectrum.setAssignedAtomIndicesByDim(carbonShiftMatches2, 1); + final HashMap> matches = new HashMap<>(); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), carbonShiftMatches1); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), carbonShiftMatches2); - return true; + return matches; } @@ -753,13 +674,7 @@ public final boolean setINADEQUATE(final Spectrum spectrum, final double tol) { */ public static final Spectrum parseHMBCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { - final Spectrum spectrum = new Spectrum( new String[]{Utils.getIsotopeIdentifier("H"), - Utils.getIsotopeIdentifier(heavyAtomType)}, - new ArrayList[]{casekit.NMR.Utils.parseCSV(pathToCSV, 5), - casekit.NMR.Utils.parseCSV(pathToCSV, 6)}, - casekit.NMR.Utils.parseCSV(pathToCSV, 9)); - - return spectrum; + return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); } @@ -776,14 +691,8 @@ public static final Spectrum parseHMBCviaCSV(final String pathToCSV, final Strin * @throws org.xml.sax.SAXException */ public static final Spectrum parseHMBCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { - - final Spectrum spectrum = new Spectrum( new String[]{ Utils.getIsotopeIdentifier("H"), - Utils.getIsotopeIdentifier(heavyAtomType)}, - new ArrayList[]{casekit.NMR.Utils.parseXML(pathToXML, 2, 2), - casekit.NMR.Utils.parseXML(pathToXML, 2, 1)}, - casekit.NMR.Utils.parseXML(pathToXML, 2, 3)); - return spectrum; + return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); } @@ -796,15 +705,20 @@ public static final Spectrum parseHMBCviaXML(final String pathToXML, final Strin * where the proton shift values is given first and the heavy atom shifts as the second. * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + * @return */ - public final void setHMBC(final Spectrum spectrum, final double tolHydrogen, final double tolHeavy) { + public final HashMap> setHMBC(final Spectrum spectrum, final double tolHydrogen, final double tolHeavy) { - final ArrayList shiftsHydrogen = spectrum.getShiftsByDim(0); + final ArrayList shiftsHydrogen = spectrum.getShifts(0); final ArrayList matchesHydrogen = this.correctHydrogenShiftMatches(shiftsHydrogen, this.findImplicitHydrogenShiftMatches(shiftsHydrogen, tolHydrogen), tolHydrogen); - final ArrayList shiftsHeavyAtom = spectrum.getShiftsByDim(1); + final ArrayList shiftsHeavyAtom = spectrum.getShifts(1); ArrayList matchesHeavyAtom = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); matchesHeavyAtom = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); + final HashMap> matches = new HashMap<>(); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), matchesHydrogen); + matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), matchesHeavyAtom); + ArrayList HMBCList; for (int i = 0; i < matchesHydrogen.size(); i++) { if (matchesHydrogen.get(i) >= 0 && matchesHeavyAtom.get(i) >= 0) { @@ -818,7 +732,6 @@ public final void setHMBC(final Spectrum spectrum, final double tolHydrogen, fin } } - spectrum.setAssignedAtomIndicesByDim(matchesHydrogen, 0); - spectrum.setAssignedAtomIndicesByDim(matchesHeavyAtom, 1); + return matches; } } From 9272c3c2d5cbbc570f8bba7a1fdcd726a408463e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 10 Aug 2018 20:43:08 +0200 Subject: [PATCH 026/405] - bugfix in second constuctor --- src/casekit/NMR/Process.java | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index f845b85..648b224 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -45,7 +45,7 @@ public class Process extends ParseRawData { final private IAtomContainer mol; final private IMolecularFormula molFormula; - private HashMap> atomTypeIndices = new HashMap<>(); + private final HashMap> atomTypeIndices = new HashMap<>(); private int[][] neighborhoodCountsMatrix; final private HashMap> shiftIndicesInACSet = new HashMap<>(); // holding of all indices of each ac set (DB) entry [first value] and it's atom indices [second value] too @@ -58,21 +58,20 @@ public Process(){ public Process(final IMolecularFormula molFormula){ super(molFormula); - this.molFormula = molFormula; - this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); + this.molFormula = super.getMolecularFormula(); + this.mol = super.getAtomContainer(); this.setAtomTypeIndices(); } - /** - * Sets bonds from already set experiment information (H,H-COSY, INADEQUATE and HMBC). - * Additionally, this function is build for bond type recognition, - * for details see {@link testkit.Utils#getBondTypeFromHybridizations(java.lang.String, org.openscience.cdk.interfaces.IAtomType.Hybridization, java.lang.String, org.openscience.cdk.interfaces.IAtomType.Hybridization)}. + /** + * Sets bonds from already set experiment information (H,H-COSY, INADEQUATE + * and HMBC). * * @param experiments */ - public void setBonds(final String[] experiments){ - + public void setBonds(final String[] experiments) { + String NMRSHIFT_ATOMTYPE; ArrayList signalList; for (int e = 0; e < experiments.length; e++) { @@ -87,7 +86,7 @@ public void setBonds(final String[] experiments){ if ((i == bondPartnerIndex)) {// || (this.mol.getBond(this.mol.getAtom(i), this.mol.getAtom(bondPartnerIndex)) != null)) { continue; } - if(experiments[e].equals(CDKConstants.NMRSPECTYPE_2D_HMBC)){ + if (experiments[e].equals(CDKConstants.NMRSPECTYPE_2D_HMBC)) { System.out.println("HMBC bond setting: still to come!!!"); } else { this.setBond(i, bondPartnerIndex); @@ -98,12 +97,11 @@ public void setBonds(final String[] experiments){ } } } - - - private void setBond(final int index1, final int index2){ - - if(this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null){ - this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); + + private void setBond(final int index1, final int index2) { + + if (this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null) { + this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); } this.mol.addBond(index1, index2, casekit.NMR.Utils.getBondTypeFromHybridizations(this.mol.getAtom(index1), this.mol.getAtom(index2))); } From 3198591c65b204db9812cc1b78520d2e4f9f5a5f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 10 Aug 2018 20:44:45 +0200 Subject: [PATCH 027/405] - some functions added - some bufixes --- src/casekit/NMR/Utils.java | 197 ++++++++++++++++++++++++++++++++++--- 1 file changed, 183 insertions(+), 14 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 40f38eb..29437eb 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -24,6 +24,7 @@ package casekit.NMR; +import casekit.NMR.model.Signal; import casekit.NMR.model.Spectrum; import java.io.BufferedReader; import java.io.BufferedWriter; @@ -33,6 +34,7 @@ import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Arrays; @@ -61,6 +63,7 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; @@ -68,6 +71,7 @@ import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.smarts.parser.SMARTSParser; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; @@ -238,19 +242,30 @@ public static ArrayList parseCSV(final String pathToPeakList, final int */ public static Spectrum CSVtoSpectrum(final String pathToPeakList, final int[] columns, final String[] atomTypes, final int intensityColumnIndex) throws IOException { - // assumes the same number of selected columns and atom types + // assumes the same number of selected columns (dimensions) and atom types if(columns.length != atomTypes.length){ return null; } - final ArrayList[] shiftsList = new ArrayList[columns.length]; final String[] nuclei = new String[columns.length]; for (int col = 0; col < columns.length; col++) { - shiftsList[col] = Utils.parseCSV(pathToPeakList, columns[col]); nuclei[col] = Utils.getIsotopeIdentifier(atomTypes[col]); } - final ArrayList intensities = parseCSV(pathToPeakList, intensityColumnIndex); + final Spectrum spectrum = new Spectrum(nuclei); + ArrayList shiftList; + for (int col = 0; col < columns.length; col++) { + shiftList = Utils.parseCSV(pathToPeakList, columns[col]); + if(col == 0){ + for (int i = 0; i < shiftList.size(); i++) { + spectrum.addSignal(new Signal(spectrum.getNuclei())); + } + } + if(!spectrum.setShifts(shiftList, col)){ + return null; + } + } + spectrum.setIntensities(parseCSV(pathToPeakList, intensityColumnIndex)); - return new Spectrum(nuclei, shiftsList, intensities); + return spectrum; } @@ -315,16 +330,81 @@ public static Spectrum XMLtoSpectrum(final String pathToXML, final int ndim, fin || (ndim < 1 || ndim > 2)){ return null; } - final ArrayList[] shiftLists = new ArrayList[ndim]; final String[] nuclei = new String[ndim]; - for (int nucl = 0; nucl < ndim; nucl++) { - nuclei[nucl] = Utils.getIsotopeIdentifier(atomTypes[nucl]); - shiftLists[nucl] = Utils.parseXML(pathToXML, ndim, attributes[nucl]); + for (int dim = 0; dim < ndim; dim++) { + nuclei[dim] = Utils.getIsotopeIdentifier(atomTypes[dim]); } - - return new Spectrum(nuclei, shiftLists, Utils.parseXML(pathToXML, ndim, ndim + 1)); + final Spectrum spectrum = new Spectrum(nuclei); + ArrayList shiftList; + for (int dim = 0; dim < ndim; dim++) { + shiftList = Utils.parseXML(pathToXML, ndim, attributes[dim]); + if(dim == 0){ + for (int i = 0; i < shiftList.size(); i++) { + spectrum.addSignal(new Signal(spectrum.getNuclei())); + } + } + if(!spectrum.setShifts(shiftList, dim)){ + return null; + } + } + spectrum.setIntensities(Utils.parseXML(pathToXML, ndim, ndim + 1)); + + return spectrum; } + public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int dim){ + if(spectrum.checkDimension(dim)){ + return Utils.getElementIdentifier(spectrum.getNuclei()[dim]); + } + + return null; + } + + public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molformula){ + final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); + int atomsInMolFormula = 0; + if(molformula != null){ + atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molformula, atomType); + } + return atomsInMolFormula - spectrum.getSignalCount(); + } + + public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecularFormula molFormula) throws IOException { + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); int n; + final ArrayList validIndices = new ArrayList<>(); + int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula); + // walk through all signals in spectrum add missing or to remove signals + while (diff != 0) { + // display all selectable signal indices in spectrum + if(diff > 0){ + System.out.println("\n" + diff + " signals are missing!\nWhich signal is not unique?"); + } else { + System.out.println("\n" + (-1 * diff) + " signals are to be removed!\nWhich signal is to remove?"); + } + for (int s = 0; s < spectrum.getSignalCount(); s++) { + System.out.print("index: " + s); + for (int d = 0; d < spectrum.getDimCount(); d++) { + System.out.print(", shift dim " + (d+1) + ": " + spectrum.getShift(s, d)); + } + System.out.println(""); + validIndices.add(s); + } + // get selected index by user input + n = -1; + while(!validIndices.contains(n)){ + System.out.println("Enter the index: "); + n = Integer.parseInt(br.readLine()); + } + // add/remove signals in spectrum + if(diff > 0){ + spectrum.addSignal(spectrum.getSignal(validIndices.indexOf(n)).getClone()); + spectrum.setEquivalence(spectrum.getSignalCount() - 1, validIndices.indexOf(n)); + } else { + spectrum.removeSignal(validIndices.indexOf(n)); + } + diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula); + } + } /** * Corrects a match list regarding a given shift list and an atom container. @@ -371,6 +451,50 @@ public static ArrayList correctShiftMatches(final IAtomContainer ac, fi return matches; } + + /** + * Corrects a match list regarding a given shift list and an atom container. + * This is useful when two ore more shift values (e.g. DEPT shifts) match + * with the same atom in the atom container. So the purpose here is to + * enable more unambiguous matches. This method first looks for unambiguous + * matches and calculates the median of the difference values between the + * shift list values and the shifts of atom container. Then, all shift list + * values are adjusted (+/-) with this median value. + * + * @param shiftList1 Shift value list to search in + * @param shiftList2 Shift value list to match in shiftList1 + * @param matchesInshiftList1 Match list to correct + * @param tol Tolerance value + * @return + */ + public static ArrayList correctShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final ArrayList matchesInshiftList1, final double tol) { + + int matchIndex; + // get differences of unique matches between query shift and ac shifts + ArrayList diffs = new ArrayList<>(); + final HashSet uniqueMatchIndicesSet = new HashSet<>(matchesInshiftList1); + for (final int uniqueMatchIndex : uniqueMatchIndicesSet) { + if (Collections.frequency(matchesInshiftList1, uniqueMatchIndex) == 1) { + matchIndex = matchesInshiftList1.indexOf(uniqueMatchIndex); + if (matchesInshiftList1.get(matchIndex) >= 0) { + diffs.add(shiftList2.get(matchIndex) - shiftList1.get(matchesInshiftList1.get(matchIndex))); + } + } + } + // calculate the median of found unique match differences + if (diffs.size() > 0) { + final double median = casekit.NMR.Utils.getMedian(diffs); + // add or subtract the median of the differences to all shift list values (input) and match again then + for (int i = 0; i < shiftList2.size(); i++) { + shiftList2.set(i, shiftList2.get(i) - median); + } + // rematch + return casekit.NMR.Utils.findShiftMatches(shiftList1, shiftList2, tol); + } + + return matchesInshiftList1; + } + /** * Finds the matches with the lowest deviations between a given shift value @@ -425,6 +549,52 @@ public static int findSingleShiftMatch(final IAtomContainer ac, final double shi return matchIndex; } + + + /** + * Finds the matches with the lowest deviations between two given shift value + * lists. + * + * @param shiftList1 shift value list to search in + * @param shiftList2 shift value list to match in shiftList1 + * @param tol Tolerance value [ppm] + * @return List of match indices within shiftList1 + */ + public static ArrayList findShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final double tol) { + + final ArrayList matchesInShiftList1 = new ArrayList<>(); + for (int i = 0; i < shiftList2.size(); i++) { + matchesInShiftList1.add(casekit.NMR.Utils.findSingleShiftMatch(shiftList1, shiftList2.get(i), tol)); + } + + return matchesInShiftList1; + } + + + /** + * Finds the match with the lowest deviation between a given shift value and + * a shift list. + * + * @param shiftList Shift list to search in + * @param shift Shift value [ppm] to find in ShiftList + * @param tol Tolerance value [ppm] + * @return Match index of a query shift within shiftList + */ + public static int findSingleShiftMatch(final ArrayList shiftList, final double shift, final double tol) { + + int matchIndex = -1; + double minDiff = tol; + for (int k = 0; k < shiftList.size(); k++) { + // figure out the shift with lowest deviation + if ((shift - tol <= shiftList.get(k)) && (shiftList.get(k) <= shift + tol) && (Math.abs(shift - shiftList.get(k)) < minDiff)) { + minDiff = Math.abs(shift - shiftList.get(k)); + matchIndex = k; + } + } + + return matchIndex; + } + /** * Finds match indices between a given shift list from a peak table and an atom container. @@ -573,7 +743,7 @@ public static String getIsotopeIdentifier(final String element) { case "B": return "11B"; case "Pt": return "195Pt"; default: - return null; + return element; } } @@ -937,13 +1107,12 @@ public static IAtomContainer setAromaticitiesInAtomContainer(final IAtomContaine */ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType) { - ArrayList toRemoveList = new ArrayList<>(); + final ArrayList toRemoveList = new ArrayList<>(); for (IAtom atomA : ac.atoms()) { if (atomA.getSymbol().equals(atomType)) {// detect wether the current atom A is a from the given atom type toRemoveList.add(atomA); } } - for (IAtom iAtom : toRemoveList) { ac.removeAtom(iAtom); } From 28052da1b5251fb1cd73a3c4dac6e0c02ad834af Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 10 Aug 2018 20:46:29 +0200 Subject: [PATCH 028/405] - equivalences list added to set equivalent signal indices within a spectrum --- src/casekit/NMR/model/Spectrum.java | 293 ++++++++++++++++++---------- 1 file changed, 192 insertions(+), 101 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 7227c08..4af803e 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -50,7 +50,7 @@ public class Spectrum { /** * Declares how many axes are in involved in this spectrum. */ - private final int ndim; + private final int nDim; /** * The nuclei of the different axes. */ @@ -63,36 +63,25 @@ public class Spectrum { private String standard; private final ArrayList signals = new ArrayList<>(); + private final ArrayList equivalences = new ArrayList<>(); - public Spectrum(final String[] nuclei, final ArrayList[] shiftLists, final ArrayList intensities) { - this.nuclei = nuclei; - this.ndim = this.nuclei.length; - this.addSignals(shiftLists, intensities); - } - - public Spectrum(final String[] nuclei, final ArrayList[] shiftLists) { - this.nuclei = nuclei; - this.ndim = this.nuclei.length; - this.addSignals(shiftLists, null); - } - public Spectrum(final String[] nuclei) { this.nuclei = nuclei; - this.ndim = this.nuclei.length; + this.nDim = this.nuclei.length; } + public String[] getNuclei(){ return this.nuclei; } public int getDimCount(){ - return this.ndim; + return this.nDim; } public void setSpecType(final String specType){ - - this.specType = specType; + this.specType = specType; } public String getSpecType(){ @@ -107,36 +96,24 @@ public String getSpecDescription(){ return this.description; } - public final void addSignals(final ArrayList[] shiftLists, final ArrayList intensities){ - // assumes the same number of shift lists as set dimension number - if(shiftLists.length != this.ndim){ - System.err.println("Unequal number of nuclei (dimension) and shift lists!!!"); - return; - } - // assumes that the shift lists have the same number of entries - int prevShiftListSize = shiftLists[0].size(); - for (int i = 0; i < shiftLists.length; i++) { - if(shiftLists[i].size() != prevShiftListSize){ - System.err.println("Unequal number of shifts in " + (i+1) + " shift list!!!"); - return; - } - if(intensities != null && shiftLists[i].size() != intensities.size()){ - System.err.println("Unequal number of shifts in shift list " + (i+1) + " and intensities number!!!"); - return; - } - } - Double[] shifts; - for (int row = 0; row < shiftLists[0].size(); row++) { - shifts = new Double[this.ndim]; - for (int col = 0; col < this.ndim; col++) { - shifts[col] = shiftLists[col].get(row); - } - if(intensities != null){ - this.addSignal(new casekit.NMR.model.Signal(this.nuclei, shifts, intensities.get(row))); - } else { - this.addSignal(new casekit.NMR.model.Signal(this.nuclei, shifts)); - } - } + public final boolean setShifts(final ArrayList shiftList, final int dim){ + if(!this.checkDimension(dim) || (!this.checkInputListSize(shiftList.size()))){ + return false; + } + for (int i = 0; i < shiftList.size(); i++) { + this.setShift(shiftList.get(i), dim, i); + } + + return true; + } + + public final boolean setShift(final double shift, final int dim, final int signalIndex){ + if(!this.checkDimension(dim) || !this.checkSignalIndex(signalIndex)){ + return false; + } + this.getSignal(signalIndex).setShift(shift, dim); + + return true; } public int getSignalCount() { @@ -144,19 +121,77 @@ public int getSignalCount() { } /** - * Adds a Signal ({@link casekit.NMR.model.Signal}) to this Spectrum class object. + * Adds a Signal ({@link casekit.NMR.model.Signal}) to this Spectrum class object at the end. + * @param signal + * @return + */ + public boolean addSignal(final Signal signal) { + if(!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ + return false; + } + this.addSignal(signal, null); + + return true; + } + + /** + * Adds a Signal ({@link casekit.NMR.model.Signal}) to this Spectrum class object at given index. * @param signal + * @param index index where to insert the signal, if null the signal will be added at the end of signal list + * @return */ - public void addSignal(final Signal signal) { - this.signals.add(signal); + public boolean addSignal(final Signal signal, final Integer index) { + if(!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ + return false; + } + // is index valid? if yes then insert it there + if(this.checkSignalIndex(index)){ + this.signals.add(index, signal); + this.equivalences.add(index, -1); + // if not then check for null value and add signal at the end + } else if(index == null){ + this.signals.add(signal); + this.equivalences.add(-1); + // no valid index value, nothing to insert or add in spectrum + } else { + return false; + } + + return true; } - public void removeSignal(final Signal signal){ - this.signals.remove(this.getSignalIndex(signal)); + public boolean removeSignal(final Signal signal){ + return this.removeSignal(this.getSignalIndex(signal)); } - public void removeSignal(final int signalIndex){ + public boolean removeSignal(final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return false; + } this.signals.remove(signalIndex); + this.equivalences.remove(signalIndex); + + return true; + } + + private boolean checkSignalIndex(final Integer signalIndex){ + return (signalIndex != null) && (signalIndex >= 0) && (signalIndex < this.getSignalCount()); + } + + public boolean checkDimension(final int dim){ + return (dim >= 0) && (dim < this.nDim); + } + + private boolean checkInputListSize(final int size){ + return (size == this.getSignalCount()); + } + + private boolean checkDimCount(final int ndim){ + return ndim == this.getDimCount(); + } + + private boolean checkNuclei(final String[] nuclei){ + return nuclei == this.getNuclei(); } /** @@ -165,6 +200,10 @@ public void removeSignal(final int signalIndex){ * @return */ public Signal getSignal(final int signalIndex) { + if(!this.checkSignalIndex(signalIndex)){ + return null; + } + return this.signals.get(signalIndex); } @@ -177,8 +216,39 @@ public ArrayList getIntensities(){ return intensities; } - public ArrayList getShiftsByDim(final int dim){ + public Double getIntensity(final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return null; + } + + return this.getSignal(signalIndex).getIntensity(); + } + + public boolean setIntensities(final ArrayList intensities){ + if(!this.checkInputListSize(intensities.size())){ + return false; + } + for (int s = 0; s < this.getSignalCount(); s++) { + this.setIntensity(intensities.get(s), s); + } + + return true; + } + + public boolean setIntensity(final double intensity, final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return false; + } + this.getSignal(signalIndex).setIntensity(intensity); + + return true; + } + + public ArrayList getShifts(final int dim){ final ArrayList shifts = new ArrayList<>(); + if(!this.checkDimension(dim)){ + return shifts; + } for (final Signal sig : this.signals) { shifts.add(sig.getShift(dim)); } @@ -186,17 +256,57 @@ public ArrayList getShiftsByDim(final int dim){ return shifts; } - public int getAssignmentAtomIndexByDim(final int signalIndex, final int dim){ - return this.getSignal(signalIndex).getAssignedAtomIndex(dim); + public Double getShift(final int SignalIndex, final int dim){ + if(!this.checkSignalIndex(SignalIndex)){ + return null; + } + + return this.getSignal(SignalIndex).getShift(dim); } - public ArrayList getAssignmentAtomIndicesByDim(final int dim){ - final ArrayList indices = new ArrayList<>(); - for (final Signal sig : this.signals) { - indices.add(sig.getAssignedAtomIndex(dim)); + public boolean setMultiplicities(final ArrayList multiplicities){ + if(!this.checkInputListSize(multiplicities.size())){ + return false; + } + for (int s = 0; s < this.getSignalCount(); s++) { + this.setMultiplicity(multiplicities.get(s), s); } - return indices; + return true; + } + + public boolean setMultiplicity(final String multiplicity, final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return false; + } + this.getSignal(signalIndex).setMultiplicity(multiplicity); + + return true; + } + + public ArrayList getSignals(){ + return this.signals; + } + + public ArrayList getEquivalences(){ + return this.equivalences; + } + + public Integer getEquivalence(final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return null; + } + + return this.equivalences.get(signalIndex); + } + + public boolean setEquivalence(final int signalIndex, final int equivalentSignalIndex){ + if(!this.checkSignalIndex(signalIndex) || !this.checkSignalIndex(equivalentSignalIndex)){ + return false; + } + this.equivalences.set(signalIndex, equivalentSignalIndex); + + return true; } /** @@ -237,14 +347,6 @@ public String getStandard() { return standard; } - public void setAssignedAtomIndicesByDim(final ArrayList indices, final int dim){ - if(indices.size() != this.getShiftsByDim(0).size()){ - return; - } - for (int i = 0; i < indices.size(); i++) { - this.getSignal(i).setAssignedAtomIndex(indices.get(i), dim); - } - } /** * Returns the signal closest to the shift sought. If no Signal is found within the interval @@ -254,55 +356,44 @@ public void setAssignedAtomIndicesByDim(final ArrayList indices, final * @param pickPrecision * @return */ - public Signal pickClosestSignal(final Double shift, final int dim, final double pickPrecision) { - - int thisPosition = -1; + public int pickClosestSignal(final double shift, final int dim, final double pickPrecision) { + int matchIndex = -1; + if(!this.checkDimension(dim)){ + return matchIndex; + } double diff = pickPrecision; - /* - * Now we search dimension dim for the chemical shift. - */ - for (int f = 0; f < this.signals.size(); f++) { - if (Math.abs((this.signals.get(f)).getShift(dim) - shift) < diff) { - diff = Math.abs((this.signals.get(f)).getShift(dim) - shift); - thisPosition = f; + for (int s = 0; s < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) - shift) < diff) { + diff = Math.abs(this.getShift(s, dim) - shift); + matchIndex = s; } - } - if(thisPosition >= 0){ - this.signals.get(thisPosition); - } + } - return null; + return matchIndex; } /** - * Returns a List with signals within the interval defined by pickPrecision. If none is found + * Returns a List with signal indices within the interval defined by pickPrecision. If none is found * an empty ArrayList is returned. * @param shift * @param dim * @param pickPrecision * @return */ - public ArrayList pickSignals(final double shift, final int dim, final double pickPrecision) { - final ArrayList pickedSignals = new ArrayList<>(); - /* - * Now we search dimension dim for the chemical shift. - */ - for (final Signal sig : this.signals) { - if (Math.abs(sig.getShift(dim) - shift) < pickPrecision) { - pickedSignals.add(sig); + public ArrayList pickSignals(final double shift, final int dim, final double pickPrecision) { + final ArrayList pickedSignals = new ArrayList<>(); + if(!this.checkDimension(dim)){ + return pickedSignals; + } + for (int s = 0; s < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) - shift) < pickPrecision) { + pickedSignals.add(s); } } + return pickedSignals; } - public ArrayList[] getShifts(){ - final ArrayList[] shiftsList = new ArrayList[this.ndim]; - for (int d = 0; d < this.ndim; d++) { - shiftsList[d] = this.getShiftsByDim(d); - } - - return shiftsList; - } } From 2c91c193102380135e92385da0fcd9d0629870f7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 10 Aug 2018 20:49:39 +0200 Subject: [PATCH 029/405] - new Assignment class --- .gitignore | 3 +- nb-configuration.xml | 11 +++ nbactions-release-profile.xml | 60 ++++++++++++++ pom.xml | 6 ++ src/casekit/NMR/model/Assignment.java | 113 ++++++++++++++++++++++++++ src/casekit/NMR/model/Signal.java | 106 ++++++++---------------- src/casekit/NMR/test.java | 88 +++++++++++--------- 7 files changed, 277 insertions(+), 110 deletions(-) create mode 100644 nbactions-release-profile.xml create mode 100644 src/casekit/NMR/model/Assignment.java diff --git a/.gitignore b/.gitignore index 35dce1c..4c01fbb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /target/ /src/casekit/test.java /src/NMR/remarks -/src/NMR/test.java \ No newline at end of file +/src/NMR/test.java +/nbproject/ \ No newline at end of file diff --git a/nb-configuration.xml b/nb-configuration.xml index 2dc4557..11a0109 100644 --- a/nb-configuration.xml +++ b/nb-configuration.xml @@ -14,5 +14,16 @@ That way multiple projects can share the same settings (useful for formatting ru Any value defined here will override the pom.xml file value but is only applicable to the current project. --> mit + false + true + + + true + + true + false + false + + false diff --git a/nbactions-release-profile.xml b/nbactions-release-profile.xml new file mode 100644 index 0000000..64d6f0b --- /dev/null +++ b/nbactions-release-profile.xml @@ -0,0 +1,60 @@ + + + + run + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -classpath %classpath casekit.NMR.test + java + + + + debug + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath casekit.NMR.test + java + true + + + + profile + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -classpath %classpath casekit.NMR.test + java + + + + rebuild + build-with-dependencies + + * + + + clean + install + + + true + + + diff --git a/pom.xml b/pom.xml index b705ffe..5b3bafe 100644 --- a/pom.xml +++ b/pom.xml @@ -6,6 +6,7 @@ casekit 1.0-SNAPSHOT casekit + src @@ -81,5 +82,10 @@ 2.2-SNAPSHOT jar + + org.openscience.cdk + cdk-silent + 2.2-SNAPSHOT + diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java new file mode 100644 index 0000000..b319637 --- /dev/null +++ b/src/casekit/NMR/model/Assignment.java @@ -0,0 +1,113 @@ +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package casekit.NMR.model; + +import java.util.ArrayList; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Assignment { + + final int nDim; + final String[] nuclei; + final int[][] assignments; + + public Assignment(final Spectrum spectrum) { + this.nuclei = spectrum.getNuclei(); + this.nDim = this.nuclei.length; + this.assignments = this.initAssignments(this.nDim, spectrum.getSignalCount()); + } + + private int[][] initAssignments(final int nDim, final int nSignal){ + final int[][] temp = new int[nDim][nSignal]; + for (int i = 0; i < nDim; i++) { + for (int j = 0; j < nSignal; j++) { + temp[i][j] = -1; + } + } + + return temp; + } + + public boolean setAssignment(final int dim, final int indexInSpectrum, final int indexInAtomContainer){ + if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + return false; + } + this.assignments[dim][indexInSpectrum] = indexInAtomContainer; + + return true; + } + + public boolean setAssignments(final int dim, final ArrayList indicesInAtomContainer){ + if(!this.checkDimension(dim) || !this.checkInputListSize(indicesInAtomContainer.size())){ + return false; + } + for (int i = 0; i < this.getAssignmentsCount(); i++) { + this.setAssignment(dim, i, indicesInAtomContainer.get(i)); + } + + return true; + } + + public Integer getAssignment(final int dim, final int indexInSpectrum){ + if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + return null; + } + + return this.assignments[dim][indexInSpectrum]; + } + + public int[] getAssignments(final int dim){ + if(!this.checkDimension(dim)){ + return null; + } + + return this.assignments[dim]; + } + + public int getDimCount(){ + return this.nDim; + } + + public int getAssignmentsCount(){ + if(this.getDimCount() > 0){ + return this.assignments[0].length; + } + return 0; + } + + public boolean checkDimension(final int dim){ + return (dim >= 0) && (dim < this.nDim); + } + + private boolean checkSpectrumIndex(final int dim, final int indexInSpectrum){ + return (indexInSpectrum >= 0) && (indexInSpectrum < this.assignments[dim].length); + } + + private boolean checkInputListSize(final int size){ + return (size == this.getAssignmentsCount()); + } +} diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java index aaf3e14..38d4ae7 100644 --- a/src/casekit/NMR/model/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -33,14 +33,13 @@ */ public class Signal { - private final int ndim; + private final int nDim; /** * Am array of doubles to store the chemical shift of */ - private final Double[] shifts; + private Double[] shifts; private final String[] nuclei; - private final int[] assignedAtomIndices; /* Signal intensity in arbitrary values */ private Double intensity; @@ -50,52 +49,58 @@ public class Signal { public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; + + public Signal(final String[] nuclei) { + this.nuclei = nuclei; + this.nDim = this.nuclei.length; + this.shifts = this.initShifts(null, this.nDim); + } + public Signal(final String[] nuclei, final Double[] shifts) { this.nuclei = nuclei; - this.ndim = this.nuclei.length; - this.shifts = shifts; - this.assignedAtomIndices = this.initAssignedAtomIndices(this.ndim); + this.nDim = this.nuclei.length; + this.shifts = this.initShifts(shifts, this.nDim); } public Signal(final String[] nuclei, final Double[] shifts, final Double intensity) { - this.nuclei = nuclei; - this.ndim = this.nuclei.length; - this.shifts = this.initShifts(shifts, this.ndim); - this.assignedAtomIndices = this.initAssignedAtomIndices(this.ndim); + this(nuclei, shifts); this.intensity = intensity; } - private Double[] initShifts(final Double[] shifts, final int ndim){ - final Double[] tempShifts = new Double[ndim]; - for (int d = 0; d < ndim; d++) { - tempShifts[d] = shifts[d]; + private Double[] initShifts(final Double[] shifts, final int nDim){ + final Double[] tempShifts = new Double[nDim]; + for (int d = 0; d < nDim; d++) { + if((shifts != null) && (shifts.length == nDim)){ + tempShifts[d] = shifts[d]; + } else { + tempShifts[d] = null; + } } return tempShifts; } - private int[] initAssignedAtomIndices(final int ndim){ - final int[] tempAssignedAtomIndices = new int[ndim]; - for (int d = 0; d < this.ndim; d++) { - tempAssignedAtomIndices[d] = -1; - } - - return tempAssignedAtomIndices; - } - - public int getDim(){ - return this.ndim; + public int getDimCount(){ + return this.nDim; } public String[] getNuclei(){ return this.nuclei; } - public void setShift(final Double shift, final int dim) { + public boolean setShift(final Double shift, final int dim) { + if(!this.checkDimension(dim)){ + return false; + } this.shifts[dim] = shift; + + return true; } public Double getShift(final int dim) { + if(!this.checkDimension(dim)){ + return null; + } return this.shifts[dim]; } @@ -114,36 +119,8 @@ public void setMultiplicity(final String multiplicity) { public String getMultiplicity() { return this.multiplicity; } - - public boolean setAssignedAtomIndices(final int[] indices){ - if(indices.length != this.ndim){ - return false; - } - for (int d = 0; d < this.ndim; d++) { - this.assignedAtomIndices[d] = indices[d]; - } - - return true; - } - - public int[] getAssignedIndices(){ - return this.assignedAtomIndices; - } - - public boolean setAssignedAtomIndex(final int index, final int dim){ - if(dim < 0 || dim >= this.ndim){ - return false; - } - this.assignedAtomIndices[dim] = index; - - return true; - } - - public int getAssignedAtomIndex(final int dim){ - return this.assignedAtomIndices[dim]; - } - public void setPhase(final int phase) { + public void setPhase(final Integer phase) { this.phase = phase; } @@ -151,32 +128,19 @@ public Integer getPhase() { return this.phase; } - @Override - public String toString() { - String s = ""; - s += ndim + " -dimensional NMRSignal for nuclei "; - for (int f = 0; f < this.nuclei.length; f++) { - s += this.nuclei[f] + "; "; - } - s += "\nShiftlist: "; - for (int f = 0; f < this.shifts.length; f++) { - s += this.shifts[f] + "; "; - } - s += "\n\n"; - return s; - } + public boolean checkDimension(final int dim){ + return (dim >= 0) && (dim < this.nDim); + } /** * * @return - * @deprecated */ public Signal getClone(){ final Signal signalClone = new Signal(this.nuclei, this.shifts); signalClone.setIntensity(this.intensity); signalClone.setMultiplicity(this.multiplicity); signalClone.setPhase(this.phase); - signalClone.setAssignedAtomIndices(this.assignedAtomIndices); return signalClone; } diff --git a/src/casekit/NMR/test.java b/src/casekit/NMR/test.java index 57a4b3e..6f5bf5d 100644 --- a/src/casekit/NMR/test.java +++ b/src/casekit/NMR/test.java @@ -6,6 +6,7 @@ package casekit.NMR; +import casekit.NMR.model.Assignment; import casekit.NMR.model.Spectrum; import java.io.FileNotFoundException; import java.io.IOException; @@ -13,6 +14,7 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.ParserConfigurationException; @@ -82,52 +84,62 @@ public static void main(String[] args) throws ParserConfigurationException, SAXE // HJ555 projectName = "HJ555"; process = new Process(molFormula_HJ555); - spec = process.parse1DNMRviaXML(Peaks13C_HJ555, "C"); -// spec.setSpecType(CDKConstants.NMRSPECTYPE_1D_ARBITRARY); - process.set1DNMR(spec); - System.out.println("assignments spectrum 13C: " + spec.getAssignmentAtomIndicesByDim(0)); -// process.parse1DNMR(Peaks13C_HJ555, "C"); - - spec = process.parseDEPTviaXML(PeaksDEPT90_HJ555); + spec = Process.parse1DNMRviaXML(Peaks13C_HJ555, "C"); + spec.setSpecType(CDKConstants.NMRSPECTYPE_1D_ARBITRARY); + final ArrayList indicesInAtomContainer1D_13C = process.set1DNMR(spec); + final Assignment assignments1D_13C = new Assignment(spec); + assignments1D_13C.setAssignments(0, indicesInAtomContainer1D_13C); + System.out.println("assignments spectrum 13C: " + Arrays.toString(assignments1D_13C.getAssignments(0))); + System.out.println("equivalences: " + spec.getEquivalences()); + + spec = Process.parseDEPTviaXML(PeaksDEPT90_HJ555); spec.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT90); - Spectrum spec135 = process.parseDEPTviaXML(PeaksDEPT135_HJ555); + Spectrum spec135 = Process.parseDEPTviaXML(PeaksDEPT135_HJ555); spec135.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT135); - int assignedHAtoms = process.setDEPT(spec, spec135, tolC); -// int assignedHAtoms = process.parseDEPT(PeaksDEPT90_HJ555, PeaksDEPT135_HJ555, tolC); - System.out.println("assigned protons: " + assignedHAtoms); - System.out.println("assignments spectrum DEPT90: " + spec.getAssignmentAtomIndicesByDim(0)); - System.out.println("assignments spectrum DEPT135: " + spec135.getAssignmentAtomIndicesByDim(0)); + final HashMap> matches1D_DEPT = process.setDEPT(spec, spec135, tolC); + final Assignment assignments1D_DEPT90 = new Assignment(spec); + final Assignment assignments1D_DEPT135 = new Assignment(spec135); + assignments1D_DEPT90.setAssignments(0, matches1D_DEPT.get("DEPT90")); + assignments1D_DEPT135.setAssignments(0, matches1D_DEPT.get("DEPT135")); + System.out.println("assignments spectrum DEPT90: " + Arrays.toString(assignments1D_DEPT90.getAssignments(0))); + System.out.println("assignments spectrum DEPT135: " + Arrays.toString(assignments1D_DEPT135.getAssignments(0))); - spec = process.parseHSQCviaXML(PeaksHSQC_HJ555, "C"); + spec = Process.parseHSQCviaXML(PeaksHSQC_HJ555, "C"); spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HSQC); - process.setHSQC(spec, tolC); -// process.parseHSQC(PeaksHSQC_HJ555, "C", tolH); - System.out.println("assignments spectrum HSQC 1: " + spec.getAssignmentAtomIndicesByDim(0)); - System.out.println("assignments spectrum HSQC 2: " + spec.getAssignmentAtomIndicesByDim(1)); - - spec = process.parseHHCOSYviaXML(PeaksCOSY_HJ555); + final HashMap> matches2D_HSQC = process.setHSQC(spec, tolC); + final Assignment assignments2D_HSQC = new Assignment(spec); + assignments2D_HSQC.setAssignments(0, matches2D_HSQC.get(Utils.getAtomTypeFromSpectrum(spec, 0))); + assignments2D_HSQC.setAssignments(1, matches2D_HSQC.get(Utils.getAtomTypeFromSpectrum(spec, 1))); + System.out.println("assignments spectrum HSQC dim1: " + Arrays.toString(assignments2D_HSQC.getAssignments(0))); + System.out.println("assignments spectrum HSQC dim2: " + Arrays.toString(assignments2D_HSQC.getAssignments(1))); + + spec = Process.parseHHCOSYviaXML(PeaksCOSY_HJ555); spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HHCOSY); - process.setHHCOSY(spec, tolH); -// process.parseHHCOSY(PeaksCOSY_HJ555, tolH); - System.out.println("assignments spectrum H,H-COSY 1: " + spec.getAssignmentAtomIndicesByDim(0)); - System.out.println("assignments spectrum H,H-COSY 2: " + spec.getAssignmentAtomIndicesByDim(1)); + final HashMap> matches2D_HHCOSY = process.setHHCOSY(spec, tolH); + final Assignment assignments2D_HHCOSY = new Assignment(spec); + assignments2D_HHCOSY.setAssignments(0, matches2D_HHCOSY.get(Utils.getAtomTypeFromSpectrum(spec, 0))); + assignments2D_HHCOSY.setAssignments(1, matches2D_HHCOSY.get(Utils.getAtomTypeFromSpectrum(spec, 1))); + System.out.println("assignments spectrum HHCOSY dim1: " + Arrays.toString(assignments2D_HHCOSY.getAssignments(0))); + System.out.println("assignments spectrum HHCOSY dim2: " + Arrays.toString(assignments2D_HHCOSY.getAssignments(1))); - spec = process.parseINADEQUATEviaXML(PeaksINADEQUATE_HJ555); - spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - process.setINADEQUATE(spec, tolC); -// process.parseINADEQUATE(PeaksINADEQUATE_HJ555, tolC); - System.out.println("assignments spectrum INADEQUATE 1: " + spec.getAssignmentAtomIndicesByDim(0)); - System.out.println("assignments spectrum INADEQUATE 2: " + spec.getAssignmentAtomIndicesByDim(1)); + spec = Process.parseINADEQUATEviaXML(PeaksINADEQUATE_HJ555); + spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); + final HashMap> matches2D_INADEQUATE = process.setINADEQUATE(spec, tolC); + final Assignment assignments2D_INADEQUATE = new Assignment(spec); + assignments2D_INADEQUATE.setAssignments(0, matches2D_INADEQUATE.get(Utils.getAtomTypeFromSpectrum(spec, 0))); + assignments2D_INADEQUATE.setAssignments(1, matches2D_INADEQUATE.get(Utils.getAtomTypeFromSpectrum(spec, 1))); + System.out.println("assignments spectrum INADEQUATE dim1: " + Arrays.toString(assignments2D_INADEQUATE.getAssignments(0))); + System.out.println("assignments spectrum INADEQUATE dim2: " + Arrays.toString(assignments2D_INADEQUATE.getAssignments(1))); - spec = process.parseHMBCviaXML(PeaksHMBC_HJ555, "C"); + spec = Process.parseHMBCviaXML(PeaksHMBC_HJ555, "C"); spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HMBC); - process.setHMBC(spec, tolH, tolC); -// process.parseHMBC(PeaksHMBC_HJ555, "C", tolH, tolC); - System.out.println("assignments spectrum HMBC 1: " + spec.getAssignmentAtomIndicesByDim(0)); - System.out.println("assignments spectrum HMBC 2: " + spec.getAssignmentAtomIndicesByDim(1)); - + final HashMap> matches2D_HMBC = process.setHMBC(spec, tolH, tolC); + final Assignment assignments2D_HMBC = new Assignment(spec); + assignments2D_HMBC.setAssignments(0, matches2D_HMBC.get(Utils.getAtomTypeFromSpectrum(spec, 0))); + assignments2D_HMBC.setAssignments(1, matches2D_HMBC.get(Utils.getAtomTypeFromSpectrum(spec, 1))); + System.out.println("assignments spectrum HMBC dim1: " + Arrays.toString(assignments2D_HMBC.getAssignments(0))); + System.out.println("assignments spectrum HMBC dim2: " + Arrays.toString(assignments2D_HMBC.getAssignments(1))); - process.setEquivalentProperties(); process.setBonds(new String[]{CDKConstants.NMRSPECTYPE_2D_HHCOSY, CDKConstants.NMRSPECTYPE_2D_INADEQUATE, CDKConstants.NMRSPECTYPE_2D_HMBC}); // without hybridizations process.createLSDFile(projectName, "/Users/mwenk/Downloads/testLSD", new String[]{"/Users/mwenk/work/software/LSD-3.4.9/Filters/", "/Users/mwenk/work/software/LSD-3.4.9/Filters/MOLGEN/badlist1/"}); @@ -175,7 +187,7 @@ public static void main(String[] args) throws ParserConfigurationException, SAXE for (int i = 0; i< ac.getAtomCount(); i++) { System.out.println("i: " + i + " -> atom: " + ac.getAtom(i).getSymbol() + ", shift: " + ac.getAtom(i).getProperty(casekit.NMR.Utils.getNMRShiftConstant("C")) + ", #H: " + ac.getAtom(i).getImplicitHydrogenCount() + ", H shifts: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) + ", Hybrid.: " + ac.getAtom(i).getHybridization() + ", HHCOSY: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) + - ", INADEQUATE: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE) + ", HMBC: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) + ", EQUAL: " + ac.getAtom(i).getProperty(casekit.NMR.ParseRawData.PROP_EQUIVALENCE)); + ", INADEQUATE: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE) + ", HMBC: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC)); } System.out.println("\nbond count: " + ac.getBondCount() + ":"); for (IBond bond : ac.bonds()) { From 86e0dd2dc1b0a7463c8abc0c7995b92ceaed08b8 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 10 Aug 2018 20:57:39 +0200 Subject: [PATCH 030/405] - small unimportant changes --- src/casekit/NMR/remarks | 15 --------------- src/casekit/NMR/test.java | 11 ----------- 2 files changed, 26 deletions(-) diff --git a/src/casekit/NMR/remarks b/src/casekit/NMR/remarks index 11a5071..7abe905 100644 --- a/src/casekit/NMR/remarks +++ b/src/casekit/NMR/remarks @@ -4,18 +4,3 @@ MIT license - PMD: CDK GitHub PMD example - JCoCo - yourkit (license received) - - -MolSpec: -- [SEMI] determine hybridization via CDK (#H >= 3) and DB lookup (max. freq, TEMPORARILY)? -- setHybridizations function: parallization? - -Utils: -- write a parse function to read nmrML files -- in getHybridizations function: -1. parallization? -2. comparing of all attached/used hydrogens with molecular formula -> helps for bond type decisions -- in getBondTypeFromHybridizations: -1. can sulfur only have 1x double bond or 2x single bond in metabolomic systems? -2. as 1. but for nitrogen and phosphorus -3. is S hybridization always single bond? \ No newline at end of file diff --git a/src/casekit/NMR/test.java b/src/casekit/NMR/test.java index 6f5bf5d..64377ff 100644 --- a/src/casekit/NMR/test.java +++ b/src/casekit/NMR/test.java @@ -10,7 +10,6 @@ import casekit.NMR.model.Spectrum; import java.io.FileNotFoundException; import java.io.IOException; -import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -18,14 +17,11 @@ import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.ParserConfigurationException; -import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.DefaultChemObjectBuilder; -import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.xml.sax.SAXException; @@ -38,13 +34,6 @@ public class test { public static void main(String[] args) throws ParserConfigurationException, SAXException, CloneNotSupportedException, FileNotFoundException, SQLException, ClassNotFoundException { final int maxSpheres = 1; -// final String[] args2 = new String[]{"-i", "/Users/mwenk/Downloads/nmrshiftdb2withsignals.sd", "-o", "/Users/mwenk/Downloads/hose" + maxSpheres + ".tsv", "-m", String.valueOf(maxSpheres), "-v"}; -// try { -// final NMRShiftDBSDFParser parser = new NMRShiftDBSDFParser(args2); -// } catch (Exception ex) { -// Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); -// } - final String Peaks13C_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/13C/50/pdata/1/peaklist.xml"; final String Peaks13C_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_13C_NMR.csv"; From 1863c04d51387bd11014324ac821ce69e4d8b4fb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:16:04 +0200 Subject: [PATCH 031/405] - added function: getAtomTypesInDB --- src/casekit/NMR/DB.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java index 6a0c484..ecf7ed7 100644 --- a/src/casekit/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -34,7 +34,9 @@ import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; import org.openscience.cdk.io.iterator.IteratingSDFReader; @@ -61,7 +63,7 @@ public class DB { * @throws FileNotFoundException * @throws CDKException */ - public static IAtomContainerSet getStructuresFromNMRShiftDBFile(final String pathToNMRShiftDB, final int maxCycleSize) throws FileNotFoundException, CDKException { + public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRShiftDB, final int maxCycleSize) throws FileNotFoundException, CDKException { final IAtomContainerSet acSet = new AtomContainerSet(); final IteratingSDFReader iterator = new IteratingSDFReader( @@ -75,6 +77,20 @@ public static IAtomContainerSet getStructuresFromNMRShiftDBFile(final String pat return acSet; } + public static HashSet getAtomTypesInDB(final String pathToDB) throws FileNotFoundException{ + final HashSet atomTypes = new HashSet<>(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToDB), + SilentChemObjectBuilder.getInstance() + ); + while (iterator.hasNext()) { + atomTypes.addAll(Utils.getAtomTypesInAtomContainer(iterator.next())); + } + + return atomTypes; + } + + public static Connection getDBConnection(final String server, final String options, final String user, final String pwd) throws SQLException { return DriverManager.getConnection(server + "?" + options, user, pwd); From 80dd6477306b3f499319cd45b01287ca8c8c9aa5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:23:45 +0200 Subject: [PATCH 032/405] - now usage of Assignment class objects: - in each assignment functions - for every 1D or 2D spectrum - some functions were added to get assignments and indices of atoms in class atom container - still to do: - check function for proton assignments (counts) when using multiple spectra to avoid multiple assignments of same protons to different heavy atoms --- src/casekit/NMR/ParseRawData.java | 541 +++++++++++++++--------------- 1 file changed, 278 insertions(+), 263 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index 79aef9b..4066176 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -23,6 +23,7 @@ */ package casekit.NMR; +import casekit.NMR.model.Assignment; import casekit.NMR.model.Spectrum; import java.io.IOException; import java.util.ArrayList; @@ -48,7 +49,8 @@ public class ParseRawData { final private IAtomContainer mol; final private IMolecularFormula molFormula; private HashMap> atomTypeIndices; - + final private HashMap spectra = new HashMap<>(); + final private HashMap assignments = new HashMap<>(); /** * Creates an instances of this class with an empty class atom container. @@ -111,11 +113,51 @@ public final HashMap> getAtomTypeIndices() { * @see Utils#getAtomTypeIndices(org.openscience.cdk.interfaces.IAtomContainer) * */ - public final void setAtomTypeIndices(){ + private void setAtomTypeIndices(){ this.atomTypeIndices = Utils.getAtomTypeIndices(this.mol); } + /** + * Returns all given and used spectra. + * + * @return + */ + public final HashMap getSpectra(){ + + return this.spectra; + } + + + /** + * Returns all created and used Assignment objects. The assigned indices + * refer to atom indices in class atom container. + * + * @return + */ + public final HashMap getAssignments(){ + + return this.assignments; + } + + + /** + * Returns one specific created and used Assignment object. + * The assigned indices refer to atom indices in class atom container. + * + * @param spectrum + * @return + */ + public final Assignment getAssignment(final Spectrum spectrum){ + + if (spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT90) || spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT135)) { + + return this.getAssignments().get(spectrum.getSpecType()); + } + + return this.assignments.get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); + } + /** * Creates a Spectrum class object from 1D NMR peak list in CSV file format. @@ -128,8 +170,10 @@ public final void setAtomTypeIndices(){ * @throws java.io.IOException */ public static final Spectrum parse1DNMRviaCSV(final String pathToCSV, final String atomType) throws IOException { - - return Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{atomType}, 6); + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{atomType}, 6); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D); + + return spectrum; } @@ -146,8 +190,10 @@ public static final Spectrum parse1DNMRviaCSV(final String pathToCSV, final Stri * @throws org.xml.sax.SAXException */ public static final Spectrum parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { - - return Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{atomType}); + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{atomType}); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D); + + return spectrum; } @@ -167,62 +213,76 @@ public static final Spectrum parse1DNMRviaXML(final String pathToXML, final Stri * equivalent signal selection by user. * * @param spectrum Spectrum class object containing the 1D shift information - * @return * @throws java.io.IOException */ - public final ArrayList set1DNMR(final Spectrum spectrum) throws IOException{ - + public final void assign1DSpectrum(final Spectrum spectrum) throws IOException{ // checks whether number of signals is equal to molecular formula if given // if not equal then edit signal list in spectrum this.check1DSpectrum(spectrum); // assign shift values to atoms sequentially - this.assignShiftValues(spectrum); + this.assignShiftValuesToAtoms(spectrum); - return this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0)); + final Assignment assignment = new Assignment(spectrum); + if(this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0)) != null){ + assignment.setAssignments(0, this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0))); + } + + this.spectra.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); + this.assignments.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); } /** * Checks the number of signals in a spectrum against the number of atoms * in molecular formula of class, if given. In case of different numbers, - * a user input will be requested. + * a user input for spectrum editing will be requested. * * @param spectrum * @throws IOException * @see Utils#editSignalsInSpectrum(casekit.NMR.model.Spectrum, org.openscience.cdk.interfaces.IMolecularFormula) */ - public void check1DSpectrum(final Spectrum spectrum) throws IOException{ + private void check1DSpectrum(final Spectrum spectrum) throws IOException{ if(this.molFormula != null){ final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula); if (diff != 0) { // adjust Spectrum size by user Utils.editSignalsInSpectrum(spectrum, this.molFormula); } - } + } } - private void assignShiftValues(final Spectrum spectrum){ + /** + * Sets shift values in atoms of class atom container as property (see below), sequentially. + * + * @param spectrum Spectrum class object which contains shifts in first + * dimension + * @see Utils#getNMRShiftConstant(java.lang.String) + */ + private void assignShiftValuesToAtoms(final Spectrum spectrum){ final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); final ArrayList shifts = spectrum.getShifts(0); - if(this.molFormula == null){ - this.removeAtoms(atomType); - // fill up the peaks for that atom type from given peak list in spectrum + if((this.molFormula == null) && !atomType.equals("H")){ + // (re-)filling up of peaks for that atom type from given peak list in spectrum + this.removeAtoms(atomType); IAtom atom; - for (final double shift : shifts) { + for (final double shift : shifts) { atom = new Atom(atomType); - atom.setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shift); + atom.setProperty(Utils.getNMRShiftConstant(atomType), shift); atom.setImplicitHydrogenCount(null); - this.mol.addAtom(atom); + this.mol.addAtom(atom); } this.setAtomTypeIndices(); } - int assignedShiftCount = 0; - for (final int i : this.atomTypeIndices.get(atomType)) { - if (assignedShiftCount < shifts.size()) { - // shift assignment in atom - this.mol.getAtom(i).setProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); + // assign shifts to atoms as property + if(this.atomTypeIndices.get(atomType) != null){ + int assignedShiftCount = 0; + for (final int i : this.atomTypeIndices.get(atomType)) { + if (assignedShiftCount < shifts.size()) { + // shift assignment in atom + this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); + } + assignedShiftCount++; } - assignedShiftCount++; } } @@ -258,12 +318,19 @@ private void removeAtoms(final String atomType) { * * @param pathToCSV Path to one DEPT peak list (Bruker's TopSpin csv file * format) + * @param mode used angle: either 90° [0] or 135° [1] * @return * @throws java.io.IOException */ - public static final Spectrum parseDEPTviaCSV(final String pathToCSV) throws IOException { + public static final Spectrum parseDEPTviaCSV(final String pathToCSV, final int mode) throws IOException { + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{"C"}, 6); + if(mode == 0){ + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT90); + } else if(mode == 1){ + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT135); + } - return Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{"C"}, 6); + return spectrum; } /** @@ -273,61 +340,88 @@ public static final Spectrum parseDEPTviaCSV(final String pathToCSV) throws IOEx * * @param pathToXML Path to one DEPT peak list (Bruker's TopSpin XML file * format) + * @param mode used angle: either 90° [0] or 135° [1] * @return * @throws java.io.IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ - public static final Spectrum parseDEPTviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { - - return Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{"C"}); + public static final Spectrum parseDEPTviaXML(final String pathToXML, final int mode) throws IOException, ParserConfigurationException, SAXException { + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{"C"}); + if(mode == 0){ + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT90); + } else if(mode == 1){ + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT135); + } + + return spectrum; } /** - * Sets the hydrogen count information of carbon atoms in atom conatiner - * by usage of DEPT90 and DEPT135 information. - * - * @param spectrumDEPT90 DEPT90 spectrum - * @param spectrumDEPT135 DEPT135 spectrum which has to contain intensity + * Sets the assignments of carbon atoms in class atom container + * by usage of DEPT90 and DEPT135 information. The implicit hydrogen count + * property is set too. + * + * @see ParseRawData#setImplicitHydrogenCountsFromDEPT() + * + * @param spectrum1D_DEPT90 DEPT90 spectrum + * @param spectrum1D_DEPT135 DEPT135 spectrum which has to contain intensity * information * @param tol tolerance value [ppm] for carbon shift matching - * @return false if one of the spectra is not set or the intensities in - * DEPT135 are missing + * @return false if 1-dimensional 13C spectrum is missing (not set beforehand) + * or something is missing in one of the two input spectra + * */ - public final HashMap> setDEPT(final Spectrum spectrumDEPT90, final Spectrum spectrumDEPT135, final double tol){ - final HashMap> matches = new HashMap<>(); - if(spectrumDEPT90 == null || spectrumDEPT135 == null || spectrumDEPT135.getIntensities() == null){ - return null; + public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum spectrum1D_DEPT135, final double tol){ + if((spectrum1D_DEPT90 == null) || (spectrum1D_DEPT135 == null) || (spectrum1D_DEPT135.getIntensities() == null) + || (this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C") == null)){ + return false; + } + + final Assignment assignment1D_DEPT90 = new Assignment(spectrum1D_DEPT90); + final Assignment assignment1D_DEPT135 = new Assignment(spectrum1D_DEPT135); + final ArrayList matchesIn1DSpectrum_DEPT90 = this.findMatchesIn1DSpectra(spectrum1D_DEPT90, 0, tol); + final ArrayList matchesIn1DSpectrum_DEPT135 = this.findMatchesIn1DSpectra(spectrum1D_DEPT135, 0, tol); + final Assignment assignment1D_13C = this.getAssignment(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C")); + + for (int i = 0; i < assignment1D_DEPT90.getAssignmentsCount(); i++) { + if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { + assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i))); + } } - final ArrayList shiftsDEPT90 = spectrumDEPT90.getShifts(0); - final ArrayList shiftsDEPT135 = spectrumDEPT135.getShifts(0); - final ArrayList intensitiesDEPT135 = spectrumDEPT135.getIntensities(); - ArrayList matchesDEPT90 = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsDEPT90, tol, "C"); - matchesDEPT90 = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT90, matchesDEPT90, tol, "C"); - matches.put("DEPT90", matchesDEPT90); - ArrayList matchesDEPT135 = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsDEPT135, tol, "C"); - matchesDEPT135 = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsDEPT135, matchesDEPT135, tol, "C"); - matches.put("DEPT135", matchesDEPT135); - - this.setImplicitHydrogenNumberFromDEPT(matchesDEPT90, matchesDEPT135, intensitiesDEPT135); - - return matches; + for (int i = 0; i < assignment1D_DEPT135.getAssignmentsCount(); i++) { + if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { + assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i))); + } + } + + this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, spectrum1D_DEPT90); + this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, assignment1D_DEPT90); + this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, spectrum1D_DEPT135); + this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, assignment1D_DEPT135); + + this.setImplicitHydrogenCountsFromDEPT(); + + return true; } - + /** - * - * @param matchesDEPT90 - * @param matchesDEPT135 - * @param intensitiesDEPT135 + * Sets the implicitHydrogenCount() property in atoms of class atom container + * by using the already set DEPT information. + * @see ParseRawData#assignDEPT(casekit.NMR.model.Spectrum, casekit.NMR.model.Spectrum, double) */ - private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesDEPT90, final ArrayList matchesDEPT135, final ArrayList intensitiesDEPT135) { - + private void setImplicitHydrogenCountsFromDEPT() { + + final ArrayList intensitiesDEPT135 = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135).getIntensities(); + final ArrayList matchesDEPT90InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT90), 0); + final ArrayList matchesDEPT135InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135), 0); + int matchDEPT90, matchDEPT135, hCount, hCountAll = 0; for (int i : this.atomTypeIndices.get("C")) { if ((this.mol.getAtom(i).getProperty(CDKConstants.NMRSHIFT_CARBON) != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() == null)) { - matchDEPT90 = matchesDEPT90.indexOf(i); - matchDEPT135 = matchesDEPT135.indexOf(i); + matchDEPT90 = matchesDEPT90InAtomContainer.indexOf(i); + matchDEPT135 = matchesDEPT135InAtomContainer.indexOf(i); if (matchDEPT90 >= 0) { // CH hCount = 1; @@ -369,8 +463,10 @@ private void setImplicitHydrogenNumberFromDEPT(final ArrayList matchesD * @throws IOException */ public static final Spectrum parseHSQCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { - - return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HSQC); + + return spectrum; } /** @@ -385,166 +481,104 @@ public static final Spectrum parseHSQCviaCSV(final String pathToCSV, final Strin * @throws org.xml.sax.SAXException */ public static final Spectrum parseHSQCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HSQC); - return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); + return spectrum; } /** - * Sets the proton shift(s) as list to belonging heavy atoms of an - * HSQC signal relationship. - * The property is then set to {@link #CONST_PROP_PROTONSHIFTS} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. - * + * * @param spectrum Spectrum class object consisting of Signal class objects - * where the proton values are given first and the heavy atom values as the second. + * where the proton shifts values are given in first dimension and the + * heavy atom shifts in the second. + * @param tolProton tolerance value [ppm] for proton shift matching * @param tolHeavyAtom tolerance value [ppm] for heavy atom shift matching - * @return */ - public final HashMap> setHSQC(final Spectrum spectrum, final double tolHeavyAtom) { - final HashMap> matches = new HashMap<>(); - final ArrayList shiftsHydrogen = spectrum.getShifts(0); - final ArrayList shiftsHeavyAtom = spectrum.getShifts(1); - ArrayList matchesHeavyAtom = Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - matchesHeavyAtom = Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavyAtom, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), matchesHeavyAtom); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), matchesHeavyAtom); + public final void assignHSQC(final Spectrum spectrum, final double tolProton, final double tolHeavyAtom) { - this.setImplicitHydrogenShifts(shiftsHydrogen, matchesHeavyAtom); - - return matches; - } - - - private void setImplicitHydrogenShifts(final ArrayList shiftsHydrogen, final ArrayList matchesHeavyAtomType) { - - IAtom matchAtom; - ArrayList assignedHydrogensShifts; - for (int i = 0; i < matchesHeavyAtomType.size(); i++) { - if (matchesHeavyAtomType.get(i) >= 0) { - matchAtom = this.mol.getAtom(matchesHeavyAtomType.get(i)); - if (matchAtom.getImplicitHydrogenCount() == null || matchAtom.getImplicitHydrogenCount() == 0) { - continue; - } - if (matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { - matchAtom.setProperty(CDKConstants.NMRSPECTYPE_2D_HSQC, new ArrayList<>(matchAtom.getImplicitHydrogenCount())); - } - assignedHydrogensShifts = matchAtom.getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); - if (assignedHydrogensShifts.size() < matchAtom.getImplicitHydrogenCount()) { - assignedHydrogensShifts.add(shiftsHydrogen.get(i)); + this.assign2DSpectrum(spectrum, tolProton, tolHeavyAtom); + // in case the 1H spectrum is given, then assign protons to same indices from belonging carbon atoms + if(this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H") != null){ + final Assignment assignment1D_1H = this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H"); + final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); + final ArrayList matchesIn1DSpectrum_1H = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); + + for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { + // if heavy atom i has an assignment in class atom container then assign that index i to belonging protons as index + if (assignment2D_HSQC.getAssignment(1, i) >= 0) { + assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAssignment(1, i)); + assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAssignment(0, matchesIn1DSpectrum_1H.get(i))); } } } + + // implement control counter for no. of attached protons (by DEPT) on carbons ?!? } - - /** - * Finds the matches with the lowest deviations between a given hydrogen - * shift value set and implicit hydrogens of heavy atoms in the atom - * container. - * - * @param shiftList shift value list to match - * @param tol tolerance value [ppm] - * @return - */ - private ArrayList findImplicitHydrogenShiftMatches(final ArrayList shiftList, final double tol) { - - final ArrayList matches = new ArrayList<>(); - for (int i = 0; i < shiftList.size(); i++) { - matches.add(this.findSingleImplicitHydrogenShiftMatch(shiftList.get(i), tol)[0]); + + private void assign2DSpectrum(final Spectrum spectrum, final double tolDim1, final double tolDim2){ + + final ArrayList matchesQueryIn1DSpectrumDim1 = this.findMatchesIn1DSpectra(spectrum, 0, tolDim1); + final ArrayList matchesQueryIn1DSpectrumDim2 = this.findMatchesIn1DSpectra(spectrum, 1, tolDim2); + final ArrayList matches1DInAtomContainerDim1 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[0]), 0); + final ArrayList matches1DInAtomContainerDim2 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[1]), 0); + + final Assignment assignment = new Assignment(spectrum); + for (int i = 0; i < matchesQueryIn1DSpectrumDim1.size(); i++) { + if((matches1DInAtomContainerDim1 != null) && (matchesQueryIn1DSpectrumDim1.get(i) >= 0)){ + assignment.setAssignment(0, i, matches1DInAtomContainerDim1.get(matchesQueryIn1DSpectrumDim1.get(i))); + } + if((matches1DInAtomContainerDim2 != null) && (matchesQueryIn1DSpectrumDim2.get(i) >= 0)){ + assignment.setAssignment(1, i, matches1DInAtomContainerDim2.get(matchesQueryIn1DSpectrumDim2.get(i))); + } } - - return matches; + + this.spectra.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); + this.assignments.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); } - - /** - * Finds a match with the lowest deviations between a given hydrogen - * shift value and implicit hydrogens of heavy atoms in the atom - * container. - * - * @param queryShift hydrogen shift value [ppm] to match - * @param tol tolerance value [ppm] for matching - * @return int array of two values: 1. index of matched heavy atom in - * atom container, 2. index of matched hydrogen in hydrogen shift list - * of corresponding found heavy atom - */ - private int[] findSingleImplicitHydrogenShiftMatch(final double queryShift, final double tol) { - - int matchIndexAtom = -1; - int matchIndexProton = -1; - double minDiff = tol; - ArrayList protonShiftList; - for (int i = 0; i < this.mol.getAtomCount(); i++) { - // skip atoms without implicit hydrogens - if (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) == null) { - continue; - } - protonShiftList = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); - for (int j = 0; j < protonShiftList.size(); j++) { - // figure out the atom with lowest shift deviation - if ((queryShift - tol <= protonShiftList.get(j)) && (protonShiftList.get(j) <= queryShift + tol) && (Math.abs(queryShift - protonShiftList.get(j)) < minDiff)) { - minDiff = Math.abs(queryShift - protonShiftList.get(j)); - matchIndexProton = j; - matchIndexAtom = i; - } + + + private ArrayList findMatchesIn1DSpectra(final Spectrum spectrum, final int dim, final double tol){ + + ArrayList matchesQueryInOrigin1DSpectrum = new ArrayList<>(); + final ArrayList shiftsQuery = spectrum.getShifts(dim); + if(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]) != null){ + final ArrayList shiftsOrigin1DSpectrum = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]).getShifts(0); + matchesQueryInOrigin1DSpectrum = Utils.findShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, tol); + matchesQueryInOrigin1DSpectrum = Utils.correctShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, matchesQueryInOrigin1DSpectrum, tol); + } else { + for (int i = 0; i < spectrum.getSignalCount(); i++) { + matchesQueryInOrigin1DSpectrum.add(-1); } } - - return new int[]{matchIndexAtom, matchIndexProton}; + + return matchesQueryInOrigin1DSpectrum; } - + /** - * Corrects a hydrogen match list regarding a given shift list and an atom - * container. - * This is useful when two ore more hydrogen shift values match - * with the same hydrogen shift (actually heavy atom) in the atom container. - * So the purpose here is to enable more unambiguous matches. This method - * first looks for unambiguous matches and calculates the median of the - * difference values between the shift list values and the shifts of atom - * container. Then, all shift list values are adjusted (+/-) with this - * median value. + * Returns the indices of atoms within the class atom container which match + * to the shifts of given spectrum and dimension. * - * @param shifts Shift value list to match - * @param matches Match list to correct - * @param tol Tolerance value [ppm] for hydrogen rematching + * @param spectrum + * @param dim * @return */ - private ArrayList correctHydrogenShiftMatches(final ArrayList shifts, ArrayList matches, final double tol) { - - int matchIndex, middle; - double diff, median; - int[] singleMatchIndex; - ArrayList singleMatchShifts; - ArrayList diffs = new ArrayList<>(); - final HashSet uniqueMatchIndicesSet = new HashSet<>(matches); - for (Integer matchIndexAtomContainer : uniqueMatchIndicesSet) { - if (Collections.frequency(matches, matchIndexAtomContainer) == 1) { - matchIndex = matches.indexOf(matchIndexAtomContainer); - if (matches.get(matchIndex) >= 0) { - singleMatchIndex = this.findSingleImplicitHydrogenShiftMatch(shifts.get(matchIndex), tol); - singleMatchShifts = this.mol.getAtom(singleMatchIndex[0]).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC); - diff = shifts.get(matchIndex) - singleMatchShifts.get(singleMatchIndex[1]); - diffs.add(diff); - } - } - } - if (diffs.size() > 0) { - middle = diffs.size() / 2; - if (diffs.size() % 2 == 1) { - median = diffs.get(middle); - } else { - median = (diffs.get(middle - 1) + diffs.get(middle)) / 2.0; - } - // add or subtract the median of the differences to all shift list values (input) and match again then - for (int i = 0; i < shifts.size(); i++) { - shifts.set(i, shifts.get(i) - median); + public final ArrayList getAssignedAtomIndices(final Spectrum spectrum, final int dim){ + + if(spectrum == null){ + return null; + } else if(this.getAssignment(spectrum) == null){ + final ArrayList atomIndices = new ArrayList<>(); + for (int i = 0; i < spectrum.getSignalCount(); i++) { + atomIndices.add(-1); } - // rematch - matches = this.findImplicitHydrogenShiftMatches(shifts, tol); + return atomIndices; } - - return matches; - } + + return Utils.ArrayToArrayList(this.getAssignment(spectrum).getAssignments(dim)); + } /** @@ -557,8 +591,10 @@ private ArrayList correctHydrogenShiftMatches(final ArrayList s * @throws IOException */ public static final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IOException { - - return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", "H"}, 9); + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", "H"}, 9); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HHCOSY); + + return spectrum; } /** @@ -572,35 +608,34 @@ public static final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IO * @throws org.xml.sax.SAXException */ public static final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { - - return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", "H"}); + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", "H"}); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HHCOSY); + + return spectrum; } + /** * Sets links between two heavy atoms of H,H-COSY signals. The property * is then set to {@link #CONST_PROP_HHCOSY} in * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)} * * @param spectrum Spectrum class object containing the 2D spectrum proton shift information - * @param tol tolerance value [ppm] for matching belonging protons + * @param tolProton tolerance value [ppm] for matching belonging protons * of heavy atom - * @return true if the links could be set; otherwise false + * @return */ - public final HashMap> setHHCOSY(final Spectrum spectrum, final double tol) { + public final boolean assignHHCOSY(final Spectrum spectrum, final double tolProton) { - final ArrayList hydrogenShiftMatches1 = this.findImplicitHydrogenShiftMatches(spectrum.getShifts(0), tol); - final ArrayList hydrogenShiftMatches2 = this.findImplicitHydrogenShiftMatches(spectrum.getShifts(1), tol); + final ArrayList protonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); + final ArrayList protonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolProton); // are all signals bidirectional? - if (!casekit.NMR.Utils.isBidirectional(hydrogenShiftMatches1, hydrogenShiftMatches2)) { - return null; + if (!Utils.isBidirectional(protonShiftMatches1, protonShiftMatches2)) { + return false; } - casekit.NMR.Utils.setBidirectionalLinks(this.mol, hydrogenShiftMatches1, hydrogenShiftMatches2, CDKConstants.NMRSPECTYPE_2D_HHCOSY); - - final HashMap> matches = new HashMap<>(); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), hydrogenShiftMatches1); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), hydrogenShiftMatches2); + this.assign2DSpectrum(spectrum, tolProton, tolProton); - return matches; + return true; } @@ -613,8 +648,10 @@ public final HashMap> setHHCOSY(final Spectrum spectr * @throws IOException */ public static final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throws IOException { - - return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"C", "C"}, 9); + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"C", "C"}, 9); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); + + return spectrum; } /** @@ -628,8 +665,10 @@ public static final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throw * @throws org.xml.sax.SAXException */ public static final Spectrum parseINADEQUATEviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"C", "C"}); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"C", "C"}); + return spectrum; } @@ -641,24 +680,20 @@ public static final Spectrum parseINADEQUATEviaXML(final String pathToXML) throw * signal according to atom B and vice versa. * * @param spectrum Spectrum class object consisting of Signal class objects - * @param tol tolerance value [ppm] for carbon atom shift matching + * @param tolCarbon tolerance value [ppm] for carbon atom shift matching * @return */ - public final HashMap> setINADEQUATE(final Spectrum spectrum, final double tol) { + public final boolean assignINADEQUATE(final Spectrum spectrum, final double tolCarbon) { - final ArrayList carbonShiftMatches1 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShifts(0), tol, "C"); - final ArrayList carbonShiftMatches2 = casekit.NMR.Utils.findShiftMatches(this.mol, spectrum.getShifts(1), tol, "C"); + final ArrayList carbonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolCarbon); + final ArrayList carbonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolCarbon); // are all signals bidirectional? if (!casekit.NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { - return null; + return false; } - casekit.NMR.Utils.setBidirectionalLinks(this.mol, carbonShiftMatches1, carbonShiftMatches2, CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - - final HashMap> matches = new HashMap<>(); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), carbonShiftMatches1); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), carbonShiftMatches2); + this.assign2DSpectrum(spectrum, tolCarbon, tolCarbon); - return matches; + return true; } @@ -673,8 +708,10 @@ public final HashMap> setINADEQUATE(final Spectrum sp * @throws IOException */ public static final Spectrum parseHMBCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { + final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HMBC); - return Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); + return spectrum; } @@ -691,8 +728,10 @@ public static final Spectrum parseHMBCviaCSV(final String pathToCSV, final Strin * @throws org.xml.sax.SAXException */ public static final Spectrum parseHMBCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { + final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); + spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HMBC); - return Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); + return spectrum; } @@ -703,35 +742,11 @@ public static final Spectrum parseHMBCviaXML(final String pathToXML, final Strin * * @param spectrum Spectrum class object consisting of Signal class objects * where the proton shift values is given first and the heavy atom shifts as the second. - * @param tolHydrogen tolerance value [ppm] for hydrogen shift matching + * @param tolProton tolerance value [ppm] for hydrogen shift matching * @param tolHeavy tolerance value [ppm] for heavy atom shift matching - * @return */ - public final HashMap> setHMBC(final Spectrum spectrum, final double tolHydrogen, final double tolHeavy) { - - final ArrayList shiftsHydrogen = spectrum.getShifts(0); - final ArrayList matchesHydrogen = this.correctHydrogenShiftMatches(shiftsHydrogen, this.findImplicitHydrogenShiftMatches(shiftsHydrogen, tolHydrogen), tolHydrogen); - final ArrayList shiftsHeavyAtom = spectrum.getShifts(1); - ArrayList matchesHeavyAtom = casekit.NMR.Utils.findShiftMatches(this.mol, shiftsHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - matchesHeavyAtom = casekit.NMR.Utils.correctShiftMatches(this.mol, shiftsHeavyAtom, matchesHeavyAtom, tolHeavy, Utils.getElementIdentifier(spectrum.getNuclei()[1])); - - final HashMap> matches = new HashMap<>(); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 0), matchesHydrogen); - matches.put(Utils.getAtomTypeFromSpectrum(spectrum, 1), matchesHeavyAtom); - - ArrayList HMBCList; - for (int i = 0; i < matchesHydrogen.size(); i++) { - if (matchesHydrogen.get(i) >= 0 && matchesHeavyAtom.get(i) >= 0) { - if (this.mol.getAtom(matchesHydrogen.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) == null) { - this.mol.getAtom(matchesHydrogen.get(i)).setProperty(CDKConstants.NMRSPECTYPE_2D_HMBC, new ArrayList<>()); - } - HMBCList = this.mol.getAtom(matchesHydrogen.get(i)).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC); - if (!HMBCList.contains(matchesHeavyAtom.get(i))) { - HMBCList.add(matchesHeavyAtom.get(i)); - } - } - } - - return matches; + public final void assignHMBC(final Spectrum spectrum, final double tolProton, final double tolHeavy) { + + this.assign2DSpectrum(spectrum, tolProton, tolHeavy); } } From 80d3966550dd9ed41de6c5ea041d95d1f14fff7f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:24:44 +0200 Subject: [PATCH 033/405] - small changes in constructors --- src/casekit/NMR/Process.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index 648b224..e4a48dd 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -43,24 +43,25 @@ */ public class Process extends ParseRawData { - final private IAtomContainer mol; - final private IMolecularFormula molFormula; - private final HashMap> atomTypeIndices = new HashMap<>(); + private final IAtomContainer mol; + private final IMolecularFormula molFormula; + private final HashMap> atomTypeIndices; private int[][] neighborhoodCountsMatrix; - final private HashMap> shiftIndicesInACSet = new HashMap<>(); // holding of all indices of each ac set (DB) entry [first value] and it's atom indices [second value] too + private final HashMap> shiftIndicesInACSet = new HashMap<>(); // holding of all indices of each ac set (DB) entry [first value] and it's atom indices [second value] too public Process(){ super(); this.molFormula = super.getMolecularFormula(); this.mol = super.getAtomContainer(); + this.atomTypeIndices = super.getAtomTypeIndices(); } public Process(final IMolecularFormula molFormula){ super(molFormula); this.molFormula = super.getMolecularFormula(); this.mol = super.getAtomContainer(); - this.setAtomTypeIndices(); + this.atomTypeIndices = super.getAtomTypeIndices(); } @@ -117,7 +118,7 @@ private void setBond(final int index1, final int index2) { */ public void createLSDFile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException{ - PrintWriter writer = new PrintWriter(pathToOutputFile, "UTF-8"); + final PrintWriter writer = new PrintWriter(pathToOutputFile, "UTF-8"); ArrayList idxs; String hybrid, protons, MULT = "", HSQC = "", COSY = "", BOND = "", HMBC = ""; final int[][] bondTable = new int[this.mol.getAtomCount()][this.mol.getAtomCount()]; @@ -255,17 +256,17 @@ public int[][] getNeighborhoodBondsCountMatrix(){ - public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] bondsSet, final String elem, String[] neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException{ + public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException{ if (stepSize < 1) { System.err.println("stepSize < 1 not allowed!!!"); return; } // creation of frequency counting matrix and shift indices holder - this.neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.length * bondsSet.length]; + this.neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.size() * bondsSet.length]; this.shiftIndicesInACSet.clear(); for (int i = 0; i < stepSize * maxShift; i++) { - for (int j = 0; j < 3 + 4 + neighborElems.length * bondsSet.length; j++) { + for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { neighborhoodCountsMatrix[i][j] = 0; } this.shiftIndicesInACSet.put(i, new ArrayList<>()); From 6076e9a85ba339d55fe315524c7f62c9c8f3a872 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:27:44 +0200 Subject: [PATCH 034/405] - some function were edited/added/removed --- src/casekit/NMR/Utils.java | 103 ++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 42 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 29437eb..cc5ee7b 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -42,21 +42,15 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang3.StringUtils; -import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.fingerprint.IBitFingerprint; -import org.openscience.cdk.fingerprint.KlekotaRothFingerprinter; -import org.openscience.cdk.fingerprint.SubstructureFingerprinter; import org.openscience.cdk.graph.CycleFinder; import org.openscience.cdk.graph.Cycles; import org.openscience.cdk.interfaces.IAtom; @@ -66,10 +60,8 @@ import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; import org.openscience.cdk.qsar.descriptors.atomic.AtomValenceDescriptor; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.smiles.smarts.parser.SMARTSParser; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.w3c.dom.NodeList; @@ -92,7 +84,7 @@ public class Utils { * @throws CDKException * @throws IOException */ - public static void convertSDFtoLSD(final String pathSDF, final String pathOut, final String pathMol2ab) throws FileNotFoundException, CDKException, IOException{ + public static void SDFtoLSD(final String pathSDF, final String pathOut, final String pathMol2ab) throws FileNotFoundException, CDKException, IOException{ System.out.println("Conversion from SDF format to LSD format... "); @@ -203,17 +195,17 @@ public static ArrayList getAtomTypeIndicesByElement(final IAtomContaine * Reads a specific column of a NMR peak table and stores it into an * ArrayList object. * - * @param pathToPeakList path to NMR peak table - * @param column column to select in peak table + * @param pathToCSV path to NMR peak table in CSV file format + * @param column column index to select in peak table * @return ArrayList of Double shift values * @throws IOException */ - public static ArrayList parseCSV(final String pathToPeakList, final int column) throws IOException { + public static ArrayList parseCSV(final String pathToCSV, final int column) throws IOException { final ArrayList shifts = new ArrayList<>(); String line; String[] tokens; - BufferedReader fileReader = new BufferedReader(new FileReader(pathToPeakList)); + BufferedReader fileReader = new BufferedReader(new FileReader(pathToCSV)); while ((line = fileReader.readLine()) != null) { tokens = line.split(","); // get shift value @@ -233,14 +225,14 @@ public static ArrayList parseCSV(final String pathToPeakList, final int * The number of columns and atom types has to be the same and defines the * dimension of the returning spectrum. * - * @param pathToPeakList path to NMR peak table - * @param columns columns to select in each peak table + * @param pathToCSV path to NMR peak table in CSV file format + * @param columns column indices to select in peak table * @param atomTypes atom types (element) for each dimension * @param intensityColumnIndex column index for intensity values * @return Spectrum class object containing the peak lists * @throws IOException */ - public static Spectrum CSVtoSpectrum(final String pathToPeakList, final int[] columns, final String[] atomTypes, final int intensityColumnIndex) throws IOException { + public static Spectrum CSVtoSpectrum(final String pathToCSV, final int[] columns, final String[] atomTypes, final int intensityColumnIndex) throws IOException { // assumes the same number of selected columns (dimensions) and atom types if(columns.length != atomTypes.length){ @@ -253,7 +245,7 @@ public static Spectrum CSVtoSpectrum(final String pathToPeakList, final int[] co final Spectrum spectrum = new Spectrum(nuclei); ArrayList shiftList; for (int col = 0; col < columns.length; col++) { - shiftList = Utils.parseCSV(pathToPeakList, columns[col]); + shiftList = Utils.parseCSV(pathToCSV, columns[col]); if(col == 0){ for (int i = 0; i < shiftList.size(); i++) { spectrum.addSignal(new Signal(spectrum.getNuclei())); @@ -263,7 +255,7 @@ public static Spectrum CSVtoSpectrum(final String pathToPeakList, final int[] co return null; } } - spectrum.setIntensities(parseCSV(pathToPeakList, intensityColumnIndex)); + spectrum.setIntensities(parseCSV(pathToCSV, intensityColumnIndex)); return spectrum; } @@ -377,9 +369,9 @@ public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecul while (diff != 0) { // display all selectable signal indices in spectrum if(diff > 0){ - System.out.println("\n" + diff + " signals are missing!\nWhich signal is not unique?"); + System.out.println("\n" + diff + " " + spectrum.getNuclei()[0] + " signals are missing!\nWhich signal is not unique?"); } else { - System.out.println("\n" + (-1 * diff) + " signals are to be removed!\nWhich signal is to remove?"); + System.out.println("\n" + (-1 * diff) + " " + spectrum.getNuclei()[0] + " signals are to be removed!\nWhich signal is to remove?"); } for (int s = 0; s < spectrum.getSignalCount(); s++) { System.out.print("index: " + s); @@ -772,16 +764,26 @@ public static String getElementIdentifier(final String isotope) { } - public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final String[] neighborElems){ - final int[] counts = new int[neighborElems.length * bondsSet.length]; + public static HashSet getAtomTypesInAtomContainer(final IAtomContainer ac) { + final HashSet atomTypes = new HashSet<>(); + for (IAtom atom : ac.atoms()) { + atomTypes.add(atom.getSymbol()); + } + + return atomTypes; + } + + + public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final ArrayList neighborElems){ + final int[] counts = new int[neighborElems.size() * bondsSet.length]; String foundBonds; // for all given neighbor element types - for (int n = 0; n < neighborElems.length; n++) { + for (int n = 0; n < neighborElems.size(); n++) { foundBonds = ""; // for all next neighbors of a specific element for (IAtom neighborAtom : ac.getConnectedAtomsList(ac.getAtom(indexAC))) { // skip if not the right neighborhood element or bond type is unknown/unset - if ((!neighborAtom.getSymbol().equals(neighborElems[n])) || (casekit.NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { + if ((!neighborAtom.getSymbol().equals(neighborElems.get(n))) || (casekit.NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { continue; } foundBonds += casekit.NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); @@ -799,32 +801,31 @@ public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int } - public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, String[] neighborElems, final int min, final int max, final int stepSize) throws IOException{ + public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int min, final int max, final int stepSize) throws IOException{ if(stepSize < 1){ System.err.println("stepSize < 1 not allowed!!!"); return; } - final StringBuilder sb = new StringBuilder(); sb.append("shift [" + elem + "] (" + stepSize + "),nTotal,inRing,isArom,q" + elem + "," + elem + "H," + elem + "H2," + elem + "H3,"); - for (int i = 0; i < neighborElems.length; i++) { + for (int i = 0; i < neighborElems.size(); i++) { for (int j = 0; j < bondsSet.length; j++) { - sb.append(bondsSet[j] + "[" + neighborElems[i] + "]"); + sb.append(bondsSet[j] + "[" + neighborElems.get(i) + "]"); if (j < bondsSet.length - 1) { sb.append(","); } } - if (i < neighborElems.length - 1) { + if (i < neighborElems.size() - 1) { sb.append(","); } } sb.append("\n"); for (int i = 0; i < stepSize * (max - min) + 1; i++) { sb.append((i + min) + ","); - for (int j = 0; j < 3 + 4 + neighborElems.length * bondsSet.length; j++) { + for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { sb.append(m[i][j]); - if (j < 3 + 4 + neighborElems.length * bondsSet.length - 1) { + if (j < 3 + 4 + neighborElems.size() * bondsSet.length - 1) { sb.append(","); } } @@ -1021,6 +1022,7 @@ public static boolean isBidirectional(final ArrayList shiftMatches1, fi * @param shiftMatches1 * @param shiftMatches2 * @param prop + * @deprecated */ public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayList shiftMatches1, final ArrayList shiftMatches2, final String prop) { @@ -1046,17 +1048,11 @@ public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayLis } - public static ArrayList countSetShiftInAtomContainer(final IAtomContainer ac, final ArrayList indices){ - - final ArrayList shifts = new ArrayList<>(); - for (final Integer index : indices) { - shifts.add(ac.getAtom(index).getProperty(Utils.getNMRShiftConstant(ac.getAtom(index).getSymbol()))); - } - return shifts; - } - - - + /** + * + * @param pathToFile + * @return + */ public static String getFileFormat(final String pathToFile) { if(pathToFile == null || pathToFile.trim().isEmpty()){ @@ -1121,6 +1117,29 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a } + public static ArrayList ArrayToArrayList(final int[] array){ + + final ArrayList list = new ArrayList<>(); + for (int i = 0; i < array.length; i++) { + list.add(array[i]); + } + + return list; + } + + + public static String getSpectrumNucleiAsString(final Spectrum spectrum){ + String specID = ""; + for (int i = 0; i < spectrum.getDimCount(); i++) { + specID += spectrum.getNuclei()[i]; + if(i < spectrum.getDimCount()-1){ + specID += "-"; + } + } + + return specID; + } + // ######################################################################################################## // test functions -> not ready to use From 7bd1a2f2be1d2cae8479205cada60e25105e5214 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:28:54 +0200 Subject: [PATCH 035/405] - ideas/still open tasks edited --- src/casekit/NMR/remarks | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/casekit/NMR/remarks b/src/casekit/NMR/remarks index 7abe905..618860f 100644 --- a/src/casekit/NMR/remarks +++ b/src/casekit/NMR/remarks @@ -1,6 +1,15 @@ -MIT license - +general things: - JUnit - PMD: CDK GitHub PMD example - JCoCo - yourkit (license received) + +coding: + +- in parseRawData class: + - now usage of Assignment class objects + - control function should be implemented + -> assigned protons (HSQC, HMBC, ...) vs. DEPT information + +- in Assignment class: + - addition of counts array as class member and functions for that \ No newline at end of file From 274bd1f4c27af30d58c7b462061da69b0ae54861 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:32:58 +0200 Subject: [PATCH 036/405] - class member "counts" and belonging functions added: - to set a counter value for each assignment index and dimension, e.g. for occurrences or number of atom matches --- src/casekit/NMR/model/Assignment.java | 38 +++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index b319637..5160884 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -34,11 +34,13 @@ public class Assignment { final int nDim; final String[] nuclei; final int[][] assignments; + final int[][] counts; public Assignment(final Spectrum spectrum) { this.nuclei = spectrum.getNuclei(); this.nDim = this.nuclei.length; this.assignments = this.initAssignments(this.nDim, spectrum.getSignalCount()); + this.counts = this.initCounts(this.nDim, spectrum.getSignalCount()); } private int[][] initAssignments(final int nDim, final int nSignal){ @@ -52,6 +54,42 @@ private int[][] initAssignments(final int nDim, final int nSignal){ return temp; } + private int[][] initCounts(final int nDim, final int nSignal){ + final int[][] temp = new int[nDim][nSignal]; + for (int i = 0; i < nDim; i++) { + for (int j = 0; j < nSignal; j++) { + temp[i][j] = 0; + } + } + + return temp; + } + + public boolean setCount(final int dim, final int indexInSpectrum, final int newCountValue){ + if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + return false; + } + this.counts[dim][indexInSpectrum] = newCountValue; + + return true; + } + + public Integer getCount(final int dim, final int indexInSpectrum){ + if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + return null; + } + + return this.counts[dim][indexInSpectrum]; + } + + public int[] getCounts(final int dim){ + if(!this.checkDimension(dim)){ + return null; + } + + return this.counts[dim]; + } + public boolean setAssignment(final int dim, final int indexInSpectrum, final int indexInAtomContainer){ if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ return false; From fd50cc8d52174a5991fbfef45786b3146eaa89b9 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:34:13 +0200 Subject: [PATCH 037/405] - editing of first comment content --- src/casekit/NMR/model/Signal.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java index 38d4ae7..54d310e 100644 --- a/src/casekit/NMR/model/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -1,5 +1,5 @@ /* -* This class was copied and modified from NMRSignal class in casekit.model package (by Christoph Steinbeck) +* This class was adopted and modified from an earlier version by Christoph Steinbeck */ /* From 9f9cc505522521e7c916b5000a9e817938206a51 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Sep 2018 23:35:17 +0200 Subject: [PATCH 038/405] - editing of first comment content --- src/casekit/NMR/model/Spectrum.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 4af803e..171ea64 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -1,5 +1,5 @@ /* -* This class was copied and modified from NMRSpectrum class in casekit.model package (by Christoph Steinbeck) +* This class was adopted and modified from an earlier version by Christoph Steinbeck */ @@ -58,7 +58,7 @@ public class Spectrum { /** * The proton frequency of the spectrometer used to record this spectrum. */ - private Float spectrometerFrequency; + private Double spectrometerFrequency; private String solvent; private String standard; @@ -323,11 +323,11 @@ public int getSignalIndex(final Signal signal) { return -1; } - public void setSpectrometerFrequency(final Float sf) { + public void setSpectrometerFrequency(final Double sf) { this.spectrometerFrequency = sf; } - public float getSpectrometerFrequency() { + public Double getSpectrometerFrequency() { return spectrometerFrequency; } From 7dae9e0c54b39ddb7cb951cc3b5e6ce9bd7bb76a Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Tue, 18 Sep 2018 23:39:27 +0200 Subject: [PATCH 039/405] Delete test.java --- src/casekit/NMR/test.java | 202 -------------------------------------- 1 file changed, 202 deletions(-) delete mode 100644 src/casekit/NMR/test.java diff --git a/src/casekit/NMR/test.java b/src/casekit/NMR/test.java deleted file mode 100644 index 64377ff..0000000 --- a/src/casekit/NMR/test.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - -package casekit.NMR; - -import casekit.NMR.model.Assignment; -import casekit.NMR.model.Spectrum; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.logging.Level; -import java.util.logging.Logger; -import javax.xml.parsers.ParserConfigurationException; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.DefaultChemObjectBuilder; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; -import org.xml.sax.SAXException; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class test { - - public static void main(String[] args) throws ParserConfigurationException, SAXException, CloneNotSupportedException, FileNotFoundException, SQLException, ClassNotFoundException { - - final int maxSpheres = 1; - - final String Peaks13C_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/13C/50/pdata/1/peaklist.xml"; - final String Peaks13C_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_13C_NMR.csv"; - final String PeaksH1_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/1H/1/pdata/1/peaklist.xml"; - final String Peaks1H_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_1H_NMR.csv"; - final String PeaksDEPT90_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HJ555_DEPT90_pseudo.xml"; - final String PeaksDEPT90_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_DEPT90_NMR_pseudo.csv"; - final String PeaksDEPT135_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/DEPT135/5/pdata/1/peaklist.xml"; - final String PeaksDEPT135_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_DEPT135_NMR.csv"; - final String PeaksHSQC_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HSQC/3/pdata/1/peaklist.xml"; - final String PeaksINADEQUATE_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HJ555_INADEQUATE_pseudo.xml"; - final String PeaksHSQC_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_HSQC_NMR.csv"; - final String PeaksHMBC_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/HMBC/4/pdata/1/peaklist.xml"; - final String PeaksHMBC_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_HMBC_NMR.csv"; - final String PeaksCOSY_HJ555 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ555/COSY/2/pdata/1/peaklist.xml"; - final String PeaksCOSY_HJ777 = "/Users/mwenk/work/research/Beemelmanns-HKI-HJ777/HJ777_COSY_NMR.csv"; - - final String pathToNMRShiftDB = "/Users/mwenk/Downloads/nmrshiftdb2withsignals.sd"; - final String pathToNMRShiftDBTest = "/Users/mwenk/Downloads/test.sdf"; - final String pathToNMRShiftDBHOSE = "/Users/mwenk/Downloads/hose" + maxSpheres + ".tsv"; - - - - final double tolC = 0.5; - final double tolH = 0.2; - final String molFormulaString_HJ555 = "C21H19NO8"; - final String molFormulaString_HJ777 = "C28H25NO11S"; - final IMolecularFormula molFormula_HJ555 = MolecularFormulaManipulator.getMolecularFormula(molFormulaString_HJ555, DefaultChemObjectBuilder.getInstance()); - final IMolecularFormula molFormula_HJ777 = MolecularFormulaManipulator.getMolecularFormula(molFormulaString_HJ777, DefaultChemObjectBuilder.getInstance()); - String projectName = ""; - casekit.NMR.Process process = null; - Spectrum spec = null; - - - IAtomContainer ac; - try { - // HJ555 - projectName = "HJ555"; - process = new Process(molFormula_HJ555); - spec = Process.parse1DNMRviaXML(Peaks13C_HJ555, "C"); - spec.setSpecType(CDKConstants.NMRSPECTYPE_1D_ARBITRARY); - final ArrayList indicesInAtomContainer1D_13C = process.set1DNMR(spec); - final Assignment assignments1D_13C = new Assignment(spec); - assignments1D_13C.setAssignments(0, indicesInAtomContainer1D_13C); - System.out.println("assignments spectrum 13C: " + Arrays.toString(assignments1D_13C.getAssignments(0))); - System.out.println("equivalences: " + spec.getEquivalences()); - - spec = Process.parseDEPTviaXML(PeaksDEPT90_HJ555); - spec.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT90); - Spectrum spec135 = Process.parseDEPTviaXML(PeaksDEPT135_HJ555); - spec135.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT135); - final HashMap> matches1D_DEPT = process.setDEPT(spec, spec135, tolC); - final Assignment assignments1D_DEPT90 = new Assignment(spec); - final Assignment assignments1D_DEPT135 = new Assignment(spec135); - assignments1D_DEPT90.setAssignments(0, matches1D_DEPT.get("DEPT90")); - assignments1D_DEPT135.setAssignments(0, matches1D_DEPT.get("DEPT135")); - System.out.println("assignments spectrum DEPT90: " + Arrays.toString(assignments1D_DEPT90.getAssignments(0))); - System.out.println("assignments spectrum DEPT135: " + Arrays.toString(assignments1D_DEPT135.getAssignments(0))); - - spec = Process.parseHSQCviaXML(PeaksHSQC_HJ555, "C"); - spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HSQC); - final HashMap> matches2D_HSQC = process.setHSQC(spec, tolC); - final Assignment assignments2D_HSQC = new Assignment(spec); - assignments2D_HSQC.setAssignments(0, matches2D_HSQC.get(Utils.getAtomTypeFromSpectrum(spec, 0))); - assignments2D_HSQC.setAssignments(1, matches2D_HSQC.get(Utils.getAtomTypeFromSpectrum(spec, 1))); - System.out.println("assignments spectrum HSQC dim1: " + Arrays.toString(assignments2D_HSQC.getAssignments(0))); - System.out.println("assignments spectrum HSQC dim2: " + Arrays.toString(assignments2D_HSQC.getAssignments(1))); - - spec = Process.parseHHCOSYviaXML(PeaksCOSY_HJ555); - spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HHCOSY); - final HashMap> matches2D_HHCOSY = process.setHHCOSY(spec, tolH); - final Assignment assignments2D_HHCOSY = new Assignment(spec); - assignments2D_HHCOSY.setAssignments(0, matches2D_HHCOSY.get(Utils.getAtomTypeFromSpectrum(spec, 0))); - assignments2D_HHCOSY.setAssignments(1, matches2D_HHCOSY.get(Utils.getAtomTypeFromSpectrum(spec, 1))); - System.out.println("assignments spectrum HHCOSY dim1: " + Arrays.toString(assignments2D_HHCOSY.getAssignments(0))); - System.out.println("assignments spectrum HHCOSY dim2: " + Arrays.toString(assignments2D_HHCOSY.getAssignments(1))); - - spec = Process.parseINADEQUATEviaXML(PeaksINADEQUATE_HJ555); - spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - final HashMap> matches2D_INADEQUATE = process.setINADEQUATE(spec, tolC); - final Assignment assignments2D_INADEQUATE = new Assignment(spec); - assignments2D_INADEQUATE.setAssignments(0, matches2D_INADEQUATE.get(Utils.getAtomTypeFromSpectrum(spec, 0))); - assignments2D_INADEQUATE.setAssignments(1, matches2D_INADEQUATE.get(Utils.getAtomTypeFromSpectrum(spec, 1))); - System.out.println("assignments spectrum INADEQUATE dim1: " + Arrays.toString(assignments2D_INADEQUATE.getAssignments(0))); - System.out.println("assignments spectrum INADEQUATE dim2: " + Arrays.toString(assignments2D_INADEQUATE.getAssignments(1))); - - spec = Process.parseHMBCviaXML(PeaksHMBC_HJ555, "C"); - spec.setSpecType(CDKConstants.NMRSPECTYPE_2D_HMBC); - final HashMap> matches2D_HMBC = process.setHMBC(spec, tolH, tolC); - final Assignment assignments2D_HMBC = new Assignment(spec); - assignments2D_HMBC.setAssignments(0, matches2D_HMBC.get(Utils.getAtomTypeFromSpectrum(spec, 0))); - assignments2D_HMBC.setAssignments(1, matches2D_HMBC.get(Utils.getAtomTypeFromSpectrum(spec, 1))); - System.out.println("assignments spectrum HMBC dim1: " + Arrays.toString(assignments2D_HMBC.getAssignments(0))); - System.out.println("assignments spectrum HMBC dim2: " + Arrays.toString(assignments2D_HMBC.getAssignments(1))); - - process.setBonds(new String[]{CDKConstants.NMRSPECTYPE_2D_HHCOSY, CDKConstants.NMRSPECTYPE_2D_INADEQUATE, CDKConstants.NMRSPECTYPE_2D_HMBC}); // without hybridizations - process.createLSDFile(projectName, "/Users/mwenk/Downloads/testLSD", new String[]{"/Users/mwenk/work/software/LSD-3.4.9/Filters/", "/Users/mwenk/work/software/LSD-3.4.9/Filters/MOLGEN/badlist1/"}); - - -// // definition of all possible bond combinations up to 6 valences -// final String[] bondsSet = {"-", "--", "---", "----", "=", "==", "=-", "=--", "%", "%-"}; // up to 4 valences (carbon) -// //"-----", "------", "=---", "=----", "==-", "==--", "===", "%%", "%--", "%---", "%=", "%=-"}; // up to 6 valences (e.g. sulfur) -// final String[] neighborElems = new String[]{"C", "O", "N", "S", "P", "Br", "Cl"}; -//// final IAtomContainerSet acSet = NMR.DB.getStructuresFromNMRShiftDBFile(pathToNMRShiftDB, 10); // ring size of 10 in aromaticity search (pubchem txt file) -// final Connection DBConnection = NMR.DB.getDBConnection("jdbc:mysql://localhost/nmrshiftdb", "useUnicode=true&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC&useSSL=false", "root", "jmd2017a"); -//// NMR.Utils.getSpectraIDsFromNMRShiftDB(DBConnection, 155.0, 156.0, "C"); -//// final HashMap> lookup = NMR.Utils.getLookupTableFromNMRShiftDB(DBConnection, "C"); -//// NMR.Utils.getRMS(lookup); -// final int minShift = 0, maxShift = 220, stepSize = 10; -// final String elem = "C"; -// NMR.DB.getRMS(DBConnection, minShift, maxShift, elem); -// final int[][] neighborhoodCountsMatrix = NMR.DB.countNeighborhoodBonds(DBConnection, bondsSet, elem, neighborElems, minShift, maxShift, stepSize); -// NMR.Utils.writeNeighborhoodBondsCountMatrix("/Users/mwenk/Downloads/countMatrix_" + elem + "_SQL.csv", neighborhoodCountsMatrix, bondsSet, elem, neighborElems, minShift, maxShift, stepSize); - - // create 1D spectrum - // coffein: 27.8;0.0Q;9|29.6;0.0Q;10|33.5;0.0Q;11|107.8;0.0S;5|144.3;0.0D;7|147.5;0.0S;4|151.6;0.0S;2|155.3;0.0S;0| -// final ArrayList spectrum = new ArrayList<>(); -// spectrum.add(new Signal(elem, 27.8, "Q", null)); -// spectrum.add(new Signal(elem, 29.6, "Q", null)); -// spectrum.add(new Signal(elem, 33.5, "Q", null)); -// spectrum.add(new Signal(elem, 107.8, "S", null)); -// spectrum.add(new Signal(elem, 144.3, "D", null)); -// spectrum.add(new Signal(elem, 147.5, "S", null)); -// spectrum.add(new Signal(elem, 151.6, "S", null)); -// spectrum.add(new Signal(elem, 155.3, "S", null)); -// NMR.DB.matchSpectrumAgainstDB(DBConnection, spectrum, 0.1, null, stepSize); -// proc.countNeighborhoodBonds(acSet, bondsSet, elem, neighborElems, minShift, maxShift, stepSize); -// NMR.Utils.writeNeighborhoodBondsCountMatrix("/Users/mwenk/Downloads/countMatrix_" + elem + ".csv", proc.getNeighborhoodBondsCountMatrix(), bondsSet, elem, neighborElems, minShift, maxShift, stepSize); - - - - } catch (IOException ex) { - Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); - } - - ac = process.getAtomContainer(); -// final HashMap> atomTypeIndices = proc.getAtomTypeIndices(); - System.out.println("\n"); - System.out.println(process.getAtomTypeIndices()); - for (int i = 0; i< ac.getAtomCount(); i++) { - System.out.println("i: " + i + " -> atom: " + ac.getAtom(i).getSymbol() + ", shift: " + ac.getAtom(i).getProperty(casekit.NMR.Utils.getNMRShiftConstant("C")) + ", #H: " + ac.getAtom(i).getImplicitHydrogenCount() + - ", H shifts: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HSQC) + ", Hybrid.: " + ac.getAtom(i).getHybridization() + ", HHCOSY: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) + - ", INADEQUATE: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE) + ", HMBC: " + ac.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC)); - } - System.out.println("\nbond count: " + ac.getBondCount() + ":"); - for (IBond bond : ac.bonds()) { - System.out.println("bond: " + bond); - } - - -// System.out.println("\n\nOpen Bonds:\n"); -// for (int i = 0; i < ac.getAtomCount(); i++) { -// Utils.getOpenBonds(ac, i); -// } - - -// try { -// Utils.convertSDFtoLSD("/Users/mwenk/work/software/molgen5.02/badlist2.sdf", "/Users/mwenk/Downloads/", "/Users/mwenk/work/software/LSD-3.4.9/Mol2abSrc"); -// } catch (CDKException ex) { -// Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); -// } catch (IOException ex) { -// Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); -// } - - } -} From 84eefe42e66fe073c42d37c9c008e3d04192063b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 20 Sep 2018 14:06:19 +0200 Subject: [PATCH 040/405] - in assignDEPT function: - setting of hybridization level to SP3 if three protons are attached to a carbon - in assignHSQC function: - setting/counting of implicit hydrogen number for other heavy atoms than carbons via HSQC information - in assignINADEQUATE function: - sets now bonds between INADEQUATE signal atoms via setBond function --- src/casekit/NMR/ParseRawData.java | 40 +++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index 4066176..7068d21 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -35,7 +35,9 @@ import org.openscience.cdk.CDKConstants; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.xml.sax.SAXException; @@ -440,6 +442,9 @@ private void setImplicitHydrogenCountsFromDEPT() { hCount = 0; } this.mol.getAtom(i).setImplicitHydrogenCount(hCount); + if( this.mol.getAtom(i).getImplicitHydrogenCount() >= 3){ + this.mol.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); + } hCountAll += hCount; } } @@ -497,7 +502,7 @@ public static final Spectrum parseHSQCviaXML(final String pathToXML, final Strin * @param tolHeavyAtom tolerance value [ppm] for heavy atom shift matching */ public final void assignHSQC(final Spectrum spectrum, final double tolProton, final double tolHeavyAtom) { - + // assign index of matching atoms to both dimensions and save the Spectrum and Assignment objects in class this.assign2DSpectrum(spectrum, tolProton, tolHeavyAtom); // in case the 1H spectrum is given, then assign protons to same indices from belonging carbon atoms if(this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H") != null){ @@ -513,8 +518,18 @@ public final void assignHSQC(final Spectrum spectrum, final double tolProton, fi } } } - - // implement control counter for no. of attached protons (by DEPT) on carbons ?!? + // attach protons on other heavy atoms than carbons via HSQC assignment counting + if(!spectrum.getNuclei()[1].equals("13C")){ + final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); + for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { + if((assignment2D_HSQC.getAssignment(1, i) > -1)){ + if(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() == null){ + this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(0); + } + this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() + 1); + } + } + } } @@ -627,7 +642,7 @@ public static final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IO */ public final boolean assignHHCOSY(final Spectrum spectrum, final double tolProton) { - final ArrayList protonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); + final ArrayList protonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); final ArrayList protonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolProton); // are all signals bidirectional? if (!Utils.isBidirectional(protonShiftMatches1, protonShiftMatches2)) { @@ -693,10 +708,27 @@ public final boolean assignINADEQUATE(final Spectrum spectrum, final double tolC } this.assign2DSpectrum(spectrum, tolCarbon, tolCarbon); + final ArrayList indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); + final ArrayList indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); + for (int i = 0; i < spectrum.getSignalCount(); i++) { + if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ + this.setBond(indicesInAtomContainerDim1.get(i), indicesInAtomContainerDim2.get(i)); + } + } + return true; } + private void setBond(final int index1, final int index2) { + + if (this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null) { + this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); + } + this.mol.addBond(index1, index2, Utils.getBondTypeFromHybridizations(this.mol.getAtom(index1), this.mol.getAtom(index2))); + } + + /** * Creates a Spectrum class object from given HMBC input file in CSV format. * From 624d669c2659145af37df1bfa71d76e8d0d5f8d7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 20 Sep 2018 14:07:57 +0200 Subject: [PATCH 041/405] - setBonds function moved to ParseRawData class - adjustment of createLSDFile function for using Assignment objects --- src/casekit/NMR/Process.java | 185 ++++++++++++++--------------------- 1 file changed, 74 insertions(+), 111 deletions(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index e4a48dd..7acdea7 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -23,6 +23,7 @@ */ package casekit.NMR; +import casekit.NMR.model.Spectrum; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -34,6 +35,7 @@ import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; +import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; @@ -63,49 +65,6 @@ public Process(final IMolecularFormula molFormula){ this.mol = super.getAtomContainer(); this.atomTypeIndices = super.getAtomTypeIndices(); } - - - /** - * Sets bonds from already set experiment information (H,H-COSY, INADEQUATE - * and HMBC). - * - * @param experiments - */ - public void setBonds(final String[] experiments) { - - String NMRSHIFT_ATOMTYPE; - ArrayList signalList; - for (int e = 0; e < experiments.length; e++) { - for (int i = 0; i < this.mol.getAtomCount(); i++) { - NMRSHIFT_ATOMTYPE = casekit.NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol()); - // is the NMR shift constant defined and does the nmr shift property entry in an atom exist? - if (NMRSHIFT_ATOMTYPE != null && this.mol.getAtom(i).getProperty(NMRSHIFT_ATOMTYPE) != null) { - if (this.mol.getAtom(i).getProperties().containsKey(experiments[e])) { - signalList = this.mol.getAtom(i).getProperty(experiments[e]); - for (int bondPartnerIndex : signalList) { - // no bonds on one and the same atom; ignore already set bonds if no override wanted - if ((i == bondPartnerIndex)) {// || (this.mol.getBond(this.mol.getAtom(i), this.mol.getAtom(bondPartnerIndex)) != null)) { - continue; - } - if (experiments[e].equals(CDKConstants.NMRSPECTYPE_2D_HMBC)) { - System.out.println("HMBC bond setting: still to come!!!"); - } else { - this.setBond(i, bondPartnerIndex); - } - } - } - } - } - } - } - - private void setBond(final int index1, final int index2) { - - if (this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null) { - this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); - } - this.mol.addBond(index1, index2, casekit.NMR.Utils.getBondTypeFromHybridizations(this.mol.getAtom(index1), this.mol.getAtom(index2))); - } /** @@ -116,109 +75,113 @@ private void setBond(final int index1, final int index2) { * @throws FileNotFoundException * @throws UnsupportedEncodingException */ - public void createLSDFile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException{ - + public void createLSDFile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException{ + final PrintWriter writer = new PrintWriter(pathToOutputFile, "UTF-8"); - ArrayList idxs; - String hybrid, protons, MULT = "", HSQC = "", COSY = "", BOND = "", HMBC = ""; - final int[][] bondTable = new int[this.mol.getAtomCount()][this.mol.getAtomCount()]; - for (int i = 0; i < this.mol.getAtomCount(); i++) { - for (int j = 0; j < this.mol.getAtomCount(); j++) { - bondTable[i][j] = 0; - } - } + String hybrid, protons, MULT = "", HSQC = "", BOND = "", HMBC = "", COSY = ""; writer.println("; " + projectName); if(this.molFormula != null){ - writer.println("; " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"); + writer.println("; molecular formula: " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"); } else { - writer.println("; unknown molecular formula"); + writer.println("; molecular formula: unknown \n\n"); } for (int i = 0; i < this.mol.getAtomCount(); i++) { // set MULT section in LSD input file // set hybridization level if(this.mol.getAtom(i).getHybridization() == null){ - hybrid = "X"; + hybrid = "-"; } else { switch (this.mol.getAtom(i).getHybridization()) { case SP1: case S: - hybrid = "1"; - break; + hybrid = "1"; break; case SP2: - hybrid = "2"; - break; + hybrid = "2"; break; default: hybrid = "3"; } } // set implicit proton number if(this.mol.getAtom(i).getImplicitHydrogenCount() == null){ - protons = "X"; + protons = "-"; } else { protons = String.valueOf(this.mol.getAtom(i).getImplicitHydrogenCount()); } MULT += "MULT " + (i+1) + " " + this.mol.getAtom(i).getSymbol() + " " + hybrid + " " + protons; - if(this.mol.getAtom(i).getProperty(casekit.NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) != null){ - MULT += ";\t" + this.mol.getAtom(i).getProperty(casekit.NMR.Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())); + if(this.mol.getAtom(i).getProperty(Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) != null){ + String hCount; + if(this.mol.getAtom(i).getImplicitHydrogenCount() == null){ + hCount = "x"; + } else { + hCount = String.valueOf(this.mol.getAtom(i).getImplicitHydrogenCount()); + } + MULT += ";\t" + this.mol.getAtom(i).getProperty(Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) + ",\t" + this.mol.getAtom(i).getSymbol() + "H" + hCount; } MULT += "\n"; // set HSQC section in LSD input file if((this.mol.getAtom(i).getImplicitHydrogenCount() != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() > 0)){ HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + "\n"; + } + } + writer.println(MULT); + writer.println(HSQC); + + // set BOND information in LSD input file by INADEQUATE + for (IBond bond : this.mol.bonds()) { + BOND += "BOND " + (bond.getAtom(0).getIndex()+1) + " " + (bond.getAtom(1).getIndex()+1) + ";\t" + this.mol.getAtom(bond.getAtom(0).getIndex()).getSymbol() + "H" + this.mol.getAtom(bond.getAtom(0).getIndex()).getImplicitHydrogenCount() + " - " + this.mol.getAtom(bond.getAtom(1).getIndex()).getSymbol() + "H" + this.mol.getAtom(bond.getAtom(1).getIndex()).getImplicitHydrogenCount() + "\n"; + } + writer.println(BOND); + + // set HMBC information to LSD input file + ArrayList indicesInAtomContainerDim1; + ArrayList indicesInAtomContainerDim2; + final boolean [][] HMBCTable = new boolean[this.mol.getAtomCount()][this.mol.getAtomCount()]; + for (int i = 0; i < this.mol.getAtomCount(); i++) { + for (int j = 0; j < this.mol.getAtomCount(); j++) { + HMBCTable[i][j] = false; } - // set BOND section in LSD input file from INADEQUATE - if (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE) != null) { - idxs = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - for (Integer idx : idxs) { - if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { - bondTable[i][idx] = 1; - BOND += "BOND " + (i+1) + " " + (idx+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + " - " + this.mol.getAtom(idx).getSymbol() + "H" + this.mol.getAtom(idx).getImplicitHydrogenCount() + "\n"; - } - } + } + for (final Spectrum spectrum : this.getSpectra().values()) { + if((spectrum.getDimCount() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HMBC)){ + continue; } - // set BOND section in LSD input file from COSY - if(this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY) != null){ - idxs = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HHCOSY); - for (Integer idx : idxs) { - if(bondTable[i][idx] == 0 && bondTable[idx][i] == 0){ - bondTable[i][idx] = 1; - COSY += "COSY " + (i+1) + " " + (idx+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + " - " + this.mol.getAtom(idx).getSymbol() + "H" + this.mol.getAtom(idx).getImplicitHydrogenCount() + "\n"; - } else { - COSY += ";COSY " + (i+1) + " " + (idx+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + " - " + this.mol.getAtom(idx).getSymbol() + "H" + this.mol.getAtom(idx).getImplicitHydrogenCount() + "\n"; + indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); + indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); + HMBC += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; + for (int i = 0; i < spectrum.getSignalCount(); i++) { + if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ + // set signal only if it is not already covered by BOND + // here reversed order (see LSD manual page): 1. heavy atom, 2. proton + if(this.mol.getBond(this.mol.getAtom(indicesInAtomContainerDim2.get(i)), this.mol.getAtom(indicesInAtomContainerDim1.get(i))) != null){ + HMBC += ";"; } - } + HMBC += "HMBC " + (indicesInAtomContainerDim2.get(i) + 1) + " " + (indicesInAtomContainerDim1.get(i) + 1) + ";\t" + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + " - " + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + "\n"; + HMBCTable[indicesInAtomContainerDim2.get(i)][indicesInAtomContainerDim1.get(i)] = true; + } + } + } + writer.println(HMBC); + // set COSY information to LSD input file + for (final Spectrum spectrum : this.getSpectra().values()) { + if((spectrum.getDimCount() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HHCOSY)){ + continue; } - // set HMBC section in LSD input file - // sets only HMBC signals which are not represented by a bond - boolean test3JviaNextNeighborBond; - if (this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC) != null) { - idxs = this.mol.getAtom(i).getProperty(CDKConstants.NMRSPECTYPE_2D_HMBC); - for (Integer idx : idxs) { - if (bondTable[i][idx] == 0 && bondTable[idx][i] == 0) { - test3JviaNextNeighborBond = false; - for (IAtom neighbor : this.mol.getConnectedAtomsList(this.mol.getAtom(i))) { - if(this.mol.getBond(neighbor, this.mol.getAtom(idx)) != null){ - test3JviaNextNeighborBond = true; - break; - } - } - if(test3JviaNextNeighborBond){ - HMBC += ";HMBC " + (idx+1) + " " + (i+1) + "; 3J\t\n"; - } else { - HMBC += "HMBC " + (idx+1) + " " + (i+1) + ";\n"; - } - } else { - HMBC += ";HMBC " + (idx+1) + " " + (i+1) + "; 2J\t\n"; + indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); + indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); + COSY += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; + for (int i = 0; i < spectrum.getSignalCount(); i++) { + if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ + // set signal only if it is not already covered by BOND or HMBC + if((this.mol.getBond(this.mol.getAtom(indicesInAtomContainerDim1.get(i)), this.mol.getAtom(indicesInAtomContainerDim2.get(i))) != null) + || HMBCTable[indicesInAtomContainerDim1.get(i)][indicesInAtomContainerDim2.get(i)]){ + COSY += ";"; } - } - } - } - writer.println(MULT); - writer.println(HSQC); - writer.println(BOND); + COSY += "COSY " + (indicesInAtomContainerDim1.get(i) + 1) + " " + (indicesInAtomContainerDim2.get(i) + 1) + ";\t" + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + " - " + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + "\n"; + } + } + } writer.println(COSY); - writer.println(HMBC); - + // set filter definitions String DEFF = ""; String FEXP = ""; if(pathsToFilters.length > 0){ From c070cf7b559b45fe7c435e6171d1fef31f7558b1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 20 Sep 2018 14:10:49 +0200 Subject: [PATCH 042/405] - small changes in getBondTypeFromHybridizations function --- src/casekit/NMR/Utils.java | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index cc5ee7b..7dab4e4 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -1289,22 +1289,24 @@ public static ArrayList getOpenBonds(final IAtomContainer ac, final */ public static IBond.Order getBondTypeFromHybridizations(final IAtom atom1, final IAtom atom2) { - final String atomType1 = atom1.getSymbol(); +// final String atomType1 = atom1.getSymbol(); final IAtomType.Hybridization hybridization1 = atom1.getHybridization(); - final String atomType2 = atom2.getSymbol(); +// final String atomType2 = atom2.getSymbol(); final IAtomType.Hybridization hybridization2 = atom2.getHybridization(); - if (hybridization1 == null || hybridization2 == null) { + if (hybridization1 == null && hybridization2 == null) { return IBond.Order.UNSET; - } - IBond.Order bondOrder1 = IBond.Order.UNSET; - IBond.Order bondOrder2 = IBond.Order.UNSET; + } + +// IBond.Order bondOrder1 = IBond.Order.UNSET; +// IBond.Order bondOrder2 = IBond.Order.UNSET; + // single bond detection, the "3" means all SP3 hybrdidizations like SP3, SP3D2 or PLANAR3 - if ((atomType1.equals("C") || atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) + if ((hybridization1 != null) //&& (atomType1.equals("C") || atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) && hybridization1.toString().contains("3")) { return IBond.Order.SINGLE; } - if ((atomType2.equals("C") || atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) + if ((hybridization2 != null) //&& (atomType2.equals("C") || atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) && hybridization2.toString().contains("3")) { return IBond.Order.SINGLE; } @@ -1327,9 +1329,9 @@ public static IBond.Order getBondTypeFromHybridizations(final IAtom atom1, final // bondOrder2 = IBond.Order.TRIPLE; // } - if (bondOrder1.equals(bondOrder2)) { - return bondOrder1; - } +// if (bondOrder1.equals(bondOrder2)) { +// return bondOrder1; +// } return IBond.Order.UNSET; } From 93894685c289d6a156422158f3eaa1145212d4f4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 20 Sep 2018 14:16:00 +0200 Subject: [PATCH 043/405] - notes updated --- src/casekit/NMR/remarks | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/casekit/NMR/remarks b/src/casekit/NMR/remarks index 618860f..b28ac52 100644 --- a/src/casekit/NMR/remarks +++ b/src/casekit/NMR/remarks @@ -7,9 +7,11 @@ general things: coding: - in parseRawData class: - - now usage of Assignment class objects - - control function should be implemented - -> assigned protons (HSQC, HMBC, ...) vs. DEPT information - + - new: usage of Assignment class objects: + -> atom properties for signals, e.g. HMBC, are not longer set + - control function should be implemented for assigning of protons: + -> no. of assigned protons (HSQC, HMBC, ...) vs. DEPT information and for other heavy atoms too + -> how many protons for one shift value (carbons or other heavy atoms) are still left? + - some function descriptions (javadoc) are deprecated and have to be updated - in Assignment class: - - addition of counts array as class member and functions for that \ No newline at end of file + - new: addition of counts array as class member and functions for that \ No newline at end of file From 00e2baf3f9b2f1a9493c99841f4955c15de2ba07 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 20 Sep 2018 22:21:28 +0200 Subject: [PATCH 044/405] - small changes in writeTextFile function --- src/casekit/NMR/Utils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 7dab4e4..95e9708 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -890,10 +890,10 @@ public static IBond.Order getBondOrderFromString(final String order){ } - public static void writeCSV(final String pathToOutput, final String table) throws IOException { + public static void writeTextFile(final String pathToOutput, final String content) throws IOException { FileWriter fr = new FileWriter(new File(pathToOutput)); BufferedWriter br = new BufferedWriter(fr); - br.write(table); + br.write(content); br.close(); } From f02eb89a5eaa711bea31826851b41835e94bd0ba Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 20 Sep 2018 22:25:18 +0200 Subject: [PATCH 045/405] - renaming from createLSDFile to createLSDInputFile - createLSDInputFile: - now using Utils.writeTextFile function to write content to file --- src/casekit/NMR/Process.java | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index 7acdea7..b02dace 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -75,15 +75,14 @@ public Process(final IMolecularFormula molFormula){ * @throws FileNotFoundException * @throws UnsupportedEncodingException */ - public void createLSDFile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException{ + public void createLSDInputFile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException, IOException{ - final PrintWriter writer = new PrintWriter(pathToOutputFile, "UTF-8"); - String hybrid, protons, MULT = "", HSQC = "", BOND = "", HMBC = "", COSY = ""; - writer.println("; " + projectName); + String wholeContent, hybrid, protons, MULT = "", HSQC = "", BOND = "", HMBC = "", COSY = ""; + wholeContent = "; project name: " + projectName + "\n"; if(this.molFormula != null){ - writer.println("; molecular formula: " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"); + wholeContent += "; molecular formula: " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"; } else { - writer.println("; molecular formula: unknown \n\n"); + wholeContent += "; molecular formula: unknown \n\n"; } for (int i = 0; i < this.mol.getAtomCount(); i++) { // set MULT section in LSD input file @@ -123,14 +122,14 @@ public void createLSDFile(final String projectName, final String pathToOutputFil HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + "\n"; } } - writer.println(MULT); - writer.println(HSQC); + wholeContent += MULT + "\n"; + wholeContent += HSQC + "\n"; // set BOND information in LSD input file by INADEQUATE for (IBond bond : this.mol.bonds()) { BOND += "BOND " + (bond.getAtom(0).getIndex()+1) + " " + (bond.getAtom(1).getIndex()+1) + ";\t" + this.mol.getAtom(bond.getAtom(0).getIndex()).getSymbol() + "H" + this.mol.getAtom(bond.getAtom(0).getIndex()).getImplicitHydrogenCount() + " - " + this.mol.getAtom(bond.getAtom(1).getIndex()).getSymbol() + "H" + this.mol.getAtom(bond.getAtom(1).getIndex()).getImplicitHydrogenCount() + "\n"; } - writer.println(BOND); + wholeContent += BOND + "\n"; // set HMBC information to LSD input file ArrayList indicesInAtomContainerDim1; @@ -160,7 +159,7 @@ public void createLSDFile(final String projectName, final String pathToOutputFil } } } - writer.println(HMBC); + wholeContent += HMBC + "\n"; // set COSY information to LSD input file for (final Spectrum spectrum : this.getSpectra().values()) { if((spectrum.getDimCount() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HHCOSY)){ @@ -180,7 +179,7 @@ public void createLSDFile(final String projectName, final String pathToOutputFil } } } - writer.println(COSY); + wholeContent += COSY + "\n"; // set filter definitions String DEFF = ""; String FEXP = ""; @@ -203,10 +202,10 @@ public void createLSDFile(final String projectName, final String pathToOutputFil FEXP += "\""; } - writer.println(DEFF); - writer.println(FEXP); - writer.close(); + wholeContent += DEFF + "\n"; + wholeContent += FEXP + "\n"; + Utils.writeTextFile(pathToOutputFile, wholeContent); } From 005d31d7572e9691613efa9145a66ba01ca6ccdc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 26 Sep 2018 03:14:47 +0200 Subject: [PATCH 046/405] - bugfix checkNuclei function: - usage of Arrays.equals() for comparison --- src/casekit/NMR/model/Spectrum.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 171ea64..1c23d28 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -29,6 +29,7 @@ package casekit.NMR.model; import java.util.ArrayList; +import java.util.Arrays; /** * @@ -191,7 +192,7 @@ private boolean checkDimCount(final int ndim){ } private boolean checkNuclei(final String[] nuclei){ - return nuclei == this.getNuclei(); + return Arrays.equals(nuclei, this.getNuclei()); } /** From bc51b73d08f0f07593ee4fbe1f4628c3bf83d4ba Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 26 Sep 2018 03:23:27 +0200 Subject: [PATCH 047/405] - functions added: - assignNMRShiftDBShiftsToAtomContainer - generatePicture --- src/casekit/NMR/Utils.java | 42 +++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 95e9708..1065a7e 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -50,6 +50,7 @@ import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; +import org.openscience.cdk.depict.DepictionGenerator; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.graph.CycleFinder; import org.openscience.cdk.graph.Cycles; @@ -653,7 +654,7 @@ public static ArrayList matchShiftsFromXML(final IAtomContainer ac, fin * 1. dimension: shift entry (row); * 2. dimension: shift value (column 1), atom index in atom container (column 2) */ - public static String[][] parseShiftsNMRShiftDB(final String shiftsString){ + public static String[][] parseShiftsInNMRShiftDBEntry(final String shiftsString){ if(shiftsString.trim().length() == 0){ return new String[][]{}; @@ -672,6 +673,31 @@ public static String[][] parseShiftsNMRShiftDB(final String shiftsString){ return values; } + + public static void assignNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String spectrumPropertyString){ + + // property string has to be changed for general case and not only "Spectrum 13C 0" + if (ac.getProperty(spectrumPropertyString) == null) { + return; + } + final String[][] spectrumStringArray = Utils.parseShiftsInNMRShiftDBEntry(ac.getProperty(spectrumPropertyString)); + + int atomIndexSpectrumDB; + Double shiftDB; + for (int k = 0; k < ac.getAtomCount(); k++) { + shiftDB = null; + for (int i = 0; i < spectrumStringArray.length; i++) { + atomIndexSpectrumDB = Integer.parseInt(spectrumStringArray[i][2]); + if (atomIndexSpectrumDB == k) { + shiftDB = Double.parseDouble(spectrumStringArray[i][0]); + break; + } + } + ac.getAtom(k).setProperty(Utils.getNMRShiftConstant(ac.getAtom(k).getSymbol()), shiftDB); + } + } + + /** * Returns the NMR shift constant value for a given element. As far as * it is defined, the value from CDKConstants.NMRSHIFT_* (e.g. @@ -897,6 +923,20 @@ public static void writeTextFile(final String pathToOutput, final String content br.close(); } + /** + * Simple function without any settings to generate a picture from a structure + * given as IAtomcontainer. + * + * @param ac Atom container + * @param path Path to file for storing + * @throws IOException + * @throws CDKException + */ + public static void generatePicture(IAtomContainer ac, String path) throws IOException, CDKException { + final DepictionGenerator dg = new DepictionGenerator().withSize(1200, 1200).withAtomColors().withAtomValues().withFillToFit(); + dg.depict(ac).writeTo(path); + } + /** * From 3393c9ff2db273b8e84ca79ca13e8e39d7012b6c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 26 Sep 2018 03:23:57 +0200 Subject: [PATCH 048/405] - small changes --- src/casekit/NMR/Process.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index b02dace..c301d7f 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -27,12 +27,10 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; import org.openscience.cdk.interfaces.IBond; @@ -242,14 +240,14 @@ public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] if (shiftsDB == null) { continue; } - String[][] shiftsDBvalues = casekit.NMR.Utils.parseShiftsNMRShiftDB(shiftsDB); + String[][] shiftsDBvalues = casekit.NMR.Utils.parseShiftsInNMRShiftDBEntry(shiftsDB); for (String[] shiftsDBvalue : shiftsDBvalues) { atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); // sometimes the DB atom index is wrong and out of array range if (atomIndexDB > acDB.getAtomCount() - 1) { continue; } - shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize;; + shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize; // if DB shift value out of min-max-range then skip this shift if(shiftDBDouble < minShift || shiftDBDouble > maxShift - 1){ continue; From 04ef26c410180676ee84a112e7610305d8154a63 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 6 Oct 2018 01:41:10 +0200 Subject: [PATCH 049/405] - further constructor added --- src/casekit/NMR/model/Signal.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java index 54d310e..0e67226 100644 --- a/src/casekit/NMR/model/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -67,6 +67,11 @@ public Signal(final String[] nuclei, final Double[] shifts, final Double intensi this.intensity = intensity; } + public Signal(final String[] nuclei, final Double[] shifts, final Double intensity, final String multiplicity) { + this(nuclei, shifts, intensity); + this.multiplicity = multiplicity; + } + private Double[] initShifts(final Double[] shifts, final int nDim){ final Double[] tempShifts = new Double[nDim]; for (int d = 0; d < nDim; d++) { From c141b70890fc9171498a28d9c8e845e7d607ae23 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 6 Oct 2018 01:47:02 +0200 Subject: [PATCH 050/405] - rename of function call --- src/casekit/NMR/Process.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index c301d7f..6b0583c 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -240,7 +240,7 @@ public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] if (shiftsDB == null) { continue; } - String[][] shiftsDBvalues = casekit.NMR.Utils.parseShiftsInNMRShiftDBEntry(shiftsDB); + String[][] shiftsDBvalues = casekit.NMR.DB.parseNMRShiftDBSpectrum(shiftsDB); for (String[] shiftsDBvalue : shiftsDBvalues) { atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); // sometimes the DB atom index is wrong and out of array range From c658168145f59a5e0ff19dd04dfd36f3a55fe0f6 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 6 Oct 2018 01:56:05 +0200 Subject: [PATCH 051/405] - two functions to get multiplicities added --- src/casekit/NMR/model/Spectrum.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 1c23d28..017da99 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -285,6 +285,23 @@ public boolean setMultiplicity(final String multiplicity, final int signalIndex) return true; } + public ArrayList getMultiplicities() { + final ArrayList multiplicities = new ArrayList<>(); + for (final Signal sig : this.signals) { + multiplicities.add(sig.getMultiplicity()); + } + + return multiplicities; + } + + public String getMultiplicity(final int SignalIndex) { + if (!this.checkSignalIndex(SignalIndex)) { + return null; + } + + return this.getSignal(SignalIndex).getMultiplicity(); + } + public ArrayList getSignals(){ return this.signals; } From b3a731c7c53042e1434a3d45e5308385a38f2173 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 6 Oct 2018 01:59:34 +0200 Subject: [PATCH 052/405] - three new functions for parsing, setting and converting NMRShiftDB spectrum strings added --- src/casekit/NMR/DB.java | 89 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java index ecf7ed7..335807d 100644 --- a/src/casekit/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -23,6 +23,7 @@ */ package casekit.NMR; +import casekit.NMR.model.Signal; import casekit.NMR.model.Spectrum; import java.io.FileNotFoundException; import java.io.FileReader; @@ -36,7 +37,6 @@ import java.util.HashMap; import java.util.HashSet; import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; import org.openscience.cdk.io.iterator.IteratingSDFReader; @@ -50,7 +50,7 @@ public class DB { /** - * Returns the molecules of a given NMRShiftDB file. + * Returns the molecules of a given MOL/SDF file. * This function sets the molecule aromaticity (with allowed exocyclic pi * bonds) by using the * {@link #setAromaticitiesInAtomContainer(org.openscience.cdk.interfaces.IAtomContainer, int)} @@ -326,4 +326,89 @@ public static ArrayList getSpectraFromNMRShiftDBEntry(final IAtomContain return spectra; } + + + /** + * Creates a two dimensional array of a given NMRShiftDB NMR entry + * with all shift values and atom indices. + * + * @param shiftsString + * @return two dimensional array: + * 1. dimension: shift entry (row); + * 2. dimension: shift value (column 1), atom index in atom container (column 2) + */ + public static String[][] parseNMRShiftDBSpectrum(final String shiftsString){ + + if(shiftsString.trim().isEmpty()){ + return new String[][]{}; + } + + String[] signalSplit; + final String[] shiftsSplit = shiftsString.split("\\|"); + final String[][] values = new String[shiftsSplit.length][3]; + for (int i = 0; i < shiftsSplit.length; i++) { + signalSplit = shiftsSplit[i].split(";"); + values[i][0] = signalSplit[0]; + values[i][1] = signalSplit[1]; + values[i][2] = signalSplit[2]; + } + + return values; + } + + /** + * Sets shifts and implicit hydrogen counts in atoms of an atom container + * by means of given spectrum property string. + * + * @param ac IAtomContainer to set + * @param spectrumPropertyString Property string of spectrum in NMRShiftDB format. + * @return + */ + public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String spectrumPropertyString){ + + if (ac.getProperty(spectrumPropertyString) == null) { + return false; + } + final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(ac.getProperty(spectrumPropertyString)); + + int atomIndexSpectrumDB; + Integer multiplicity; + Double shift; + for (int k = 0; k < ac.getAtomCount(); k++) { + shift = null; + multiplicity = null; + for (int i = 0; i < spectrumStringArray.length; i++) { + atomIndexSpectrumDB = Integer.parseInt(spectrumStringArray[i][2]); + if (atomIndexSpectrumDB == k) { + shift = Double.parseDouble(spectrumStringArray[i][0]); + multiplicity = Utils.getHydrogenCountFromMultiplicity(spectrumStringArray[i][1].substring(spectrumStringArray[i][1].length() - 1)); + break; + } + } + ac.getAtom(k).setProperty(Utils.getNMRShiftConstant(ac.getAtom(k).getSymbol()), shift); + ac.getAtom(k).setImplicitHydrogenCount(multiplicity); + } + + return true; + } + + + public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String atomType){ + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + + final Spectrum spectrum = new Spectrum(new String[]{Utils.getIsotopeIdentifier(atomType)}); + String multiplicity; + Double shift, intensity; + for (int i = 0; i < spectrumStringArray.length; i++) { + shift = Double.parseDouble(spectrumStringArray[i][0]); + multiplicity = spectrumStringArray[i][1].substring(spectrumStringArray[i][1].length() - 1); + intensity = Double.parseDouble(spectrumStringArray[i][1].substring(0, spectrumStringArray[i][1].length() - 1)); + spectrum.addSignal(new Signal(new String[]{Utils.getIsotopeIdentifier(atomType)}, new Double[]{shift}, intensity, multiplicity)); + } + + return spectrum; + } } From 96571e18d29c5337304d8cba7d5353ee35596a7b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 6 Oct 2018 02:01:43 +0200 Subject: [PATCH 053/405] - some general functions added --- src/casekit/NMR/Utils.java | 172 ++++++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 67 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 1065a7e..3fb13ca 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -42,6 +42,8 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -604,6 +606,7 @@ public static int findSingleShiftMatch(final ArrayList shiftList, final * @param column Column number of shift values in peak table * @return Indices of matches for each shift within the IAtomContainer * @throws IOException + * @deprecated */ public static ArrayList matchShiftsFromPeakTable(final IAtomContainer ac, final String pathToPeakList, final String atomType, final double tol, final int column) throws IOException { @@ -634,6 +637,7 @@ public static ArrayList matchShiftsFromPeakTable(final IAtomContainer a * @throws IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException + * @deprecated */ public static ArrayList matchShiftsFromXML(final IAtomContainer ac, final String pathToXML, final String atomType, final double tol, final int ndim, final int attribute) throws IOException, ParserConfigurationException, SAXException { @@ -645,56 +649,36 @@ public static ArrayList matchShiftsFromXML(final IAtomContainer ac, fin } - /** - * Creates a two dimensional array of a given NMRShiftDB NMR entry - * with all shift values and atom indices. - * - * @param shiftsString - * @return two dimensional array: - * 1. dimension: shift entry (row); - * 2. dimension: shift value (column 1), atom index in atom container (column 2) - */ - public static String[][] parseShiftsInNMRShiftDBEntry(final String shiftsString){ - - if(shiftsString.trim().length() == 0){ - return new String[][]{}; - } + public static Integer getHydrogenCountFromMultiplicity(final String mult){ - String[] signalSplit; - final String[] shiftsSplit = shiftsString.split("\\|"); - final String[][] values = new String[shiftsSplit.length][3]; - for (int i = 0; i < shiftsSplit.length; i++) { - signalSplit = shiftsSplit[i].split(";"); - values[i][0] = signalSplit[0]; - values[i][1] = signalSplit[1]; - values[i][2] = signalSplit[2]; + switch(mult){ + case "Q": + return 3; + case "T": + return 2; + case "D": + return 1; + case "S": + return 0; + default: + return null; } - - return values; } - public static void assignNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String spectrumPropertyString){ - - // property string has to be changed for general case and not only "Spectrum 13C 0" - if (ac.getProperty(spectrumPropertyString) == null) { - return; + public static String getMultiplicityFromHydrogenCount(final int hCount) { + switch (hCount) { + case 0: + return "S"; + case 1: + return "D"; + case 2: + return "T"; + case 3: + return "Q"; + default: + return null; } - final String[][] spectrumStringArray = Utils.parseShiftsInNMRShiftDBEntry(ac.getProperty(spectrumPropertyString)); - - int atomIndexSpectrumDB; - Double shiftDB; - for (int k = 0; k < ac.getAtomCount(); k++) { - shiftDB = null; - for (int i = 0; i < spectrumStringArray.length; i++) { - atomIndexSpectrumDB = Integer.parseInt(spectrumStringArray[i][2]); - if (atomIndexSpectrumDB == k) { - shiftDB = Double.parseDouble(spectrumStringArray[i][0]); - break; - } - } - ac.getAtom(k).setProperty(Utils.getNMRShiftConstant(ac.getAtom(k).getSymbol()), shiftDB); - } } @@ -714,7 +698,7 @@ public static String getNMRShiftConstant(final String element){ case "N": return CDKConstants.NMRSHIFT_NITROGEN; case "P": return CDKConstants.NMRSHIFT_PHOSPORUS; case "F": return CDKConstants.NMRSHIFT_FLUORINE; - case "S": return CDKConstants.NMRSHIFT_SULFUR; +// case "S": return CDKConstants.NMRSHIFT_SULFUR; default: return null; } @@ -800,6 +784,12 @@ public static HashSet getAtomTypesInAtomContainer(final IAtomContainer a } + public static boolean checkMinMaxValue(final int min, final int max, final int value){ + + return (value >= min || value <= max); + } + + public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final ArrayList neighborElems){ final int[] counts = new int[neighborElems.size() * bondsSet.length]; String foundBonds; @@ -825,7 +815,7 @@ public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int return counts; } - + public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int min, final int max, final int stepSize) throws IOException{ @@ -933,7 +923,7 @@ public static void writeTextFile(final String pathToOutput, final String content * @throws CDKException */ public static void generatePicture(IAtomContainer ac, String path) throws IOException, CDKException { - final DepictionGenerator dg = new DepictionGenerator().withSize(1200, 1200).withAtomColors().withAtomValues().withFillToFit(); + final DepictionGenerator dg = new DepictionGenerator().withSize(1200, 1200).withAtomColors().withFillToFit().withAtomNumbers(); dg.depict(ac).writeTo(path); } @@ -1007,24 +997,6 @@ public static double getMedian(final ArrayList data) { } - /** - * - * @param data - * @return - */ - public static double getRMS(final ArrayList data) { - if(data.size() == 1){ - return data.get(0); - } - double qSum = 0; - for (final Double d : data) { - qSum += d*d; - } - - return Math.sqrt(qSum/data.size()); - } - - /** * * @param data @@ -1103,7 +1075,26 @@ public static String getFileFormat(final String pathToFile) { return split[split.length - 1]; } - + + /** + * + * @param data + * @return + */ + public static Double getRMS(final ArrayList data) { + if(data.isEmpty()){ + return null; + } + if (data.size() == 1) { + return data.get(0); + } + double qSum = 0; + for (final Double d : data) { + qSum += d * d; + } + + return Math.sqrt(qSum / data.size()); + } /** @@ -1116,12 +1107,46 @@ public static HashMap getRMS(final HashMap rms = new HashMap<>(); for (final String key : lookup.keySet()) { rms.put(key, casekit.NMR.Utils.getRMS(lookup.get(key))); -// System.out.println("count: " + lookup.get(key).size() + ", mean: " + NMR.Utils.getMean(lookup.get(key)) + ", rms: " + rms.get(key) + ", median: " + NMR.Utils.getMedian(lookup.get(key))); } return rms; } + + public static void combineHashMaps(final HashMap> hoseLookupToKeep, final HashMap> hoseLookup){ + + for (String hose : hoseLookup.keySet()) { + if(!hoseLookupToKeep.containsKey(hose)){ + hoseLookupToKeep.put(hose, new ArrayList<>()); + } + hoseLookupToKeep.get(hose).addAll(hoseLookup.get(hose)); + } + } + + + /** + * + * @param ac + */ + public static void setExplicitToImplicitHydrogens(final IAtomContainer ac){ + final List toRemoveList = new ArrayList<>(); + IAtom atomB; + for (final IAtom atomA : ac.atoms()) { + if (atomA.getAtomicNumber() == 1) { + atomB = ac.getConnectedAtomsList(atomA).get(0); + if(atomB.getImplicitHydrogenCount() == null){ + atomB.setImplicitHydrogenCount(0); + } + atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + 1); + toRemoveList.add(atomA); + } + } + for (final IAtom iAtom : toRemoveList) { + ac.removeAtom(iAtom); + } + } + + public static IAtomContainer setAromaticitiesInAtomContainer(final IAtomContainer ac, final int maxCycleSize) throws CDKException { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); @@ -1181,6 +1206,19 @@ public static String getSpectrumNucleiAsString(final Spectrum spectrum){ } + public static ExecutorService initExecuter(final int nThreads) { + return Executors.newFixedThreadPool(nThreads); + } + + public static void stopExecuter(final ExecutorService executor) { + executor.shutdown(); + if (!executor.isTerminated()) { + System.err.println("killing non-finished tasks"); + } + executor.shutdownNow(); + } + + // ######################################################################################################## // test functions -> not ready to use From 2cd1b5ffd5c764616d9862f8c901f946ae3f95fb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 6 Oct 2018 02:03:06 +0200 Subject: [PATCH 054/405] - usage of casekit.NMR.Utils class --- src/casekit/HOSECodePredictor.java | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index b1ae19a..c93d8e0 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -7,6 +7,7 @@ package casekit; +import casekit.NMR.Utils; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -14,7 +15,6 @@ import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.StringTokenizer; import org.apache.commons.cli.CommandLine; @@ -274,20 +274,7 @@ public void generatePicture(IAtomContainer ac, String path) throws IOException, */ void fixExplicitHydrogens(IAtomContainer ac) { - List toRemoveList = new ArrayList<>(); - IAtom atomB; - for (IAtom atomA : ac.atoms()) - { - if (atomA.getAtomicNumber() == 1) - { - atomB = ac.getConnectedAtomsList(atomA).get(0); - atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() +1 ); - toRemoveList.add(atomA); - } - } - for (IAtom iAtom : toRemoveList) { - ac.removeAtom(iAtom); - } + Utils.setExplicitToImplicitHydrogens(ac); } private void parseArgs(String[] args) throws ParseException From 97e525d9f65cdff84b6eaeb2c2fcc8fa80d4a8ad Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 12 Oct 2018 01:17:38 +0200 Subject: [PATCH 055/405] - some functions modified/added --- src/casekit/NMR/Utils.java | 195 +++++++++++-------------------------- 1 file changed, 57 insertions(+), 138 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 3fb13ca..f74218d 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -44,6 +44,9 @@ import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -784,9 +787,9 @@ public static HashSet getAtomTypesInAtomContainer(final IAtomContainer a } - public static boolean checkMinMaxValue(final int min, final int max, final int value){ + public static boolean checkMinMaxValue(final double min, final double max, final double value){ - return (value >= min || value <= max); + return (value >= min && value <= max); } @@ -1002,12 +1005,36 @@ public static double getMedian(final ArrayList data) { * @param data * @return */ - public static double getMean(final ArrayList data) { + public static Double getMean(final ArrayList data) { double sum = 0; - for (Double d : data) { - sum += d; + int nullCounter = 0; + for (final Double d : data) { + if(d != null){ + sum += d; + } else { + nullCounter++; + } } - return sum/data.size(); + return ((data.size() - nullCounter) != 0) ? (sum/(data.size() - nullCounter)) : null; + } + + + /** + * + * @param data + * @return + */ + public static Double getMean(final Double[] data) { + double sum = 0; + int nullCounter = 0; + for (final Double d : data) { + if(d != null){ + sum += d; + } else { + nullCounter++; + } + } + return ((data.length - nullCounter) != 0) ? (sum/(data.length - nullCounter)) : null; } @@ -1088,12 +1115,17 @@ public static Double getRMS(final ArrayList data) { if (data.size() == 1) { return data.get(0); } + int nullCounter = 0; double qSum = 0; for (final Double d : data) { - qSum += d * d; + if(d != null){ + qSum += d * d; + } else { + nullCounter++; + } } - return Math.sqrt(qSum / data.size()); + return ((data.size() - nullCounter) != 0) ? Math.sqrt(qSum / (data.size() - nullCounter)) : null; } @@ -1105,15 +1137,19 @@ public static Double getRMS(final ArrayList data) { public static HashMap getRMS(final HashMap> lookup){ final HashMap rms = new HashMap<>(); + Double rmsInList; for (final String key : lookup.keySet()) { - rms.put(key, casekit.NMR.Utils.getRMS(lookup.get(key))); + rmsInList = Utils.getRMS(lookup.get(key)); + if(rmsInList != null) { + rms.put(key, rmsInList); + } } return rms; } - public static void combineHashMaps(final HashMap> hoseLookupToKeep, final HashMap> hoseLookup){ + public static Void combineHashMaps(final HashMap> hoseLookupToKeep, final HashMap> hoseLookup){ for (String hose : hoseLookup.keySet()) { if(!hoseLookupToKeep.containsKey(hose)){ @@ -1121,6 +1157,7 @@ public static void combineHashMaps(final HashMap> hose } hoseLookupToKeep.get(hose).addAll(hoseLookup.get(hose)); } + return null; } @@ -1211,11 +1248,17 @@ public static ExecutorService initExecuter(final int nThreads) { } public static void stopExecuter(final ExecutorService executor) { - executor.shutdown(); - if (!executor.isTerminated()) { - System.err.println("killing non-finished tasks"); + try { + executor.shutdown(); + executor.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + System.err.println("termination interrupted"); + } finally { + if (!executor.isTerminated()) { + System.err.println("killing non-finished tasks"); + } + executor.shutdownNow(); } - executor.shutdownNow(); } @@ -1223,130 +1266,6 @@ public static void stopExecuter(final ExecutorService executor) { // test functions -> not ready to use - - - /** - * Returns - * - * @param values - * @return - */ - public static HashMap getValueFrequencies(final ArrayList values) { - - final HashMap freqs = new HashMap<>(); - final HashSet valueLevels = new HashSet<>(values); - int sum = 0; - for (int value : valueLevels) { - sum += Collections.frequency(values, value); - } - for (int value : valueLevels) { - freqs.put(value, (Collections.frequency(values, value) / (double) sum)); - } - - return freqs; - } - - - /** - * Returns a list of open bonds of an atom. - * - * @param ac atom container - * @param atomIndex index of the atom to test - * @return - */ - public static ArrayList getOpenBonds(final IAtomContainer ac, final int atomIndex) { - - final IAtom atom = ac.getAtom(atomIndex); - if (atom.getHybridization() == null) { - return null; - } - final ArrayList bondOrderList = new ArrayList<>(); - final AtomValenceDescriptor valenceDesc = new AtomValenceDescriptor(); - final int valence = Integer.valueOf(valenceDesc.calculate(atom, ac).getValue().toString()); - int electronsLeft = (8 - (valence + atom.getImplicitHydrogenCount())); - - if (electronsLeft == 0) { -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); - return bondOrderList; - } - // only one single bond left; possible at SP1, SP2 and SP3 - if (electronsLeft == 1) { - bondOrderList.add(IBond.Order.SINGLE); -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); - return bondOrderList; - } - // with SP3 are only single bonds possible - if (atom.getHybridization().equals(IAtomType.Hybridization.SP3)) { - // subtract the single bonded neighbor number - electronsLeft -= ac.getConnectedAtomsList(atom).size(); - for (int k = 0; k < electronsLeft; k++) { - bondOrderList.add(IBond.Order.SINGLE); - } -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList); - return bondOrderList; - } - - if (atom.getHybridization().equals(IAtomType.Hybridization.SP2)) { - switch (atom.getSymbol()) { - case "O": - case "S": - bondOrderList.add(IBond.Order.DOUBLE); - return bondOrderList; - case "C": - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.DOUBLE); - break; - case "N": - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.DOUBLE); - break; - default: - break; - } - } else if (atom.getHybridization().equals(IAtomType.Hybridization.SP1)) { - switch (atom.getSymbol()) { - case "C": - bondOrderList.add(IBond.Order.DOUBLE); - bondOrderList.add(IBond.Order.DOUBLE); - // or - bondOrderList.add(IBond.Order.SINGLE); - bondOrderList.add(IBond.Order.TRIPLE); - break; - case "N": - bondOrderList.add(IBond.Order.TRIPLE); - break; - default: - break; - } - } - for (IAtom neighbor : ac.getConnectedAtomsList(atom)) { - bondOrderList.remove(ac.getBond(atom, neighbor).getOrder()); - electronsLeft -= casekit.NMR.Utils.getElectronNumberByBondOrder(ac.getBond(atom, neighbor).getOrder()); - } - - int theoCounter = 0; - for (IBond.Order order : bondOrderList) { - theoCounter += casekit.NMR.Utils.getElectronNumberByBondOrder(order); - } - - switch (Math.abs(theoCounter - electronsLeft)) { - case 1: - bondOrderList.remove(IBond.Order.SINGLE); - theoCounter -= 1; - break; - case 2: - - break; - case 3: - - break; - } - -// System.out.println(atom.getSymbol() + ": " + atomIndex + " (" + atom.getHybridization() + "): " + bondOrderList + " -> e: " + theoCounter + " (theo) vs. " + electronsLeft + " (real), bond counter: " + ac.getConnectedAtomsList(atom).size() + " (+" + atom.getImplicitHydrogenCount() + "H)"); - return bondOrderList; - } - /** * Returns a bond type for two bond atoms from its hybridization. * CURRENTLY ONLY SINGLE BOND DETECTION POSSIBLE!!! From f37fe64f6accc42f7971b1d4d3c2775f5af1b40e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 13:53:04 +0200 Subject: [PATCH 056/405] - deleted some unused imports --- src/casekit/NMR/ParseRawData.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index 7068d21..5c8d92a 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -27,9 +27,7 @@ import casekit.NMR.model.Spectrum; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import javax.xml.parsers.ParserConfigurationException; import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; @@ -37,7 +35,6 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.xml.sax.SAXException; From 9038f116f64d2c4e68d556a0cfb3cf0737dcb11d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 13:55:18 +0200 Subject: [PATCH 057/405] - small changes --- src/casekit/NMR/Utils.java | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index f74218d..520a367 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -651,7 +651,12 @@ public static ArrayList matchShiftsFromXML(final IAtomContainer ac, fin return matchesAtomType; } - + /** + * Specified for carbons only -> not generic!!! + * + * @param mult + * @return + */ public static Integer getHydrogenCountFromMultiplicity(final String mult){ switch(mult){ @@ -668,7 +673,12 @@ public static Integer getHydrogenCountFromMultiplicity(final String mult){ } } - + /** + * Specified for carbons only -> not generic!!! + * + * @param hCount + * @return + */ public static String getMultiplicityFromHydrogenCount(final int hCount) { switch (hCount) { case 0: @@ -1247,16 +1257,15 @@ public static ExecutorService initExecuter(final int nThreads) { return Executors.newFixedThreadPool(nThreads); } - public static void stopExecuter(final ExecutorService executor) { + public static void stopExecuter(final ExecutorService executor, final long seconds) { + executor.shutdown(); try { - executor.shutdown(); - executor.awaitTermination(10, TimeUnit.SECONDS); - } catch (InterruptedException e) { - System.err.println("termination interrupted"); - } finally { - if (!executor.isTerminated()) { - System.err.println("killing non-finished tasks"); + if (!executor.awaitTermination(seconds, TimeUnit.SECONDS)) { + System.err.println("killing non-finished tasks!"); + executor.shutdownNow(); } + } catch (InterruptedException e) { + System.err.println("killing non-finished tasks!"); executor.shutdownNow(); } } From b728f7c26441a1e0b61473bfb3ef1d66bfcf9e08 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 15:03:46 +0200 Subject: [PATCH 058/405] - removed the counts class member and its functions --- src/casekit/NMR/model/Assignment.java | 40 +-------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index 5160884..3219a1c 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -34,13 +34,11 @@ public class Assignment { final int nDim; final String[] nuclei; final int[][] assignments; - final int[][] counts; public Assignment(final Spectrum spectrum) { this.nuclei = spectrum.getNuclei(); this.nDim = this.nuclei.length; this.assignments = this.initAssignments(this.nDim, spectrum.getSignalCount()); - this.counts = this.initCounts(this.nDim, spectrum.getSignalCount()); } private int[][] initAssignments(final int nDim, final int nSignal){ @@ -53,43 +51,7 @@ private int[][] initAssignments(final int nDim, final int nSignal){ return temp; } - - private int[][] initCounts(final int nDim, final int nSignal){ - final int[][] temp = new int[nDim][nSignal]; - for (int i = 0; i < nDim; i++) { - for (int j = 0; j < nSignal; j++) { - temp[i][j] = 0; - } - } - - return temp; - } - - public boolean setCount(final int dim, final int indexInSpectrum, final int newCountValue){ - if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ - return false; - } - this.counts[dim][indexInSpectrum] = newCountValue; - - return true; - } - - public Integer getCount(final int dim, final int indexInSpectrum){ - if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ - return null; - } - - return this.counts[dim][indexInSpectrum]; - } - - public int[] getCounts(final int dim){ - if(!this.checkDimension(dim)){ - return null; - } - - return this.counts[dim]; - } - + public boolean setAssignment(final int dim, final int indexInSpectrum, final int indexInAtomContainer){ if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ return false; From 9c8071ba829a4755cf28047f89bb8de0972dfb06 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 15:39:05 +0200 Subject: [PATCH 059/405] - only renamings --- src/casekit/NMR/DB.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java index 335807d..e31efc9 100644 --- a/src/casekit/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -314,7 +314,7 @@ public static HashMap getRMS(final Connection DBConnection, fina } - public static ArrayList getSpectraFromNMRShiftDBEntry(final IAtomContainer ac, final String elem) { + public static ArrayList getNMRShiftDBSpectra(final IAtomContainer ac, final String elem) { ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); final ArrayList spectra = new ArrayList<>(); @@ -361,15 +361,15 @@ public static String[][] parseNMRShiftDBSpectrum(final String shiftsString){ * by means of given spectrum property string. * * @param ac IAtomContainer to set - * @param spectrumPropertyString Property string of spectrum in NMRShiftDB format. + * @param NMRShiftDBSpectrum Property string of spectrum in NMRShiftDB format. * @return */ - public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String spectrumPropertyString){ + public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String NMRShiftDBSpectrum){ - if (ac.getProperty(spectrumPropertyString) == null) { + if (ac.getProperty(NMRShiftDBSpectrum) == null) { return false; } - final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(ac.getProperty(spectrumPropertyString)); + final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(ac.getProperty(NMRShiftDBSpectrum)); int atomIndexSpectrumDB; Integer multiplicity; From d8ee763877005764913e57ff78ca7e5263946e7a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 15:59:00 +0200 Subject: [PATCH 060/405] - added an exception in check1DSpectrum --- src/casekit/NMR/ParseRawData.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index 5c8d92a..fdece02 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -31,6 +31,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomType; @@ -213,8 +214,9 @@ public static final Spectrum parse1DNMRviaXML(final String pathToXML, final Stri * * @param spectrum Spectrum class object containing the 1D shift information * @throws java.io.IOException + * @throws org.openscience.cdk.exception.CDKException */ - public final void assign1DSpectrum(final Spectrum spectrum) throws IOException{ + public final void assign1DSpectrum(final Spectrum spectrum) throws IOException, CDKException{ // checks whether number of signals is equal to molecular formula if given // if not equal then edit signal list in spectrum this.check1DSpectrum(spectrum); @@ -239,12 +241,12 @@ public final void assign1DSpectrum(final Spectrum spectrum) throws IOException{ * @throws IOException * @see Utils#editSignalsInSpectrum(casekit.NMR.model.Spectrum, org.openscience.cdk.interfaces.IMolecularFormula) */ - private void check1DSpectrum(final Spectrum spectrum) throws IOException{ + private void check1DSpectrum(final Spectrum spectrum) throws IOException, CDKException{ if(this.molFormula != null){ - final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula); + final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula, 0); if (diff != 0) { // adjust Spectrum size by user - Utils.editSignalsInSpectrum(spectrum, this.molFormula); + Utils.editSignalsInSpectrum(spectrum, this.molFormula, 0); } } } From d9342039b27c071a3378de4c38e59b6cff4942c1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 16:00:02 +0200 Subject: [PATCH 061/405] - function renaming in Utils class adapted --- src/casekit/NMR/Process.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index 6b0583c..80f8328 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -236,7 +236,7 @@ public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] for (int k = 0; k < acSet.getAtomContainerCount(); k++) { acDB = acSet.getAtomContainer(k); // for all DB entries containing a spectrum for the current query atom type - for (final String shiftsDB : casekit.NMR.DB.getSpectraFromNMRShiftDBEntry(acDB, elem)) { + for (final String shiftsDB : DB.getNMRShiftDBSpectra(acDB, elem)) { if (shiftsDB == null) { continue; } From 01827ae94c9706f195cd7ab206df33842a50a421 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 16:01:16 +0200 Subject: [PATCH 062/405] - exception added in getDifferenceSpectrumSizeAndMolecularFormulaCount() --- src/casekit/NMR/Utils.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 520a367..9796919 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -358,19 +358,22 @@ public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int return null; } - public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molformula){ - final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); + public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws CDKException{ + if(!spectrum.checkDimension(dim)){ + throw new CDKException(Thread.currentThread().getStackTrace()[2].getClassName() + "." + Thread.currentThread().getStackTrace()[2].getMethodName() + ": invalid dimension in spectrum given"); + } + final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, dim); int atomsInMolFormula = 0; - if(molformula != null){ - atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molformula, atomType); + if(molFormula != null){ + atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molFormula, atomType); } return atomsInMolFormula - spectrum.getSignalCount(); } - public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecularFormula molFormula) throws IOException { + public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws IOException, CDKException { BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); int n; final ArrayList validIndices = new ArrayList<>(); - int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula); + int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula, dim); // walk through all signals in spectrum add missing or to remove signals while (diff != 0) { // display all selectable signal indices in spectrum @@ -400,7 +403,7 @@ public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecul } else { spectrum.removeSignal(validIndices.indexOf(n)); } - diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula); + diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula, dim); } } From db41e7b113616e46cd544ebd5f727e992413077e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 16:01:34 +0200 Subject: [PATCH 063/405] - updated --- src/casekit/NMR/remarks | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/casekit/NMR/remarks b/src/casekit/NMR/remarks index b28ac52..dccc0b3 100644 --- a/src/casekit/NMR/remarks +++ b/src/casekit/NMR/remarks @@ -13,5 +13,4 @@ coding: -> no. of assigned protons (HSQC, HMBC, ...) vs. DEPT information and for other heavy atoms too -> how many protons for one shift value (carbons or other heavy atoms) are still left? - some function descriptions (javadoc) are deprecated and have to be updated -- in Assignment class: - - new: addition of counts array as class member and functions for that \ No newline at end of file +- add exceptions where needed \ No newline at end of file From f37ca9136ef88f37fd9dbe07bd9c806d9b89dafb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 16:05:01 +0200 Subject: [PATCH 064/405] - a Java-Doc added --- src/casekit/NMR/model/Spectrum.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 017da99..a41548a 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -127,12 +127,7 @@ public int getSignalCount() { * @return */ public boolean addSignal(final Signal signal) { - if(!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ - return false; - } - this.addSignal(signal, null); - - return true; + return this.addSignal(signal, null); } /** @@ -179,6 +174,12 @@ private boolean checkSignalIndex(final Integer signalIndex){ return (signalIndex != null) && (signalIndex >= 0) && (signalIndex < this.getSignalCount()); } + /** + * Checks whether the input dimension exists in this spectrum or not. + * + * @param dim + * @return + */ public boolean checkDimension(final int dim){ return (dim >= 0) && (dim < this.nDim); } @@ -318,11 +319,11 @@ public Integer getEquivalence(final int signalIndex){ return this.equivalences.get(signalIndex); } - public boolean setEquivalence(final int signalIndex, final int equivalentSignalIndex){ - if(!this.checkSignalIndex(signalIndex) || !this.checkSignalIndex(equivalentSignalIndex)){ + public boolean setEquivalence(final int signalIndex, final int isEquivalentToSignalIndex){ + if(!this.checkSignalIndex(signalIndex) || !this.checkSignalIndex(isEquivalentToSignalIndex)){ return false; } - this.equivalences.set(signalIndex, equivalentSignalIndex); + this.equivalences.set(signalIndex, isEquivalentToSignalIndex); return true; } From 89a1c7da3604dba3e2ed584d615279d65082e9e3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 16:07:11 +0200 Subject: [PATCH 065/405] - changes in pathes --- nbactions.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nbactions.xml b/nbactions.xml index 9db7bbf..2a4cbf2 100644 --- a/nbactions.xml +++ b/nbactions.xml @@ -10,7 +10,7 @@ org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - -classpath %classpath casekit.NMR.test + -classpath %classpath casekit.NMR.Start java @@ -24,7 +24,7 @@ org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath casekit.NMR.test + -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath casekit.NMR.Start java true @@ -39,7 +39,7 @@ org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - -classpath %classpath casekit.NMR.test + -classpath %classpath casekit.NMR.Start java From e788750c562389dcc0cdb4a7f69d3691b798b369 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Oct 2018 16:07:40 +0200 Subject: [PATCH 066/405] - "cdk-standard" added as dependency --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 5b3bafe..5f2866b 100644 --- a/pom.xml +++ b/pom.xml @@ -87,5 +87,11 @@ cdk-silent 2.2-SNAPSHOT + + org.openscience.cdk + cdk-standard + 2.2-SNAPSHOT + jar + From b56d3cbe146984d3cfd5536ebb02b1bbd4efc580 Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Tue, 23 Oct 2018 16:09:37 +0200 Subject: [PATCH 067/405] Delete ShiftMatcher.java --- src/casekit/NMR/ShiftMatcher.java | 339 ------------------------------ 1 file changed, 339 deletions(-) delete mode 100644 src/casekit/NMR/ShiftMatcher.java diff --git a/src/casekit/NMR/ShiftMatcher.java b/src/casekit/NMR/ShiftMatcher.java deleted file mode 100644 index a46fe32..0000000 --- a/src/casekit/NMR/ShiftMatcher.java +++ /dev/null @@ -1,339 +0,0 @@ -/* - * The MIT License - * - * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package casekit.NMR; - - -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; -import org.apache.commons.lang3.ArrayUtils; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Hashtable; -import java.util.Set; -import java.util.StringTokenizer; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.fingerprint.Fingerprinter; -import org.openscience.cdk.fingerprint.IBitFingerprint; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.qsar.descriptors.atomic.AtomHybridizationDescriptor; -import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.similarity.Tanimoto; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class ShiftMatcher { - - public ShiftMatcher(){ - this.HOSESymbolHash = new Hashtable(); - this.verbose = true; - -// this.HOSESymbolHash.put("H", "H"); -// this.HOSESymbolHash.put("C", "C"); - this.HOSESymbolHash.put("O", "O"); - this.HOSESymbolHash.put("N", "N"); - this.HOSESymbolHash.put("S", "S"); - this.HOSESymbolHash.put("P", "P"); - this.HOSESymbolHash.put("Si", "Q"); - this.HOSESymbolHash.put("B", "B"); - this.HOSESymbolHash.put("F", "F"); - this.HOSESymbolHash.put("Cl", "X"); - this.HOSESymbolHash.put("Br", "Y"); - this.HOSESymbolHash.put("I", "I"); - -// this.HOSESymbolHash.put("Se", "Se"); -// this.HOSESymbolHash.put("Sn", "Sn"); -// this.HOSESymbolHash.put("Ge", "Ge"); -// this.HOSESymbolHash.put("te", "Te"); -// this.HOSESymbolHash.put("Zn", "Zn"); -// this.HOSESymbolHash.put("As", "As"); -// this.HOSESymbolHash.put("Li", "Li"); -// this.HOSESymbolHash.put("Ti", "Ti"); -// this.HOSESymbolHash.put("Pb", "Pb"); -// this.HOSESymbolHash.put("Hg", "Hg"); -// this.HOSESymbolHash.put("Mg", "Mg"); - - } - - private final boolean verbose; - private Hashtable> hoseLookup; - private int carbonNMRCount; - private int hydrogenNMRCount; - private int molListDBCount; - private final Hashtable HOSESymbolHash; - private ArrayList molListDB; - - - - - - private ArrayList convertToHOSECodeTerm(final String molFormula){ - - final ArrayList convertedList = new ArrayList<>(); - Matcher m = Pattern.compile("[A-Z]").matcher(molFormula); - String s; - - while (m.find()) { - s = molFormula.substring(m.start(), m.end()); - if(m.end() + 1 <= molFormula.length()){ - Character c = molFormula.subSequence(m.end(), m.end() + 1).charAt(0); - if(Character.isLowerCase(c)){ - s = molFormula.substring(m.start(), m.end() + 1); - } - } - // ignore C and H atoms - if(s.equals("C") || s.equals("H")) continue; - - if(this.HOSESymbolHash.containsKey(s)) { - convertedList.add(this.HOSESymbolHash.get(s)); - } else { - convertedList.add(s); - } - } - - return convertedList; - } - - - public void matchHOSEShifts(final double[] nmrShiftValues, final String molFormula) throws Exception { - if (verbose) { - System.out.println("Starting shift matching (2) against hose code table"); - } - - Arrays.parallelSort(nmrShiftValues); - - final Set keys = this.hoseLookup.keySet(); - ArrayList shifts; - - final Hashtable hoseHeteroCount = new Hashtable<>(); - final Hashtable hoseHCount = new Hashtable<>(); - - StringTokenizer strtok; - StringTokenizer strtok2; - - final ArrayList atomTypesHetero = convertToHOSECodeTerm(molFormula); - System.err.println("found heavy atoms in molecular formula: " + molFormula + " -> " + atomTypesHetero.toString()); - - - Matcher m; - int heteroAtomCount; - int carbonAtomCount; - int HAtomCount; - int totalBindPartnCount; - int allAtomCount; - - int hitCounter; -// int middle; -// double mean; -// double median; -// double d1; -// double d2; -// double sum; -// double sd; - - String neighbors; - - - for (int i = 0; i < nmrShiftValues.length; i++) { - final ArrayList hoseMatch = new ArrayList<>(); - - hitCounter = 0; - - for (String hose : keys) { - - strtok = new StringTokenizer(hose, ";"); - totalBindPartnCount = Integer.parseInt(strtok.nextToken().substring(2, 3)); - strtok2 = new StringTokenizer(strtok.nextToken(), "("); - - String directNeighbors = strtok2.nextToken(); - - heteroAtomCount = 0; - for (String s : atomTypesHetero) { - if (s.length() == 1) { - m = Pattern.compile(s +"(?![a-z])").matcher(directNeighbors); - } else { - m = Pattern.compile(s).matcher(directNeighbors); - } - while (m.find()) { - heteroAtomCount++; - } - } - - m = Pattern.compile("C(?![a-z])").matcher(directNeighbors); - carbonAtomCount = 0; - while (m.find()) { - carbonAtomCount++; - } - - //check (count) whether other atom types than C or needed hetero atoms are present or not - m = Pattern.compile("[A-Z]").matcher(directNeighbors); - allAtomCount = 0; - while (m.find()) { - allAtomCount++; - } - if(allAtomCount - (carbonAtomCount + heteroAtomCount) != 0){ -// System.out.println(hose + " -> " + allAtomCount + " - (" + carbonAtomCount + " + " + heteroAtomCount + ")"); -// System.out.println(" -> REFUSED!!!"); - continue; - } - - HAtomCount = totalBindPartnCount - (carbonAtomCount + heteroAtomCount); - - hoseHeteroCount.put(hose, heteroAtomCount); - hoseHCount.put(hose, HAtomCount); - - - shifts = hoseLookup.get(hose); - if ((nmrShiftValues[i] >= shifts.get(shifts.indexOf(Collections.min(shifts)))) && nmrShiftValues[i] <= shifts.get(shifts.indexOf(Collections.max(shifts)))) { - - hitCounter++; - hoseMatch.add(hose); - } - } - System.out.println("\nThere are " + hitCounter + " hits for shift value " + nmrShiftValues[i]); - - for (int j = 4; j >= 0; j--) { - - System.out.println(j + ":"); - final ArrayList directNeighbors = new ArrayList<>(); - - for (String hose : hoseMatch) { - if (hoseHeteroCount.get(hose) == j) { -// System.out.println(hose + ": " + hoseHeteroAtoms.get(hose)); -// shifts = hoseLookup.get(hose); -// -// middle = shifts.size() / 2; -// if (shifts.size() % 2 == 1) { -// median = shifts.get(middle); -// } else { -// median = (shifts.get(middle - 1) + shifts.get(middle)) / 2.0; -// } -// -// sum = 0; -// for (int k = 0; k < shifts.size(); k++) { -// sum += shifts.get(k); -// } -// mean = (sum / shifts.size()); -// -// d1 = 0; -// d2 = 0; -// sum = 0; -// for (int k = 0; k < shifts.size(); k++) { -// d2 = (mean - shifts.get(k)) * (mean - shifts.get(k)); -// d1 = d2 + d1; -// } -// sd = 0; -// if (shifts.size() > 1) { -// sd = Math.sqrt((d1 / (shifts.size() - 1))); -// } -// -// System.out.println(hose + " [" + shifts.get(shifts.indexOf(Collections.min(shifts))) + ", " + shifts.get(shifts.indexOf(Collections.max(shifts))) + "] (" + shifts.size() + ") -> mean: " + mean + ", median: " + median + ", sd: " + sd + "\n"); - - strtok = new StringTokenizer(hose, ";"); - strtok.nextToken(); - strtok2 = new StringTokenizer(strtok.nextToken(), "("); - neighbors = strtok2.nextToken(); - - neighbors = neighbors + "\t\t -> het: " + j + ", H: " + hoseHCount.get(hose); - - directNeighbors.add(neighbors); - } - } - - //unique HOSE codes - HashSet hs = new HashSet(); - hs.addAll(directNeighbors); - directNeighbors.clear(); - directNeighbors.addAll(hs); - - for (String n : directNeighbors) { - System.out.println("\t" + n); - } - - } - } -// System.out.println("\n"); - - - } - - - public void matchDBBits(final IAtomContainer acQ, final String elementSymbol) throws CDKException{ - - final double[] nmrShiftValuesQ = new double[acQ.getAtomCount()]; - for (int i = 0; i < acQ.getAtomCount(); i++) { - nmrShiftValuesQ[i] = acQ.getAtom(i).getProperty(CDKConstants.NMRSHIFT_CARBON); - } - System.out.println("Q shifts: " + Arrays.toString(nmrShiftValuesQ)); - - Arrays.parallelSort(nmrShiftValuesQ); - - ArrayList nmrShiftValuesDBList; - double[] nmrShiftValuesDB; - Double[] temp; - for(int i = 0; i< this.molListDBCount; i++){ - IAtomContainer acDB = this.molListDB.get(i); - System.out.println("acDB atom count: " + acDB.getAtomCount()); - nmrShiftValuesDBList = new ArrayList<>(); - - Fingerprinter fp = new Fingerprinter(); - IBitFingerprint ifpDB = fp.getBitFingerprint(acDB); - IBitFingerprint ifpQ = fp.getBitFingerprint(acQ); - System.out.println(Arrays.toString(ifpQ.getSetbits())); - System.out.println(Arrays.toString(ifpDB.getSetbits()) + "\n"); - - for(int j = 0; j < acDB.getAtomCount(); j++) { - if(acDB.getAtom(j).getSymbol().equals(elementSymbol)){ - nmrShiftValuesDBList.add(acDB.getAtom(j).getProperty(CDKConstants.NMRSHIFT_CARBON)); - } - } - temp = nmrShiftValuesDBList.toArray(new Double[nmrShiftValuesDBList.size()]); - nmrShiftValuesDB = ArrayUtils.toPrimitive(temp); - Arrays.parallelSort(nmrShiftValuesDB); - - System.out.println("DB shifts: " + Arrays.toString(nmrShiftValuesDB)); - - double tanimoto_coefficient = Tanimoto.calculate(nmrShiftValuesQ, nmrShiftValuesDB); - System.out.println(i + ": Tanimo result: " + tanimoto_coefficient); - } - - } - - - - - - -} From 7c35cf12a658a9338939684e45620b27c0ac8314 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Nov 2018 18:07:19 +0100 Subject: [PATCH 068/405] - clone methode added - adding an assignment is possible - getting both type of indices is now possible: - index of atom in structure - index of signal in spectrum --- src/casekit/NMR/model/Assignment.java | 62 +++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index 3219a1c..1a287f0 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -29,11 +29,11 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Assignment { +public class Assignment implements Cloneable { final int nDim; final String[] nuclei; - final int[][] assignments; + int[][] assignments; public Assignment(final Spectrum spectrum) { this.nuclei = spectrum.getNuclei(); @@ -52,6 +52,14 @@ private int[][] initAssignments(final int nDim, final int nSignal){ return temp; } + /** + * Sets an assignment as atom index for a signal position. + * + * @param dim + * @param indexInSpectrum + * @param indexInAtomContainer + * @return + */ public boolean setAssignment(final int dim, final int indexInSpectrum, final int indexInAtomContainer){ if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ return false; @@ -72,7 +80,7 @@ public boolean setAssignments(final int dim, final ArrayList indicesInA return true; } - public Integer getAssignment(final int dim, final int indexInSpectrum){ + public Integer getAtomIndex(final int dim, final int indexInSpectrum){ if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ return null; } @@ -80,7 +88,20 @@ public Integer getAssignment(final int dim, final int indexInSpectrum){ return this.assignments[dim][indexInSpectrum]; } - public int[] getAssignments(final int dim){ + public Integer getSignalIndex(final int dim, final int atomIndexInStructure){ + if(!this.checkDimension(dim)){ + return null; + } + for (int signalIndex = 0; signalIndex < this.assignments[dim].length; signalIndex++) { + if(this.getAtomIndex(dim, signalIndex) == atomIndexInStructure){ + return signalIndex; + } + } + + return null; + } + + public int[] getAtomIndices(final int dim){ if(!this.checkDimension(dim)){ return null; } @@ -99,6 +120,34 @@ public int getAssignmentsCount(){ return 0; } + public int getSetAssignmentsCount(final int dim){ + int setAssignmentsCounter = 0; + if((this.getDimCount() > 0) && (this.checkDimension(dim))){ + for (int j = 0; j < this.assignments[dim].length; j++) { + if(this.assignments[dim][j] != -1){ + setAssignmentsCounter++; + } + } + } + return setAssignmentsCounter; + } + + public boolean addAssignment(final int[] indices){ + if(indices.length != this.nDim){ + return false; + } + final int[][] extendedAssignments = new int[this.nDim][this.getAssignmentsCount()+1]; + for (int dim = 0; dim < this.nDim; dim++) { + for (int i = 0; i < this.getAssignmentsCount(); i++) { + extendedAssignments[dim][i] = this.getAtomIndex(dim, i); + } + extendedAssignments[dim][this.getAssignmentsCount()] = indices[dim]; + } + this.assignments = extendedAssignments; + + return true; + } + public boolean checkDimension(final int dim){ return (dim >= 0) && (dim < this.nDim); } @@ -110,4 +159,9 @@ private boolean checkSpectrumIndex(final int dim, final int indexInSpectrum){ private boolean checkInputListSize(final int size){ return (size == this.getAssignmentsCount()); } + + @Override + public Assignment clone() throws CloneNotSupportedException{ + return (Assignment) super.clone(); + } } From dc6c3741048841086d78e1056224db3e23da885e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Nov 2018 18:07:54 +0100 Subject: [PATCH 069/405] - clone method added --- src/casekit/NMR/model/Signal.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java index 0e67226..5c37489 100644 --- a/src/casekit/NMR/model/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -31,7 +31,7 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Signal { +public class Signal implements Cloneable { private final int nDim; @@ -150,4 +150,8 @@ public Signal getClone(){ return signalClone; } + @Override + public Signal clone() throws CloneNotSupportedException{ + return (Signal) super.clone(); + } } From 9cbae8c15a410949dedfa75a54625e8380bc5639 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Nov 2018 18:08:11 +0100 Subject: [PATCH 070/405] - clone method added --- src/casekit/NMR/model/Spectrum.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index a41548a..c90f3f0 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -35,7 +35,7 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Spectrum { +public class Spectrum implements Cloneable { /** * An arbitrary name or description that can be assigned to this spectrum for identification purposes. @@ -413,6 +413,9 @@ public ArrayList pickSignals(final double shift, final int dim, final d return pickedSignals; } - + @Override + public Spectrum clone() throws CloneNotSupportedException{ + return (Spectrum) super.clone(); + } } From 57499ec1704f1338abbced18cf48fd7c8c0765ba Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Nov 2018 18:10:03 +0100 Subject: [PATCH 071/405] - added function: checkIndexInAtomContainer --- src/casekit/NMR/Utils.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 9796919..d9fc661 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -67,6 +67,7 @@ import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.qsar.descriptors.atomic.AtomValenceDescriptor; +import org.openscience.cdk.silent.Atom; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; @@ -662,6 +663,10 @@ public static ArrayList matchShiftsFromXML(final IAtomContainer ac, fin */ public static Integer getHydrogenCountFromMultiplicity(final String mult){ + if(mult == null){ + System.out.println("null!!!"); + return null; + } switch(mult){ case "Q": return 3; @@ -672,6 +677,7 @@ public static Integer getHydrogenCountFromMultiplicity(final String mult){ case "S": return 0; default: + System.out.println("unknown symbol!!"); return null; } } @@ -1255,6 +1261,9 @@ public static String getSpectrumNucleiAsString(final Spectrum spectrum){ return specID; } + public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex){ + return ((atomIndex >= 0) && atomIndex < ac.getAtomCount()); + } public static ExecutorService initExecuter(final int nThreads) { return Executors.newFixedThreadPool(nThreads); From f92c4fa7f5cf8f6a82b6658bf359b57a8bb41dbb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Nov 2018 18:11:35 +0100 Subject: [PATCH 072/405] - followed the renaming of methods in Assignment class --- src/casekit/NMR/ParseRawData.java | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index fdece02..7e72562 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -386,13 +386,13 @@ public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum final Assignment assignment1D_13C = this.getAssignment(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C")); for (int i = 0; i < assignment1D_DEPT90.getAssignmentsCount(); i++) { - if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { - assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i))); + if (assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { + assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT90.get(i))); } } for (int i = 0; i < assignment1D_DEPT135.getAssignmentsCount(); i++) { - if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { - assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i))); + if (assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { + assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT135.get(i))); } } @@ -511,9 +511,9 @@ public final void assignHSQC(final Spectrum spectrum, final double tolProton, fi for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { // if heavy atom i has an assignment in class atom container then assign that index i to belonging protons as index - if (assignment2D_HSQC.getAssignment(1, i) >= 0) { - assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAssignment(1, i)); - assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAssignment(0, matchesIn1DSpectrum_1H.get(i))); + if (assignment2D_HSQC.getAtomIndex(1, i) >= 0) { + assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAtomIndex(1, i)); + assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAtomIndex(0, matchesIn1DSpectrum_1H.get(i))); } } } @@ -521,11 +521,11 @@ public final void assignHSQC(final Spectrum spectrum, final double tolProton, fi if(!spectrum.getNuclei()[1].equals("13C")){ final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { - if((assignment2D_HSQC.getAssignment(1, i) > -1)){ - if(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() == null){ - this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(0); + if((assignment2D_HSQC.getAtomIndex(1, i) > -1)){ + if(this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).getImplicitHydrogenCount() == null){ + this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).setImplicitHydrogenCount(0); } - this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() + 1); + this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).getImplicitHydrogenCount() + 1); } } } @@ -591,7 +591,7 @@ public final ArrayList getAssignedAtomIndices(final Spectrum spectrum, return atomIndices; } - return Utils.ArrayToArrayList(this.getAssignment(spectrum).getAssignments(dim)); + return Utils.ArrayToArrayList(this.getAssignment(spectrum).getAtomIndices(dim)); } From fcdd8d457a0d8d8f6a6287d80285bc5d80579144 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Nov 2018 18:13:19 +0100 Subject: [PATCH 073/405] - small changes --- src/casekit/NMR/DB.java | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java index e31efc9..90b1b45 100644 --- a/src/casekit/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -36,6 +36,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import org.openscience.cdk.CDKConstants; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; @@ -357,12 +358,15 @@ public static String[][] parseNMRShiftDBSpectrum(final String shiftsString){ } /** - * Sets shifts and implicit hydrogen counts in atoms of an atom container + * Sets shifts, intensities and implicit hydrogen counts in atoms of an atom container * by means of given spectrum property string. * * @param ac IAtomContainer to set * @param NMRShiftDBSpectrum Property string of spectrum in NMRShiftDB format. * @return + * + * @see DB#parseNMRShiftDBSpectrum(java.lang.String) + * @see Utils#getHydrogenCountFromMultiplicity(java.lang.String) */ public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String NMRShiftDBSpectrum){ @@ -371,23 +375,19 @@ public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac } final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(ac.getProperty(NMRShiftDBSpectrum)); - int atomIndexSpectrumDB; - Integer multiplicity; + Integer atomIndexSpectrum; + String multiplicity; Double shift; - for (int k = 0; k < ac.getAtomCount(); k++) { - shift = null; - multiplicity = null; - for (int i = 0; i < spectrumStringArray.length; i++) { - atomIndexSpectrumDB = Integer.parseInt(spectrumStringArray[i][2]); - if (atomIndexSpectrumDB == k) { - shift = Double.parseDouble(spectrumStringArray[i][0]); - multiplicity = Utils.getHydrogenCountFromMultiplicity(spectrumStringArray[i][1].substring(spectrumStringArray[i][1].length() - 1)); - break; - } - } - ac.getAtom(k).setProperty(Utils.getNMRShiftConstant(ac.getAtom(k).getSymbol()), shift); - ac.getAtom(k).setImplicitHydrogenCount(multiplicity); - } + + for (int i = 0; i < spectrumStringArray.length; i++) { + atomIndexSpectrum = Integer.parseInt(spectrumStringArray[i][2]); + shift = Double.parseDouble(spectrumStringArray[i][0]); + multiplicity = spectrumStringArray[i][1].substring(spectrumStringArray[i][1].length() - 1); + if(Utils.checkIndexInAtomContainer(ac, atomIndexSpectrum)){ + ac.getAtom(atomIndexSpectrum).setProperty(Utils.getNMRShiftConstant(ac.getAtom(atomIndexSpectrum).getSymbol()), shift); + ac.getAtom(atomIndexSpectrum).setImplicitHydrogenCount(Utils.getHydrogenCountFromMultiplicity(multiplicity)); + } + } return true; } @@ -398,7 +398,6 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect return null; } final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = new Spectrum(new String[]{Utils.getIsotopeIdentifier(atomType)}); String multiplicity; Double shift, intensity; From 3130b16c76231a048f3e5d39d007ad1c69ea00d2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 13 Feb 2019 01:37:59 +0100 Subject: [PATCH 074/405] - small change in function call --- src/casekit/HOSECodePredictor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index c93d8e0..d7661c4 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -274,7 +274,7 @@ public void generatePicture(IAtomContainer ac, String path) throws IOException, */ void fixExplicitHydrogens(IAtomContainer ac) { - Utils.setExplicitToImplicitHydrogens(ac); + Utils.convertExplicitToImplicitHydrogens(ac); } private void parseArgs(String[] args) throws ParseException From 3c48043b24b6407ef2f22dcdb4cadac1c4ebb227 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 13 Feb 2019 01:41:58 +0100 Subject: [PATCH 075/405] - most methods were marked as deprecated - SQL methods for NMRShiftDB - obsolet methods to get SSC information separately - methods for getting and preparing SSC information from NMRShiftDB edited/added --- src/casekit/NMR/DB.java | 197 ++++++++++++++++++++++++++++++++++------ 1 file changed, 168 insertions(+), 29 deletions(-) diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java index 90b1b45..ded2fd5 100644 --- a/src/casekit/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -23,6 +23,7 @@ */ package casekit.NMR; +import casekit.NMR.model.Assignment; import casekit.NMR.model.Signal; import casekit.NMR.model.Spectrum; import java.io.FileNotFoundException; @@ -36,7 +37,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; -import org.openscience.cdk.CDKConstants; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; @@ -54,30 +54,107 @@ public class DB { * Returns the molecules of a given MOL/SDF file. * This function sets the molecule aromaticity (with allowed exocyclic pi * bonds) by using the - * {@link #setAromaticitiesInAtomContainer(org.openscience.cdk.interfaces.IAtomContainer, int)} + * {@link Utils#setAromaticitiesInAtomContainer(org.openscience.cdk.interfaces.IAtomContainer)} * function. * * @param pathToNMRShiftDB path to NMRShiftDB file - * @param maxCycleSize maximum cycle size for setting the aromaticity in a - * molecule + * @param setAromaticity whether to set aromaticities in structures or not * @return * @throws FileNotFoundException * @throws CDKException + * @deprecated */ - public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRShiftDB, final int maxCycleSize) throws FileNotFoundException, CDKException { - + public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRShiftDB, final boolean setAromaticity) throws FileNotFoundException, CDKException { final IAtomContainerSet acSet = new AtomContainerSet(); final IteratingSDFReader iterator = new IteratingSDFReader( new FileReader(pathToNMRShiftDB), SilentChemObjectBuilder.getInstance() ); + IAtomContainer ac; while (iterator.hasNext()) { - acSet.addAtomContainer(casekit.NMR.Utils.setAromaticitiesInAtomContainer(iterator.next(), maxCycleSize)); + ac = iterator.next(); + if(setAromaticity){ + Utils.setAromaticitiesInAtomContainer(ac); + } + acSet.addAtomContainer(ac); } return acSet; } + + + /** + * Returns the spectra of a given MOL/SDF file containing NMRShiftDB properties. + * + * @param pathToNMRShiftDB path to NMRShiftDB file + * @param NMRShiftDBSpectrumProperty spectrum property name to use + * @param atomType atomType of requested spectra + * @return + * @throws FileNotFoundException + * @throws CDKException + * @deprecated + */ + public static ArrayList getSpectraFromSDFile(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty , final String atomType) throws FileNotFoundException, CDKException { + final ArrayList spectrumSet = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance() + ); + IAtomContainer ac; + while (iterator.hasNext()) { + ac = iterator.next(); + if((ac == null) || (ac.getProperty(NMRShiftDBSpectrumProperty) == null)){ + spectrumSet.add(null); + } else { + spectrumSet.add(DB.NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), atomType)); + } + } + + return spectrumSet; + } + + /** + * Returns 3-tuples consisting of structure, spectrum and assignments + * for each molecule in the given NMRShiftDB file. + * + * @param pathToNMRShiftDB path to NMRShiftDB file + * @param NMRShiftDBSpectrumProperty spectrum property string to use + * @param atomType atomType of requested spectra + * @return + * @throws FileNotFoundException + * @throws CDKException + */ + public static HashMap getSSCComponentsFromNMRShiftDB(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty, final String atomType) throws FileNotFoundException, CDKException { + final HashMap structureSetWithSpectra = new HashMap<>(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance() + ); + IAtomContainer ac; + Spectrum spectrum; + Assignment assignment; + while (iterator.hasNext()) { + ac = iterator.next(); + Utils.setAromaticitiesInAtomContainer(ac); + + spectrum = DB.NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), atomType); + assignment = DB.NMRShiftDBSpectrumToAssignment(ac.getProperty(NMRShiftDBSpectrumProperty), atomType); + if ((ac != null) && (spectrum != null)) { + structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{ac, spectrum, assignment}); + } + } + + return structureSetWithSpectra; + } + + /** + * + * @param pathToDB + * @return + * @throws FileNotFoundException + * @deprecated + */ public static HashSet getAtomTypesInDB(final String pathToDB) throws FileNotFoundException{ final HashSet atomTypes = new HashSet<>(); final IteratingSDFReader iterator = new IteratingSDFReader( @@ -91,12 +168,36 @@ public static HashSet getAtomTypesInDB(final String pathToDB) throws Fil return atomTypes; } - + /** + * + * @param server + * @param options + * @param user + * @param pwd + * @return + * @throws SQLException + * @deprecated + */ public static Connection getDBConnection(final String server, final String options, final String user, final String pwd) throws SQLException { return DriverManager.getConnection(server + "?" + options, user, pwd); } + /** + * + * @param DBConnection + * @param bondsSet + * @param elem + * @param neighborElems + * @param minShift + * @param maxShift + * @param stepSize + * @return + * @throws FileNotFoundException + * @throws IOException + * @throws SQLException + * @deprecated + */ public static int[][] countNeighborhoodBonds(final Connection DBConnection, final String[] bondsSet, final String elem, String[] neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException, SQLException { if (DBConnection == null || stepSize < 1) { @@ -170,7 +271,14 @@ public static int[][] countNeighborhoodBonds(final Connection DBConnection, fina return neighborhoodCountsMatrix; } - + /** + * + * @param DBConnection + * @param query + * @return + * @throws SQLException + * @deprecated + */ public static ResultSet getResultSet(final Connection DBConnection, final String query) throws SQLException{ if (DBConnection == null) { @@ -192,6 +300,7 @@ public static ResultSet getResultSet(final Connection DBConnection, final String * @param elem * @return * @throws SQLException + * @deprecated */ public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBConnection, final double minShift, final double maxShift, final String mult, final Double minIntens, final Double maxIntens, final String elem) throws SQLException { @@ -223,6 +332,19 @@ public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBC } // currently only for 1D spectra + + /** + * + * @param DBConnection + * @param spectrum + * @param shiftDev + * @param intensDev + * @param stepSize + * @param dim + * @return + * @throws SQLException + * @deprecated + */ public static HashMap> matchSpectrumAgainstDB(final Connection DBConnection, final Spectrum spectrum, final double shiftDev, final Double intensDev, final int stepSize, final int dim) throws SQLException{ final HashMap> hits = new HashMap<>(); double shift; @@ -246,6 +368,7 @@ public static HashMap> matchSpectrumAgainstDB(final * @param elem * @return * @throws SQLException + * @deprecated */ public static HashMap> getLookupTableFromNMRShiftDB(final Connection DBConnection, final String elem) throws SQLException { @@ -284,6 +407,7 @@ public static HashMap> getLookupTableFromNMRShiftDB(fi * @param elem * @return * @throws SQLException + * @deprecated */ public static HashMap getRMS(final Connection DBConnection, final double minShift, final double maxShift, final String elem) throws SQLException { @@ -331,27 +455,27 @@ public static ArrayList getNMRShiftDBSpectra(final IAtomContainer ac, fi /** * Creates a two dimensional array of a given NMRShiftDB NMR entry - * with all shift values and atom indices. + * with all signal shift values, intensities, multiplicities and atom indices. * - * @param shiftsString + * @param NMRShiftDBSpectrum * @return two dimensional array: - * 1. dimension: shift entry (row); - * 2. dimension: shift value (column 1), atom index in atom container (column 2) + * 1. dimension: signal index (row); + * 2. dimension: signal shift value (column 1), signal intensity (column 2), + * signal multiplicity (column 3), atom index in structure (column 4) */ - public static String[][] parseNMRShiftDBSpectrum(final String shiftsString){ - - if(shiftsString.trim().isEmpty()){ + public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum){ + if(NMRShiftDBSpectrum.trim().isEmpty()){ return new String[][]{}; } - String[] signalSplit; - final String[] shiftsSplit = shiftsString.split("\\|"); - final String[][] values = new String[shiftsSplit.length][3]; + final String[] shiftsSplit = NMRShiftDBSpectrum.split("\\|"); + final String[][] values = new String[shiftsSplit.length][4]; for (int i = 0; i < shiftsSplit.length; i++) { signalSplit = shiftsSplit[i].split(";"); - values[i][0] = signalSplit[0]; - values[i][1] = signalSplit[1]; - values[i][2] = signalSplit[2]; + values[i][0] = signalSplit[0]; // shift value + values[i][1] = signalSplit[1].substring(0, signalSplit[1].length() - 1); // intensity + values[i][2] = signalSplit[1].substring(signalSplit[1].length() - 1); // multiplicity + values[i][3] = signalSplit[2]; // atom index } return values; @@ -367,25 +491,25 @@ public static String[][] parseNMRShiftDBSpectrum(final String shiftsString){ * * @see DB#parseNMRShiftDBSpectrum(java.lang.String) * @see Utils#getHydrogenCountFromMultiplicity(java.lang.String) + * @deprecated */ public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String NMRShiftDBSpectrum){ - if (ac.getProperty(NMRShiftDBSpectrum) == null) { return false; } final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(ac.getProperty(NMRShiftDBSpectrum)); Integer atomIndexSpectrum; - String multiplicity; +// String multiplicity; Double shift; for (int i = 0; i < spectrumStringArray.length; i++) { - atomIndexSpectrum = Integer.parseInt(spectrumStringArray[i][2]); + atomIndexSpectrum = Integer.parseInt(spectrumStringArray[i][3]); shift = Double.parseDouble(spectrumStringArray[i][0]); - multiplicity = spectrumStringArray[i][1].substring(spectrumStringArray[i][1].length() - 1); +// multiplicity = spectrumStringArray[i][3]; if(Utils.checkIndexInAtomContainer(ac, atomIndexSpectrum)){ ac.getAtom(atomIndexSpectrum).setProperty(Utils.getNMRShiftConstant(ac.getAtom(atomIndexSpectrum).getSymbol()), shift); - ac.getAtom(atomIndexSpectrum).setImplicitHydrogenCount(Utils.getHydrogenCountFromMultiplicity(multiplicity)); +// ac.getAtom(atomIndexSpectrum).setImplicitHydrogenCount(Utils.getHydrogenCountFromMultiplicity(multiplicity)); } } @@ -403,11 +527,26 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect Double shift, intensity; for (int i = 0; i < spectrumStringArray.length; i++) { shift = Double.parseDouble(spectrumStringArray[i][0]); - multiplicity = spectrumStringArray[i][1].substring(spectrumStringArray[i][1].length() - 1); - intensity = Double.parseDouble(spectrumStringArray[i][1].substring(0, spectrumStringArray[i][1].length() - 1)); + intensity = Double.parseDouble(spectrumStringArray[i][1]); + multiplicity = spectrumStringArray[i][2]; spectrum.addSignal(new Signal(new String[]{Utils.getIsotopeIdentifier(atomType)}, new Double[]{shift}, intensity, multiplicity)); } + Utils.setSpectrumEquivalences(spectrum); return spectrum; } + + public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String atomType) { + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final String[][] NMRShiftDBSpectrumStringArray = DB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + final Spectrum spectrum = DB.NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, atomType); + final Assignment assignment = new Assignment(spectrum); + for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { + assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); + } + + return assignment; + } } From 7823f1a81d7c8661251082cea50d3a1e93081262 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 13 Feb 2019 01:42:49 +0100 Subject: [PATCH 076/405] - smal changes --- src/casekit/NMR/ParseRawData.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/ParseRawData.java index 7e72562..89c8623 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/ParseRawData.java @@ -35,6 +35,7 @@ import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomType; +import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; @@ -724,7 +725,7 @@ private void setBond(final int index1, final int index2) { if (this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null) { this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); } - this.mol.addBond(index1, index2, Utils.getBondTypeFromHybridizations(this.mol.getAtom(index1), this.mol.getAtom(index2))); + this.mol.addBond(index1, index2, IBond.Order.UNSET); } From 0262e032ae7f40d1661c01bdb9594fb8bdce7811 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 13 Feb 2019 01:44:52 +0100 Subject: [PATCH 077/405] - methods edited/added --- src/casekit/NMR/Utils.java | 294 +++++++++++++++++++++++++------------ 1 file changed, 199 insertions(+), 95 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index d9fc661..33b3f9a 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -45,8 +45,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -55,6 +53,7 @@ import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; +import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; import org.openscience.cdk.depict.DepictionGenerator; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.graph.CycleFinder; @@ -66,10 +65,10 @@ import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.qsar.descriptors.atomic.AtomValenceDescriptor; -import org.openscience.cdk.silent.Atom; import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; @@ -903,7 +902,6 @@ public static ArrayList> getBondOrderSets(final String[] return bondOrderSets; } - public static String getStringFromBondOrder(final IBond.Order order) { switch (order) { case SINGLE: @@ -915,8 +913,7 @@ public static String getStringFromBondOrder(final IBond.Order order) { default: return null; } - } - + } public static IBond.Order getBondOrderFromString(final String order){ switch(order){ @@ -944,7 +941,7 @@ public static void writeTextFile(final String pathToOutput, final String content * @throws IOException * @throws CDKException */ - public static void generatePicture(IAtomContainer ac, String path) throws IOException, CDKException { + public static void generatePicture(final IAtomContainer ac, final String path) throws IOException, CDKException { final DepictionGenerator dg = new DepictionGenerator().withSize(1200, 1200).withAtomColors().withFillToFit().withAtomNumbers(); dg.depict(ac).writeTo(path); } @@ -988,13 +985,16 @@ public static ArrayList getOutliers(ArrayList input) { * @param data * @return */ - public static double getMedian(final List data) { + public static Double getMedian(final List data) { + if(data == null){ + return null; + } if(data.size() == 1){ - return data.get(0); + return data.get(0).doubleValue(); } Collections.sort(data); if (data.size() % 2 == 1) { - return data.get(data.size() / 2); + return data.get(data.size() / 2).doubleValue(); } else { return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; } @@ -1006,7 +1006,10 @@ public static double getMedian(final List data) { * @param data * @return */ - public static double getMedian(final ArrayList data) { + public static Double getMedian(final ArrayList data) { + if ((data == null) || data.isEmpty()) { + return null; + } if(data.size() == 1){ return data.get(0); } @@ -1016,7 +1019,7 @@ public static double getMedian(final ArrayList data) { } else { return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; } - } + } /** @@ -1025,6 +1028,9 @@ public static double getMedian(final ArrayList data) { * @return */ public static Double getMean(final ArrayList data) { + if(data == null){ + return null; + } double sum = 0; int nullCounter = 0; for (final Double d : data) { @@ -1044,6 +1050,9 @@ public static Double getMean(final ArrayList data) { * @return */ public static Double getMean(final Double[] data) { + if(data == null){ + return null; + } double sum = 0; int nullCounter = 0; for (final Double d : data) { @@ -1128,7 +1137,7 @@ public static String getFileFormat(final String pathToFile) { * @return */ public static Double getRMS(final ArrayList data) { - if(data.isEmpty()){ + if((data == null) || data.isEmpty()){ return null; } if (data.size() == 1) { @@ -1154,7 +1163,6 @@ public static Double getRMS(final ArrayList data) { * @return */ public static HashMap getRMS(final HashMap> lookup){ - final HashMap rms = new HashMap<>(); Double rmsInList; for (final String key : lookup.keySet()) { @@ -1167,51 +1175,178 @@ public static HashMap getRMS(final HashMap " + existingBondsOrderSum + " + " + implicitHydrogenCount + " = " + (existingBondsOrderSum + implicitHydrogenCount) + " <= " + atom.getValency() + " ? "); + + return (existingBondsOrderSum + implicitHydrogenCount) >= atom.getValency(); + } - public static Void combineHashMaps(final HashMap> hoseLookupToKeep, final HashMap> hoseLookup){ + public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException{ + final CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(ac.getBuilder()); + for (IAtom atom : ac.atoms()) { + IAtomType type = matcher.findMatchingAtomType(ac, atom); + AtomTypeManipulator.configure(atom, type); + } + CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(ac.getBuilder()); + adder.addImplicitHydrogens(ac); + } + + public static int countElements(final String input){ + int counter = 0; + for (int k = 0; k < input.length(); k++) { + // Check for uppercase letters + if (Character.isLetter(input.charAt(k)) && Character.isUpperCase(input.charAt(k))) { + counter++; + } + } - for (String hose : hoseLookup.keySet()) { - if(!hoseLookupToKeep.containsKey(hose)){ - hoseLookupToKeep.put(hose, new ArrayList<>()); + return counter; + } + + public static ArrayList getComponents(final String symbols){ + final ArrayList components = new ArrayList<>(); + for (int i = 0; i < symbols.length(); i++) { + if ((i + 1 < symbols.length()) + && Character.isLowerCase(symbols.charAt(i + 1))) { + components.add(symbols.substring(i, i + 2)); + i++; + } else { + components.add(symbols.substring(i, i + 1)); } - hoseLookupToKeep.get(hose).addAll(hoseLookup.get(hose)); + } + + return components; + } + + /** + * + * @param lookup + * @return + */ + public static HashMap getMedian(final HashMap> lookup) { + + final HashMap medians = new HashMap<>(); + Double medianInList; + for (final String key : lookup.keySet()) { + medianInList = Utils.getMedian(lookup.get(key)); + if (medianInList != null) { + medians.put(key, medianInList); + } + } + + return medians; + } + + + public static void combineHashMaps(final HashMap> hoseLookupToExtend, final HashMap> hoseLookup){ + for (final String hose : hoseLookup.keySet()) { + if(!hoseLookupToExtend.containsKey(hose)){ + hoseLookupToExtend.put(hose, new ArrayList<>()); + } + hoseLookupToExtend.get(hose).addAll(hoseLookup.get(hose)); } - return null; } + /** + * Checks whether a structure contains explicit hydrogen atoms or not. + * + * @param ac structure to check + * @return + */ + public static boolean containsExplicitHydrogens(final IAtomContainer ac){ + for (final IAtom atomA : ac.atoms()) { + // check each atom whether it is an hydrogen + if (atomA.getSymbol().equals("H")) { + return true; + } + } + + return false; + } /** + * Stores all explicit hydrogens as implicit counter for the bonded heavy + * atoms and removes those from the atom container. Also, a HashMap + * containing non-hydrogen atoms and its indices + * before the removals will be returned which one can use for atom index + * comparison (before and after the removals) later. * - * @param ac + * @param ac the structure to convert + * @return + * + * @see #containsExplicitHydrogens(org.openscience.cdk.interfaces.IAtomContainer) */ - public static void setExplicitToImplicitHydrogens(final IAtomContainer ac){ + public static HashMap convertExplicitToImplicitHydrogens(final IAtomContainer ac){ + // create a list of atom indices which one can use for index comparison (before vs. after) after removing the explict hydrogens + final HashMap atomIndices = new HashMap<>(); final List toRemoveList = new ArrayList<>(); IAtom atomB; for (final IAtom atomA : ac.atoms()) { - if (atomA.getAtomicNumber() == 1) { + // check each atom whether it is an hydrogen; + // if yes then store (increase) the number of implicit hydrogens + // for its bonded heavy atom + if (atomA.getSymbol().equals("H")) { atomB = ac.getConnectedAtomsList(atomA).get(0); if(atomB.getImplicitHydrogenCount() == null){ atomB.setImplicitHydrogenCount(0); } atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + 1); - toRemoveList.add(atomA); + toRemoveList.add(atomA); + } else { + // store all non-hydrogen atoms and their indices + atomIndices.put(atomA, atomA.getIndex()); } + } + // remove all explicit hydrogen atoms for (final IAtom iAtom : toRemoveList) { ac.removeAtom(iAtom); } + + return atomIndices; } - - public static IAtomContainer setAromaticitiesInAtomContainer(final IAtomContainer ac, final int maxCycleSize) throws CDKException { + /** + * + * @param ac + * @return + */ + public static int getExplicitHydrogenCount(final IAtomContainer ac){ + final List toRemoveList = new ArrayList<>(); + IAtom atomB; + for (final IAtom atomA : ac.atoms()) { + if (atomA.getAtomicNumber() == 1) { + atomB = ac.getConnectedAtomsList(atomA).get(0); + if(atomB.getImplicitHydrogenCount() == null){ + atomB.setImplicitHydrogenCount(0); + } + atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + 1); + toRemoveList.add(atomA); + } + } + return toRemoveList.size(); + } + + + public static void setAromaticitiesInAtomContainer(final IAtomContainer ac) throws CDKException { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); - final CycleFinder cycles = Cycles.or(Cycles.all(), Cycles.all(maxCycleSize)); + final CycleFinder cycles = Cycles.all(ac.getAtomCount()); final Aromaticity aromaticity = new Aromaticity(model, cycles); aromaticity.apply(ac); - - return ac; } @@ -1261,6 +1396,25 @@ public static String getSpectrumNucleiAsString(final Spectrum spectrum){ return specID; } + public static Spectrum setSpectrumEquivalences(final Spectrum spectrum){ + int equivalentSignalIndex; + for (final Signal signal : spectrum.getSignals()) { + equivalentSignalIndex = -1; + for (final int closestSignalIndex : spectrum.pickSignals(signal.getShift(0), 0, 0.0)) { + if (spectrum.getSignalIndex(signal) <= closestSignalIndex) { + continue; + } + if (signal.getMultiplicity().equals(spectrum.getSignal(closestSignalIndex).getMultiplicity())) { + equivalentSignalIndex = closestSignalIndex; + break; + } + } + spectrum.setEquivalence(spectrum.getSignalIndex(signal), equivalentSignalIndex); + } + + return spectrum; + } + public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex){ return ((atomIndex >= 0) && atomIndex < ac.getAtomCount()); } @@ -1282,75 +1436,25 @@ public static void stopExecuter(final ExecutorService executor, final long secon } } - - // ######################################################################################################## - // test functions -> not ready to use - - - /** - * Returns a bond type for two bond atoms from its hybridization. - * CURRENTLY ONLY SINGLE BOND DETECTION POSSIBLE!!! - * This function detects single, double and triple bonds and returns a - * bond order from {@link org.openscience.cdk.interfaces.IBond.Order}. - * If no bond type could be detected then - * {@link org.openscience.cdk.interfaces.IBond.Order#UNSET} will be - * returned. - * For single and double bond detection, the following elements are defined - * so far: C, O, N, S. - * For triple bond detection, the following elements are defined so far: C, - * N. - * + /** + * Returns the bond order for a numeric order value. * - * @param atom1 - * @param atom2 + * @param orderNumber * @return */ - public static IBond.Order getBondTypeFromHybridizations(final IAtom atom1, final IAtom atom2) { - -// final String atomType1 = atom1.getSymbol(); - final IAtomType.Hybridization hybridization1 = atom1.getHybridization(); -// final String atomType2 = atom2.getSymbol(); - final IAtomType.Hybridization hybridization2 = atom2.getHybridization(); - - if (hybridization1 == null && hybridization2 == null) { - return IBond.Order.UNSET; - } + public static IBond.Order getBondOrder(final int orderNumber) { + for (IBond.Order order : IBond.Order.values()){ + if(order.numeric() == orderNumber){ + return order; + } + } -// IBond.Order bondOrder1 = IBond.Order.UNSET; -// IBond.Order bondOrder2 = IBond.Order.UNSET; - - // single bond detection, the "3" means all SP3 hybrdidizations like SP3, SP3D2 or PLANAR3 - if ((hybridization1 != null) //&& (atomType1.equals("C") || atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) - && hybridization1.toString().contains("3")) { - return IBond.Order.SINGLE; - } - if ((hybridization2 != null) //&& (atomType2.equals("C") || atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) - && hybridization2.toString().contains("3")) { - return IBond.Order.SINGLE; - } -// // double bond detection -// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1) || hybridization1.equals(IAtomType.Hybridization.SP2))) -// || ((atomType1.equals("O") || atomType1.equals("N") || atomType1.equals("S")) && (hybridization1.equals(IAtomType.Hybridization.SP2)))) { -// bondOrder1 = IBond.Order.DOUBLE; -// } -// if ((atomType2.equals("C") && (hybridization2.equals(IAtomType.Hybridization.SP1) || hybridization2.equals(IAtomType.Hybridization.SP2))) -// || ((atomType2.equals("O") || atomType2.equals("N") || atomType2.equals("S")) && hybridization2.equals(IAtomType.Hybridization.SP2))) { -// bondOrder2 = IBond.Order.DOUBLE; -// } -// // triple bond detection -// if ((atomType1.equals("C") && (hybridization1.equals(IAtomType.Hybridization.SP1))) -// && (atomType2.equals("N") && hybridization2.equals(IAtomType.Hybridization.SP1))) { -// bondOrder1 = IBond.Order.TRIPLE; -// } -// if ((atomType2.equals("N") && (hybridization2.equals(IAtomType.Hybridization.SP1))) -// && (atomType1.equals("C") && hybridization1.equals(IAtomType.Hybridization.SP1))) { -// bondOrder2 = IBond.Order.TRIPLE; -// } - -// if (bondOrder1.equals(bondOrder2)) { -// return bondOrder1; -// } + return null; + } - return IBond.Order.UNSET; + public static Integer getBondOrderInteger(final IBond.Order order) { + return (order != null) ? order.numeric() : null; } + + } From 9aa13675267444d0fad0b034fc07028fa01643bf Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 13 Feb 2019 01:46:01 +0100 Subject: [PATCH 078/405] - description changes and bugfix in addAssignment() --- src/casekit/NMR/model/Assignment.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index 1a287f0..f15887b 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -98,7 +98,7 @@ public Integer getSignalIndex(final int dim, final int atomIndexInStructure){ } } - return null; + return -1; } public int[] getAtomIndices(final int dim){ @@ -132,8 +132,15 @@ public int getSetAssignmentsCount(final int dim){ return setAssignmentsCounter; } - public boolean addAssignment(final int[] indices){ - if(indices.length != this.nDim){ + /** + * Adds a new assignment entry for a further signal. The given atom indices + * will be stored as atom index for each dimension of the signal/spectrum. + * + * @param atomIndicesInStructure + * @return + */ + public boolean addAssignment(final int[] atomIndicesInStructure){ + if(atomIndicesInStructure.length != this.nDim){ return false; } final int[][] extendedAssignments = new int[this.nDim][this.getAssignmentsCount()+1]; @@ -141,7 +148,7 @@ public boolean addAssignment(final int[] indices){ for (int i = 0; i < this.getAssignmentsCount(); i++) { extendedAssignments[dim][i] = this.getAtomIndex(dim, i); } - extendedAssignments[dim][this.getAssignmentsCount()] = indices[dim]; + extendedAssignments[dim][this.getAssignmentsCount()] = atomIndicesInStructure[dim]; } this.assignments = extendedAssignments; From a61d800002c74c11e531e58aef75307f91ada97f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 13 Feb 2019 01:48:51 +0100 Subject: [PATCH 079/405] - changes/additions in/of methods for peak picking in spectrum and equivalent signal(s) classes --- src/casekit/NMR/model/Spectrum.java | 223 ++++++++++++++++++++++------ 1 file changed, 176 insertions(+), 47 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index c90f3f0..4fb2d52 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -30,12 +30,15 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; /** * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Spectrum implements Cloneable { +public class Spectrum { /** * An arbitrary name or description that can be assigned to this spectrum for identification purposes. @@ -62,9 +65,9 @@ public class Spectrum implements Cloneable { private Double spectrometerFrequency; private String solvent; private String standard; - private final ArrayList signals = new ArrayList<>(); private final ArrayList equivalences = new ArrayList<>(); + private ArrayList[] equivalentSignals; public Spectrum(final String[] nuclei) { @@ -72,7 +75,6 @@ public Spectrum(final String[] nuclei) { this.nDim = this.nuclei.length; } - public String[] getNuclei(){ return this.nuclei; } @@ -120,38 +122,59 @@ public final boolean setShift(final double shift, final int dim, final int signa public int getSignalCount() { return this.signals.size(); } - + + /** + * Adds a list of signals to this spectrum. + * + * @param signals list of signals to add + * @return + */ + public boolean addSignals(final ArrayList signals){ + for (final Signal signal : signals) { + if (!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())) { + return false; + } + } + for (final Signal signal : signals) { + this.addSignal(signal); + } + + return true; + } + /** - * Adds a Signal ({@link casekit.NMR.model.Signal}) to this Spectrum class object at the end. - * @param signal - * @return + * Adds a signal to this spectrum. + * + * @param signal signal to add + * @return */ public boolean addSignal(final Signal signal) { - return this.addSignal(signal, null); + if(!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ + return false; + } + // add signal at the end of signal list + this.signals.add(signal); + this.equivalences.add(-1); + this.updateEquivalentSignalClasses(); + + return true; } /** - * Adds a Signal ({@link casekit.NMR.model.Signal}) to this Spectrum class object at given index. - * @param signal - * @param index index where to insert the signal, if null the signal will be added at the end of signal list - * @return + * Adds a signal to this spectrum and stores an equivalent signal index. + * + * @param signal signal to add + * @param equivalentSignalIndex index of equivalent signal in this spectrum + * @return */ - public boolean addSignal(final Signal signal, final Integer index) { + public boolean addSignal(final Signal signal, final int equivalentSignalIndex) { if(!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ return false; } - // is index valid? if yes then insert it there - if(this.checkSignalIndex(index)){ - this.signals.add(index, signal); - this.equivalences.add(index, -1); - // if not then check for null value and add signal at the end - } else if(index == null){ - this.signals.add(signal); - this.equivalences.add(-1); - // no valid index value, nothing to insert or add in spectrum - } else { - return false; - } + // add signal at the end of signal list + this.signals.add(signal); + this.equivalences.add(equivalentSignalIndex); + this.updateEquivalentSignalClasses(); return true; } @@ -166,6 +189,7 @@ public boolean removeSignal(final int signalIndex){ } this.signals.remove(signalIndex); this.equivalences.remove(signalIndex); + this.updateEquivalentSignalClasses(); return true; } @@ -307,6 +331,93 @@ public ArrayList getSignals(){ return this.signals; } + public Boolean hasEquivalences(final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return null; + } + + return (this.getEquivalence(signalIndex) != -1) || (this.getEquivalences().contains(signalIndex)); + } + + private ArrayList searchEquivalentSignals(final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return null; + } + final ArrayList equivalentSignalIndices = new ArrayList<>(); + // case 1: signal was first input signal (root) of an equivalence class and is actually not knowing any of its equivalences; collect all equivalent signals + if(this.getEquivalence(signalIndex) == -1){ + for (int i = 0; i < this.getEquivalences().size(); i++) { + if((this.getEquivalences().get(i) != -1) && (this.getEquivalences().get(i) == signalIndex)) { + equivalentSignalIndices.add(i); + } + } + } else { + // case 2: signal was not the first input signal of that equivalent class; store the class root signal + equivalentSignalIndices.add(this.getEquivalences().get(signalIndex)); + } + // check all stored signals for further equivalent signals (i.e. for the added root signal in case 2) + for (int i = 0; i < equivalentSignalIndices.size(); i++) { + for (int j = 0; j < this.getEquivalences().size(); j++) { + // do not store the own signal index in own equ. signal class + if(j == signalIndex){ + continue; + } + if ((this.getEquivalences().get(j) != -1) + && (Integer.compare(this.getEquivalences().get(j), equivalentSignalIndices.get(i)) == 0) + && !equivalentSignalIndices.contains(j)) { + equivalentSignalIndices.add(j); + } + } + } + + return equivalentSignalIndices; + } + + private void updateEquivalentSignalClasses(){ + this.equivalentSignals = new ArrayList[this.getSignalCount()]; + for(int i = 0; i < this.getSignalCount(); i++) { + this.equivalentSignals[i] = this.searchEquivalentSignals(i); +// this.equivalentSignals.put(i, this.searchEquivalentSignals(i)); + } + } + + /** + * Returns equivalent signals for requested signal. + * + * @param signalIndex + * @return + */ + public ArrayList getEquivalentSignals(final int signalIndex){ + if(!this.checkSignalIndex(signalIndex)){ + return null; + } + + return this.equivalentSignals[signalIndex]; + } + + /** + * Returns a hashmap of equivalent signal classes. + * The key set of that hashmap is just a numerical class index and is not + * belonging to any signal. + * + * @return + */ + public HashMap> getEquivalentSignalClasses(){ + this.updateEquivalentSignalClasses(); + // create a new HashMap object to return, containing the key signal index to have a full equivalent signal class + final HashMap> equivalentSignalClasses = new HashMap<>(); + final HashSet storedSignalIndices = new HashSet<>(); + for (int i = 0; i < this.getSignalCount(); i++) { + if (!storedSignalIndices.contains(i)) { + equivalentSignalClasses.put(equivalentSignalClasses.size(), new ArrayList<>(this.equivalentSignals[i])); + equivalentSignalClasses.get(equivalentSignalClasses.size() - 1).add(i); + storedSignalIndices.addAll(equivalentSignalClasses.get(equivalentSignalClasses.size() - 1)); + } + } + + return equivalentSignalClasses; + } + public ArrayList getEquivalences(){ return this.equivalences; } @@ -324,19 +435,20 @@ public boolean setEquivalence(final int signalIndex, final int isEquivalentToSig return false; } this.equivalences.set(signalIndex, isEquivalentToSignalIndex); + this.updateEquivalentSignalClasses(); return true; } /** * Returns the position of an NMRSignal the List - * @param signal - * @return + * @param signal + * @return */ public int getSignalIndex(final Signal signal) { - for (int f = 0; f < this.signals.size(); f++) { - if (this.signals.get(f) == signal) { - return f; + for (int s = 0; s < this.signals.size(); s++) { + if (this.signals.get(s) == signal) { + return s; } } return -1; @@ -368,12 +480,12 @@ public String getStandard() { /** - * Returns the signal closest to the shift sought. If no Signal is found within the interval - * defined by pickprecision, null is returned. - * @param shift - * @param dim - * @param pickPrecision - * @return + * Returns the signal index closest to the given shift. If no Signal is found within the interval + * defined by pickprecision, -1 is returned. + * @param shift query shift + * @param dim dimension in spectrum to look in + * @param pickPrecision tolerance value for search window + * @return */ public int pickClosestSignal(final double shift, final int dim, final double pickPrecision) { int matchIndex = -1; @@ -392,30 +504,47 @@ public int pickClosestSignal(final double shift, final int dim, final double pic } /** - * Returns a List with signal indices within the interval defined by pickPrecision. If none is found - * an empty ArrayList is returned. - * @param shift - * @param dim - * @param pickPrecision - * @return + * Returns a list of signal indices within the interval defined by + * pickPrecision. That list is sorted by the distances to the query shift. + * If none is found an empty ArrayList is returned. + * @param shift query shift + * @param dim dimension in spectrum to look in + * @param pickPrecision tolerance value for search window + * @return */ - public ArrayList pickSignals(final double shift, final int dim, final double pickPrecision) { + public ArrayList pickSignals(final Double shift, final int dim, final double pickPrecision) { final ArrayList pickedSignals = new ArrayList<>(); if(!this.checkDimension(dim)){ return pickedSignals; } for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) < pickPrecision) { + if (Math.abs(this.getShift(s, dim) - shift) <= pickPrecision) { pickedSignals.add(s); } } + // sort signal indices by distance to query shift + pickedSignals.sort(new Comparator() { + @Override + public int compare(final Integer pickedSignalIndex1, final Integer pickedSignalIndex2) { + return Double.compare(Math.abs(shift - getShift(pickedSignalIndex1, dim)), Math.abs(shift - getShift(pickedSignalIndex2, dim))); + } + }); return pickedSignals; } - @Override - public Spectrum clone() throws CloneNotSupportedException{ - return (Spectrum) super.clone(); + public Spectrum getClone() { + final Spectrum clone = new Spectrum(this.nuclei); + for (int i = 0; i < this.getSignalCount(); i++) { + clone.addSignal(this.getSignal(i), this.getEquivalence(i)); + } + clone.setSpecDescription(this.description); + clone.setSolvent(this.solvent); + clone.setSpecType(this.specType); + clone.setSpectrometerFrequency(this.spectrometerFrequency); + clone.setStandard(this.standard); + + return clone; } } From 16d44633b2ce9d1d083b80c0b57c9bc8fcf01ced Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 13 Apr 2019 00:37:36 +0200 Subject: [PATCH 080/405] - added functions for MongoDB services - changes in SSC or spectra retrievals from NMRShiftDB --- src/casekit/NMR/DB.java | 296 ++++++++++++++++++++++++++++++++++------ 1 file changed, 257 insertions(+), 39 deletions(-) diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java index ded2fd5..73e41df 100644 --- a/src/casekit/NMR/DB.java +++ b/src/casekit/NMR/DB.java @@ -26,6 +26,12 @@ import casekit.NMR.model.Assignment; import casekit.NMR.model.Signal; import casekit.NMR.model.Spectrum; +import com.mongodb.MongoClient; +import com.mongodb.MongoClientOptions; +import com.mongodb.MongoCredential; +import com.mongodb.ServerAddress; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.MongoDatabase; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; @@ -35,8 +41,11 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import org.bson.Document; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; @@ -84,48 +93,96 @@ public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRSh /** - * Returns the spectra of a given MOL/SDF file containing NMRShiftDB properties. + * Returns all spectra for each molecule and a given nucleus which exist as + * property in a NMRSHiftDB SDF. * * @param pathToNMRShiftDB path to NMRShiftDB file - * @param NMRShiftDBSpectrumProperty spectrum property name to use - * @param atomType atomType of requested spectra + * @param nucleus nucleus of requested spectra * @return * @throws FileNotFoundException * @throws CDKException - * @deprecated + * */ - public static ArrayList getSpectraFromSDFile(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty , final String atomType) throws FileNotFoundException, CDKException { - - final ArrayList spectrumSet = new ArrayList<>(); + public static ArrayList> getSpectraFromNMRShiftDB(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException { + final ArrayList> spectraSet = new ArrayList<>(); final IteratingSDFReader iterator = new IteratingSDFReader( new FileReader(pathToNMRShiftDB), SilentChemObjectBuilder.getInstance() ); IAtomContainer ac; + Spectrum spectrum; + ArrayList spectra; + HashMap spectraStrings; + String spectrumIndexInRecord, solvent; while (iterator.hasNext()) { ac = iterator.next(); - if((ac == null) || (ac.getProperty(NMRShiftDBSpectrumProperty) == null)){ - spectrumSet.add(null); - } else { - spectrumSet.add(DB.NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), atomType)); + if(ac == null){ + continue; + } + spectraStrings = DB.getSpectraStrings(ac, nucleus); + if(spectraStrings.isEmpty() || (ac.getProperty("Solvent") == null)){ + continue; + } + spectra = new ArrayList<>(); + for (final String spectrumPropertyString : spectraStrings.keySet()) { + spectrum = DB.NMRShiftDBSpectrumToSpectrum(spectraStrings.get(spectrumPropertyString), nucleus); + if(spectrum == null){ + continue; + } + spectrumIndexInRecord = spectrumPropertyString.split("\\s")[spectrumPropertyString.split("\\s").length - 1]; + solvent = DB.getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord); + if(solvent == null){ + continue; + } + spectrum.setSolvent(solvent); + + if(Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ + continue; + } + + spectra.add(new Object[]{spectrum, DB.NMRShiftDBSpectrumToAssignment(spectraStrings.get(spectrumPropertyString), nucleus)}); } + spectraSet.add(spectra); } - return spectrumSet; + return spectraSet; + } + + public static String getSolvent(final String solventPropertyString, final String spectrumIndexInRecord){ + final String[] solventPropertyStringSplit = solventPropertyString.split(":"); + String solvent; + for (int i = 0; i < solventPropertyStringSplit.length; i++) { + if (solventPropertyStringSplit[i].endsWith(spectrumIndexInRecord)) { + solvent = solventPropertyStringSplit[i + 1]; + if(solvent.substring(solvent.length() - 1).matches("\\d")){ + solvent = solvent.substring(0, solvent.length() - 1); + } + if(solvent.substring(solvent.length() - 1).matches("\\d")){ + solvent = solvent.substring(0, solvent.length() - 1); + } + solvent = solvent.substring(0, solvent.length() - 1); + + return solvent; + } + } + + return null; } /** * Returns 3-tuples consisting of structure, spectrum and assignments - * for each molecule in the given NMRShiftDB file. + * for each valid molecule record in the given NMRShiftDB file. Valid means + * here that each molecule record has to contain the given spectrum + * property string as well as the number of signals in that spectrum has to + * be the same as atoms of that atom type in molecule. * * @param pathToNMRShiftDB path to NMRShiftDB file * @param NMRShiftDBSpectrumProperty spectrum property string to use - * @param atomType atomType of requested spectra * @return * @throws FileNotFoundException * @throws CDKException */ - public static HashMap getSSCComponentsFromNMRShiftDB(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty, final String atomType) throws FileNotFoundException, CDKException { + public static HashMap getSSCComponentsFromNMRShiftDB(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty) throws FileNotFoundException, CDKException { final HashMap structureSetWithSpectra = new HashMap<>(); final IteratingSDFReader iterator = new IteratingSDFReader( new FileReader(pathToNMRShiftDB), @@ -134,20 +191,130 @@ public static HashMap getSSCComponentsFromNMRShiftDB(final St IAtomContainer ac; Spectrum spectrum; Assignment assignment; + final String nucleus = DB.getNucleusFromNMRShiftDBSpectrumProperty(NMRShiftDBSpectrumProperty); + final String spectrumIndexInRecord = NMRShiftDBSpectrumProperty.split("\\s")[NMRShiftDBSpectrumProperty.split("\\s").length - 1]; while (iterator.hasNext()) { ac = iterator.next(); - Utils.setAromaticitiesInAtomContainer(ac); + // skip molecules which not contain any of requested spectrum information + if(ac.getProperty(NMRShiftDBSpectrumProperty) == null){ + continue; + } + spectrum = DB.NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); + // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule + if((spectrum == null) || Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ + continue; + } + if(ac.getProperty("Solvent") != null){ + spectrum.setSolvent(DB.getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord)); + } + if(ac.getProperty("Field Strength [MHz]") != null){ + for (final String fieldStrength : ac.getProperty("Field Strength [MHz]").toString().split("\\s")) { + if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { + try { + spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split(spectrumIndexInRecord + ":")[1])); + } catch (NumberFormatException e) { +// spectrum.setSpectrometerFrequency(null); + } + break; + } + } + } - spectrum = DB.NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), atomType); - assignment = DB.NMRShiftDBSpectrumToAssignment(ac.getProperty(NMRShiftDBSpectrumProperty), atomType); - if ((ac != null) && (spectrum != null)) { + assignment = DB.NMRShiftDBSpectrumToAssignment(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); +// if ((ac != null) && (spectrum != null)) { structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{ac, spectrum, assignment}); - } +// } + + Utils.setAromaticitiesInAtomContainer(ac); } return structureSetWithSpectra; } + /** + * Returns a hashmap containing combined keys (by "_") of solvents + * and lists of calculated deviations between all given spectra for a + * nucleus in molecule record as values.
+ * Here, only molecule records in NMRShiftDB file are considered which have + * at least two different spectra for same nucleus.
+ * Example: "Spectrum 13C 0", "Spectrum 13C 1" will be used for given + * nucleus 13C. + * + * + * @param pathToNMRShiftDB + * @param nucleus + * @return + * @throws FileNotFoundException + * @throws CDKException + */ + public static HashMap> getSolventDeviations(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException{ + int signalCount; + Spectrum spectrum; + Assignment assignment; + final ArrayList> spectraSets = DB.getSpectraFromNMRShiftDB(pathToNMRShiftDB, nucleus); + HashMap> shiftsPerAtom; + HashMap> solventsPerAtom; + ArrayList solvents; + String[] solventsToSort; + + final HashMap> deviations = new HashMap<>(); + String combiKey; + + for (final ArrayList spectraSetInRecord : spectraSets) { + shiftsPerAtom = new HashMap<>(); + solventsPerAtom = new HashMap<>(); + signalCount = -1; + for (final Object[] spectrumAndAssignment : spectraSetInRecord) { + spectrum = (Spectrum) spectrumAndAssignment[0]; + assignment = (Assignment) spectrumAndAssignment[1]; + if (signalCount == -1) { + signalCount = spectrum.getSignalCount(); + } else if (signalCount != spectrum.getSignalCount()) { + continue; + } + for (final int atomIndex : assignment.getAtomIndices(0)) { + if (!shiftsPerAtom.containsKey(atomIndex)) { + shiftsPerAtom.put(atomIndex, new ArrayList<>()); + solventsPerAtom.put(atomIndex, new ArrayList<>()); + } + shiftsPerAtom.get(atomIndex).add(spectrum.getSignal(assignment.getSignalIndex(0, atomIndex)).getShift(0)); + solventsPerAtom.get(atomIndex).add(spectrum.getSolvent()); + } + } + if (shiftsPerAtom.isEmpty() || (shiftsPerAtom.get(Collections.min(shiftsPerAtom.keySet())).size() < 2)) { + continue; + } + solvents = new ArrayList<>(solventsPerAtom.get(Collections.min(solventsPerAtom.keySet()))); +// if(Collections.frequency(solvents, "Unreported") + Collections.frequency(solvents, "Unknown") > solvents.size() - 2){ +// continue; +// } + + for (final int atomIndex : shiftsPerAtom.keySet()) { + for (int s1 = 0; s1 < solvents.size(); s1++) { +// if(solvents.get(s1).equals("Unreported") || solvents.get(s1).equals("Unknown")){ +// continue; +// } + for (int s2 = s1 + 1; s2 < solvents.size(); s2++) { +// if (solvents.get(s2).equals("Unreported") || solvents.get(s2).equals("Unknown")) { +// continue; +// } + solventsToSort = new String[2]; + solventsToSort[0] = solvents.get(s1); + solventsToSort[1] = solvents.get(s2); + Arrays.sort(solventsToSort); + combiKey = solventsToSort[0] + "_" + solventsToSort[1]; + if (!deviations.containsKey(combiKey)) { + deviations.put(combiKey, new ArrayList<>()); + } + deviations.get(combiKey).add(Math.abs(shiftsPerAtom.get(atomIndex).get(s1) - shiftsPerAtom.get(atomIndex).get(s2))); + } + } + } + } + + return deviations; + } + /** * * @param pathToDB @@ -167,7 +334,7 @@ public static HashSet getAtomTypesInDB(final String pathToDB) throws Fil return atomTypes; } - + /** * * @param server @@ -439,13 +606,12 @@ public static HashMap getRMS(final Connection DBConnection, fina } - public static ArrayList getNMRShiftDBSpectra(final IAtomContainer ac, final String elem) { - - ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); - final ArrayList spectra = new ArrayList<>(); - for (String prop : props) { - if (prop.contains("Spectrum " + casekit.NMR.Utils.getIsotopeIdentifier(elem))) { - spectra.add(ac.getProperty(prop)); + public static HashMap getSpectraStrings(final IAtomContainer ac, final String nucleus) { + final ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); + final HashMap spectra = new HashMap<>(); + for (final String prop : props) { + if (prop.startsWith("Spectrum " + nucleus)) { + spectra.put(prop, ac.getProperty(prop)); } } @@ -491,7 +657,7 @@ public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum * * @see DB#parseNMRShiftDBSpectrum(java.lang.String) * @see Utils#getHydrogenCountFromMultiplicity(java.lang.String) - * @deprecated + * @deprecated */ public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String NMRShiftDBSpectrum){ if (ac.getProperty(NMRShiftDBSpectrum) == null) { @@ -516,37 +682,89 @@ public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac return true; } + public static String getNucleusFromNMRShiftDBSpectrumProperty(final String NMRShiftDBSpectrumProperty){ + return NMRShiftDBSpectrumProperty.split(" ")[1]; + } - public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String atomType){ + public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus){ if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { return null; } final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = new Spectrum(new String[]{Utils.getIsotopeIdentifier(atomType)}); + final Spectrum spectrum = new Spectrum(new String[]{nucleus}); String multiplicity; Double shift, intensity; - for (int i = 0; i < spectrumStringArray.length; i++) { - shift = Double.parseDouble(spectrumStringArray[i][0]); - intensity = Double.parseDouble(spectrumStringArray[i][1]); - multiplicity = spectrumStringArray[i][2]; - spectrum.addSignal(new Signal(new String[]{Utils.getIsotopeIdentifier(atomType)}, new Double[]{shift}, intensity, multiplicity)); + try { + for (int i = 0; i < spectrumStringArray.length; i++) { + shift = Double.parseDouble(spectrumStringArray[i][0]); + intensity = Double.parseDouble(spectrumStringArray[i][1]); + multiplicity = spectrumStringArray[i][2]; + spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, intensity)); + } + Utils.setSpectrumEquivalences(spectrum); + } catch (Exception e) { + + return null; } - Utils.setSpectrumEquivalences(spectrum); return spectrum; } - public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String atomType) { + public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String nucleus) { if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { return null; } final String[][] NMRShiftDBSpectrumStringArray = DB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = DB.NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, atomType); + final Spectrum spectrum = DB.NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, nucleus); final Assignment assignment = new Assignment(spectrum); for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); } return assignment; + } + + public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) throws CDKException { + MongoClient mongo; + try { + // Creating a Mongo client + mongo = new MongoClient( + new ServerAddress("127.0.0.1", 27017), + MongoCredential.createCredential( + mongoUser, + mongoAuthDB, + mongoPassword.toCharArray()), + MongoClientOptions.builder().build()); + System.out.println("Login to MongoDB was successfull"); + // Accessing the database + } catch (Exception e) { + e.printStackTrace(); + System.err.println(Thread.currentThread().getStackTrace()[1].getMethodName() + ": could not connect to MongoDB!"); + + return null; + } + + return mongo; + } + + public static MongoDatabase getDatabase(final MongoClient mongo, final String mongoDBName){ + return mongo.getDatabase(mongoDBName); + } + + public static MongoCollection getCollection(final MongoClient mongo, final String mongoDBName, final String mongoDBCollection) { + final MongoDatabase database = DB.getDatabase(mongo, mongoDBName); + if (database == null) { + return null; + } + System.out.println("Access to database \"" + mongoDBName + "\" was successfull"); + // Retrieving a collection + final MongoCollection collection = database.getCollection(mongoDBCollection); + System.out.println("Retrieval of collection \"" + mongoDBCollection + "\" was successfull -> size: " + collection.countDocuments()); + + return collection; + } + + public static void logout(final MongoClient mongo) { + mongo.close(); } } From e76d8af24a622a60779d55eb70c20673030ec4ee Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 13 Apr 2019 00:38:11 +0200 Subject: [PATCH 081/405] - disabled countNeighborhoodBonds() method --- src/casekit/NMR/Process.java | 110 +++++++++++++++++------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java index 80f8328..5f82120 100644 --- a/src/casekit/NMR/Process.java +++ b/src/casekit/NMR/Process.java @@ -216,61 +216,61 @@ public int[][] getNeighborhoodBondsCountMatrix(){ - public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException{ - - if (stepSize < 1) { - System.err.println("stepSize < 1 not allowed!!!"); - return; - } - // creation of frequency counting matrix and shift indices holder - this.neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.size() * bondsSet.length]; - this.shiftIndicesInACSet.clear(); - for (int i = 0; i < stepSize * maxShift; i++) { - for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { - neighborhoodCountsMatrix[i][j] = 0; - } - this.shiftIndicesInACSet.put(i, new ArrayList<>()); - } - int atomIndexDB, shiftDBInt; double shiftDBDouble; IAtomContainer acDB; - // go through all molecules in DB - for (int k = 0; k < acSet.getAtomContainerCount(); k++) { - acDB = acSet.getAtomContainer(k); - // for all DB entries containing a spectrum for the current query atom type - for (final String shiftsDB : DB.getNMRShiftDBSpectra(acDB, elem)) { - if (shiftsDB == null) { - continue; - } - String[][] shiftsDBvalues = casekit.NMR.DB.parseNMRShiftDBSpectrum(shiftsDB); - for (String[] shiftsDBvalue : shiftsDBvalues) { - atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); - // sometimes the DB atom index is wrong and out of array range - if (atomIndexDB > acDB.getAtomCount() - 1) { - continue; - } - shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize; - // if DB shift value out of min-max-range then skip this shift - if(shiftDBDouble < minShift || shiftDBDouble > maxShift - 1){ - continue; - } - shiftDBInt = (int) (shiftDBDouble * stepSize); - this.neighborhoodCountsMatrix[shiftDBInt - minShift][0] += 1; // increase number of this shift occurence - this.neighborhoodCountsMatrix[shiftDBInt - minShift][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member - this.neighborhoodCountsMatrix[shiftDBInt - minShift][2] += (acDB.getAtom(atomIndexDB).isAromatic()) ? 1 : 0; // increase if atom is aromatic - this.neighborhoodCountsMatrix[shiftDBInt - minShift][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN - this.neighborhoodCountsMatrix[shiftDBInt - minShift][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH - this.neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 - this.neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 - // add counts for a specific atom to matrix m - int[] counts = casekit.NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); - for (int i = 0; i < counts.length; i++) { - this.neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; - } - // add this atom container index and atom index within it to belonging hash map - this.shiftIndicesInACSet.get(shiftDBInt).add(new Integer[]{k, atomIndexDB}); - } - } - } - } +// public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException{ +// +// if (stepSize < 1) { +// System.err.println("stepSize < 1 not allowed!!!"); +// return; +// } +// // creation of frequency counting matrix and shift indices holder +// this.neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.size() * bondsSet.length]; +// this.shiftIndicesInACSet.clear(); +// for (int i = 0; i < stepSize * maxShift; i++) { +// for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { +// neighborhoodCountsMatrix[i][j] = 0; +// } +// this.shiftIndicesInACSet.put(i, new ArrayList<>()); +// } +// int atomIndexDB, shiftDBInt; double shiftDBDouble; IAtomContainer acDB; +// // go through all molecules in DB +// for (int k = 0; k < acSet.getAtomContainerCount(); k++) { +// acDB = acSet.getAtomContainer(k); +// // for all DB entries containing a spectrum for the current query atom type +// for (final String shiftsDB : DB.getNMRShiftDBSpectra(acDB, elem)) { +// if (shiftsDB == null) { +// continue; +// } +// String[][] shiftsDBvalues = casekit.NMR.DB.parseNMRShiftDBSpectrum(shiftsDB); +// for (String[] shiftsDBvalue : shiftsDBvalues) { +// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); +// // sometimes the DB atom index is wrong and out of array range +// if (atomIndexDB > acDB.getAtomCount() - 1) { +// continue; +// } +// shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize; +// // if DB shift value out of min-max-range then skip this shift +// if(shiftDBDouble < minShift || shiftDBDouble > maxShift - 1){ +// continue; +// } +// shiftDBInt = (int) (shiftDBDouble * stepSize); +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][0] += 1; // increase number of this shift occurence +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][2] += (acDB.getAtom(atomIndexDB).isAromatic()) ? 1 : 0; // increase if atom is aromatic +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 +// // add counts for a specific atom to matrix m +// int[] counts = casekit.NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); +// for (int i = 0; i < counts.length; i++) { +// this.neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; +// } +// // add this atom container index and atom index within it to belonging hash map +// this.shiftIndicesInACSet.get(shiftDBInt).add(new Integer[]{k, atomIndexDB}); +// } +// } +// } +// } From 5d8fdbbc032dd5d4692d3844bcc9e51e1d4338a0 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 13 Apr 2019 00:38:56 +0200 Subject: [PATCH 082/405] - changes and additions --- src/casekit/NMR/Utils.java | 148 +++++++++++++++++++++++++------------ 1 file changed, 102 insertions(+), 46 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 33b3f9a..ea48fec 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -38,6 +38,7 @@ import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -49,6 +50,7 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; @@ -66,6 +68,7 @@ import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.similarity.Tanimoto; import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; @@ -152,7 +155,6 @@ public static void SDFtoLSD(final String pathSDF, final String pathOut, final St } - /** * Returns a hashmap constisting of lists of atom indices in an atom container. * This is done for all atom types (e.g. C or Br) in given atom container. @@ -925,8 +927,8 @@ public static IBond.Order getBondOrderFromString(final String order){ } - public static void writeTextFile(final String pathToOutput, final String content) throws IOException { - FileWriter fr = new FileWriter(new File(pathToOutput)); + public static void writeTextFile(final String pathToOutputFile, final String content) throws IOException { + FileWriter fr = new FileWriter(new File(pathToOutputFile)); BufferedWriter br = new BufferedWriter(fr); br.write(content); br.close(); @@ -986,7 +988,7 @@ public static ArrayList getOutliers(ArrayList input) { * @return */ public static Double getMedian(final List data) { - if(data == null){ + if((data == null) || data.isEmpty()){ return null; } if(data.size() == 1){ @@ -1007,7 +1009,7 @@ public static Double getMedian(final List data) { * @return */ public static Double getMedian(final ArrayList data) { - if ((data == null) || data.isEmpty()) { + if((data == null) || data.isEmpty()) { return null; } if(data.size() == 1){ @@ -1027,8 +1029,8 @@ public static Double getMedian(final ArrayList data) { * @param data * @return */ - public static Double getMean(final ArrayList data) { - if(data == null){ + public static Double getMean(final Collection data) { + if((data == null) || data.isEmpty()){ return null; } double sum = 0; @@ -1043,6 +1045,39 @@ public static Double getMean(final ArrayList data) { return ((data.size() - nullCounter) != 0) ? (sum/(data.size() - nullCounter)) : null; } + /** + * + * @param data + * @return + */ + public static Double getStandardDeviation(final ArrayList data) { + if ((data == null) || data.isEmpty()) { + return null; + } + final Double variance = Utils.getVariance(data); + + return (variance != null) ? Math.sqrt(variance) : null; + } + + public static Double getVariance(final Collection data) { + if ((data == null) || data.isEmpty()) { + return null; + } + final int nullCounter = Collections.frequency(data, null); + double quadrSum = 0.0; + final Double mean = Utils.getMean(data); + if(mean == null){ + return null; + } + for (final Double d : data) { + if (d != null) { + quadrSum += Math.pow(d - mean, 2); + } + } + + return ((data.size() - nullCounter) != 0) ? (quadrSum / (data.size() - nullCounter)) : null; + } + /** * @@ -1050,7 +1085,7 @@ public static Double getMean(final ArrayList data) { * @return */ public static Double getMean(final Double[] data) { - if(data == null){ + if((data == null) || (data.length == 0)){ return null; } double sum = 0; @@ -1062,7 +1097,7 @@ public static Double getMean(final Double[] data) { nullCounter++; } } - return ((data.length - nullCounter) != 0) ? (sum/(data.length - nullCounter)) : null; + return ((data.length - nullCounter) != 0) ? (sum / (data.length - nullCounter)) : null; } @@ -1175,22 +1210,8 @@ public static HashMap getRMS(final HashMap " + existingBondsOrderSum + " + " + implicitHydrogenCount + " = " + (existingBondsOrderSum + implicitHydrogenCount) + " <= " + atom.getValency() + " ? "); - - return (existingBondsOrderSum + implicitHydrogenCount) >= atom.getValency(); + public static boolean isSaturated(final IAtomContainer ac, final int atomIndex) throws CDKException { + return Utils.getBondOrderSum(ac, atomIndex, true).intValue() >= ac.getAtom(atomIndex).getValency(); } public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException{ @@ -1215,25 +1236,27 @@ public static int countElements(final String input){ return counter; } - public static ArrayList getComponents(final String symbols){ - final ArrayList components = new ArrayList<>(); - for (int i = 0; i < symbols.length(); i++) { - if ((i + 1 < symbols.length()) - && Character.isLowerCase(symbols.charAt(i + 1))) { - components.add(symbols.substring(i, i + 2)); - i++; - } else { - components.add(symbols.substring(i, i + 1)); - } - } - - return components; - } +// public static ArrayList getComponents(final String symbols){ +// final ArrayList components = new ArrayList<>(); +// for (int i = 0; i < symbols.length(); i++) { +// if ((i + 1 < symbols.length()) +// && Character.isLowerCase(symbols.charAt(i + 1))) { +// components.add(symbols.substring(i, i + 2)); +// i++; +// } else { +// components.add(symbols.substring(i, i + 1)); +// } +// } +// +// return components; +// } /** * * @param lookup * @return + * + * @deprecated */ public static HashMap getMedian(final HashMap> lookup) { @@ -1249,7 +1272,6 @@ public static HashMap getMedian(final HashMap> hoseLookupToExtend, final HashMap> hoseLookup){ for (final String hose : hoseLookup.keySet()) { if(!hoseLookupToExtend.containsKey(hose)){ @@ -1259,6 +1281,15 @@ public static void combineHashMaps(final HashMap> hose } } + public static Double roundDouble(final Double value, final int decimalPlaces){ + if(value == null){ + return null; + } + final int decimalFactor = (int) (Math.pow(10, decimalPlaces)); + + return (Math.round(value * decimalFactor) / (double) decimalFactor); + } + /** * Checks whether a structure contains explicit hydrogen atoms or not. * @@ -1434,17 +1465,17 @@ public static void stopExecuter(final ExecutorService executor, final long secon System.err.println("killing non-finished tasks!"); executor.shutdownNow(); } - } + } /** * Returns the bond order for a numeric order value. * - * @param orderNumber + * @param orderAsNumeric * @return */ - public static IBond.Order getBondOrder(final int orderNumber) { + public static IBond.Order getBondOrder(final int orderAsNumeric) { for (IBond.Order order : IBond.Order.values()){ - if(order.numeric() == orderNumber){ + if(order.numeric() == orderAsNumeric){ return order; } } @@ -1452,9 +1483,34 @@ public static IBond.Order getBondOrder(final int orderNumber) { return null; } - public static Integer getBondOrderInteger(final IBond.Order order) { - return (order != null) ? order.numeric() : null; + public static Float getBondOrderAsNumeric(final IBond bond) { + if(bond == null){ + return null; + } + float bondOrderAsNumeric; + if (bond.isAromatic()) { + bondOrderAsNumeric = (float) 1.5; + } else { + bondOrderAsNumeric = bond.getOrder().numeric(); + } + + return bondOrderAsNumeric; } + public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, final boolean includeImplicitHydrogenCount) { + if(!Utils.checkIndexInAtomContainer(ac, atomIndex)){ + return null; + } + float bondsOrderSum = 0; + final IAtom atom = ac.getAtom(atomIndex); + for (final IBond bond : ac.getConnectedBondsList(atom)) { + bondsOrderSum += Utils.getBondOrderAsNumeric(bond); + } + if(includeImplicitHydrogenCount && (atom.getImplicitHydrogenCount() != null)){ + bondsOrderSum += atom.getImplicitHydrogenCount(); + } + + return bondsOrderSum; + } } From 8be53a69de7817d1fb3b61c1c594239810140c13 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 13 Apr 2019 00:39:59 +0200 Subject: [PATCH 083/405] - new method isFullyAssigned() to check whether all signals could be assigned --- src/casekit/NMR/model/Assignment.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index f15887b..dcb78f3 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -132,6 +132,14 @@ public int getSetAssignmentsCount(final int dim){ return setAssignmentsCounter; } + public Boolean isFullyAssigned(final int dim){ + if(!this.checkDimension(dim)){ + return null; + } + + return this.getSetAssignmentsCount(dim) == this.getAssignmentsCount(); + } + /** * Adds a new assignment entry for a further signal. The given atom indices * will be stored as atom index for each dimension of the signal/spectrum. From 5b51ecf46955b02265de016899e52ae54ce8780a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 13 Apr 2019 00:41:23 +0200 Subject: [PATCH 084/405] - minor changes in constructors and getClone() function --- src/casekit/NMR/model/Signal.java | 32 ++++++++++--------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java index 5c37489..72818e7 100644 --- a/src/casekit/NMR/model/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -31,7 +31,7 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Signal implements Cloneable { +public class Signal { private final int nDim; @@ -51,25 +51,19 @@ public class Signal implements Cloneable { public Signal(final String[] nuclei) { - this.nuclei = nuclei; - this.nDim = this.nuclei.length; - this.shifts = this.initShifts(null, this.nDim); + this(nuclei, null); } public Signal(final String[] nuclei, final Double[] shifts) { + this(nuclei, shifts, null, null); + } + + public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final Double intensity) { this.nuclei = nuclei; this.nDim = this.nuclei.length; this.shifts = this.initShifts(shifts, this.nDim); - } - - public Signal(final String[] nuclei, final Double[] shifts, final Double intensity) { - this(nuclei, shifts); - this.intensity = intensity; - } - - public Signal(final String[] nuclei, final Double[] shifts, final Double intensity, final String multiplicity) { - this(nuclei, shifts, intensity); this.multiplicity = multiplicity; + this.intensity = intensity; } private Double[] initShifts(final Double[] shifts, final int nDim){ @@ -142,16 +136,10 @@ public boolean checkDimension(final int dim){ * @return */ public Signal getClone(){ - final Signal signalClone = new Signal(this.nuclei, this.shifts); - signalClone.setIntensity(this.intensity); - signalClone.setMultiplicity(this.multiplicity); - signalClone.setPhase(this.phase); + final Signal clone = new Signal(this.nuclei, this.shifts, this.multiplicity, this.intensity); + clone.setPhase(this.phase); - return signalClone; + return clone; } - @Override - public Signal clone() throws CloneNotSupportedException{ - return (Signal) super.clone(); - } } From 93566f12f51f98eb2d98df2b80d5635dac66c66f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 13 Apr 2019 00:42:45 +0200 Subject: [PATCH 085/405] - return of boolean values if signals are added or removed --- src/casekit/NMR/model/Spectrum.java | 70 ++++++++++++++++------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 4fb2d52..7db3482 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -65,14 +65,19 @@ public class Spectrum { private Double spectrometerFrequency; private String solvent; private String standard; - private final ArrayList signals = new ArrayList<>(); - private final ArrayList equivalences = new ArrayList<>(); + private final ArrayList signals; + private int signalCount; + private final ArrayList equivalences; private ArrayList[] equivalentSignals; public Spectrum(final String[] nuclei) { this.nuclei = nuclei; this.nDim = this.nuclei.length; + this.signals = new ArrayList<>(); + this.signalCount = 0; + this.equivalences = new ArrayList<>(); + this.equivalentSignals = new ArrayList[]{}; } public String[] getNuclei(){ @@ -110,7 +115,7 @@ public final boolean setShifts(final ArrayList shiftList, final int dim) return true; } - public final boolean setShift(final double shift, final int dim, final int signalIndex){ + public final boolean setShift(final Double shift, final int dim, final int signalIndex){ if(!this.checkDimension(dim) || !this.checkSignalIndex(signalIndex)){ return false; } @@ -120,7 +125,7 @@ public final boolean setShift(final double shift, final int dim, final int signa } public int getSignalCount() { - return this.signals.size(); + return this.signalCount; } /** @@ -148,16 +153,8 @@ public boolean addSignals(final ArrayList signals){ * @param signal signal to add * @return */ - public boolean addSignal(final Signal signal) { - if(!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ - return false; - } - // add signal at the end of signal list - this.signals.add(signal); - this.equivalences.add(-1); - this.updateEquivalentSignalClasses(); - - return true; + public boolean addSignal(final Signal signal) { + return this.addSignal(signal, -1); } /** @@ -172,11 +169,15 @@ public boolean addSignal(final Signal signal, final int equivalentSignalIndex) { return false; } // add signal at the end of signal list - this.signals.add(signal); - this.equivalences.add(equivalentSignalIndex); - this.updateEquivalentSignalClasses(); + if(this.signals.add(signal)){ + this.signalCount++; + this.equivalences.add(equivalentSignalIndex); + this.updateEquivalentSignalClasses(); + + return true; + } - return true; + return false; } public boolean removeSignal(final Signal signal){ @@ -184,17 +185,21 @@ public boolean removeSignal(final Signal signal){ } public boolean removeSignal(final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ + if(!this.checkSignalIndex(signalIndex)){ return false; - } - this.signals.remove(signalIndex); - this.equivalences.remove(signalIndex); - this.updateEquivalentSignalClasses(); - - return true; - } - - private boolean checkSignalIndex(final Integer signalIndex){ + } + if(this.signals.remove(signalIndex) != null){ + this.signalCount--; + this.equivalences.remove(signalIndex); + this.updateEquivalentSignalClasses(); + + return true; + } + + return false; + } + + private boolean checkSignalIndex(final Integer signalIndex){ return (signalIndex != null) && (signalIndex >= 0) && (signalIndex < this.getSignalCount()); } @@ -230,7 +235,11 @@ public Signal getSignal(final int signalIndex) { return null; } - return this.signals.get(signalIndex); + try { + return this.signals.get(signalIndex); + } catch (Exception e) { + return null; + } } public ArrayList getIntensities(){ @@ -377,7 +386,6 @@ private void updateEquivalentSignalClasses(){ this.equivalentSignals = new ArrayList[this.getSignalCount()]; for(int i = 0; i < this.getSignalCount(); i++) { this.equivalentSignals[i] = this.searchEquivalentSignals(i); -// this.equivalentSignals.put(i, this.searchEquivalentSignals(i)); } } @@ -536,7 +544,7 @@ public int compare(final Integer pickedSignalIndex1, final Integer pickedSignalI public Spectrum getClone() { final Spectrum clone = new Spectrum(this.nuclei); for (int i = 0; i < this.getSignalCount(); i++) { - clone.addSignal(this.getSignal(i), this.getEquivalence(i)); + clone.addSignal(this.getSignal(i).getClone(), this.getEquivalence(i)); } clone.setSpecDescription(this.description); clone.setSolvent(this.solvent); From a6ed506f6df71b21b6048a9056b7e56e9b52aa11 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 15 Apr 2019 23:29:07 +0200 Subject: [PATCH 086/405] - added a null check in addSignal() --- src/casekit/NMR/model/Spectrum.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 7db3482..93a0cf1 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -165,7 +165,7 @@ public boolean addSignal(final Signal signal) { * @return */ public boolean addSignal(final Signal signal, final int equivalentSignalIndex) { - if(!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ + if((signal == null) || !this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ return false; } // add signal at the end of signal list From fefc2d224f3788fd95a9b921af8a219c1380c1d9 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 15 Apr 2019 23:47:10 +0200 Subject: [PATCH 087/405] - new class for predicting chemical shifts, signals and spectra based on HOSE code --- src/casekit/NMR/predict/Predict.java | 128 +++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 src/casekit/NMR/predict/Predict.java diff --git a/src/casekit/NMR/predict/Predict.java b/src/casekit/NMR/predict/Predict.java new file mode 100644 index 0000000..5bcfeb5 --- /dev/null +++ b/src/casekit/NMR/predict/Predict.java @@ -0,0 +1,128 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package casekit.NMR.predict; + +import casekit.NMR.Utils; +import casekit.NMR.model.Signal; +import casekit.NMR.model.Spectrum; +import hose.HOSECodeBuilder; +import java.util.ArrayList; +import java.util.HashMap; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Predict { + + /** + * Predicts a shift value for a central atom based on its HOSE code and a + * given HOSE code lookup table. The prediction is done by using the mean + * of all occurring shifts in lookup table for the given HOSE code. + * + * @param HOSECodeLookupTable + * @param HOSECode + * @return null if HOSE code does not exist in lookup table + * + * @see casekit.NMR.Utils#getRMS(java.util.ArrayList) + * + */ + public static Double predictShift(final HashMap> HOSECodeLookupTable, final String HOSECode) { + if (HOSECodeLookupTable.containsKey(HOSECode)) { + return Utils.getMean(HOSECodeLookupTable.get(HOSECode)); + } + + return null; + } + + /** + * Specified for carbons (13C) only. Not generic at the moment because of + * usage of {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)} + * with {@code hCount}. + * + * @param HOSECodeLookupTable + * @param ac + * @param atomIndex + * @param maxSphere + * @param nucleus + * @param hCount + * @return null if HOSE code of selected atom does not exist in lookup table + * + * @throws CDKException + * + * @see #predictShift(java.util.HashMap, java.lang.String) + * + */ + public static Signal predictSignal(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, final String nucleus, final Integer hCount) throws CDKException { + if (!Utils.checkIndexInAtomContainer(ac, atomIndex) || (hCount == null)) { + return null; + } + final Double predictedShift = Predict.predictShift(HOSECodeLookupTable, HOSECodeBuilder.buildHOSECode(ac, atomIndex, maxSphere, false)); + if (predictedShift == null) { + return null; + } + return new Signal( + new String[]{nucleus}, + new Double[]{predictedShift}, + Utils.getMultiplicityFromHydrogenCount(hCount), + null + ); + } + + /** + * Specified for carbons (13C) only. Not generic at the moment because of + * {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. + * + * @param HOSECodeLookupTable + * @param ac + * @param maxSphere + * @param nucleus + * @return null if a HOSE code of one atom does not exist in lookup table + * + * @throws org.openscience.cdk.exception.CDKException + * + * @see #predictSignal(java.util.HashMap, + * org.openscience.cdk.interfaces.IAtomContainer, int, java.lang.Integer, + * java.lang.String, java.lang.Integer) + * + */ + public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final Integer maxSphere, final String nucleus) throws CDKException { + final Spectrum predictedSpectrum = new Spectrum(new String[]{nucleus}); + Signal signal; + for (final IAtom atom : ac.atoms()) { + if (atom.getSymbol().equals(Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { + signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus, atom.getImplicitHydrogenCount()); + if(signal == null){ + return null; + } + predictedSpectrum.addSignal(signal); + } + } + + return predictedSpectrum; + } +} From a44afa50303afe20f6f25a4f04c65194fc658cc4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 16 Apr 2019 00:07:35 +0200 Subject: [PATCH 088/405] - checked functions for deprecated ones - methods belonging to HOSE code symbols moved to FragAssembler --- src/casekit/NMR/Utils.java | 123 ++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 62 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index ea48fec..d7076b5 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -354,12 +354,17 @@ public static Spectrum XMLtoSpectrum(final String pathToXML, final int ndim, fin public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int dim){ if(spectrum.checkDimension(dim)){ - return Utils.getElementIdentifier(spectrum.getNuclei()[dim]); + return Utils.getAtomTypeFromNucleus(spectrum.getNuclei()[dim]); } return null; } + public static String getAtomTypeFromNucleus(final String nucleus){ + final String[] nucleusSplit = nucleus.split("\\d"); + return nucleusSplit[nucleusSplit.length - 1]; + } + public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws CDKException{ if(!spectrum.checkDimension(dim)){ throw new CDKException(Thread.currentThread().getStackTrace()[2].getClassName() + "." + Thread.currentThread().getStackTrace()[2].getMethodName() + ": invalid dimension in spectrum given"); @@ -727,27 +732,6 @@ public static String getNMRShiftConstant(final String element){ } } - - public static int getElectronNumberByBondOrder(final IBond.Order order) { - switch (order) { - case SINGLE: - return 1; - case DOUBLE: - return 2; - case TRIPLE: - return 3; - case QUADRUPLE: - return 4; - case QUINTUPLE: - return 5; - case SEXTUPLE: - return 6; - default: - return 0; - } - } - - /** * Returns the NMR isotope identifier for a given element, e.g. C -> 13C. * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. @@ -770,31 +754,7 @@ public static String getIsotopeIdentifier(final String element) { default: return element; } - } - - /** - * Returns the element identifier for a given isotope, e.g. 13C -> C. - * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. - * - * @param isotope isotope's symbol (e.g. "13C") - * @return - */ - public static String getElementIdentifier(final String isotope) { - switch (isotope) { - case "13C": return "C"; - case "1H": return "H"; - case "15N": return "N"; - case "31P": return "P"; - case "19F": return "F"; - case "17O": return "O"; - case "33S": return "S"; - case "29Si": return "Si"; - case "11B": return "B"; - case "195Pt": return "Pt"; - default: - return null; - } - } + } public static HashSet getAtomTypesInAtomContainer(final IAtomContainer ac) { @@ -812,7 +772,16 @@ public static boolean checkMinMaxValue(final double min, final double max, final return (value >= min && value <= max); } - + /** + * + * @param ac + * @param indexAC + * @param bondsSet + * @param neighborElems + * @return + * + * @deprecated + */ public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final ArrayList neighborElems){ final int[] counts = new int[neighborElems.size() * bondsSet.length]; String foundBonds; @@ -839,7 +808,21 @@ public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int return counts; } - + /** + * + * @param pathToOutput + * @param m + * @param bondsSet + * @param elem + * @param neighborElems + * @param min + * @param max + * @param stepSize + * + * @throws IOException + * + * @deprecated + */ public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int min, final int max, final int stepSize) throws IOException{ if(stepSize < 1){ @@ -877,14 +860,26 @@ public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, writer.close(); } - + /** + * + * @param s + * @return + * + * @deprecated + */ public static String sortString(final String s) { final char[] c = s.toCharArray(); Arrays.sort(c); return new String(c); } - + /** + * + * @param valences + * @return + * + * @deprecated + */ public static ArrayList> getBondOrderSets(final String[] valences) { final ArrayList> bondOrderSets = new ArrayList<>(); @@ -904,6 +899,13 @@ public static ArrayList> getBondOrderSets(final String[] return bondOrderSets; } + /** + * + * @param order + * @return + * + * @deprecated + */ public static String getStringFromBondOrder(final IBond.Order order) { switch (order) { case SINGLE: @@ -915,16 +917,7 @@ public static String getStringFromBondOrder(final IBond.Order order) { default: return null; } - } - - public static IBond.Order getBondOrderFromString(final String order){ - switch(order){ - case "-": return IBond.Order.SINGLE; - case "=": return IBond.Order.DOUBLE; - case "%": return IBond.Order.TRIPLE; - default: return null; - } - } + } public static void writeTextFile(final String pathToOutputFile, final String content) throws IOException { @@ -1403,7 +1396,13 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a return ac; } - + /** + * + * @param array + * @return + * + * @deprecated + */ public static ArrayList ArrayToArrayList(final int[] array){ final ArrayList list = new ArrayList<>(); From fa99738cc3afee7819a12cf6a8a4e80faa9b014d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 16 Apr 2019 00:08:42 +0200 Subject: [PATCH 089/405] - updated --- pom.xml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pom.xml b/pom.xml index 5f2866b..2d90761 100644 --- a/pom.xml +++ b/pom.xml @@ -93,5 +93,16 @@ 2.2-SNAPSHOT jar + + org.mongodb + mongo-java-driver + 3.10.0 + + + org.openscience + FragAssembler + 1.0-SNAPSHOT + jar + From e833d504c35060ffe4b93b5210261fad5a436036 Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Tue, 16 Apr 2019 00:10:50 +0200 Subject: [PATCH 090/405] Delete nb-configuration.xml --- nb-configuration.xml | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 nb-configuration.xml diff --git a/nb-configuration.xml b/nb-configuration.xml deleted file mode 100644 index 11a0109..0000000 --- a/nb-configuration.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - mit - false - true - - - true - - true - false - false - - false - - From 3189bc0ce773962a785c2c521688305076c24030 Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Tue, 16 Apr 2019 00:10:58 +0200 Subject: [PATCH 091/405] Delete nbactions-release-profile.xml --- nbactions-release-profile.xml | 60 ----------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 nbactions-release-profile.xml diff --git a/nbactions-release-profile.xml b/nbactions-release-profile.xml deleted file mode 100644 index 64d6f0b..0000000 --- a/nbactions-release-profile.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - - run - - jar - - - process-classes - org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - - - -classpath %classpath casekit.NMR.test - java - - - - debug - - jar - - - process-classes - org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - - - -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath casekit.NMR.test - java - true - - - - profile - - jar - - - process-classes - org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - - - -classpath %classpath casekit.NMR.test - java - - - - rebuild - build-with-dependencies - - * - - - clean - install - - - true - - - From d7ef8756766da3ef344f21e7e4cb32e0d0b83d1c Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Tue, 16 Apr 2019 00:11:08 +0200 Subject: [PATCH 092/405] Delete nbactions.xml --- nbactions.xml | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 nbactions.xml diff --git a/nbactions.xml b/nbactions.xml deleted file mode 100644 index 2a4cbf2..0000000 --- a/nbactions.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - run - - jar - - - process-classes - org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - - - -classpath %classpath casekit.NMR.Start - java - - - - debug - - jar - - - process-classes - org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - - - -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath casekit.NMR.Start - java - true - - - - profile - - jar - - - process-classes - org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - - - -classpath %classpath casekit.NMR.Start - java - - - From af2b809cf8d1407dc3c5e3caf9573eb8c76b9bd4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 13:44:08 +0200 Subject: [PATCH 093/405] - new class for making Assignment, Signal and Spectrum classes as dimensional NMR classes with certain properties and functionalities --- .../NMR/model/dimensional/DimensionalNMR.java | 31 +++++++ src/casekit/model/Dimensional.java | 86 +++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 src/casekit/NMR/model/dimensional/DimensionalNMR.java create mode 100644 src/casekit/model/Dimensional.java diff --git a/src/casekit/NMR/model/dimensional/DimensionalNMR.java b/src/casekit/NMR/model/dimensional/DimensionalNMR.java new file mode 100644 index 0000000..e7e117e --- /dev/null +++ b/src/casekit/NMR/model/dimensional/DimensionalNMR.java @@ -0,0 +1,31 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.NMR.model.dimensional; + +import casekit.model.Dimensional; + +public class DimensionalNMR extends Dimensional { + + protected DimensionalNMR(String[] dimNames) throws IndexOutOfBoundsException { + super(dimNames); + } + + public final String[] getNuclei(){ + return this.getDimNames(); + } + + public boolean compareNuclei(final String[] nuclei){ + return this.compareDimNames(nuclei); + } + +} diff --git a/src/casekit/model/Dimensional.java b/src/casekit/model/Dimensional.java new file mode 100644 index 0000000..bc3da90 --- /dev/null +++ b/src/casekit/model/Dimensional.java @@ -0,0 +1,86 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.model; + +import java.util.Arrays; + +public class Dimensional { + + private final int nDim; + private final String[] dimNames; + + /** + * Creates a new object of that class by given dimension names. + * + * @param dimNames names for all dimensions to store. + * @throws IndexOutOfBoundsException + */ + protected Dimensional(final String[] dimNames) throws IndexOutOfBoundsException { + if(dimNames.length == 0){ + throw new IndexOutOfBoundsException("Number of given dimensions (" + dimNames.length + ") is not valid: must be >= 1"); + } + + this.dimNames = dimNames; + this.nDim = dimNames.length; + } + + /** + * Returns the dimension names. + * + * @return + */ + protected final String[] getDimNames() { + return dimNames; + } + + /** + * Checks whether the input dimension names are equal to the dimension names of + * this object and in same order. + * + * @param dimNames names of dimensions to check + * @return + */ + protected final boolean compareDimNames(final String[] dimNames){ + return Arrays.equals(this.getDimNames(), dimNames); + } + + /** + * Returns the number of dimensions. + * + * @return + */ + public final int getNDim() { + return this.nDim; + } + + /** + * Checks whether the input dimension exists by dimension number. The dimension + * indexing starts at 0. + * + * @param dim input dimension number + * @return + */ + public final boolean containsDim(final int dim){ + return (dim >= 0) && (dim < this.getNDim()); + } + + /** + * Checks whether the input dimension count is equal to the number of dimensions of this object. + * + * @param nDim number of input dimensions + * @return + */ + public final boolean compareNDim(final int nDim){ + return nDim == this.getNDim(); + } +} From 2e28daccab282652f618592ed1abf691c7e29f66 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 13:45:40 +0200 Subject: [PATCH 094/405] - new class for parsing text and xml files --- src/casekit/io/FileParser.java | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/casekit/io/FileParser.java diff --git a/src/casekit/io/FileParser.java b/src/casekit/io/FileParser.java new file mode 100644 index 0000000..2cf669e --- /dev/null +++ b/src/casekit/io/FileParser.java @@ -0,0 +1,38 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.io; + +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.*; + +public class FileParser { + + public static BufferedReader parseText(final String pathToTextFile) throws FileNotFoundException { + return new BufferedReader(new FileReader(pathToTextFile)); + } + + public static Document parseXML(final String pathToXML) throws IOException, SAXException, ParserConfigurationException { + final DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); + final DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); + final Document doc = docBuilder.parse(new File(pathToXML)); + // normalize text representation + doc.getDocumentElement().normalize(); + + return doc; + } +} From 92fc4b1b3ff7eb2f887a6505ffca4517e6fd8d46 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 14:01:06 +0200 Subject: [PATCH 095/405] - new class for calculations or comparisons between spectra or combining them --- src/casekit/NMR/match/Matcher.java | 280 +++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 src/casekit/NMR/match/Matcher.java diff --git a/src/casekit/NMR/match/Matcher.java b/src/casekit/NMR/match/Matcher.java new file mode 100644 index 0000000..90b5b9d --- /dev/null +++ b/src/casekit/NMR/match/Matcher.java @@ -0,0 +1,280 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.NMR.match; + +import casekit.NMR.Utils; +import casekit.NMR.model.Assignment; +import casekit.NMR.model.Signal; +import casekit.NMR.model.Spectrum; +import org.apache.commons.lang3.ArrayUtils; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.similarity.Tanimoto; + +import java.util.Arrays; +import java.util.HashSet; + +public class Matcher { + + + /** + * Checks whether two spectra contain given dimensions. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension to select in first spectrum + * @param dim2 dimension to select in second spectrum + * @return true if both spectra contain the selected dimension + */ + public static boolean checkDimensions(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2){ + return spectrum1.containsDim(dim1) && spectrum2.containsDim(dim2); + } + + /** + * Combines selected dimensions of two spectra while considering possible equivalent signals + * via the {@code pickPrecision} parameter and multiplicity comparison. + * In {@code spectrum1}, the equivalent signals have to be set. + * + * + * @param spectrum1 first spectrum, incl. equivalent signals + * @param spectrum2 second spectrum + * @param pickPrecision tolerance value used for signal shift matching to + * find equivalent signals + * @param dim1 dimension of first spectrum to combine + * @param dim2 dimension of second spectrum to combine + * @return null if one spectrum does not contain the selected dimension + * + */ + public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double pickPrecision) throws Exception { + if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + return null; + } + int equivalentSignalIndex; + // create new spectra which is to fill with signals of both spectra + final Spectrum combinedSpectrum = spectrum1.getClone(); + // fill in signals from spectrum2 + // consider the possibility of potential equivalent signals here + for (final Signal signalSpectrum2 : spectrum2.getSignals()) { + equivalentSignalIndex = -1; + for (final int closestSignalIndex : combinedSpectrum.pickSignals(signalSpectrum2.getShift(dim2), dim1, pickPrecision)) { + if (signalSpectrum2.getMultiplicity().equals(combinedSpectrum.getSignal(closestSignalIndex).getMultiplicity())) { + equivalentSignalIndex = closestSignalIndex; + } + } + combinedSpectrum.addSignal(signalSpectrum2.getClone(), equivalentSignalIndex); + } + return combinedSpectrum; + } + + /** + * Calculates the Tanimoto coefficient between two spectra in given dimensions. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @return + * @throws CDKException + */ + public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) throws CDKException { + if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + return null; + } + final double[] shiftsSpectrum1 = ArrayUtils.toPrimitive(spectrum1.getShifts(dim1).toArray(new Double[spectrum1.getSignalCount()])); + Arrays.parallelSort(shiftsSpectrum1); + final double[] shiftsSpectrum2 = ArrayUtils.toPrimitive(spectrum2.getShifts(dim2).toArray(new Double[spectrum2.getSignalCount()])); + Arrays.parallelSort(shiftsSpectrum2); + + return Tanimoto.calculate(shiftsSpectrum1, shiftsSpectrum2); + } + + /** + * Returns deviatons between matched shifts in SSC and query query spectrum. + * The matching procedure is already included here. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol + * @return + * + * @see #matchSpectra(Spectrum, Spectrum, int, int, double) + */ + public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { + final Double[] deviations = new Double[spectrum1.getSignalCount()]; + final Assignment matchAssignments = Matcher.matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol); + Signal matchedSignalInSpectrum2; + for (int i = 0; i < spectrum1.getSignalCount(); i++) { + if (matchAssignments.getAtomIndex(0, i) == -1) { + deviations[i] = null; + } else { + matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAtomIndex(0, i)); + deviations[i] = Math.abs(spectrum1.getSignal(i).getShift(dim1) - matchedSignalInSpectrum2.getShift(dim2)); + } + } + return deviations; + } + + /** + * Returns the average of all deviations within a given input array. + * + * @param deviations array of deviations + * @return + * + * + */ + public static Double calculateAverageDeviation(final Double[] deviations) { + // every signal has to have a match + for (final Double deviation : deviations) { + if (deviation == null) { + return null; + } + } + + return Utils.getMean(deviations); + } + + /** + * Returns the average of all deviations of matched shifts between two + * spectra. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during peak picking in + * shift comparison + * @return + * + * @see #getDeviations(Spectrum, Spectrum, int, int, double) + * @see #calculateAverageDeviation(Double[]) + */ + public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { + return Matcher.calculateAverageDeviation(Matcher.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); + } + + /** + * Returns the closest shift matches between two spectra in selected dimensions + * as an Assignment object with one set dimension only.
+ * Despite intensities are expected, they are still not considered here. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during spectra shift + * comparison + * @return Assignments with signal indices of spectrum1 and matched indices + * in spectrum2; null if one of the spectra does not + * contain the selected dimension + */ + public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { + if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + return null; + } + final Assignment matchAssignments = new Assignment(spectrum1); + final HashSet pickedSignalIndices = new HashSet<>(); + int pickedSignalIndexSpectrum2; + int pickedSignalIndexSpectrum2Prev; + for (int i = 0; i < spectrum1.getSignalCount(); i++) { + if (spectrum1.getShift(i, dim1) == null) { + pickedSignalIndexSpectrum2 = -1; + } else { + pickedSignalIndexSpectrum2 = spectrum2.pickClosestSignal(spectrum1.getShift(i, dim1), dim2, shiftTol); + // if matched signal is already assigned, then consider symmetries (equiv. signals) + if (pickedSignalIndices.contains(pickedSignalIndexSpectrum2)) { + // symmetry exists + if (spectrum2.hasEquivalences(pickedSignalIndexSpectrum2)) { + pickedSignalIndexSpectrum2Prev = pickedSignalIndexSpectrum2; + // assign the next signal in equivalence list + for (final int equivalentSignalIndexSpectrum2 : spectrum2.getEquivalentSignals(pickedSignalIndexSpectrum2)) { + if (!pickedSignalIndices.contains(equivalentSignalIndexSpectrum2)) { + pickedSignalIndexSpectrum2 = equivalentSignalIndexSpectrum2; + break; + } + } + // if no further equivalent signal exists then that match is not valid + if (pickedSignalIndexSpectrum2 == pickedSignalIndexSpectrum2Prev) { + pickedSignalIndexSpectrum2 = -1; + } + } else { + // not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch + // -> still open + pickedSignalIndexSpectrum2 = -1; + } + } + // check multiplicity + if ((spectrum1.getMultiplicity(i) == null) || (spectrum2.getMultiplicity(pickedSignalIndexSpectrum2) == null) || !spectrum1.getMultiplicity(i).equals(spectrum2.getMultiplicity(pickedSignalIndexSpectrum2))) { + pickedSignalIndexSpectrum2 = -1; + } + } + // add only truly assigned signal to list of already assigned signals + if (pickedSignalIndexSpectrum2 != -1) { + pickedSignalIndices.add(pickedSignalIndexSpectrum2); + } + // set picked signal index in assignment object + matchAssignments.setAssignment(0, i, pickedSignalIndexSpectrum2); + } +// // try to assign the still unassigned shifts in spectrum1 to shifts in spectrum2 +// System.out.println("--> assignments before:\t" + Utils.ArrayToArrayList(matchAssignments.getAtomIndices(0))); +// ArrayList pickedSignalIndicesInSpectrum2; +// for (int i = 0; i < matchAssignments.getAssignmentsCount(); i++) { +// final Double queryShiftSpectrum1 = spectrum1.getShift(i, 0); +// if ((matchAssignments.getAtomIndex(0, i) == -1) && (queryShiftSpectrum1 != null)) { +// pickedSignalIndicesInSpectrum2 = spectrum2.pickSignals(queryShiftSpectrum1, 0, shiftTol); +// for (final int pickedSignalIndexInSpectrum2 : pickedSignalIndicesInSpectrum2) { +// if (!pickedSignalIndices.contains(pickedSignalIndexInSpectrum2) +// && (spectrum1.getMultiplicity(i) != null) +// && (spectrum2.getMultiplicity(pickedSignalIndexInSpectrum2) != null) +// && spectrum1.getMultiplicity(i).equals(spectrum2.getMultiplicity(pickedSignalIndexInSpectrum2))) { +// matchAssignments.setAssignment(0, i, pickedSignalIndexInSpectrum2); +// pickedSignalIndices.add(pickedSignalIndexInSpectrum2); +// break; +// } +// } +// } +// } +// System.out.println("--> assignments after:\t" + Utils.ArrayToArrayList(matchAssignments.getAtomIndices(0))); + return matchAssignments; + } + + /** + * Returns the closest shift matches between two spectra in all dimensions + * as one Assignment object with N set dimensions. + * N here means the number of dimensions in both spectra.
+ * Despite intensities are expected, they are still not considered here. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param shiftTols tolerance values [ppm] per each dimension used during spectra shift + * comparisons + * @return Assignments with signal indices of spectrum1 and matched indices + * in spectrum2 for each dimension; null if the number of + * dimensions in both spectra is not the same or is different than the number of given + * shift tolerances + * + * @see #matchSpectra(Spectrum, Spectrum, int, int, double) + * + */ + public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final double[] shiftTols) { + if((spectrum1.getNDim() != spectrum2.getNDim()) || (spectrum1.getNDim() != shiftTols.length)){ + return null; + } + final Assignment matchAssignment = new Assignment(spectrum1); + for (int dim = 0; dim < spectrum1.getNDim(); dim++) { + matchAssignment.setAssignments(dim, Matcher.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]).getAtomIndices(0)); + } + + return matchAssignment; + } +} From 78d2e94b5b98c62b8c6056543963cf3d775b26f6 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 14:06:01 +0200 Subject: [PATCH 096/405] - this class now extends the DimensionalNMR class --- src/casekit/NMR/model/Assignment.java | 58 ++++++++---------- src/casekit/NMR/model/Signal.java | 85 +++++++++++---------------- 2 files changed, 60 insertions(+), 83 deletions(-) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index dcb78f3..9206565 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -23,28 +23,30 @@ */ package casekit.NMR.model; +import casekit.NMR.model.dimensional.DimensionalNMR; +import org.apache.commons.lang3.ArrayUtils; + import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Assignment implements Cloneable { +public class Assignment extends DimensionalNMR implements Cloneable { - final int nDim; - final String[] nuclei; int[][] assignments; public Assignment(final Spectrum spectrum) { - this.nuclei = spectrum.getNuclei(); - this.nDim = this.nuclei.length; - this.assignments = this.initAssignments(this.nDim, spectrum.getSignalCount()); + super(spectrum.getNuclei()); + this.assignments = this.initAssignments(this.getNDim(), spectrum.getSignalCount()); } - private int[][] initAssignments(final int nDim, final int nSignal){ - final int[][] temp = new int[nDim][nSignal]; + private int[][] initAssignments(final int nDim, final int nSignals){ + final int[][] temp = new int[nDim][nSignals]; for (int i = 0; i < nDim; i++) { - for (int j = 0; j < nSignal; j++) { + for (int j = 0; j < nSignals; j++) { temp[i][j] = -1; } } @@ -61,7 +63,7 @@ private int[][] initAssignments(final int nDim, final int nSignal){ * @return */ public boolean setAssignment(final int dim, final int indexInSpectrum, final int indexInAtomContainer){ - if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ return false; } this.assignments[dim][indexInSpectrum] = indexInAtomContainer; @@ -69,8 +71,8 @@ public boolean setAssignment(final int dim, final int indexInSpectrum, final int return true; } - public boolean setAssignments(final int dim, final ArrayList indicesInAtomContainer){ - if(!this.checkDimension(dim) || !this.checkInputListSize(indicesInAtomContainer.size())){ + public boolean setAssignments(final int dim, final List indicesInAtomContainer){ + if(!this.containsDim(dim) || !this.checkInputListSize(indicesInAtomContainer.size())){ return false; } for (int i = 0; i < this.getAssignmentsCount(); i++) { @@ -81,7 +83,7 @@ public boolean setAssignments(final int dim, final ArrayList indicesInA } public Integer getAtomIndex(final int dim, final int indexInSpectrum){ - if(!this.checkDimension(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ return null; } @@ -89,7 +91,7 @@ public Integer getAtomIndex(final int dim, final int indexInSpectrum){ } public Integer getSignalIndex(final int dim, final int atomIndexInStructure){ - if(!this.checkDimension(dim)){ + if(!this.containsDim(dim)){ return null; } for (int signalIndex = 0; signalIndex < this.assignments[dim].length; signalIndex++) { @@ -101,20 +103,16 @@ public Integer getSignalIndex(final int dim, final int atomIndexInStructure){ return -1; } - public int[] getAtomIndices(final int dim){ - if(!this.checkDimension(dim)){ + public List getAtomIndices(final int dim){ + if(!this.containsDim(dim)){ return null; } - return this.assignments[dim]; - } - - public int getDimCount(){ - return this.nDim; + return Arrays.asList(ArrayUtils.toObject(this.assignments[dim])); } public int getAssignmentsCount(){ - if(this.getDimCount() > 0){ + if(this.getNDim() > 0){ return this.assignments[0].length; } return 0; @@ -122,7 +120,7 @@ public int getAssignmentsCount(){ public int getSetAssignmentsCount(final int dim){ int setAssignmentsCounter = 0; - if((this.getDimCount() > 0) && (this.checkDimension(dim))){ + if(this.containsDim(dim)){ for (int j = 0; j < this.assignments[dim].length; j++) { if(this.assignments[dim][j] != -1){ setAssignmentsCounter++; @@ -133,7 +131,7 @@ public int getSetAssignmentsCount(final int dim){ } public Boolean isFullyAssigned(final int dim){ - if(!this.checkDimension(dim)){ + if(!this.containsDim(dim)){ return null; } @@ -148,11 +146,11 @@ public Boolean isFullyAssigned(final int dim){ * @return */ public boolean addAssignment(final int[] atomIndicesInStructure){ - if(atomIndicesInStructure.length != this.nDim){ + if(atomIndicesInStructure.length != this.getNDim()){ return false; } - final int[][] extendedAssignments = new int[this.nDim][this.getAssignmentsCount()+1]; - for (int dim = 0; dim < this.nDim; dim++) { + final int[][] extendedAssignments = new int[this.getNDim()][this.getAssignmentsCount()+1]; + for (int dim = 0; dim < this.getNDim(); dim++) { for (int i = 0; i < this.getAssignmentsCount(); i++) { extendedAssignments[dim][i] = this.getAtomIndex(dim, i); } @@ -161,12 +159,8 @@ public boolean addAssignment(final int[] atomIndicesInStructure){ this.assignments = extendedAssignments; return true; - } - - public boolean checkDimension(final int dim){ - return (dim >= 0) && (dim < this.nDim); } - + private boolean checkSpectrumIndex(final int dim, final int indexInSpectrum){ return (indexInSpectrum >= 0) && (indexInSpectrum < this.assignments[dim].length); } diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java index 72818e7..b17da9d 100644 --- a/src/casekit/NMR/model/Signal.java +++ b/src/casekit/NMR/model/Signal.java @@ -27,68 +27,56 @@ */ package casekit.NMR.model; +import casekit.NMR.model.dimensional.DimensionalNMR; + /** * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Signal { - - private final int nDim; +public class Signal extends DimensionalNMR { /** * Am array of doubles to store the chemical shift of */ private Double[] shifts; - private final String[] nuclei; - /* Signal intensity in arbitrary values */ private Double intensity; private String multiplicity; - private Integer phase; - public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; - public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; +// private Integer phase; +// public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; +// public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; - public Signal(final String[] nuclei) { + public Signal(final String[] nuclei) throws Exception { this(nuclei, null); } - public Signal(final String[] nuclei, final Double[] shifts) { + public Signal(final String[] nuclei, final Double[] shifts) throws Exception { this(nuclei, shifts, null, null); } - public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final Double intensity) { - this.nuclei = nuclei; - this.nDim = this.nuclei.length; - this.shifts = this.initShifts(shifts, this.nDim); + public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final Double intensity) throws Exception { + super(nuclei); + this.shifts = this.initShifts(shifts, this.getNDim()); this.multiplicity = multiplicity; this.intensity = intensity; } - private Double[] initShifts(final Double[] shifts, final int nDim){ + private Double[] initShifts(final Double[] shifts, final int nDim) throws Exception { + if((shifts == null) || (shifts.length != nDim)){ + throw new Exception("Number of given nuclei (" + nDim + ") and shifts (" + shifts.length + ") is not the same!!!"); + } final Double[] tempShifts = new Double[nDim]; for (int d = 0; d < nDim; d++) { - if((shifts != null) && (shifts.length == nDim)){ - tempShifts[d] = shifts[d]; - } else { - tempShifts[d] = null; - } + tempShifts[d] = shifts[d]; } return tempShifts; } - - public int getDimCount(){ - return this.nDim; - } - - public String[] getNuclei(){ - return this.nuclei; - } - + public boolean setShift(final Double shift, final int dim) { - if(!this.checkDimension(dim)){ + if(!this.containsDim(dim)){ return false; } this.shifts[dim] = shift; @@ -97,7 +85,7 @@ public boolean setShift(final Double shift, final int dim) { } public Double getShift(final int dim) { - if(!this.checkDimension(dim)){ + if(!this.containsDim(dim)){ return null; } return this.shifts[dim]; @@ -119,27 +107,22 @@ public String getMultiplicity() { return this.multiplicity; } - public void setPhase(final Integer phase) { - this.phase = phase; - } - - public Integer getPhase() { - return this.phase; - } +// public void setPhase(final Integer phase) { +// this.phase = phase; +// } +// +// public Integer getPhase() { +// return this.phase; +// } - public boolean checkDimension(final int dim){ - return (dim >= 0) && (dim < this.nDim); - } - - /** - * - * @return - */ - public Signal getClone(){ - final Signal clone = new Signal(this.nuclei, this.shifts, this.multiplicity, this.intensity); - clone.setPhase(this.phase); - - return clone; + + public Signal getClone() throws Exception { +// final Signal clone = new Signal(this.getDimNames(), this.shifts, this.multiplicity, this.intensity); +// clone.setPhase(this.phase); +// +// return clone; + + return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.intensity); } } From d2c73c42714f2fa448233ec0506ff99bbe240376 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 14:08:29 +0200 Subject: [PATCH 097/405] - this class now extends the DimensionalNMR class - class functionality extended by equivalent signal detection --- src/casekit/NMR/model/Spectrum.java | 101 ++++++++++++++-------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 93a0cf1..c90c03f 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -28,8 +28,9 @@ */ package casekit.NMR.model; +import casekit.NMR.model.dimensional.DimensionalNMR; + import java.util.ArrayList; -import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -38,7 +39,7 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Spectrum { +public class Spectrum extends DimensionalNMR { /** * An arbitrary name or description that can be assigned to this spectrum for identification purposes. @@ -50,15 +51,6 @@ public class Spectrum { * numerous experiments yielding basically identical information having different names */ private String specType; - - /** - * Declares how many axes are in involved in this spectrum. - */ - private final int nDim; - /** - * The nuclei of the different axes. - */ - private final String nuclei[]; /** * The proton frequency of the spectrometer used to record this spectrum. */ @@ -72,22 +64,13 @@ public class Spectrum { public Spectrum(final String[] nuclei) { - this.nuclei = nuclei; - this.nDim = this.nuclei.length; + super(nuclei); this.signals = new ArrayList<>(); this.signalCount = 0; this.equivalences = new ArrayList<>(); this.equivalentSignals = new ArrayList[]{}; } - - public String[] getNuclei(){ - return this.nuclei; - } - - public int getDimCount(){ - return this.nDim; - } - + public void setSpecType(final String specType){ this.specType = specType; } @@ -105,7 +88,7 @@ public String getSpecDescription(){ } public final boolean setShifts(final ArrayList shiftList, final int dim){ - if(!this.checkDimension(dim) || (!this.checkInputListSize(shiftList.size()))){ + if(!this.containsDim(dim) || (!this.checkInputListSize(shiftList.size()))){ return false; } for (int i = 0; i < shiftList.size(); i++) { @@ -116,7 +99,7 @@ public final boolean setShifts(final ArrayList shiftList, final int dim) } public final boolean setShift(final Double shift, final int dim, final int signalIndex){ - if(!this.checkDimension(dim) || !this.checkSignalIndex(signalIndex)){ + if(!this.containsDim(dim) || !this.checkSignalIndex(signalIndex)){ return false; } this.getSignal(signalIndex).setShift(shift, dim); @@ -136,7 +119,7 @@ public int getSignalCount() { */ public boolean addSignals(final ArrayList signals){ for (final Signal signal : signals) { - if (!this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())) { + if (!this.compareNuclei(signal.getNuclei())) { return false; } } @@ -165,7 +148,7 @@ public boolean addSignal(final Signal signal) { * @return */ public boolean addSignal(final Signal signal, final int equivalentSignalIndex) { - if((signal == null) || !this.checkDimCount(signal.getDimCount()) || !this.checkNuclei(signal.getNuclei())){ + if((signal == null) || !this.compareNuclei(signal.getNuclei())){ return false; } // add signal at the end of signal list @@ -203,28 +186,10 @@ private boolean checkSignalIndex(final Integer signalIndex){ return (signalIndex != null) && (signalIndex >= 0) && (signalIndex < this.getSignalCount()); } - /** - * Checks whether the input dimension exists in this spectrum or not. - * - * @param dim - * @return - */ - public boolean checkDimension(final int dim){ - return (dim >= 0) && (dim < this.nDim); - } - private boolean checkInputListSize(final int size){ return (size == this.getSignalCount()); } - private boolean checkDimCount(final int ndim){ - return ndim == this.getDimCount(); - } - - private boolean checkNuclei(final String[] nuclei){ - return Arrays.equals(nuclei, this.getNuclei()); - } - /** * Returns an NMRSignal at position number in the List * @param signalIndex @@ -281,7 +246,7 @@ public boolean setIntensity(final double intensity, final int signalIndex){ public ArrayList getShifts(final int dim){ final ArrayList shifts = new ArrayList<>(); - if(!this.checkDimension(dim)){ + if(!this.containsDim(dim)){ return shifts; } for (final Signal sig : this.signals) { @@ -448,6 +413,42 @@ public boolean setEquivalence(final int signalIndex, final int isEquivalentToSig return true; } + /** + * Detects equivalent signals within this spectrum by a pick precision of 0.0 (no shift deviations are allowed). + * + * @see #detectEquivalences(double) + */ + public void detectEquivalences(){ + this.detectEquivalences(0.0); + } + + /** + * Detects equivalent signals within this spectrum by a given pick precision (shift deviations are allowed). + * + * @param pickPrecision tolerance value used for signal shift matching to find equivalent signals + * + * @see #getEquivalence(int) + * @see #getEquivalences() + * @see #getEquivalentSignals(int) + * @see #getEquivalentSignalClasses() + */ + public void detectEquivalences(final double pickPrecision){ + int equivalentSignalIndex; + for (final Signal signal : this.getSignals()) { + equivalentSignalIndex = -1; + for (final int closestSignalIndex : this.pickSignals(signal.getShift(0), 0, pickPrecision)) { + if (this.getSignalIndex(signal) <= closestSignalIndex) { + continue; + } + if (signal.getMultiplicity().equals(this.getSignal(closestSignalIndex).getMultiplicity())) { + equivalentSignalIndex = closestSignalIndex; + break; + } + } + this.setEquivalence(this.getSignalIndex(signal), equivalentSignalIndex); + } + } + /** * Returns the position of an NMRSignal the List * @param signal @@ -497,7 +498,7 @@ public String getStandard() { */ public int pickClosestSignal(final double shift, final int dim, final double pickPrecision) { int matchIndex = -1; - if(!this.checkDimension(dim)){ + if(!this.containsDim(dim)){ return matchIndex; } double diff = pickPrecision; @@ -522,7 +523,7 @@ public int pickClosestSignal(final double shift, final int dim, final double pic */ public ArrayList pickSignals(final Double shift, final int dim, final double pickPrecision) { final ArrayList pickedSignals = new ArrayList<>(); - if(!this.checkDimension(dim)){ + if(!this.containsDim(dim)){ return pickedSignals; } for (int s = 0; s < this.getSignalCount(); s++) { @@ -537,12 +538,12 @@ public int compare(final Integer pickedSignalIndex1, final Integer pickedSignalI return Double.compare(Math.abs(shift - getShift(pickedSignalIndex1, dim)), Math.abs(shift - getShift(pickedSignalIndex2, dim))); } }); - + return pickedSignals; } - public Spectrum getClone() { - final Spectrum clone = new Spectrum(this.nuclei); + public Spectrum getClone() throws Exception { + final Spectrum clone = new Spectrum(this.getNuclei()); for (int i = 0; i < this.getSignalCount(); i++) { clone.addSignal(this.getSignal(i).getClone(), this.getEquivalence(i)); } From 8e18771fee2a57481a4870f2fb9cb3a5a82d2799 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 14:28:46 +0200 Subject: [PATCH 098/405] - redesigned class for parsing CSV and XML files in Bruker's TopSpin NMR file format --- src/casekit/NMR/parse/Parser.java | 206 ++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 src/casekit/NMR/parse/Parser.java diff --git a/src/casekit/NMR/parse/Parser.java b/src/casekit/NMR/parse/Parser.java new file mode 100644 index 0000000..67430d5 --- /dev/null +++ b/src/casekit/NMR/parse/Parser.java @@ -0,0 +1,206 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.NMR.parse; + +import casekit.NMR.Utils; +import casekit.NMR.model.Signal; +import casekit.NMR.model.Spectrum; +import casekit.io.FileParser; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; +import javax.xml.parsers.ParserConfigurationException; +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; + + +public class Parser { + + + /** + * Creates a Spectrum class object from given 1D NMR input file in CSV or XML format. + * The extension of given file is used to determine the format. + * + * @param pathToFile path to peak table (Bruker's TopSpin csv or xml + * file format) + * @param nucleus nucleus to use for spectrum creation, e.g. "13C" + * @return + * @throws Exception + */ + public static Spectrum parse1DNMR(final String pathToFile, final String nucleus) throws Exception { + switch (Utils.getFileFormat(pathToFile)){ + case "csv": + return CSVtoSpectrum(pathToFile, new int[]{4}, new String[]{nucleus}, 6); + case "xml": + return XMLtoSpectrum(pathToFile, 1, new int[]{1}, new String[]{nucleus}); + default: + return null; + } + } + + /** + * Creates a Spectrum class object from given 2D NMR input file in CSV or XML format. + * The extension of given file is used to determine the format. + * + * @param pathToFile path to peak table (Bruker's TopSpin csv or xml + * file format) + * @param nuclei nuclei to use for spectrum creation, e.g. ["13C", "13C] + * + * @return + * @throws Exception + */ + public static Spectrum parse2DNMR(final String pathToFile, final String[] nuclei) throws Exception { + switch (Utils.getFileFormat(pathToFile)){ + case "csv": + return CSVtoSpectrum(pathToFile, new int[]{5, 6}, nuclei, 9); + case "xml": + return XMLtoSpectrum(pathToFile, 2, new int[]{2, 1}, nuclei); + default: + return null; + } + } + + /** + * Reads a specific column of a NMR peak table and stores it into an + * ArrayList object. + * + * @param pathToCSV path to NMR peak table in CSV file format + * @param column column index to select in peak table + * @return ArrayList of Double shift values + * @throws IOException + */ + private static ArrayList CSVtoPeakList(final String pathToCSV, final int column) throws IOException { + final ArrayList shifts = new ArrayList<>(); + String line; + String[] tokens; + final BufferedReader fileReader = FileParser.parseText(pathToCSV); + while ((line = fileReader.readLine()) != null) { + tokens = line.split(","); + // get shift value + if (tokens[column].trim().matches("^[+|-]{0,1}\\d+\\.{0,1}\\d*")) { + shifts.add(Double.parseDouble(tokens[column].trim())); + } + } + fileReader.close(); + + return shifts; + } + + /** + * Reads specific columns of one NMR peak table to obtain a Spectrum class + * object and set intensitiy values. + * The number of columns and atom types has to be the same and defines the + * dimension of the returning spectrum. + * + * @param pathToCSV path to NMR peak table in CSV file format + * @param columns column indices to select in peak table + * @param nuclei nuclei for each dimension + * @param intensityColumnIndex column index for intensity values + * @return Spectrum class object containing the peak lists + * @throws Exception + */ + private static Spectrum CSVtoSpectrum(final String pathToCSV, final int[] columns, final String[] nuclei, final int intensityColumnIndex) throws Exception { + // assumes the same number of selected columns (dimensions) and atom types + if(columns.length != nuclei.length){ + return null; + } + final Spectrum spectrum = new Spectrum(nuclei); + ArrayList shiftList; + for (int col = 0; col < columns.length; col++) { + shiftList = CSVtoPeakList(pathToCSV, columns[col]); + if(col == 0){ + for (int i = 0; i < shiftList.size(); i++) { + spectrum.addSignal(new Signal(spectrum.getNuclei())); + } + } + if(!spectrum.setShifts(shiftList, col)){ + return null; + } + } + spectrum.setIntensities(CSVtoPeakList(pathToCSV, intensityColumnIndex)); + + return spectrum; + } + + /** + * Reads a NMR peak XML file and returns one attribute of nodes (column) into an + * ArrayList object. + * The XML file must be in Bruker's TopSpin format. + * + * @param pathToXML Path to XML file + * @param dim number of dimensions of given data 1 (1D) or 2 (2D) + * @param attribute which attribute index in XML peak nodes should be used: + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, + * intensity if 1D data) or 3 (intensity if 2D data) + * + * @return ArrayList of Double shift values + * @throws IOException + * @throws javax.xml.parsers.ParserConfigurationException + * @throws org.xml.sax.SAXException + */ + private static ArrayList XMLtoPeakList(final String pathToXML, final int dim, final int attribute) throws IOException, ParserConfigurationException, SAXException { + // assumes a attribute value between 1 and 3 + if(attribute < 1 || attribute > 3){ + return null; + } + + final ArrayList shifts = new ArrayList<>(); + final Document doc = FileParser.parseXML(pathToXML); + + final NodeList peakLists = doc.getElementsByTagName("Peak" + dim + "D"); + for (int i = 0; i < peakLists.getLength(); i++) { + shifts.add(Double.parseDouble(peakLists.item(i).getAttributes().item(attribute - 1).getNodeValue())); + } + + return shifts; + } + + /** + * Reads specific columns of NMR XML files to obtain a Spectrum class + * object. + * The XML file must be in Bruker's TopSpin format. + * + * @param pathToXML path to NMR XML file in Bruker's TopSpin XML file format + * @param ndim number of dimensions: 1 (1D) or 2 (2D) + * @param attributes which attribute indices in XML peak nodes should be used: + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data) + * @param nuclei nuclei for each dimension + * @return Spectrum class object containing the selected peak lists + * @throws Exception + */ + private static Spectrum XMLtoSpectrum(final String pathToXML, final int ndim, final int[] attributes, final String[] nuclei) throws Exception { + + // assumes the same number of dims, attributes and atom types and a maximum number of dims of 2 + if((ndim != attributes.length) || (ndim != nuclei.length) || (attributes.length != nuclei.length) + || (ndim < 1 || ndim > 2)){ + return null; + } + final Spectrum spectrum = new Spectrum(nuclei); + ArrayList shiftList; + for (int dim = 0; dim < ndim; dim++) { + shiftList = XMLtoPeakList(pathToXML, ndim, attributes[dim]); + if(dim == 0){ + for (int i = 0; i < (shiftList != null ? shiftList.size() : 0); i++) { + spectrum.addSignal(new Signal(spectrum.getNuclei())); + } + } + if(!spectrum.setShifts(shiftList, dim)){ + return null; + } + } + spectrum.setIntensities(XMLtoPeakList(pathToXML, ndim, ndim + 1)); + + return spectrum; + } +} From 86b7962796b286d109cacb2a84b1bbd016dafa4e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 15:06:29 +0200 Subject: [PATCH 099/405] - some outcommented source code from Utils class added which could be useful in future to correct spectra signal matches --- src/casekit/NMR/match/Matcher.java | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/casekit/NMR/match/Matcher.java b/src/casekit/NMR/match/Matcher.java index 90b5b9d..9c2af81 100644 --- a/src/casekit/NMR/match/Matcher.java +++ b/src/casekit/NMR/match/Matcher.java @@ -20,7 +20,9 @@ import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.similarity.Tanimoto; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; public class Matcher { @@ -277,4 +279,50 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s return matchAssignment; } + + + // might be useful in future to correct matches between spectra + +// /** +// * Corrects a match list regarding a given shift list and an atom container. +// * This is useful when two ore more shift values (e.g. DEPT shifts) match +// * with the same atom in the atom container. So the purpose here is to +// * enable more unambiguous matches. This method first looks for unambiguous +// * matches and calculates the median of the difference values between the +// * shift list values and the shifts of atom container. Then, all shift list +// * values are adjusted (+/-) with this median value. +// * +// * @param shiftList1 Shift value list to search in +// * @param shiftList2 Shift value list to match in shiftList1 +// * @param matchesInShiftList1 Matcher list to correct +// * @param tol Tolerance value +// * @return +// */ +// public static ArrayList correctShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final ArrayList matchesInShiftList1, final double tol) { +// +// int matchIndex; +// // get differences of unique matches between query shift and ac shifts +// ArrayList diffs = new ArrayList<>(); +// final HashSet uniqueMatchIndicesSet = new HashSet<>(matchesInShiftList1); +// for (final int uniqueMatchIndex : uniqueMatchIndicesSet) { +// if (Collections.frequency(matchesInShiftList1, uniqueMatchIndex) == 1) { +// matchIndex = matchesInShiftList1.indexOf(uniqueMatchIndex); +// if (matchesInShiftList1.get(matchIndex) >= 0) { +// diffs.add(shiftList2.get(matchIndex) - shiftList1.get(matchesInShiftList1.get(matchIndex))); +// } +// } +// } +// // calculate the median of found unique match differences +// if (diffs.size() > 0) { +// final double median = casekit.NMR.Utils.getMedian(diffs); +// // add or subtract the median of the differences to all shift list values (input) and match again then +// for (int i = 0; i < shiftList2.size(); i++) { +// shiftList2.set(i, shiftList2.get(i) - median); +// } +// // rematch +// return casekit.NMR.Utils.findShiftMatches(shiftList1, shiftList2, tol); +// } +// +// return matchesInShiftList1; +// } } From 736ad249b0ec620d9798c6f9e0edc101384f71db Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 15:13:46 +0200 Subject: [PATCH 100/405] - new class for chemical shift, signal and spectrum prediction - limited to carbon atoms (at moment) --- src/casekit/NMR/predict/Predict.java | 67 +++++++++++++++------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/src/casekit/NMR/predict/Predict.java b/src/casekit/NMR/predict/Predict.java index 5bcfeb5..7ba7c82 100644 --- a/src/casekit/NMR/predict/Predict.java +++ b/src/casekit/NMR/predict/Predict.java @@ -26,9 +26,11 @@ import casekit.NMR.Utils; import casekit.NMR.model.Signal; import casekit.NMR.model.Spectrum; -import hose.HOSECodeBuilder; import java.util.ArrayList; import java.util.HashMap; + +import hose.HOSECodeBuilder; +import hose.model.ConnectionTree; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; @@ -42,13 +44,15 @@ public class Predict { /** * Predicts a shift value for a central atom based on its HOSE code and a * given HOSE code lookup table. The prediction is done by using the mean - * of all occurring shifts in lookup table for the given HOSE code. + * of all occurring shifts in lookup table for the given HOSE code.
+ * Specified for carbons (13C) only -> {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. * - * @param HOSECodeLookupTable - * @param HOSECode + * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts + * of occurring central atoms as values + * @param HOSECode specific HOSE code to use for shift prediction * @return null if HOSE code does not exist in lookup table * - * @see casekit.NMR.Utils#getRMS(java.util.ArrayList) + * @see casekit.NMR.Utils#getRMS(ArrayList) * */ public static Double predictShift(final HashMap> HOSECodeLookupTable, final String HOSECode) { @@ -60,62 +64,65 @@ public static Double predictShift(final HashMap> HOSEC } /** - * Specified for carbons (13C) only. Not generic at the moment because of - * usage of {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)} - * with {@code hCount}. + * Predicts a signal for a central atom based on its HOSE code and a + * given HOSE code lookup table. The prediction is done by using the mean + * of all occurring shifts in lookup table for the given HOSE code.
+ * Specified for carbons (13C) only -> {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. + * + * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts + * of occurring central atoms as values + * @param ac structure to predict from + * @param atomIndex index of central atom in structure for HOSE code generation + * @param maxSphere maximum sphere to use for HOSE code generation + * @param nucleus nucleus (e.g. "13C") for signal creation * - * @param HOSECodeLookupTable - * @param ac - * @param atomIndex - * @param maxSphere - * @param nucleus - * @param hCount * @return null if HOSE code of selected atom does not exist in lookup table * * @throws CDKException * - * @see #predictShift(java.util.HashMap, java.lang.String) + * @see #predictShift(HashMap, String) * */ - public static Signal predictSignal(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, final String nucleus, final Integer hCount) throws CDKException { - if (!Utils.checkIndexInAtomContainer(ac, atomIndex) || (hCount == null)) { + public static Signal predictSignal(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, final String nucleus) throws Exception { + if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } - final Double predictedShift = Predict.predictShift(HOSECodeLookupTable, HOSECodeBuilder.buildHOSECode(ac, atomIndex, maxSphere, false)); + final String HOSECode = HOSECodeBuilder.buildHOSECode(ac, atomIndex, maxSphere, false); + final Double predictedShift = Predict.predictShift(HOSECodeLookupTable, HOSECode); if (predictedShift == null) { return null; } return new Signal( new String[]{nucleus}, new Double[]{predictedShift}, - Utils.getMultiplicityFromHydrogenCount(hCount), + Utils.getMultiplicityFromHydrogenCount(ac.getAtom(atomIndex).getImplicitHydrogenCount()), null ); } /** - * Specified for carbons (13C) only. Not generic at the moment because of - * {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. + * Predicts a spectrum for a given structure based on HOSE code of atoms with specified nucleus and a + * given HOSE code lookup table.
+ * Specified for carbons (13C) only -> {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. * - * @param HOSECodeLookupTable - * @param ac - * @param maxSphere - * @param nucleus + * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts + * of occurring central atoms as values + * @param ac structure to predict from + * @param maxSphere maximum sphere to use for HOSE code generation + * @param nucleus nucleus (e.g. "13C") for signal creation * @return null if a HOSE code of one atom does not exist in lookup table * * @throws org.openscience.cdk.exception.CDKException * - * @see #predictSignal(java.util.HashMap, - * org.openscience.cdk.interfaces.IAtomContainer, int, java.lang.Integer, - * java.lang.String, java.lang.Integer) + * @see #predictSignal(HashMap, IAtomContainer, int, Integer, String) * */ - public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final Integer maxSphere, final String nucleus) throws CDKException { + public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final Integer maxSphere, final String nucleus) throws Exception { final Spectrum predictedSpectrum = new Spectrum(new String[]{nucleus}); Signal signal; for (final IAtom atom : ac.atoms()) { if (atom.getSymbol().equals(Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { - signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus, atom.getImplicitHydrogenCount()); + signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus); if(signal == null){ return null; } From 0a0096dcbf7e6c0e725862848b3a0ceea5ff000b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 15:32:01 +0200 Subject: [PATCH 101/405] - new classes for operations with NMRShiftDB and MongoDB --- src/casekit/NMR/dbservice/MongoDB.java | 73 ++++ src/casekit/NMR/dbservice/NMRShiftDB.java | 423 ++++++++++++++++++++++ 2 files changed, 496 insertions(+) create mode 100644 src/casekit/NMR/dbservice/MongoDB.java create mode 100644 src/casekit/NMR/dbservice/NMRShiftDB.java diff --git a/src/casekit/NMR/dbservice/MongoDB.java b/src/casekit/NMR/dbservice/MongoDB.java new file mode 100644 index 0000000..0f548a7 --- /dev/null +++ b/src/casekit/NMR/dbservice/MongoDB.java @@ -0,0 +1,73 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.NMR.dbservice; + +import com.mongodb.MongoClient; +import com.mongodb.MongoClientOptions; +import com.mongodb.MongoCredential; +import com.mongodb.ServerAddress; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.MongoDatabase; +import org.bson.Document; +import org.openscience.cdk.exception.CDKException; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class MongoDB { + + + public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) throws CDKException { + MongoClient mongo; + try { + // Creating a Mongo client + mongo = new MongoClient( + new ServerAddress("127.0.0.1", 27017), + MongoCredential.createCredential( + mongoUser, + mongoAuthDB, + mongoPassword.toCharArray()), + MongoClientOptions.builder().build()); + System.out.println("Login to MongoDB was successfull"); + // Accessing the database + } catch (Exception e) { + e.printStackTrace(); + System.err.println(Thread.currentThread().getStackTrace()[1].getMethodName() + ": could not connect to MongoDB!"); + + return null; + } + + return mongo; + } + + public static MongoDatabase getDatabase(final MongoClient mongo, final String mongoDBName){ + return mongo.getDatabase(mongoDBName); + } + + public static MongoCollection getCollection(final MongoClient mongo, final String mongoDBName, final String mongoDBCollection) { + final MongoDatabase database = MongoDB.getDatabase(mongo, mongoDBName); + if (database == null) { + return null; + } + System.out.println("Access to database \"" + mongoDBName + "\" was successfull"); + // Retrieving a collection + final MongoCollection collection = database.getCollection(mongoDBCollection); + System.out.println("Retrieval of collection \"" + mongoDBCollection + "\" was successfull -> size: " + collection.countDocuments()); + + return collection; + } + + public static void logout(final MongoClient mongo) { + mongo.close(); + } +} diff --git a/src/casekit/NMR/dbservice/NMRShiftDB.java b/src/casekit/NMR/dbservice/NMRShiftDB.java new file mode 100644 index 0000000..7c26dde --- /dev/null +++ b/src/casekit/NMR/dbservice/NMRShiftDB.java @@ -0,0 +1,423 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.NMR.dbservice; + +import casekit.NMR.Utils; +import casekit.NMR.model.Assignment; +import casekit.NMR.model.Signal; +import casekit.NMR.model.Spectrum; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomContainerSet; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.AtomContainerSet; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.util.*; + +public class NMRShiftDB { + /** + * Returns the molecules of a given MOL/SDF file. + * This function sets the molecule aromaticity (with allowed exocyclic pi + * bonds) by using the + * {@link Utils#setAromaticitiesInAtomContainer(org.openscience.cdk.interfaces.IAtomContainer)} + * function. + * + * @param pathToNMRShiftDB path to NMRShiftDB file + * @param setAromaticity whether to set aromaticities in structures or not + * @return + * @throws FileNotFoundException + * @throws CDKException + * @deprecated + */ + public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRShiftDB, final boolean setAromaticity) throws FileNotFoundException, CDKException { + final IAtomContainerSet acSet = new AtomContainerSet(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance() + ); + IAtomContainer ac; + while (iterator.hasNext()) { + ac = iterator.next(); + if(setAromaticity){ + Utils.setAromaticitiesInAtomContainer(ac); + } + acSet.addAtomContainer(ac); + } + + return acSet; + } + + /** + * Returns all spectra for each molecule and a given nucleus which exist as + * property in a NMRSHiftDB SDF. + * + * @param pathToNMRShiftDB path to NMRShiftDB file + * @param nucleus nucleus of requested spectra + * @return + * @throws FileNotFoundException + * @throws CDKException + * + */ + public static ArrayList> getSpectraFromNMRShiftDB(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException { + final ArrayList> spectraSet = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance() + ); + IAtomContainer ac; + Spectrum spectrum; + ArrayList spectra; + HashMap spectraStrings; + String spectrumIndexInRecord, solvent; + while (iterator.hasNext()) { + ac = iterator.next(); + if(ac == null){ + continue; + } + spectraStrings = getSpectraStrings(ac, nucleus); + if(spectraStrings.isEmpty() || (ac.getProperty("Solvent") == null)){ + continue; + } + spectra = new ArrayList<>(); + for (final String spectrumPropertyString : spectraStrings.keySet()) { + spectrum = NMRShiftDBSpectrumToSpectrum(spectraStrings.get(spectrumPropertyString), nucleus); + if(spectrum == null){ + continue; + } + spectrumIndexInRecord = spectrumPropertyString.split("\\s")[spectrumPropertyString.split("\\s").length - 1]; + solvent = getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord); + if(solvent == null){ + continue; + } + spectrum.setSolvent(solvent); + + if(Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ + continue; + } + + spectra.add(new Object[]{spectrum, NMRShiftDBSpectrumToAssignment(spectraStrings.get(spectrumPropertyString), nucleus)}); + } + spectraSet.add(spectra); + } + + return spectraSet; + } + + public static String getSolvent(final String solventPropertyString, final String spectrumIndexInRecord){ + final String[] solventPropertyStringSplit = solventPropertyString.split(":"); + String solvent; + for (int i = 0; i < solventPropertyStringSplit.length; i++) { + if (solventPropertyStringSplit[i].endsWith(spectrumIndexInRecord)) { + solvent = solventPropertyStringSplit[i + 1]; + if(solvent.substring(solvent.length() - 1).matches("\\d")){ + solvent = solvent.substring(0, solvent.length() - 1); + } + if(solvent.substring(solvent.length() - 1).matches("\\d")){ + solvent = solvent.substring(0, solvent.length() - 1); + } + solvent = solvent.substring(0, solvent.length() - 1); + + return solvent; + } + } + + return null; + } + + /** + * Returns 3-tuples consisting of structure, spectrum and assignments + * for each valid molecule record in the given NMRShiftDB file. Valid means + * here that each molecule record has to contain the given spectrum + * property string as well as the number of signals in that spectrum has to + * be the same as atoms of that atom type in molecule. + * + * @param pathToNMRShiftDB path to NMRShiftDB file + * @param NMRShiftDBSpectrumProperty spectrum property string to use + * @return + * @throws FileNotFoundException + * @throws CDKException + */ + public static HashMap getSSCComponentsFromNMRShiftDB(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty) throws FileNotFoundException, CDKException { + final HashMap structureSetWithSpectra = new HashMap<>(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance() + ); + IAtomContainer ac; + Spectrum spectrum; + Assignment assignment; + final String nucleus = getNucleusFromNMRShiftDBSpectrumProperty(NMRShiftDBSpectrumProperty); + final String spectrumIndexInRecord = NMRShiftDBSpectrumProperty.split("\\s")[NMRShiftDBSpectrumProperty.split("\\s").length - 1]; + while (iterator.hasNext()) { + ac = iterator.next(); + // skip molecules which not contain any of requested spectrum information + if(ac.getProperty(NMRShiftDBSpectrumProperty) == null){ + continue; + } + spectrum = NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); + // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule + if((spectrum == null) || Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ + continue; + } + if(ac.getProperty("Solvent") != null){ + spectrum.setSolvent(getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord)); + } + if(ac.getProperty("Field Strength [MHz]") != null){ + for (final String fieldStrength : ac.getProperty("Field Strength [MHz]").toString().split("\\s")) { + if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { + try { + spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split(spectrumIndexInRecord + ":")[1])); + } catch (NumberFormatException e) { +// spectrum.setSpectrometerFrequency(null); + } + break; + } + } + } + + assignment = NMRShiftDBSpectrumToAssignment(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); +// if ((ac != null) && (spectrum != null)) { + structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{ac, spectrum, assignment}); +// } + + Utils.setAromaticitiesInAtomContainer(ac); + } + + return structureSetWithSpectra; + } + + /** + * Returns a hashmap containing combined keys (by "_") of solvents + * and lists of calculated deviations between all given spectra for a + * nucleus in molecule record as values.
+ * Here, only molecule records in NMRShiftDB file are considered which have + * at least two different spectra for same nucleus.
+ * Example: "Spectrum 13C 0", "Spectrum 13C 1" will be used for given + * nucleus 13C. + * + * + * @param pathToNMRShiftDB + * @param nucleus + * @return + * @throws FileNotFoundException + * @throws CDKException + */ + public static HashMap> getSolventDeviations(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException{ + int signalCount; + Spectrum spectrum; + Assignment assignment; + final ArrayList> spectraSets = getSpectraFromNMRShiftDB(pathToNMRShiftDB, nucleus); + HashMap> shiftsPerAtom; + HashMap> solventsPerAtom; + ArrayList solvents; + String[] solventsToSort; + + final HashMap> deviations = new HashMap<>(); + String combiKey; + + for (final ArrayList spectraSetInRecord : spectraSets) { + shiftsPerAtom = new HashMap<>(); + solventsPerAtom = new HashMap<>(); + signalCount = -1; + for (final Object[] spectrumAndAssignment : spectraSetInRecord) { + spectrum = (Spectrum) spectrumAndAssignment[0]; + assignment = (Assignment) spectrumAndAssignment[1]; + if (signalCount == -1) { + signalCount = spectrum.getSignalCount(); + } else if (signalCount != spectrum.getSignalCount()) { + continue; + } + for (final int atomIndex : assignment.getAtomIndices(0)) { + if (!shiftsPerAtom.containsKey(atomIndex)) { + shiftsPerAtom.put(atomIndex, new ArrayList<>()); + solventsPerAtom.put(atomIndex, new ArrayList<>()); + } + shiftsPerAtom.get(atomIndex).add(spectrum.getSignal(assignment.getSignalIndex(0, atomIndex)).getShift(0)); + solventsPerAtom.get(atomIndex).add(spectrum.getSolvent()); + } + } + if (shiftsPerAtom.isEmpty() || (shiftsPerAtom.get(Collections.min(shiftsPerAtom.keySet())).size() < 2)) { + continue; + } + solvents = new ArrayList<>(solventsPerAtom.get(Collections.min(solventsPerAtom.keySet()))); +// if(Collections.frequency(solvents, "Unreported") + Collections.frequency(solvents, "Unknown") > solvents.size() - 2){ +// continue; +// } + + for (final int atomIndex : shiftsPerAtom.keySet()) { + for (int s1 = 0; s1 < solvents.size(); s1++) { +// if(solvents.get(s1).equals("Unreported") || solvents.get(s1).equals("Unknown")){ +// continue; +// } + for (int s2 = s1 + 1; s2 < solvents.size(); s2++) { +// if (solvents.get(s2).equals("Unreported") || solvents.get(s2).equals("Unknown")) { +// continue; +// } + solventsToSort = new String[2]; + solventsToSort[0] = solvents.get(s1); + solventsToSort[1] = solvents.get(s2); + Arrays.sort(solventsToSort); + combiKey = solventsToSort[0] + "_" + solventsToSort[1]; + if (!deviations.containsKey(combiKey)) { + deviations.put(combiKey, new ArrayList<>()); + } + deviations.get(combiKey).add(Math.abs(shiftsPerAtom.get(atomIndex).get(s1) - shiftsPerAtom.get(atomIndex).get(s2))); + } + } + } + } + + return deviations; + } + + /** + * + * @param pathToDB + * @return + * @throws FileNotFoundException + * @deprecated + */ + public static HashSet getAtomTypesInDB(final String pathToDB) throws FileNotFoundException{ + final HashSet atomTypes = new HashSet<>(); + final IteratingSDFReader iterator = new IteratingSDFReader( + new FileReader(pathToDB), + SilentChemObjectBuilder.getInstance() + ); + while (iterator.hasNext()) { + atomTypes.addAll(Utils.getAtomTypesInAtomContainer(iterator.next())); + } + + return atomTypes; + } + + // currently only for 1D spectra + public static HashMap getSpectraStrings(final IAtomContainer ac, final String nucleus) { + final ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); + final HashMap spectra = new HashMap<>(); + for (final String prop : props) { + if (prop.startsWith("Spectrum " + nucleus)) { + spectra.put(prop, ac.getProperty(prop)); + } + } + + return spectra; + } + + /** + * Creates a two dimensional array of a given NMRShiftDB NMR entry + * with all signal shift values, intensities, multiplicities and atom indices. + * + * @param NMRShiftDBSpectrum + * @return two dimensional array: + * 1. dimension: signal index (row); + * 2. dimension: signal shift value (column 1), signal intensity (column 2), + * signal multiplicity (column 3), atom index in structure (column 4) + */ + public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum){ + if(NMRShiftDBSpectrum.trim().isEmpty()){ + return new String[][]{}; + } + String[] signalSplit; + final String[] shiftsSplit = NMRShiftDBSpectrum.split("\\|"); + final String[][] values = new String[shiftsSplit.length][4]; + for (int i = 0; i < shiftsSplit.length; i++) { + signalSplit = shiftsSplit[i].split(";"); + values[i][0] = signalSplit[0]; // shift value + values[i][1] = signalSplit[1].substring(0, signalSplit[1].length() - 1); // intensity + values[i][2] = signalSplit[1].substring(signalSplit[1].length() - 1); // multiplicity + values[i][3] = signalSplit[2]; // atom index + } + + return values; + } + + /** + * Sets shifts, intensities and implicit hydrogen counts in atoms of an atom container + * by means of given spectrum property string. + * + * @param ac IAtomContainer to set + * @param NMRShiftDBSpectrum Property string of spectrum in NMRShiftDB format. + * @return + * + * @see MongoDB#parseNMRShiftDBSpectrum(String) + * @see Utils#getHydrogenCountFromMultiplicity(String) + * @deprecated + */ + public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String NMRShiftDBSpectrum){ + if (ac.getProperty(NMRShiftDBSpectrum) == null) { + return false; + } + final String[][] spectrumStringArray = parseNMRShiftDBSpectrum(ac.getProperty(NMRShiftDBSpectrum)); + + Integer atomIndexSpectrum; +// String multiplicity; + Double shift; + + for (int i = 0; i < spectrumStringArray.length; i++) { + atomIndexSpectrum = Integer.parseInt(spectrumStringArray[i][3]); + shift = Double.parseDouble(spectrumStringArray[i][0]); +// multiplicity = spectrumStringArray[i][3]; + if(Utils.checkIndexInAtomContainer(ac, atomIndexSpectrum)){ + ac.getAtom(atomIndexSpectrum).setProperty(Utils.getNMRShiftConstant(ac.getAtom(atomIndexSpectrum).getSymbol()), shift); +// ac.getAtom(atomIndexSpectrum).setImplicitHydrogenCount(Utils.getHydrogenCountFromMultiplicity(multiplicity)); + } + } + + return true; + } + + public static String getNucleusFromNMRShiftDBSpectrumProperty(final String NMRShiftDBSpectrumProperty){ + return NMRShiftDBSpectrumProperty.split(" ")[1]; + } + + public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus){ + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final String[][] spectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + final Spectrum spectrum = new Spectrum(new String[]{nucleus}); + String multiplicity; + Double shift, intensity; + try { + for (int i = 0; i < spectrumStringArray.length; i++) { + shift = Double.parseDouble(spectrumStringArray[i][0]); + intensity = Double.parseDouble(spectrumStringArray[i][1]); + multiplicity = spectrumStringArray[i][2]; + spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, intensity)); + } + spectrum.detectEquivalences(); + } catch (Exception e) { + + return null; + } + + return spectrum; + } + + public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String nucleus) { + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final String[][] NMRShiftDBSpectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + final Spectrum spectrum = NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, nucleus); + final Assignment assignment = new Assignment(spectrum); + for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { + assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); + } + + return assignment; + } +} From cfc5d649f09ef8504499e64a29a681fda9ae72bc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:23:36 +0200 Subject: [PATCH 102/405] - new class to create a input file for LSD structure generator - currently only supporting output of: - project name - molecular formula - hybridizations of atom if set in atom container - already known bond information from atom container - path to filters for structure generation - printing th COSY or HMBC information is to repair after redesign --- src/casekit/NMR/Process.java | 287 ---------------------- src/casekit/NMR/convert/LSDConverter.java | 243 ++++++++++++++++++ 2 files changed, 243 insertions(+), 287 deletions(-) delete mode 100644 src/casekit/NMR/Process.java create mode 100644 src/casekit/NMR/convert/LSDConverter.java diff --git a/src/casekit/NMR/Process.java b/src/casekit/NMR/Process.java deleted file mode 100644 index 5f82120..0000000 --- a/src/casekit/NMR/Process.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * The MIT License - * - * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package casekit.NMR; - -import casekit.NMR.model.Spectrum; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.HashMap; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomContainerSet; -import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class Process extends ParseRawData { - - private final IAtomContainer mol; - private final IMolecularFormula molFormula; - private final HashMap> atomTypeIndices; - private int[][] neighborhoodCountsMatrix; - private final HashMap> shiftIndicesInACSet = new HashMap<>(); // holding of all indices of each ac set (DB) entry [first value] and it's atom indices [second value] too - - - public Process(){ - super(); - this.molFormula = super.getMolecularFormula(); - this.mol = super.getAtomContainer(); - this.atomTypeIndices = super.getAtomTypeIndices(); - } - - public Process(final IMolecularFormula molFormula){ - super(molFormula); - this.molFormula = super.getMolecularFormula(); - this.mol = super.getAtomContainer(); - this.atomTypeIndices = super.getAtomTypeIndices(); - } - - - /** - * - * @param projectName - * @param pathToOutputFile - * @param pathsToFilters - * @throws FileNotFoundException - * @throws UnsupportedEncodingException - */ - public void createLSDInputFile(final String projectName, final String pathToOutputFile, final String[] pathsToFilters) throws FileNotFoundException, UnsupportedEncodingException, IOException{ - - String wholeContent, hybrid, protons, MULT = "", HSQC = "", BOND = "", HMBC = "", COSY = ""; - wholeContent = "; project name: " + projectName + "\n"; - if(this.molFormula != null){ - wholeContent += "; molecular formula: " + MolecularFormulaManipulator.getString(this.molFormula) + "\n\n"; - } else { - wholeContent += "; molecular formula: unknown \n\n"; - } - for (int i = 0; i < this.mol.getAtomCount(); i++) { - // set MULT section in LSD input file - // set hybridization level - if(this.mol.getAtom(i).getHybridization() == null){ - hybrid = "-"; - } else { - switch (this.mol.getAtom(i).getHybridization()) { - case SP1: - case S: - hybrid = "1"; break; - case SP2: - hybrid = "2"; break; - default: - hybrid = "3"; - } - } - // set implicit proton number - if(this.mol.getAtom(i).getImplicitHydrogenCount() == null){ - protons = "-"; - } else { - protons = String.valueOf(this.mol.getAtom(i).getImplicitHydrogenCount()); - } - MULT += "MULT " + (i+1) + " " + this.mol.getAtom(i).getSymbol() + " " + hybrid + " " + protons; - if(this.mol.getAtom(i).getProperty(Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) != null){ - String hCount; - if(this.mol.getAtom(i).getImplicitHydrogenCount() == null){ - hCount = "x"; - } else { - hCount = String.valueOf(this.mol.getAtom(i).getImplicitHydrogenCount()); - } - MULT += ";\t" + this.mol.getAtom(i).getProperty(Utils.getNMRShiftConstant(this.mol.getAtom(i).getSymbol())) + ",\t" + this.mol.getAtom(i).getSymbol() + "H" + hCount; - } - MULT += "\n"; - // set HSQC section in LSD input file - if((this.mol.getAtom(i).getImplicitHydrogenCount() != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() > 0)){ - HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + this.mol.getAtom(i).getSymbol() + "H" + this.mol.getAtom(i).getImplicitHydrogenCount() + "\n"; - } - } - wholeContent += MULT + "\n"; - wholeContent += HSQC + "\n"; - - // set BOND information in LSD input file by INADEQUATE - for (IBond bond : this.mol.bonds()) { - BOND += "BOND " + (bond.getAtom(0).getIndex()+1) + " " + (bond.getAtom(1).getIndex()+1) + ";\t" + this.mol.getAtom(bond.getAtom(0).getIndex()).getSymbol() + "H" + this.mol.getAtom(bond.getAtom(0).getIndex()).getImplicitHydrogenCount() + " - " + this.mol.getAtom(bond.getAtom(1).getIndex()).getSymbol() + "H" + this.mol.getAtom(bond.getAtom(1).getIndex()).getImplicitHydrogenCount() + "\n"; - } - wholeContent += BOND + "\n"; - - // set HMBC information to LSD input file - ArrayList indicesInAtomContainerDim1; - ArrayList indicesInAtomContainerDim2; - final boolean [][] HMBCTable = new boolean[this.mol.getAtomCount()][this.mol.getAtomCount()]; - for (int i = 0; i < this.mol.getAtomCount(); i++) { - for (int j = 0; j < this.mol.getAtomCount(); j++) { - HMBCTable[i][j] = false; - } - } - for (final Spectrum spectrum : this.getSpectra().values()) { - if((spectrum.getDimCount() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HMBC)){ - continue; - } - indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); - indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); - HMBC += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; - for (int i = 0; i < spectrum.getSignalCount(); i++) { - if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ - // set signal only if it is not already covered by BOND - // here reversed order (see LSD manual page): 1. heavy atom, 2. proton - if(this.mol.getBond(this.mol.getAtom(indicesInAtomContainerDim2.get(i)), this.mol.getAtom(indicesInAtomContainerDim1.get(i))) != null){ - HMBC += ";"; - } - HMBC += "HMBC " + (indicesInAtomContainerDim2.get(i) + 1) + " " + (indicesInAtomContainerDim1.get(i) + 1) + ";\t" + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + " - " + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + "\n"; - HMBCTable[indicesInAtomContainerDim2.get(i)][indicesInAtomContainerDim1.get(i)] = true; - } - } - } - wholeContent += HMBC + "\n"; - // set COSY information to LSD input file - for (final Spectrum spectrum : this.getSpectra().values()) { - if((spectrum.getDimCount() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HHCOSY)){ - continue; - } - indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); - indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); - COSY += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; - for (int i = 0; i < spectrum.getSignalCount(); i++) { - if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ - // set signal only if it is not already covered by BOND or HMBC - if((this.mol.getBond(this.mol.getAtom(indicesInAtomContainerDim1.get(i)), this.mol.getAtom(indicesInAtomContainerDim2.get(i))) != null) - || HMBCTable[indicesInAtomContainerDim1.get(i)][indicesInAtomContainerDim2.get(i)]){ - COSY += ";"; - } - COSY += "COSY " + (indicesInAtomContainerDim1.get(i) + 1) + " " + (indicesInAtomContainerDim2.get(i) + 1) + ";\t" + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + " - " + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + this.mol.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + "\n"; - } - } - } - wholeContent += COSY + "\n"; - // set filter definitions - String DEFF = ""; - String FEXP = ""; - if(pathsToFilters.length > 0){ - int fragmentCounter = 1; - for (String pathToFilter : pathsToFilters) { - File folder = new File(pathToFilter); - File[] listOfFiles = folder.listFiles(); - for (File file : listOfFiles) { - if (file.isFile() && !file.getName().toLowerCase().contains(".")) { - DEFF += "DEFF F" + fragmentCounter + " \"" + file.getAbsolutePath() + "\"\n"; - fragmentCounter++; - } - } - } - FEXP = "FEXP \"NOT F1"; - for (int i = 2; i < fragmentCounter; i++) { - FEXP += " and NOT F" + i; - } - FEXP += "\""; - } - - wholeContent += DEFF + "\n"; - wholeContent += FEXP + "\n"; - - Utils.writeTextFile(pathToOutputFile, wholeContent); - } - - - public int[][] getNeighborhoodBondsCountMatrix(){ - - return this.neighborhoodCountsMatrix; - } - - - - - -// public void countNeighborhoodBonds(final IAtomContainerSet acSet, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException{ -// -// if (stepSize < 1) { -// System.err.println("stepSize < 1 not allowed!!!"); -// return; -// } -// // creation of frequency counting matrix and shift indices holder -// this.neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.size() * bondsSet.length]; -// this.shiftIndicesInACSet.clear(); -// for (int i = 0; i < stepSize * maxShift; i++) { -// for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { -// neighborhoodCountsMatrix[i][j] = 0; -// } -// this.shiftIndicesInACSet.put(i, new ArrayList<>()); -// } -// int atomIndexDB, shiftDBInt; double shiftDBDouble; IAtomContainer acDB; -// // go through all molecules in DB -// for (int k = 0; k < acSet.getAtomContainerCount(); k++) { -// acDB = acSet.getAtomContainer(k); -// // for all DB entries containing a spectrum for the current query atom type -// for (final String shiftsDB : DB.getNMRShiftDBSpectra(acDB, elem)) { -// if (shiftsDB == null) { -// continue; -// } -// String[][] shiftsDBvalues = casekit.NMR.DB.parseNMRShiftDBSpectrum(shiftsDB); -// for (String[] shiftsDBvalue : shiftsDBvalues) { -// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); -// // sometimes the DB atom index is wrong and out of array range -// if (atomIndexDB > acDB.getAtomCount() - 1) { -// continue; -// } -// shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize; -// // if DB shift value out of min-max-range then skip this shift -// if(shiftDBDouble < minShift || shiftDBDouble > maxShift - 1){ -// continue; -// } -// shiftDBInt = (int) (shiftDBDouble * stepSize); -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][0] += 1; // increase number of this shift occurence -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][2] += (acDB.getAtom(atomIndexDB).isAromatic()) ? 1 : 0; // increase if atom is aromatic -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 -// // add counts for a specific atom to matrix m -// int[] counts = casekit.NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); -// for (int i = 0; i < counts.length; i++) { -// this.neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; -// } -// // add this atom container index and atom index within it to belonging hash map -// this.shiftIndicesInACSet.get(shiftDBInt).add(new Integer[]{k, atomIndexDB}); -// } -// } -// } -// } - - - - - - - - - - - - - -} diff --git a/src/casekit/NMR/convert/LSDConverter.java b/src/casekit/NMR/convert/LSDConverter.java new file mode 100644 index 0000000..005d8d1 --- /dev/null +++ b/src/casekit/NMR/convert/LSDConverter.java @@ -0,0 +1,243 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.NMR.convert; + +import casekit.NMR.ParseData; +import casekit.NMR.Utils; +import casekit.NMR.dbservice.NMRShiftDB; +import casekit.NMR.model.Spectrum; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; + +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.formula.MolecularFormula; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomContainerSet; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class LSDConverter { + + /** + * + * @param projectName + * @param pathToOutputFile + * @param pathsToFilters + * @param molecularFormula + * @param ac + * @param spectra + * + * @throws IOException + */ + public static void ConvertToLSD(final String projectName, final String pathToOutputFile, final String[] pathsToFilters, final MolecularFormula molecularFormula, final IAtomContainer ac, final HashMap spectra) throws IOException { + + String wholeContent, hybrid, protons, MULT = "", HSQC = "", BOND = "", HMBC = "", COSY = ""; + wholeContent = "; project name: " + projectName + "\n"; + if(molecularFormula != null){ + wholeContent += "; molecular formula: " + MolecularFormulaManipulator.getString(molecularFormula) + "\n\n"; + } else { + wholeContent += "; molecular formula: unknown \n\n"; + } + for (int i = 0; i < ac.getAtomCount(); i++) { + // set MULT section in LSD input file + // set hybridization level + if(ac.getAtom(i).getHybridization() == null){ + hybrid = "-"; + } else { + switch (ac.getAtom(i).getHybridization()) { + case SP1: + case S: + hybrid = "1"; break; + case SP2: + hybrid = "2"; break; + default: + hybrid = "3"; + } + } + // set implicit proton number + if(ac.getAtom(i).getImplicitHydrogenCount() == null){ + protons = "-"; + } else { + protons = String.valueOf(ac.getAtom(i).getImplicitHydrogenCount()); + } + MULT += "MULT " + (i+1) + " " + ac.getAtom(i).getSymbol() + " " + hybrid + " " + protons; + if(ac.getAtom(i).getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())) != null){ + String hCount; + if(ac.getAtom(i).getImplicitHydrogenCount() == null){ + hCount = "x"; + } else { + hCount = String.valueOf(ac.getAtom(i).getImplicitHydrogenCount()); + } + MULT += ";\t" + ac.getAtom(i).getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())) + ",\t" + ac.getAtom(i).getSymbol() + "H" + hCount; + } + MULT += "\n"; + // set HSQC section in LSD input file + if((ac.getAtom(i).getImplicitHydrogenCount() != null) && (ac.getAtom(i).getImplicitHydrogenCount() > 0)){ + HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + ac.getAtom(i).getSymbol() + "H" + ac.getAtom(i).getImplicitHydrogenCount() + "\n"; + } + } + wholeContent += MULT + "\n"; + wholeContent += HSQC + "\n"; + + // set BOND information in LSD input file by INADEQUATE or general bond knowledge + for (IBond bond : ac.bonds()) { + BOND += "BOND " + (bond.getAtom(0).getIndex()+1) + " " + (bond.getAtom(1).getIndex()+1) + ";\t" + ac.getAtom(bond.getAtom(0).getIndex()).getSymbol() + "H" + ac.getAtom(bond.getAtom(0).getIndex()).getImplicitHydrogenCount() + " - " + ac.getAtom(bond.getAtom(1).getIndex()).getSymbol() + "H" + ac.getAtom(bond.getAtom(1).getIndex()).getImplicitHydrogenCount() + "\n"; + } + wholeContent += BOND + "\n"; + +// // set HMBC information to LSD input file +// ArrayList indicesInAtomContainerDim1; +// ArrayList indicesInAtomContainerDim2; +// final boolean [][] HMBCTable = new boolean[ac.getAtomCount()][ac.getAtomCount()]; +// for (int i = 0; i < ac.getAtomCount(); i++) { +// for (int j = 0; j < ac.getAtomCount(); j++) { +// HMBCTable[i][j] = false; +// } +// } +// for (final Spectrum spectrum : spectra.values()) { +// if((spectrum.getNDim() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HMBC)){ +// continue; +// } +// indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); +// indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); +// HMBC += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; +// for (int i = 0; i < spectrum.getSignalCount(); i++) { +// if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ +// // set signal only if it is not already covered by BOND +// // here reversed order (see LSD manual page): 1. heavy atom, 2. proton +// if(ac.getBond(ac.getAtom(indicesInAtomContainerDim2.get(i)), ac.getAtom(indicesInAtomContainerDim1.get(i))) != null){ +// HMBC += ";"; +// } +// HMBC += "HMBC " + (indicesInAtomContainerDim2.get(i) + 1) + " " + (indicesInAtomContainerDim1.get(i) + 1) + ";\t" + ac.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + " - " + ac.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + "\n"; +// HMBCTable[indicesInAtomContainerDim2.get(i)][indicesInAtomContainerDim1.get(i)] = true; +// } +// } +// } +// wholeContent += HMBC + "\n"; +// // set COSY information to LSD input file +// for (final Spectrum spectrum : spectra.values()) { +// if((spectrum.getNDim() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HHCOSY)){ +// continue; +// } +// indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); +// indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); +// COSY += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; +// for (int i = 0; i < spectrum.getSignalCount(); i++) { +// if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ +// // set signal only if it is not already covered by BOND or HMBC +// if((ac.getBond(ac.getAtom(indicesInAtomContainerDim1.get(i)), ac.getAtom(indicesInAtomContainerDim2.get(i))) != null) +// || HMBCTable[indicesInAtomContainerDim1.get(i)][indicesInAtomContainerDim2.get(i)]){ +// COSY += ";"; +// } +// COSY += "COSY " + (indicesInAtomContainerDim1.get(i) + 1) + " " + (indicesInAtomContainerDim2.get(i) + 1) + ";\t" + ac.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + " - " + ac.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + "\n"; +// } +// } +// } +// wholeContent += COSY + "\n"; + // set filter definitions + String DEFF = ""; + String FEXP = ""; + if((pathsToFilters != null) && pathsToFilters.length > 0){ + int fragmentCounter = 1; + for (final String pathToFilter : pathsToFilters) { + File folder = new File(pathToFilter); + File[] listOfFiles = folder.listFiles(); + for (final File file : listOfFiles) { + if (file.isFile() && !file.getName().toLowerCase().contains(".")) { + DEFF += "DEFF F" + fragmentCounter + " \"" + file.getAbsolutePath() + "\"\n"; + fragmentCounter++; + } + } + } + FEXP = "FEXP \"NOT F1"; + for (int i = 2; i < fragmentCounter; i++) { + FEXP += " and NOT F" + i; + } + FEXP += "\""; + } + + wholeContent += DEFF + "\n"; + wholeContent += FEXP + "\n"; + + Utils.writeTextFile(pathToOutputFile, wholeContent); + } + + + +// public static void countNeighborhoodBonds(final String pathToNMRShiftDB, final String[] bondsSet, final String nucleus, final ArrayList neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException, CDKException { +// +// if (stepSize < 1) { +// System.err.println("stepSize < 1 not allowed!!!"); +// return; +// } +// // creation of frequency counting matrix and shift indices holder +// final int[][] neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.size() * bondsSet.length]; +// final IAtomContainerSet acSet = NMRShiftDB.getStructuresFromSDFile(pathToNMRShiftDB, true); +// final HashMap> shiftIndicesInACSet = new HashMap<>(); +// for (int i = 0; i < stepSize * maxShift; i++) { +// for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { +// neighborhoodCountsMatrix[i][j] = 0; +// } +// shiftIndicesInACSet.put(i, new ArrayList<>()); +// } +// int atomIndexDB, shiftDBInt; double shiftDBDouble; IAtomContainer acDB; +// // go through all molecules in MongoDB +// for (int k = 0; k < acSet.getAtomContainerCount(); k++) { +// acDB = acSet.getAtomContainer(k); +// // for all MongoDB entries containing a spectrum for the current query atom type +// for (final String shiftsDB : NMRShiftDB.getSpectraFromNMRShiftDB(pathToNMRShiftDB, nucleus)) { +// if (shiftsDB == null) { +// continue; +// } +// String[][] shiftsDBvalues = casekit.NMR.dbservice.NMRShiftDB.parseNMRShiftDBSpectrum(shiftsDB); +// for (String[] shiftsDBvalue : shiftsDBvalues) { +// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); +// // sometimes the MongoDB atom index is wrong and out of array range +// if (atomIndexDB > acDB.getAtomCount() - 1) { +// continue; +// } +// shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize; +// // if MongoDB shift value out of min-max-range then skip this shift +// if(shiftDBDouble < minShift || shiftDBDouble > maxShift - 1){ +// continue; +// } +// shiftDBInt = (int) (shiftDBDouble * stepSize); +// neighborhoodCountsMatrix[shiftDBInt - minShift][0] += 1; // increase number of this shift occurence +// neighborhoodCountsMatrix[shiftDBInt - minShift][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member +// neighborhoodCountsMatrix[shiftDBInt - minShift][2] += (acDB.getAtom(atomIndexDB).isAromatic()) ? 1 : 0; // increase if atom is aromatic +// neighborhoodCountsMatrix[shiftDBInt - minShift][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN +// neighborhoodCountsMatrix[shiftDBInt - minShift][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH +// neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 +// neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 +// // add counts for a specific atom to matrix m +// int[] counts = casekit.NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); +// for (int i = 0; i < counts.length; i++) { +// neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; +// } +// // add this atom container index and atom index within it to belonging hash map +// shiftIndicesInACSet.get(shiftDBInt).add(new Integer[]{k, atomIndexDB}); +// } +// } +// } +// } + +} From 4122e742ec15f65a72b47e25e98216a653d4dc6d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:25:53 +0200 Subject: [PATCH 103/405] - added a TODO --- src/casekit/NMR/convert/LSDConverter.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/casekit/NMR/convert/LSDConverter.java b/src/casekit/NMR/convert/LSDConverter.java index 005d8d1..10f9d0c 100644 --- a/src/casekit/NMR/convert/LSDConverter.java +++ b/src/casekit/NMR/convert/LSDConverter.java @@ -103,6 +103,9 @@ public static void ConvertToLSD(final String projectName, final String pathToOut BOND += "BOND " + (bond.getAtom(0).getIndex()+1) + " " + (bond.getAtom(1).getIndex()+1) + ";\t" + ac.getAtom(bond.getAtom(0).getIndex()).getSymbol() + "H" + ac.getAtom(bond.getAtom(0).getIndex()).getImplicitHydrogenCount() + " - " + ac.getAtom(bond.getAtom(1).getIndex()).getSymbol() + "H" + ac.getAtom(bond.getAtom(1).getIndex()).getImplicitHydrogenCount() + "\n"; } wholeContent += BOND + "\n"; + + + // @TODO repair HMBC and COSY information output // // set HMBC information to LSD input file // ArrayList indicesInAtomContainerDim1; From cb3f2fd9691e63ac4ab4e5d826eb39bb0465295b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:28:38 +0200 Subject: [PATCH 104/405] - small changes in text --- src/casekit/NMR/dbservice/NMRShiftDB.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/casekit/NMR/dbservice/NMRShiftDB.java b/src/casekit/NMR/dbservice/NMRShiftDB.java index 7c26dde..6308ee7 100644 --- a/src/casekit/NMR/dbservice/NMRShiftDB.java +++ b/src/casekit/NMR/dbservice/NMRShiftDB.java @@ -62,7 +62,7 @@ public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRSh /** * Returns all spectra for each molecule and a given nucleus which exist as - * property in a NMRSHiftDB SDF. + * property in a NMRShiftDB SDF. * * @param pathToNMRShiftDB path to NMRShiftDB file * @param nucleus nucleus of requested spectra @@ -108,7 +108,7 @@ public static ArrayList> getSpectraFromNMRShiftDB(final Stri continue; } - spectra.add(new Object[]{spectrum, NMRShiftDBSpectrumToAssignment(spectraStrings.get(spectrumPropertyString), nucleus)}); + spectra.add(new Object[]{spectrum, NMRShiftDB.NMRShiftDBSpectrumToAssignment(spectraStrings.get(spectrumPropertyString), nucleus)}); } spectraSet.add(spectra); } From 2feba89dc2085ba6b069638e276a1336c4340827 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:35:54 +0200 Subject: [PATCH 105/405] - redesigned class to try to interpret following information : - 1D NMR - DEPT90/135 NMR - 2D NMR: - HSQC - COSY - HMBC - INADEQUATE - molecular formula - the interpretations have to be updated after redesigning --- .../InterpretData.java} | 357 +++--------------- 1 file changed, 56 insertions(+), 301 deletions(-) rename src/casekit/NMR/{ParseRawData.java => interpretation/InterpretData.java} (63%) diff --git a/src/casekit/NMR/ParseRawData.java b/src/casekit/NMR/interpretation/InterpretData.java similarity index 63% rename from src/casekit/NMR/ParseRawData.java rename to src/casekit/NMR/interpretation/InterpretData.java index 89c8623..98850de 100644 --- a/src/casekit/NMR/ParseRawData.java +++ b/src/casekit/NMR/interpretation/InterpretData.java @@ -1,37 +1,25 @@ /* * The MIT License * - * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.NMR; +package casekit.NMR.interpretation; +import casekit.NMR.Utils; import casekit.NMR.model.Assignment; import casekit.NMR.model.Spectrum; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import javax.xml.parsers.ParserConfigurationException; + import org.openscience.cdk.Atom; import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomType; @@ -39,13 +27,12 @@ import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; -import org.xml.sax.SAXException; /** * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class ParseRawData { +public class InterpretData { final private IAtomContainer mol; final private IMolecularFormula molFormula; @@ -56,10 +43,10 @@ public class ParseRawData { /** * Creates an instances of this class with an empty class atom container. */ - public ParseRawData(){ + public InterpretData(){ this.molFormula = null; this.mol = SilentChemObjectBuilder.getInstance().newAtomContainer(); - this.setAtomTypeIndices(); + this.updateAtomTypeIndices(); } /** @@ -68,12 +55,13 @@ public ParseRawData(){ * * @param molFormula IMolecularFormula object for IAtomContainer creation */ - public ParseRawData(final IMolecularFormula molFormula){ + public InterpretData(final IMolecularFormula molFormula){ this.molFormula = molFormula; this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); - this.setAtomTypeIndices(); + this.updateAtomTypeIndices(); } - + + /** * Returns used IMolecularFormula object for this class instance. @@ -114,7 +102,7 @@ public final HashMap> getAtomTypeIndices() { * @see Utils#getAtomTypeIndices(org.openscience.cdk.interfaces.IAtomContainer) * */ - private void setAtomTypeIndices(){ + private void updateAtomTypeIndices(){ this.atomTypeIndices = Utils.getAtomTypeIndices(this.mol); } @@ -157,47 +145,9 @@ public final Assignment getAssignment(final Spectrum spectrum){ } return this.assignments.get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); - } - - - /** - * Creates a Spectrum class object from 1D NMR peak list in CSV file format. - * - * @param pathToCSV Path to peak list (Bruker's TopSpin csv file - * format) - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return Spectrum class object from given input file - * @throws java.io.IOException - */ - public static final Spectrum parse1DNMRviaCSV(final String pathToCSV, final String atomType) throws IOException { - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{atomType}, 6); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D); - - return spectrum; } - - /** - * Creates a Spectrum class object from 1D NMR peak list in XML file format. - * - * @param pathToXML Path to peak list (Bruker's TopSpin csv file - * format) - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return Spectrum class object from given input file - * @throws java.io.IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static final Spectrum parse1DNMRviaXML(final String pathToXML, final String atomType) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{atomType}); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D); - - return spectrum; - } - /** * Sets the 1D NMR shift values for given Spectrum object to atoms of the class IAtomContainer. * The shift values will be assigned sequentially. @@ -217,7 +167,7 @@ public static final Spectrum parse1DNMRviaXML(final String pathToXML, final Stri * @throws java.io.IOException * @throws org.openscience.cdk.exception.CDKException */ - public final void assign1DSpectrum(final Spectrum spectrum) throws IOException, CDKException{ + public final void assign1DSpectrum(final Spectrum spectrum) throws Exception { // checks whether number of signals is equal to molecular formula if given // if not equal then edit signal list in spectrum this.check1DSpectrum(spectrum); @@ -240,16 +190,16 @@ public final void assign1DSpectrum(final Spectrum spectrum) throws IOException, * * @param spectrum * @throws IOException - * @see Utils#editSignalsInSpectrum(casekit.NMR.model.Spectrum, org.openscience.cdk.interfaces.IMolecularFormula) + * @see Utils#editSignalsInSpectrum(Spectrum, IMolecularFormula, int) */ - private void check1DSpectrum(final Spectrum spectrum) throws IOException, CDKException{ - if(this.molFormula != null){ + private void check1DSpectrum(final Spectrum spectrum) throws Exception { + if(this.molFormula != null) { final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula, 0); if (diff != 0) { // adjust Spectrum size by user Utils.editSignalsInSpectrum(spectrum, this.molFormula, 0); } - } + } } @@ -258,7 +208,8 @@ private void check1DSpectrum(final Spectrum spectrum) throws IOException, CDKExc * * @param spectrum Spectrum class object which contains shifts in first * dimension - * @see Utils#getNMRShiftConstant(java.lang.String) + * @see Utils#getNMRShiftConstant(java.lang.String) + * */ private void assignShiftValuesToAtoms(final Spectrum spectrum){ final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); @@ -273,7 +224,7 @@ private void assignShiftValuesToAtoms(final Spectrum spectrum){ atom.setImplicitHydrogenCount(null); this.mol.addAtom(atom); } - this.setAtomTypeIndices(); + this.updateAtomTypeIndices(); } // assign shifts to atoms as property if(this.atomTypeIndices.get(atomType) != null){ @@ -307,64 +258,15 @@ private void removeAtoms(final String atomType) { this.mol.removeAtom(iAtom); } - this.setAtomTypeIndices(); - } - - - /** - * Sets the number of implicit hydrogens from two carbon DEPT90 and DEPT135 - * peak - * tables to carbon atoms. The meanwhile found matches are corrected, - * see - * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. - * - * @param pathToCSV Path to one DEPT peak list (Bruker's TopSpin csv file - * format) - * @param mode used angle: either 90° [0] or 135° [1] - * @return - * @throws java.io.IOException - */ - public static final Spectrum parseDEPTviaCSV(final String pathToCSV, final int mode) throws IOException { - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{4}, new String[]{"C"}, 6); - if(mode == 0){ - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT90); - } else if(mode == 1){ - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT135); - } - - return spectrum; + this.updateAtomTypeIndices(); } - /** - * Sets the number of implicit hydrogens from two carbon DEPT90 and DEPT135 - * XML files to carbon atoms. The meanwhile found matches are corrected, see - * {@link Utils#correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double,String)}. - * - * @param pathToXML Path to one DEPT peak list (Bruker's TopSpin XML file - * format) - * @param mode used angle: either 90° [0] or 135° [1] - * @return - * @throws java.io.IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static final Spectrum parseDEPTviaXML(final String pathToXML, final int mode) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 1, new int[]{1}, new String[]{"C"}); - if(mode == 0){ - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT90); - } else if(mode == 1){ - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_1D_DEPT135); - } - - return spectrum; - } - /** * Sets the assignments of carbon atoms in class atom container * by usage of DEPT90 and DEPT135 information. The implicit hydrogen count * property is set too. * - * @see ParseRawData#setImplicitHydrogenCountsFromDEPT() + * @see InterpretData#setImplicitHydrogenCountsFromDEPT() * * @param spectrum1D_DEPT90 DEPT90 spectrum * @param spectrum1D_DEPT135 DEPT135 spectrum which has to contain intensity @@ -379,7 +281,7 @@ public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum || (this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C") == null)){ return false; } - + final Assignment assignment1D_DEPT90 = new Assignment(spectrum1D_DEPT90); final Assignment assignment1D_DEPT135 = new Assignment(spectrum1D_DEPT135); final ArrayList matchesIn1DSpectrum_DEPT90 = this.findMatchesIn1DSpectra(spectrum1D_DEPT90, 0, tol); @@ -411,7 +313,7 @@ public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum /** * Sets the implicitHydrogenCount() property in atoms of class atom container * by using the already set DEPT information. - * @see ParseRawData#assignDEPT(casekit.NMR.model.Spectrum, casekit.NMR.model.Spectrum, double) + * @see InterpretData#assignDEPT(casekit.NMR.model.Spectrum, casekit.NMR.model.Spectrum, double) */ private void setImplicitHydrogenCountsFromDEPT() { @@ -455,45 +357,9 @@ private void setImplicitHydrogenCountsFromDEPT() { } } - - - /** - * Creates a Spectrum class object from given HSQC input file in CSV format. - * - * @param pathToCSV path to HSQC peak table (Bruker's TopSpin csv file - * format) - * @param heavyAtomType Element name of H bonded heavy atom (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return - * @throws IOException - */ - public static final Spectrum parseHSQCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HSQC); - - return spectrum; - } + /** - * Creates a Spectrum class object from given HSQC input file in XML format. - * - * @param pathToXML path to HSQC XML file - * @param heavyAtomType Element name of H bonded heavy atom (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static final Spectrum parseHSQCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HSQC); - - return spectrum; - } - - - /** * * @param spectrum Spectrum class object consisting of Signal class objects * where the proton shifts values are given in first dimension and the @@ -531,15 +397,15 @@ public final void assignHSQC(final Spectrum spectrum, final double tolProton, fi } } } - - + + private void assign2DSpectrum(final Spectrum spectrum, final double tolDim1, final double tolDim2){ - + final ArrayList matchesQueryIn1DSpectrumDim1 = this.findMatchesIn1DSpectra(spectrum, 0, tolDim1); - final ArrayList matchesQueryIn1DSpectrumDim2 = this.findMatchesIn1DSpectra(spectrum, 1, tolDim2); - final ArrayList matches1DInAtomContainerDim1 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[0]), 0); - final ArrayList matches1DInAtomContainerDim2 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[1]), 0); - + final ArrayList matchesQueryIn1DSpectrumDim2 = this.findMatchesIn1DSpectra(spectrum, 1, tolDim2); + final ArrayList matches1DInAtomContainerDim1 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[0]), 0); + final ArrayList matches1DInAtomContainerDim2 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[1]), 0); + final Assignment assignment = new Assignment(spectrum); for (int i = 0; i < matchesQueryIn1DSpectrumDim1.size(); i++) { if((matches1DInAtomContainerDim1 != null) && (matchesQueryIn1DSpectrumDim1.get(i) >= 0)){ @@ -549,29 +415,29 @@ private void assign2DSpectrum(final Spectrum spectrum, final double tolDim1, fin assignment.setAssignment(1, i, matches1DInAtomContainerDim2.get(matchesQueryIn1DSpectrumDim2.get(i))); } } - + this.spectra.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); this.assignments.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); } - - + + private ArrayList findMatchesIn1DSpectra(final Spectrum spectrum, final int dim, final double tol){ - + ArrayList matchesQueryInOrigin1DSpectrum = new ArrayList<>(); - final ArrayList shiftsQuery = spectrum.getShifts(dim); - if(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]) != null){ - final ArrayList shiftsOrigin1DSpectrum = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]).getShifts(0); - matchesQueryInOrigin1DSpectrum = Utils.findShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, tol); - matchesQueryInOrigin1DSpectrum = Utils.correctShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, matchesQueryInOrigin1DSpectrum, tol); - } else { - for (int i = 0; i < spectrum.getSignalCount(); i++) { - matchesQueryInOrigin1DSpectrum.add(-1); - } - } - - return matchesQueryInOrigin1DSpectrum; +// final ArrayList shiftsQuery = spectrum.getShifts(dim); +// if(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]) != null){ +// final ArrayList shiftsOrigin1DSpectrum = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]).getShifts(0); +// matchesQueryInOrigin1DSpectrum = Utils.findShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, tol); +// matchesQueryInOrigin1DSpectrum = Utils.correctShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, matchesQueryInOrigin1DSpectrum, tol); +// } else { +// for (int i = 0; i < spectrum.getSignalCount(); i++) { +// matchesQueryInOrigin1DSpectrum.add(-1); +// } +// } + + return matchesQueryInOrigin1DSpectrum; } - + /** * Returns the indices of atoms within the class atom container which match * to the shifts of given spectrum and dimension. @@ -592,48 +458,12 @@ public final ArrayList getAssignedAtomIndices(final Spectrum spectrum, return atomIndices; } - return Utils.ArrayToArrayList(this.getAssignment(spectrum).getAtomIndices(dim)); - } - - - /** - * Creates a Spectrum class object from given H,H-COSY input file in CSV format. - * - * @param pathToCSV path to H,H-COSY peak table (Bruker's TopSpin csv - * file - * format) - * @return - * @throws IOException - */ - public static final Spectrum parseHHCOSYviaCSV(final String pathToCSV) throws IOException { - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", "H"}, 9); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HHCOSY); - - return spectrum; + return new ArrayList<>(this.getAssignment(spectrum).getAtomIndices(dim)); } - /** - * Creates a Spectrum class object from given H,H-COSY input file in XML format. - * - * @param pathToXML path to H,H-COSY peak XML file (Bruker's TopSpin XML - * file format) - * @return - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static final Spectrum parseHHCOSYviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", "H"}); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HHCOSY); - - return spectrum; - } - /** - * Sets links between two heavy atoms of H,H-COSY signals. The property - * is then set to {@link #CONST_PROP_HHCOSY} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)} + * Sets links between two heavy atoms of H,H-COSY signals. * * @param spectrum Spectrum class object containing the 2D spectrum proton shift information * @param tolProton tolerance value [ppm] for matching belonging protons @@ -653,44 +483,9 @@ public final boolean assignHHCOSY(final Spectrum spectrum, final double tolProto return true; } - - /** - * Creates a Spectrum class object from given INADEQUATE input file in CSV format. - * - * @param pathToCSV path to INADEQUATE peak table (Bruker's TopSpin csv - * file format) - * @return - * @throws IOException - */ - public static final Spectrum parseINADEQUATEviaCSV(final String pathToCSV) throws IOException { - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"C", "C"}, 9); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - - return spectrum; - } - - /** - * Creates a Spectrum class object from given INADEQUATE input file in XML format. - * - * @param pathToXML path to INADEQUATE peak XML file (Bruker's TopSpin XML - * file format) - * @return - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static final Spectrum parseINADEQUATEviaXML(final String pathToXML) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"C", "C"}); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_INADEQUATE); - - return spectrum; - } - /** * Sets links between two carbon atoms in an INADEQUATE signal relationship. - * The property is then set to {@link #CONST_PROP_INADEQUATE} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. * Returns true if all signals are bidirectional, so that atom A has a * signal according to atom B and vice versa. * @@ -727,51 +522,11 @@ private void setBond(final int index1, final int index2) { } this.mol.addBond(index1, index2, IBond.Order.UNSET); } - - - /** - * Creates a Spectrum class object from given HMBC input file in CSV format. - * - * @param pathToCSV path to HMBC peak table (Bruker's TopSpin csv - * file format) - * @param heavyAtomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return - * @throws IOException - */ - public static final Spectrum parseHMBCviaCSV(final String pathToCSV, final String heavyAtomType) throws IOException { - final Spectrum spectrum = Utils.CSVtoSpectrum(pathToCSV, new int[]{5, 6}, new String[]{"H", heavyAtomType}, 9); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HMBC); - - return spectrum; - } - - /** - * Creates a Spectrum class object from given HMBC input file in XML format. - * - * @param pathToXML path to HMBC peak XML file (Bruker's TopSpin XML file - * format) - * @param heavyAtomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static final Spectrum parseHMBCviaXML(final String pathToXML, final String heavyAtomType) throws IOException, ParserConfigurationException, SAXException { - final Spectrum spectrum = Utils.XMLtoSpectrum(pathToXML, 2, new int[]{2, 1}, new String[]{"H", heavyAtomType}); - spectrum.setSpecType(CDKConstants.NMRSPECTYPE_2D_HMBC); - - return spectrum; - } - - + /** * Sets links between heavy atoms which are in HMBC signal relationship. - * The property is then set to {@link #CONST_PROP_HMBC} in - * {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object)}. - * + * * @param spectrum Spectrum class object consisting of Signal class objects * where the proton shift values is given first and the heavy atom shifts as the second. * @param tolProton tolerance value [ppm] for hydrogen shift matching From aad471c76a594267f1c7d20c6534d91d2a4e5ea9 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:37:12 +0200 Subject: [PATCH 106/405] - updates --- src/casekit/NMR/Utils.java | 535 +++---------------------------------- 1 file changed, 44 insertions(+), 491 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index d7076b5..9c2a711 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -1,56 +1,19 @@ /* * The MIT License * - * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package casekit.NMR; -import casekit.NMR.model.Signal; import casekit.NMR.model.Spectrum; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import org.w3c.dom.Document; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import org.apache.commons.lang3.ArrayUtils; +import casekit.NMR.parse.Parser; import org.apache.commons.lang3.StringUtils; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; @@ -60,22 +23,23 @@ import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.graph.CycleFinder; import org.openscience.cdk.graph.Cycles; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomType; -import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.interfaces.*; import org.openscience.cdk.io.SDFWriter; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.similarity.Tanimoto; import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; -import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import javax.xml.parsers.ParserConfigurationException; +import java.io.*; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + /** * * @author Michael Wenk [https://github.com/michaelwenk] @@ -197,163 +161,10 @@ public static ArrayList getAtomTypeIndicesByElement(final IAtomContaine return indices; } - - - /** - * Reads a specific column of a NMR peak table and stores it into an - * ArrayList object. - * - * @param pathToCSV path to NMR peak table in CSV file format - * @param column column index to select in peak table - * @return ArrayList of Double shift values - * @throws IOException - */ - public static ArrayList parseCSV(final String pathToCSV, final int column) throws IOException { - - final ArrayList shifts = new ArrayList<>(); - String line; - String[] tokens; - BufferedReader fileReader = new BufferedReader(new FileReader(pathToCSV)); - while ((line = fileReader.readLine()) != null) { - tokens = line.split(","); - // get shift value - if (tokens[column].trim().matches("^[+|-]{0,1}\\d+\\.{0,1}\\d*")) { - shifts.add(Double.parseDouble(tokens[column].trim())); - } - } - fileReader.close(); - - return shifts; - } - - /** - * Reads specific columns of one NMR peak table to obtain a Spectrum class - * object and set intensitiy values. - * The number of columns and atom types has to be the same and defines the - * dimension of the returning spectrum. - * - * @param pathToCSV path to NMR peak table in CSV file format - * @param columns column indices to select in peak table - * @param atomTypes atom types (element) for each dimension - * @param intensityColumnIndex column index for intensity values - * @return Spectrum class object containing the peak lists - * @throws IOException - */ - public static Spectrum CSVtoSpectrum(final String pathToCSV, final int[] columns, final String[] atomTypes, final int intensityColumnIndex) throws IOException { - - // assumes the same number of selected columns (dimensions) and atom types - if(columns.length != atomTypes.length){ - return null; - } - final String[] nuclei = new String[columns.length]; - for (int col = 0; col < columns.length; col++) { - nuclei[col] = Utils.getIsotopeIdentifier(atomTypes[col]); - } - final Spectrum spectrum = new Spectrum(nuclei); - ArrayList shiftList; - for (int col = 0; col < columns.length; col++) { - shiftList = Utils.parseCSV(pathToCSV, columns[col]); - if(col == 0){ - for (int i = 0; i < shiftList.size(); i++) { - spectrum.addSignal(new Signal(spectrum.getNuclei())); - } - } - if(!spectrum.setShifts(shiftList, col)){ - return null; - } - } - spectrum.setIntensities(parseCSV(pathToCSV, intensityColumnIndex)); - - return spectrum; - } - - - /** - * Reads a NMR peak XML file and returns one attribute of nodes (column) into an - * ArrayList object. - * The XML file must be in Bruker's TopSpin format. - * - * @param pathToXML Path to XML file - * @param dim number of dimensions of given data 1 (1D) or 2 (2D) - * @param attribute which attribute index in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, - * intensity if 1D data) or 3 (intensity if 2D data) - * - * @return ArrayList of Double shift values - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static ArrayList parseXML(final String pathToXML, final int dim, final int attribute) throws IOException, ParserConfigurationException, SAXException { - // assumes a attribute value between 1 and 3 - if(attribute < 1 || attribute > 3){ - return null; - } - - final ArrayList shifts = new ArrayList<>(); - final DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); - final DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); - final Document doc = docBuilder.parse(new File(pathToXML)); - - // normalize text representation - doc.getDocumentElement().normalize(); - final NodeList peakLists = doc.getElementsByTagName("Peak" + dim + "D"); - for (int i = 0; i < peakLists.getLength(); i++) { - shifts.add(Double.parseDouble(peakLists.item(i).getAttributes().item(attribute - 1).getNodeValue())); - } - - return shifts; - } - - - /** - * Reads specific columns of NMR XML files to obtain a Spectrum class - * object. - * The XML file must be in Bruker's TopSpin format. - * - * @param pathToXML path to NMR XML file in Bruker's TopSpin XML file format - * @param ndim number of dimensions: 1 (1D) or 2 (2D) - * @param attributes which attribute indices in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data) - * @param atomTypes atom types (element) for each dimension - * @return Spectrum class object containing the selected peak lists - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - public static Spectrum XMLtoSpectrum(final String pathToXML, final int ndim, final int[] attributes, final String[] atomTypes) throws IOException, ParserConfigurationException, SAXException { - - // assumes the same number of dims, attributes and atom types and a maximum number of dims of 2 - if((ndim != attributes.length) || (ndim != atomTypes.length) || (attributes.length != atomTypes.length) - || (ndim < 1 || ndim > 2)){ - return null; - } - final String[] nuclei = new String[ndim]; - for (int dim = 0; dim < ndim; dim++) { - nuclei[dim] = Utils.getIsotopeIdentifier(atomTypes[dim]); - } - final Spectrum spectrum = new Spectrum(nuclei); - ArrayList shiftList; - for (int dim = 0; dim < ndim; dim++) { - shiftList = Utils.parseXML(pathToXML, ndim, attributes[dim]); - if(dim == 0){ - for (int i = 0; i < shiftList.size(); i++) { - spectrum.addSignal(new Signal(spectrum.getNuclei())); - } - } - if(!spectrum.setShifts(shiftList, dim)){ - return null; - } - } - spectrum.setIntensities(Utils.parseXML(pathToXML, ndim, ndim + 1)); - - return spectrum; - } - public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int dim){ - if(spectrum.checkDimension(dim)){ + if(spectrum.containsDim(dim)){ return Utils.getAtomTypeFromNucleus(spectrum.getNuclei()[dim]); } @@ -366,7 +177,7 @@ public static String getAtomTypeFromNucleus(final String nucleus){ } public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws CDKException{ - if(!spectrum.checkDimension(dim)){ + if(!spectrum.containsDim(dim)){ throw new CDKException(Thread.currentThread().getStackTrace()[2].getClassName() + "." + Thread.currentThread().getStackTrace()[2].getMethodName() + ": invalid dimension in spectrum given"); } final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, dim); @@ -377,7 +188,7 @@ public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectr return atomsInMolFormula - spectrum.getSignalCount(); } - public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws IOException, CDKException { + public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws Exception { BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); int n; final ArrayList validIndices = new ArrayList<>(); int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula, dim); @@ -391,10 +202,10 @@ public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecul } for (int s = 0; s < spectrum.getSignalCount(); s++) { System.out.print("index: " + s); - for (int d = 0; d < spectrum.getDimCount(); d++) { + for (int d = 0; d < spectrum.getNDim(); d++) { System.out.print(", shift dim " + (d+1) + ": " + spectrum.getShift(s, d)); } - System.out.println(""); + System.out.println(); validIndices.add(s); } // get selected index by user input @@ -414,253 +225,6 @@ public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecul } } - /** - * Corrects a match list regarding a given shift list and an atom container. - * This is useful when two ore more shift values (e.g. DEPT shifts) match - * with the same atom in the atom container. So the purpose here is to - * enable more unambiguous matches. This method first looks for unambiguous - * matches and calculates the median of the difference values between the - * shift list values and the shifts of atom container. Then, all shift list - * values are adjusted (+/-) with this median value. - * - * @param ac IAtomContainer to search - * @param shifts Shift value list to match - * @param matches Match list to correct - * @param tol Tolerance value - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return - */ - public static ArrayList correctShiftMatches(final IAtomContainer ac, final ArrayList shifts, final ArrayList matches, final double tol, final String atomType) { - - int matchIndex; - // get differences of unique matches between query shift and ac shifts - ArrayList diffs = new ArrayList<>(); - final HashSet uniqueMatchIndicesSet = new HashSet<>(matches); - for (Integer matchIndexAtomContainer : uniqueMatchIndicesSet) { - if (Collections.frequency(matches, matchIndexAtomContainer) == 1) { - matchIndex = matches.indexOf(matchIndexAtomContainer); - if (matches.get(matchIndex) >= 0) { - diffs.add(shifts.get(matchIndex) - Double.parseDouble(ac.getAtom(matches.get(matchIndex)).getProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType)).toString())); - } - } - } - // calculate the median of found unique match differences - if (diffs.size() > 0) { - final double median = casekit.NMR.Utils.getMedian(diffs); - // add or subtract the median of the differences to all shift list values (input) and match again then - for (int i = 0; i < shifts.size(); i++) { - shifts.set(i, shifts.get(i) - median); - } - // rematch - return casekit.NMR.Utils.findShiftMatches(ac, shifts, tol, atomType); - } - - return matches; - } - - - /** - * Corrects a match list regarding a given shift list and an atom container. - * This is useful when two ore more shift values (e.g. DEPT shifts) match - * with the same atom in the atom container. So the purpose here is to - * enable more unambiguous matches. This method first looks for unambiguous - * matches and calculates the median of the difference values between the - * shift list values and the shifts of atom container. Then, all shift list - * values are adjusted (+/-) with this median value. - * - * @param shiftList1 Shift value list to search in - * @param shiftList2 Shift value list to match in shiftList1 - * @param matchesInshiftList1 Match list to correct - * @param tol Tolerance value - * @return - */ - public static ArrayList correctShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final ArrayList matchesInshiftList1, final double tol) { - - int matchIndex; - // get differences of unique matches between query shift and ac shifts - ArrayList diffs = new ArrayList<>(); - final HashSet uniqueMatchIndicesSet = new HashSet<>(matchesInshiftList1); - for (final int uniqueMatchIndex : uniqueMatchIndicesSet) { - if (Collections.frequency(matchesInshiftList1, uniqueMatchIndex) == 1) { - matchIndex = matchesInshiftList1.indexOf(uniqueMatchIndex); - if (matchesInshiftList1.get(matchIndex) >= 0) { - diffs.add(shiftList2.get(matchIndex) - shiftList1.get(matchesInshiftList1.get(matchIndex))); - } - } - } - // calculate the median of found unique match differences - if (diffs.size() > 0) { - final double median = casekit.NMR.Utils.getMedian(diffs); - // add or subtract the median of the differences to all shift list values (input) and match again then - for (int i = 0; i < shiftList2.size(); i++) { - shiftList2.set(i, shiftList2.get(i) - median); - } - // rematch - return casekit.NMR.Utils.findShiftMatches(shiftList1, shiftList2, tol); - } - - return matchesInshiftList1; - } - - - /** - * Finds the matches with the lowest deviations between a given shift value - * set and the atoms of an atom container. A tolerance value and NMRSHIFT - * constant must be set. - * - * @param ac IAtomContainer to search - * @param shiftList shift value list to match - * @param tol Tolerance value [ppm] - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return List of match indices for every query shift within the IAtomContainer - */ - public static ArrayList findShiftMatches(final IAtomContainer ac, final ArrayList shiftList, final double tol, final String atomType) { - - final ArrayList matches = new ArrayList<>(); - for (int i = 0; i < shiftList.size(); i++) { - matches.add(casekit.NMR.Utils.findSingleShiftMatch(ac, shiftList.get(i), tol, atomType)); - } - - return matches; - } - - /** - * Finds the match with the lowest deviation between a given shift value and - * the atoms of an atom container. A tolerance value and NMRSHIFT constant - * must be set. - * - * @param ac IAtomContainer to search - * @param shift Shift value to match [ppm] - * @param tol Tolerance value [ppm] - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @return Match index of a query shift within the IAtomContainer - */ - public static int findSingleShiftMatch(final IAtomContainer ac, final double shift, final double tol, final String atomType) { - - int matchIndex = -1; - double minDiff = tol, acShift; - for (int k = 0; k < ac.getAtomCount(); k++) { - // skip other atom types than given - if (ac.getAtom(k).getProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType)) == null) { - continue; - } - // figure out the atom with lowest shift deviation - acShift = Double.parseDouble(ac.getAtom(k).getProperty(casekit.NMR.Utils.getNMRShiftConstant(atomType)).toString()); - if ((shift - tol <= acShift) && (acShift <= shift + tol) && (Math.abs(shift - acShift) < minDiff)) { - minDiff = Math.abs(shift - acShift); - matchIndex = k; - } - } - - return matchIndex; - } - - - /** - * Finds the matches with the lowest deviations between two given shift value - * lists. - * - * @param shiftList1 shift value list to search in - * @param shiftList2 shift value list to match in shiftList1 - * @param tol Tolerance value [ppm] - * @return List of match indices within shiftList1 - */ - public static ArrayList findShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final double tol) { - - final ArrayList matchesInShiftList1 = new ArrayList<>(); - for (int i = 0; i < shiftList2.size(); i++) { - matchesInShiftList1.add(casekit.NMR.Utils.findSingleShiftMatch(shiftList1, shiftList2.get(i), tol)); - } - - return matchesInShiftList1; - } - - - /** - * Finds the match with the lowest deviation between a given shift value and - * a shift list. - * - * @param shiftList Shift list to search in - * @param shift Shift value [ppm] to find in ShiftList - * @param tol Tolerance value [ppm] - * @return Match index of a query shift within shiftList - */ - public static int findSingleShiftMatch(final ArrayList shiftList, final double shift, final double tol) { - - int matchIndex = -1; - double minDiff = tol; - for (int k = 0; k < shiftList.size(); k++) { - // figure out the shift with lowest deviation - if ((shift - tol <= shiftList.get(k)) && (shiftList.get(k) <= shift + tol) && (Math.abs(shift - shiftList.get(k)) < minDiff)) { - minDiff = Math.abs(shift - shiftList.get(k)); - matchIndex = k; - } - } - - return matchIndex; - } - - - /** - * Finds match indices between a given shift list from a peak table and an atom container. - * Wrapper function for {@link #parsePeakTable(String, int)}, - * {@link #findShiftMatches(IAtomContainer, ArrayList, double, String)} - * and - * {@link #correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double, String)}. - * - * @param ac IAtomContainer to search for matches - * @param pathToPeakList Path to peak table - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @param tol Tolerance value [ppm] - * @param column Column number of shift values in peak table - * @return Indices of matches for each shift within the IAtomContainer - * @throws IOException - * @deprecated - */ - public static ArrayList matchShiftsFromPeakTable(final IAtomContainer ac, final String pathToPeakList, final String atomType, final double tol, final int column) throws IOException { - - final ArrayList shiftsAtomType = casekit.NMR.Utils.parseCSV(pathToPeakList, column); - ArrayList matchesAtomType = casekit.NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); - matchesAtomType = casekit.NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); - - return matchesAtomType; - } - - - /** - * Finds match indices between a given shift list from a XML file and an - * atom container. Wrapper function for {@link #parseXML(String, int)}, - * {@link #findShiftMatches(IAtomContainer, ArrayList, double, String)} and - * {@link #correctShiftMatches(IAtomContainer, ArrayList, ArrayList, double, String)}. - * - * @param ac IAtomContainer to search for matches - * @param pathToXML - * @param atomType Element name (e.g. "C") which also occurrs in - * {@link Utils#getNMRShiftConstant(java.lang.String)} - * @param tol Tolerance value [ppm] - * @param ndim number of dimensions of given data 1 (1D) or 2 (2D) - * @param attribute which attribute index in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, - * intensity if 1D data) or 3 (intensity if 2D data) - * @return Indices of matches for each shift within the IAtomContainer - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - * @deprecated - */ - public static ArrayList matchShiftsFromXML(final IAtomContainer ac, final String pathToXML, final String atomType, final double tol, final int ndim, final int attribute) throws IOException, ParserConfigurationException, SAXException { - - final ArrayList shiftsAtomType = casekit.NMR.Utils.parseXML(pathToXML, ndim, attribute); - ArrayList matchesAtomType = casekit.NMR.Utils.findShiftMatches(ac, shiftsAtomType, tol, atomType); - matchesAtomType = casekit.NMR.Utils.correctShiftMatches(ac, shiftsAtomType, matchesAtomType, tol, atomType); - - return matchesAtomType; - } - /** * Specified for carbons only -> not generic!!! * @@ -1203,31 +767,32 @@ public static HashMap getRMS(final HashMap= ac.getAtom(atomIndex).getValency(); } public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException{ final CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(ac.getBuilder()); + IAtomType type; for (IAtom atom : ac.atoms()) { - IAtomType type = matcher.findMatchingAtomType(ac, atom); + type = matcher.findMatchingAtomType(ac, atom); AtomTypeManipulator.configure(atom, type); } - CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(ac.getBuilder()); + final CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(ac.getBuilder()); adder.addImplicitHydrogens(ac); } - public static int countElements(final String input){ - int counter = 0; - for (int k = 0; k < input.length(); k++) { - // Check for uppercase letters - if (Character.isLetter(input.charAt(k)) && Character.isUpperCase(input.charAt(k))) { - counter++; - } - } - - return counter; - } +// public static int countElements(final String input){ +// int counter = 0; +// for (int k = 0; k < input.length(); k++) { +// // Check for uppercase letters +// if (Character.isLetter(input.charAt(k)) && Character.isUpperCase(input.charAt(k))) { +// counter++; +// } +// } +// +// return counter; +// } // public static ArrayList getComponents(final String symbols){ // final ArrayList components = new ArrayList<>(); @@ -1264,7 +829,15 @@ public static HashMap getMedian(final HashMap> hoseLookupToExtend, final HashMap> hoseLookup){ for (final String hose : hoseLookup.keySet()) { if(!hoseLookupToExtend.containsKey(hose)){ @@ -1401,7 +974,6 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a * @param array * @return * - * @deprecated */ public static ArrayList ArrayToArrayList(final int[] array){ @@ -1416,9 +988,9 @@ public static ArrayList ArrayToArrayList(final int[] array){ public static String getSpectrumNucleiAsString(final Spectrum spectrum){ String specID = ""; - for (int i = 0; i < spectrum.getDimCount(); i++) { + for (int i = 0; i < spectrum.getNDim(); i++) { specID += spectrum.getNuclei()[i]; - if(i < spectrum.getDimCount()-1){ + if(i < spectrum.getNDim() - 1){ specID += "-"; } } @@ -1426,25 +998,6 @@ public static String getSpectrumNucleiAsString(final Spectrum spectrum){ return specID; } - public static Spectrum setSpectrumEquivalences(final Spectrum spectrum){ - int equivalentSignalIndex; - for (final Signal signal : spectrum.getSignals()) { - equivalentSignalIndex = -1; - for (final int closestSignalIndex : spectrum.pickSignals(signal.getShift(0), 0, 0.0)) { - if (spectrum.getSignalIndex(signal) <= closestSignalIndex) { - continue; - } - if (signal.getMultiplicity().equals(spectrum.getSignal(closestSignalIndex).getMultiplicity())) { - equivalentSignalIndex = closestSignalIndex; - break; - } - } - spectrum.setEquivalence(spectrum.getSignalIndex(signal), equivalentSignalIndex); - } - - return spectrum; - } - public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex){ return ((atomIndex >= 0) && atomIndex < ac.getAtomCount()); } From a41497eff67d17dc19a1d80d1e9a6491c06d4bbc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:38:28 +0200 Subject: [PATCH 107/405] - small update --- src/casekit/NMR/convert/LSDConverter.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/casekit/NMR/convert/LSDConverter.java b/src/casekit/NMR/convert/LSDConverter.java index 10f9d0c..645095f 100644 --- a/src/casekit/NMR/convert/LSDConverter.java +++ b/src/casekit/NMR/convert/LSDConverter.java @@ -11,24 +11,15 @@ */ package casekit.NMR.convert; -import casekit.NMR.ParseData; import casekit.NMR.Utils; -import casekit.NMR.dbservice.NMRShiftDB; import casekit.NMR.model.Spectrum; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; import java.util.HashMap; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.formula.MolecularFormula; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomContainerSet; import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; /** From 13c51d95a47676d8350d0c4665e5d1ad9af65629 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:39:29 +0200 Subject: [PATCH 108/405] - updated --- pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pom.xml b/pom.xml index 2d90761..115c6ba 100644 --- a/pom.xml +++ b/pom.xml @@ -98,11 +98,5 @@ mongo-java-driver 3.10.0 - - org.openscience - FragAssembler - 1.0-SNAPSHOT - jar - From 0c485d96015f7200ba47fc8a98ef0fb067a83413 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 11 Jul 2019 16:52:13 +0200 Subject: [PATCH 109/405] - uses now the HOSECodeBuilder from https://github.com/michaelwenk/HOSECodeBuilder --- src/casekit/NMR/predict/Predict.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/casekit/NMR/predict/Predict.java b/src/casekit/NMR/predict/Predict.java index 7ba7c82..a976b4a 100644 --- a/src/casekit/NMR/predict/Predict.java +++ b/src/casekit/NMR/predict/Predict.java @@ -30,7 +30,6 @@ import java.util.HashMap; import hose.HOSECodeBuilder; -import hose.model.ConnectionTree; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; From 868bb919923e96f6d078cdbe34fa56d1244335f3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 12 Jul 2019 12:31:29 +0200 Subject: [PATCH 110/405] - minor changes --- src/casekit/NMR/predict/Predict.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/casekit/NMR/predict/Predict.java b/src/casekit/NMR/predict/Predict.java index a976b4a..2bafc9b 100644 --- a/src/casekit/NMR/predict/Predict.java +++ b/src/casekit/NMR/predict/Predict.java @@ -23,17 +23,18 @@ */ package casekit.NMR.predict; + import casekit.NMR.Utils; import casekit.NMR.model.Signal; import casekit.NMR.model.Spectrum; -import java.util.ArrayList; -import java.util.HashMap; - import hose.HOSECodeBuilder; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; +import java.util.ArrayList; +import java.util.HashMap; + /** * * @author Michael Wenk [https://github.com/michaelwenk] From 13d10c41a638ab995339699a6a23426048c7016b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 12 Jul 2019 12:32:40 +0200 Subject: [PATCH 111/405] - function added to convert a NMRShiftDB spectrum string into a basic spectrum string --- src/casekit/NMR/dbservice/NMRShiftDB.java | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/casekit/NMR/dbservice/NMRShiftDB.java b/src/casekit/NMR/dbservice/NMRShiftDB.java index 6308ee7..857da09 100644 --- a/src/casekit/NMR/dbservice/NMRShiftDB.java +++ b/src/casekit/NMR/dbservice/NMRShiftDB.java @@ -383,6 +383,35 @@ public static String getNucleusFromNMRShiftDBSpectrumProperty(final String NMRSh return NMRShiftDBSpectrumProperty.split(" ")[1]; } + public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShiftDBSpectrum, final String nucleus, final String description){ + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final StringBuilder basicSpectrum = new StringBuilder(); + // append description + if(!description.trim().startsWith("//")){ + basicSpectrum.append("// "); + } + basicSpectrum.append(description).append("\n"); + final String[][] spectrumStringArray = NMRShiftDB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + try { + for (int i = 0; i < spectrumStringArray.length; i++) { + // append nucleus + basicSpectrum.append(nucleus).append(", "); + // append chemical shift + basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][0])).append(", "); + // append multiplicity + basicSpectrum.append(spectrumStringArray[i][2]).append(", "); + // append intensity + basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][1])).append("\n"); + } + } catch (Exception e) { + return null; + } + + return basicSpectrum.toString(); + } + public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus){ if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { return null; From 96b3cd75f294df1e000b47e8508d56b1fde761ab Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 09:21:56 +0200 Subject: [PATCH 112/405] - adjustments --- README.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index af6cd92..5c8f49f 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,6 @@ This project hosts various Java classes for teaching and research dealing with s This project depends on the Chemistry Development Project (CDK), hosted under http://cdk.github.io/ Please refer to these pages for updated information and the latest version of the CDK. CDK's API documentation is available though our [Github site](http://cdk.github.io/cdk/). -## Releases - -Latest release of casekit is at https://github.com/steinbeck/casekit/releases/latest - ## Download Spectra Source code This assumes that you have git working on your system and you have initialised your local repository. Refer to https://help.github.com/articles/set-up-git/ for more @@ -22,7 +18,7 @@ This assumes that you have git working on your system and you have initialised y Then, downloading spectra is just a matter of ```bash -$ git clone https://github.com/steinbeck/casedk.git +$ git clone https://github.com/michaelwenk/casekit ``` ## Compiling @@ -30,7 +26,8 @@ $ git clone https://github.com/steinbeck/casedk.git Compiling the library is performed with Apache Maven and requires Java 1.7 or later: ```bash -spectra/$ mvn package +cd casekit +mvn clean package ``` will create an all-in-one-jar under ./target From 135e7047fa289f8508906fb6ac43e596e6e228ea Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 09:44:42 +0200 Subject: [PATCH 113/405] - adjustments --- src/casekit/NMRShiftDBSDFParser.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/casekit/NMRShiftDBSDFParser.java b/src/casekit/NMRShiftDBSDFParser.java index 796fe4c..c2ff08b 100644 --- a/src/casekit/NMRShiftDBSDFParser.java +++ b/src/casekit/NMRShiftDBSDFParser.java @@ -81,8 +81,8 @@ public NMRShiftDBSDFParser(String[] args) throws Exception while (iterator.hasNext()) { ac = iterator.next(); - carbonNMR = (String)ac.getProperty("Spectrum 13C 0"); - hydrogenNMR = (String)ac.getProperty("Spectrum 1H 0"); + carbonNMR = ac.getProperty("Spectrum 13C 0"); + hydrogenNMR = ac.getProperty("Spectrum 1H 0"); if (carbonNMR != null) { carbonNMRCount++; @@ -220,6 +220,13 @@ private Options setupOptions(String[] args) .desc("filename of generated HOSE code table (required)") .build(); options.addOption(outfile); + Option maxspheres = Option.builder("m") + .required(true) + .hasArg() + .longOpt("maxspheres") + .desc("maximum sphere size up to which to generate HOSE codes (required)") + .build(); + options.addOption(maxspheres); Option verbose = Option.builder("v") .required(false) .longOpt("verbose") @@ -233,13 +240,7 @@ private Options setupOptions(String[] args) .desc("store pictures in given directory") .build(); options.addOption(picdir); - Option maxspheres = Option.builder("m") - .required(false) - .hasArg() - .longOpt("maxspheres") - .desc("maximum sphere size up to which to generate HOSE codes") - .build(); - options.addOption(maxspheres); + return options; } From 4e2e8e148b045d7f67d8b61472509f16f05d1413 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 09:44:57 +0200 Subject: [PATCH 114/405] - adjustments --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5c8f49f..bcfdf78 100644 --- a/README.md +++ b/README.md @@ -42,17 +42,18 @@ The following classes are to demonstrate the prediction of Carbon-13 NMR spectra Takes the NMRShiftDB SDF with assigned spectra (download from help section of NMRShiftDB.org) and produces a Tab-separated file with HOSE codes and assigned shift values. This file can then be read by HOSECodePredictor and SimilarityRanker. ```bash -usage: java -jar spectra.jar casekit.NMRShiftDBSDFParser -i -o [-v] '[-d ]' [-m ] +usage: java -jar casekit.jar casekit.NMRShiftDBSDFParser -i -o + -m [-v] [-d ] Generates a table of HOSE codes and assigned shifts from an NMRShiftDB SDF file from http://nmrshiftdb.nmr.uni-koeln.de/portal/js_pane/P-Help. -i,--infile filename of NMRShiftDB SDF with spectra (required) -o,--outfile filename of generated HOSE code table (required) + -m,--maxspheres maximum sphere size up to which to generate HOSE + codes (required) -v,--verbose generate messages about progress of operation -d,--picdir store pictures in given directory - -m,--maxspheres maximum sphere size up to which to generate HOSE - codes ``` #### HOSECodePredictor From e8e936d9a582157a2df4794491cd197b7921cc63 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 10:00:38 +0200 Subject: [PATCH 115/405] - adjustments --- pom.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 115c6ba..4fb7d98 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 - casekit + org.openscience casekit 1.0-SNAPSHOT casekit @@ -98,5 +98,10 @@ mongo-java-driver 3.10.0 + + org.openscience + HOSECodeBuilder + 1.0 + From 82ae87ef06f94fb6bc200f840011ebc5f20678ff Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 10:11:19 +0200 Subject: [PATCH 116/405] - adjustments --- README.md | 42 +++++++----------------------------------- 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index bcfdf78..c5a8bcd 100644 --- a/README.md +++ b/README.md @@ -63,46 +63,18 @@ It needs the TSV file generated by NMRShiftDBSDFParser as input. ```bash usage: java -jar casekit.jar casekit.HOSECodePredictor -s -i - [-v] [-d ] [-m ] + -d -m [-v] Predict NMR chemical shifts for a given molecule based on table of HOSE codes and assigned shifts. -s,--hosecodes filename of TSV file with HOSE codes (required) - -i,--infile filename of with SDF/MOL file of structures to be - predicted (required) - -v,--verbose generate messages about progress of operation - -d,--picdir store pictures of structures with assigned shifts - in given directory + -i,--infile filename of with SDF/MOL file of a structure to + be predicted (required) + -d,--picdir store picture of structure with assigned shifts + in given directory (required) -m,--maxspheres maximum sphere size up to which to generate HOSE - codes. Default is 6 spheres if this option is - ommitted. - -Please report issues at https://github.com/steinbeck/spectra -``` - -#### SimilarityRanker - -Rank structures based on given experimental spectrum and similarity to -predicted spectrum. - -```bash -usage: java -jar casekit.jar casekit.SimilarityRanker -i -p -o - -s [-n ] [-v] -Rank structures based on given experimental spectrum and similarity to -predicted spectrum. - - -i,--infile filename of with SDF/MOL file of structures to be - ranked (required) - -p,--spectrum filename of CSV file with spectrum. Format of each - line: ; (required) - -o,--outpath path to store pictures of ranked output structures - (required) - -s,--hosecodes filename of TSV file with HOSE codes (required) - -n,--number number of structures in output file. Default is - 10, if this option is ommitted - -v,--verbose generate messages about progress of operation - -Please report issues at https://github.com/steinbeck/spectra + codes (required) + -v,--verbose generate messages about progress of operation ``` From 26de3787f78f011193a461d9c9807ddfc19f568e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 10:11:49 +0200 Subject: [PATCH 117/405] - adjustments for CMD input --- src/casekit/HOSECodePredictor.java | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index d7661c4..cc053a1 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -246,7 +246,7 @@ public Double getShift(String hose) ArrayList list = hoseLookup.get(hose); for (int f = 0; f < list.size(); f++) { - shiftvalue = shiftvalue + ((Double) list.get(f)).doubleValue(); + shiftvalue = shiftvalue + list.get(f).doubleValue(); } shiftvalue = shiftvalue / list.size(); if (verbose) System.out.println("Predicted HOSE code from " + list.size() + " values"); @@ -322,29 +322,29 @@ private Options setupOptions(String[] args) .required(true) .hasArg() .longOpt("infile") - .desc("filename of with SDF/MOL file of structures to be predicted (required)") + .desc("filename of with SDF/MOL file of a structure to be predicted (required)") .build(); options.addOption(infile); - Option verbose = Option.builder("v") - .required(false) - .longOpt("verbose") - .desc("generate messages about progress of operation") - .build(); - options.addOption(verbose); Option picdir = Option.builder("d") - .required(false) + .required(true) .hasArg() .longOpt("picdir") - .desc("store pictures of structures with assigned shifts in given directory") + .desc("store picture of structure with assigned shifts in given directory (required)") .build(); options.addOption(picdir); Option maxspheres = Option.builder("m") - .required(false) + .required(true) .hasArg() .longOpt("maxspheres") - .desc("maximum sphere size up to which to generate HOSE codes. Default is 6 spheres if this option is ommitted.") + .desc("maximum sphere size up to which to generate HOSE codes (required)") .build(); options.addOption(maxspheres); + Option verbose = Option.builder("v") + .required(false) + .longOpt("verbose") + .desc("generate messages about progress of operation") + .build(); + options.addOption(verbose); return options; } From 92cf183b51232819db86dd55a50dcc899bae0d92 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 10:24:23 +0200 Subject: [PATCH 118/405] - added the HOSECodeBuilder dependency description --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c5a8bcd..81e8134 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,13 @@ This project hosts various Java classes for teaching and research dealing with s This project depends on the Chemistry Development Project (CDK), hosted under http://cdk.github.io/ Please refer to these pages for updated information and the latest version of the CDK. CDK's API documentation is available though our [Github site](http://cdk.github.io/cdk/). -## Download Spectra Source code +## Download Source code -This assumes that you have git working on your system and you have initialised your local repository. Refer to https://help.github.com/articles/set-up-git/ for more +This assumes that you have git working on your system and you have initialised your local repository. +The packages HOSECodeBuilder (https://github.com/michaelwenk/HOSECodeBuilder) has to be installed on the local machine. +It is a dependency in casekit's pom.xml and has to ready to use, e.g. in Maven's .m2 folder. -Then, downloading spectra is just a matter of +Then, downloading casekit is just a matter of ```bash $ git clone https://github.com/michaelwenk/casekit @@ -39,7 +41,7 @@ The following classes are to demonstrate the prediction of Carbon-13 NMR spectra #### NMRShiftDBSDFParser -Takes the NMRShiftDB SDF with assigned spectra (download from help section of NMRShiftDB.org) and produces a Tab-separated file with HOSE codes and assigned shift values. This file can then be read by HOSECodePredictor and SimilarityRanker. +Takes the NMRShiftDB SDF with assigned spectra (download from help section of NMRShiftDB.org) and produces a Tab-separated file with HOSE codes and assigned shift values. This file can then be read by HOSECodePredictor. ```bash usage: java -jar casekit.jar casekit.NMRShiftDBSDFParser -i -o From b370952440e4fba8e10ba9e2c3c08e48254abe1b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 11:24:43 +0200 Subject: [PATCH 119/405] - added more information to the installation guide --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 81e8134..b37628e 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,7 @@ Please refer to these pages for updated information and the latest version of th ## Download Source code -This assumes that you have git working on your system and you have initialised your local repository. -The packages HOSECodeBuilder (https://github.com/michaelwenk/HOSECodeBuilder) has to be installed on the local machine. -It is a dependency in casekit's pom.xml and has to ready to use, e.g. in Maven's .m2 folder. +This assumes that you have git working on your system and you have initialised your local repository. Then, downloading casekit is just a matter of @@ -25,6 +23,12 @@ $ git clone https://github.com/michaelwenk/casekit ## Compiling +The package HOSECodeBuilder (https://github.com/michaelwenk/HOSECodeBuilder) has to be installed on the local machine. +It is a dependency in casekit's pom.xml and has to ready to use, e.g. installed in Maven's .m2 folder.
+This can be done something like: + + mvn install:install-file -Dfile=PATH/TO/HOSECodeBuilder-1.0-SNAPSHOT-jar-with-dependencies.jar -DgroupId=org.openscience -DartifactId=HOSECodeBuilder -Dversion=1.0 -Dpackaging=jar + Compiling the library is performed with Apache Maven and requires Java 1.7 or later: ```bash From 2096d059b3d2669d7efd1fe9b83edc9c37381c79 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 14:29:51 +0200 Subject: [PATCH 120/405] - added more information to the installation section --- .classpath | 20 - .idea/artifacts/casekit_jar.xml | 63 ++ .idea/casekit.iml | 69 ++ .idea/compiler.xml | 16 + .idea/copyright/MIT_License.xml | 6 + .idea/copyright/profiles_settings.xml | 3 + ...hub_gilleain_signatures_signatures_1_1.xml | 13 + .../Maven__com_google_guava_guava_17_0.xml | 13 + .../Maven__commons_cli_commons_cli_1_3_1.xml | 13 + .../Maven__gov_nist_math_jama_1_0_3.xml | 13 + .../Maven__javax_vecmath_vecmath_1_5_2.xml | 13 + .../Maven__jgrapht_jgrapht_0_6_0.xml | 13 + .idea/libraries/Maven__junit_junit_4_10.xml | 13 + ...__org_apache_commons_commons_lang3_3_5.xml | 13 + ...org_apache_commons_commons_math3_3_1_1.xml | 13 + ...en__org_freehep_freehep_graphics2d_2_4.xml | 13 + ...__org_freehep_freehep_graphicsbase_2_4.xml | 13 + ...en__org_freehep_freehep_graphicsio_2_4.xml | 13 + ...org_freehep_freehep_graphicsio_pdf_2_4.xml | 13 + ..._org_freehep_freehep_graphicsio_ps_2_4.xml | 13 + ...org_freehep_freehep_graphicsio_svg_2_4.xml | 13 + ...g_freehep_freehep_graphicsio_tests_2_4.xml | 13 + .../Maven__org_freehep_freehep_io_2_2_2.xml | 13 + .../Maven__org_hamcrest_hamcrest_core_1_1.xml | 13 + ...__org_mongodb_mongo_java_driver_3_10_0.xml | 13 + ...n__org_openscience_HOSECodeBuilder_1_0.xml | 13 + ...nscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml | 13 + ...enscience_cdk_cdk_charges_2_2_SNAPSHOT.xml | 13 + ..._openscience_cdk_cdk_core_2_2_SNAPSHOT.xml | 13 + ..._openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml | 13 + ..._openscience_cdk_cdk_data_2_2_SNAPSHOT.xml | 13 + ...penscience_cdk_cdk_depict_2_2_SNAPSHOT.xml | 13 + ..._openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml | 13 + ...openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml | 13 + ...ience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml | 13 + ...cience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml | 13 + ...enscience_cdk_cdk_formula_2_2_SNAPSHOT.xml | 13 + ...cience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml | 13 + ...rg_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml | 13 + ...science_cdk_cdk_ioformats_2_2_SNAPSHOT.xml | 13 + ...ience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml | 13 + ...penscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml | 13 + ..._openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml | 13 + ...cience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml | 13 + ...nscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml | 13 + ...penscience_cdk_cdk_render_2_2_SNAPSHOT.xml | 13 + ...science_cdk_cdk_renderawt_2_2_SNAPSHOT.xml | 13 + ...ience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml | 13 + ...ience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml | 13 + ...g_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml | 13 + ...science_cdk_cdk_signature_2_2_SNAPSHOT.xml | 13 + ...penscience_cdk_cdk_silent_2_2_SNAPSHOT.xml | 13 + ...penscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml | 13 + ...penscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml | 13 + ...nscience_cdk_cdk_standard_2_2_SNAPSHOT.xml | 13 + ...ence_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml | 13 + .../Maven__uk_ac_ebi_beam_beam_core_1_2.xml | 13 + .../Maven__uk_ac_ebi_beam_beam_func_1_2.xml | 13 + .idea/libraries/Maven__xalan_xalan_2_7_0.xml | 13 + .../Maven__xerces_xercesImpl_2_8_0.xml | 13 + .../Maven__xml_apis_xml_apis_1_3_03.xml | 13 + .idea/libraries/Maven__xom_xom_1_2_5.xml | 13 + .idea/misc.xml | 14 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .project | 23 - src/casekit/NMR/DB.java | 770 ------------------ 67 files changed, 913 insertions(+), 813 deletions(-) delete mode 100644 .classpath create mode 100644 .idea/artifacts/casekit_jar.xml create mode 100644 .idea/casekit.iml create mode 100644 .idea/compiler.xml create mode 100644 .idea/copyright/MIT_License.xml create mode 100644 .idea/copyright/profiles_settings.xml create mode 100644 .idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml create mode 100644 .idea/libraries/Maven__com_google_guava_guava_17_0.xml create mode 100644 .idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml create mode 100644 .idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml create mode 100644 .idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml create mode 100644 .idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml create mode 100644 .idea/libraries/Maven__junit_junit_4_10.xml create mode 100644 .idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml create mode 100644 .idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml create mode 100644 .idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml create mode 100644 .idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml create mode 100644 .idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml create mode 100644 .idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml create mode 100644 .idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml create mode 100644 .idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml create mode 100644 .idea/libraries/Maven__xalan_xalan_2_7_0.xml create mode 100644 .idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml create mode 100644 .idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml create mode 100644 .idea/libraries/Maven__xom_xom_1_2_5.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml delete mode 100644 .project delete mode 100644 src/casekit/NMR/DB.java diff --git a/.classpath b/.classpath deleted file mode 100644 index 149cb3c..0000000 --- a/.classpath +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/.idea/artifacts/casekit_jar.xml b/.idea/artifacts/casekit_jar.xml new file mode 100644 index 0000000..e16f507 --- /dev/null +++ b/.idea/artifacts/casekit_jar.xml @@ -0,0 +1,63 @@ + + + $PROJECT_DIR$/out/artifacts/casekit_jar + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/casekit.iml b/.idea/casekit.iml new file mode 100644 index 0000000..fba646b --- /dev/null +++ b/.idea/casekit.iml @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..f2aa0ef --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/copyright/MIT_License.xml b/.idea/copyright/MIT_License.xml new file mode 100644 index 0000000..23000f0 --- /dev/null +++ b/.idea/copyright/MIT_License.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 0000000..ea2b04b --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml b/.idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml new file mode 100644 index 0000000..1b9226b --- /dev/null +++ b/.idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_guava_guava_17_0.xml b/.idea/libraries/Maven__com_google_guava_guava_17_0.xml new file mode 100644 index 0000000..2a9069c --- /dev/null +++ b/.idea/libraries/Maven__com_google_guava_guava_17_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml b/.idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml new file mode 100644 index 0000000..a1510b9 --- /dev/null +++ b/.idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml b/.idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml new file mode 100644 index 0000000..84df334 --- /dev/null +++ b/.idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml b/.idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml new file mode 100644 index 0000000..f04d302 --- /dev/null +++ b/.idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml b/.idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml new file mode 100644 index 0000000..55a6d97 --- /dev/null +++ b/.idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__junit_junit_4_10.xml b/.idea/libraries/Maven__junit_junit_4_10.xml new file mode 100644 index 0000000..ed8bf5f --- /dev/null +++ b/.idea/libraries/Maven__junit_junit_4_10.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml new file mode 100644 index 0000000..666266c --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml new file mode 100644 index 0000000..5627f0c --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml new file mode 100644 index 0000000..9b824aa --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml new file mode 100644 index 0000000..f2fec4f --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml new file mode 100644 index 0000000..062846f --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml new file mode 100644 index 0000000..0ae4e5b --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml new file mode 100644 index 0000000..9f51a68 --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml new file mode 100644 index 0000000..f86533c --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml new file mode 100644 index 0000000..39b60ac --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml b/.idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml new file mode 100644 index 0000000..7eb8b43 --- /dev/null +++ b/.idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml b/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml new file mode 100644 index 0000000..acdf443 --- /dev/null +++ b/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml b/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml new file mode 100644 index 0000000..8f2291e --- /dev/null +++ b/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml b/.idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml new file mode 100644 index 0000000..09a9a46 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..67e398f --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..c1ec4b4 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..b5ae76c --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..a9c65a2 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..63e5eed --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..c6831ba --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..4aac108 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..3086186 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..0194ace --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..25068aa --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..4a5e857 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..ee1b6a3 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..6db98ec --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..677f9f4 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..e9a6c18 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..673c1bc --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..63ab4f9 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..e255df3 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..a219dbc --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..479050f --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..fd79be6 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..14e359e --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..23a1cae --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..bd1da56 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..cadfcb4 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..30c1ced --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..1ae1451 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..35fa6d9 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..f7ab0b6 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml new file mode 100644 index 0000000..b6dfd79 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml b/.idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml new file mode 100644 index 0000000..b8648d6 --- /dev/null +++ b/.idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml b/.idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml new file mode 100644 index 0000000..7c39ee3 --- /dev/null +++ b/.idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xalan_xalan_2_7_0.xml b/.idea/libraries/Maven__xalan_xalan_2_7_0.xml new file mode 100644 index 0000000..dd647b4 --- /dev/null +++ b/.idea/libraries/Maven__xalan_xalan_2_7_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml b/.idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml new file mode 100644 index 0000000..74ccea0 --- /dev/null +++ b/.idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml b/.idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml new file mode 100644 index 0000000..cc2538f --- /dev/null +++ b/.idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xom_xom_1_2_5.xml b/.idea/libraries/Maven__xom_xom_1_2_5.xml new file mode 100644 index 0000000..773ab34 --- /dev/null +++ b/.idea/libraries/Maven__xom_xom_1_2_5.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..1c3f3fd --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,14 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..4b6e86b --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.project b/.project deleted file mode 100644 index e3d2998..0000000 --- a/.project +++ /dev/null @@ -1,23 +0,0 @@ - - - casekit - - - - - - org.eclipse.jdt.core.javabuilder - - - - - org.eclipse.m2e.core.maven2Builder - - - - - - org.eclipse.jdt.core.javanature - org.eclipse.m2e.core.maven2Nature - - diff --git a/src/casekit/NMR/DB.java b/src/casekit/NMR/DB.java deleted file mode 100644 index 73e41df..0000000 --- a/src/casekit/NMR/DB.java +++ /dev/null @@ -1,770 +0,0 @@ -/* - * The MIT License - * - * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package casekit.NMR; - -import casekit.NMR.model.Assignment; -import casekit.NMR.model.Signal; -import casekit.NMR.model.Spectrum; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientOptions; -import com.mongodb.MongoCredential; -import com.mongodb.ServerAddress; -import com.mongodb.client.MongoCollection; -import com.mongodb.client.MongoDatabase; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import org.bson.Document; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomContainerSet; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.AtomContainerSet; -import org.openscience.cdk.silent.SilentChemObjectBuilder; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class DB { - - /** - * Returns the molecules of a given MOL/SDF file. - * This function sets the molecule aromaticity (with allowed exocyclic pi - * bonds) by using the - * {@link Utils#setAromaticitiesInAtomContainer(org.openscience.cdk.interfaces.IAtomContainer)} - * function. - * - * @param pathToNMRShiftDB path to NMRShiftDB file - * @param setAromaticity whether to set aromaticities in structures or not - * @return - * @throws FileNotFoundException - * @throws CDKException - * @deprecated - */ - public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRShiftDB, final boolean setAromaticity) throws FileNotFoundException, CDKException { - final IAtomContainerSet acSet = new AtomContainerSet(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToNMRShiftDB), - SilentChemObjectBuilder.getInstance() - ); - IAtomContainer ac; - while (iterator.hasNext()) { - ac = iterator.next(); - if(setAromaticity){ - Utils.setAromaticitiesInAtomContainer(ac); - } - acSet.addAtomContainer(ac); - } - - return acSet; - } - - - /** - * Returns all spectra for each molecule and a given nucleus which exist as - * property in a NMRSHiftDB SDF. - * - * @param pathToNMRShiftDB path to NMRShiftDB file - * @param nucleus nucleus of requested spectra - * @return - * @throws FileNotFoundException - * @throws CDKException - * - */ - public static ArrayList> getSpectraFromNMRShiftDB(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException { - final ArrayList> spectraSet = new ArrayList<>(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToNMRShiftDB), - SilentChemObjectBuilder.getInstance() - ); - IAtomContainer ac; - Spectrum spectrum; - ArrayList spectra; - HashMap spectraStrings; - String spectrumIndexInRecord, solvent; - while (iterator.hasNext()) { - ac = iterator.next(); - if(ac == null){ - continue; - } - spectraStrings = DB.getSpectraStrings(ac, nucleus); - if(spectraStrings.isEmpty() || (ac.getProperty("Solvent") == null)){ - continue; - } - spectra = new ArrayList<>(); - for (final String spectrumPropertyString : spectraStrings.keySet()) { - spectrum = DB.NMRShiftDBSpectrumToSpectrum(spectraStrings.get(spectrumPropertyString), nucleus); - if(spectrum == null){ - continue; - } - spectrumIndexInRecord = spectrumPropertyString.split("\\s")[spectrumPropertyString.split("\\s").length - 1]; - solvent = DB.getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord); - if(solvent == null){ - continue; - } - spectrum.setSolvent(solvent); - - if(Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ - continue; - } - - spectra.add(new Object[]{spectrum, DB.NMRShiftDBSpectrumToAssignment(spectraStrings.get(spectrumPropertyString), nucleus)}); - } - spectraSet.add(spectra); - } - - return spectraSet; - } - - public static String getSolvent(final String solventPropertyString, final String spectrumIndexInRecord){ - final String[] solventPropertyStringSplit = solventPropertyString.split(":"); - String solvent; - for (int i = 0; i < solventPropertyStringSplit.length; i++) { - if (solventPropertyStringSplit[i].endsWith(spectrumIndexInRecord)) { - solvent = solventPropertyStringSplit[i + 1]; - if(solvent.substring(solvent.length() - 1).matches("\\d")){ - solvent = solvent.substring(0, solvent.length() - 1); - } - if(solvent.substring(solvent.length() - 1).matches("\\d")){ - solvent = solvent.substring(0, solvent.length() - 1); - } - solvent = solvent.substring(0, solvent.length() - 1); - - return solvent; - } - } - - return null; - } - - /** - * Returns 3-tuples consisting of structure, spectrum and assignments - * for each valid molecule record in the given NMRShiftDB file. Valid means - * here that each molecule record has to contain the given spectrum - * property string as well as the number of signals in that spectrum has to - * be the same as atoms of that atom type in molecule. - * - * @param pathToNMRShiftDB path to NMRShiftDB file - * @param NMRShiftDBSpectrumProperty spectrum property string to use - * @return - * @throws FileNotFoundException - * @throws CDKException - */ - public static HashMap getSSCComponentsFromNMRShiftDB(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty) throws FileNotFoundException, CDKException { - final HashMap structureSetWithSpectra = new HashMap<>(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToNMRShiftDB), - SilentChemObjectBuilder.getInstance() - ); - IAtomContainer ac; - Spectrum spectrum; - Assignment assignment; - final String nucleus = DB.getNucleusFromNMRShiftDBSpectrumProperty(NMRShiftDBSpectrumProperty); - final String spectrumIndexInRecord = NMRShiftDBSpectrumProperty.split("\\s")[NMRShiftDBSpectrumProperty.split("\\s").length - 1]; - while (iterator.hasNext()) { - ac = iterator.next(); - // skip molecules which not contain any of requested spectrum information - if(ac.getProperty(NMRShiftDBSpectrumProperty) == null){ - continue; - } - spectrum = DB.NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); - // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if((spectrum == null) || Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ - continue; - } - if(ac.getProperty("Solvent") != null){ - spectrum.setSolvent(DB.getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord)); - } - if(ac.getProperty("Field Strength [MHz]") != null){ - for (final String fieldStrength : ac.getProperty("Field Strength [MHz]").toString().split("\\s")) { - if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { - try { - spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split(spectrumIndexInRecord + ":")[1])); - } catch (NumberFormatException e) { -// spectrum.setSpectrometerFrequency(null); - } - break; - } - } - } - - assignment = DB.NMRShiftDBSpectrumToAssignment(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); -// if ((ac != null) && (spectrum != null)) { - structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{ac, spectrum, assignment}); -// } - - Utils.setAromaticitiesInAtomContainer(ac); - } - - return structureSetWithSpectra; - } - - /** - * Returns a hashmap containing combined keys (by "_") of solvents - * and lists of calculated deviations between all given spectra for a - * nucleus in molecule record as values.
- * Here, only molecule records in NMRShiftDB file are considered which have - * at least two different spectra for same nucleus.
- * Example: "Spectrum 13C 0", "Spectrum 13C 1" will be used for given - * nucleus 13C. - * - * - * @param pathToNMRShiftDB - * @param nucleus - * @return - * @throws FileNotFoundException - * @throws CDKException - */ - public static HashMap> getSolventDeviations(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException{ - int signalCount; - Spectrum spectrum; - Assignment assignment; - final ArrayList> spectraSets = DB.getSpectraFromNMRShiftDB(pathToNMRShiftDB, nucleus); - HashMap> shiftsPerAtom; - HashMap> solventsPerAtom; - ArrayList solvents; - String[] solventsToSort; - - final HashMap> deviations = new HashMap<>(); - String combiKey; - - for (final ArrayList spectraSetInRecord : spectraSets) { - shiftsPerAtom = new HashMap<>(); - solventsPerAtom = new HashMap<>(); - signalCount = -1; - for (final Object[] spectrumAndAssignment : spectraSetInRecord) { - spectrum = (Spectrum) spectrumAndAssignment[0]; - assignment = (Assignment) spectrumAndAssignment[1]; - if (signalCount == -1) { - signalCount = spectrum.getSignalCount(); - } else if (signalCount != spectrum.getSignalCount()) { - continue; - } - for (final int atomIndex : assignment.getAtomIndices(0)) { - if (!shiftsPerAtom.containsKey(atomIndex)) { - shiftsPerAtom.put(atomIndex, new ArrayList<>()); - solventsPerAtom.put(atomIndex, new ArrayList<>()); - } - shiftsPerAtom.get(atomIndex).add(spectrum.getSignal(assignment.getSignalIndex(0, atomIndex)).getShift(0)); - solventsPerAtom.get(atomIndex).add(spectrum.getSolvent()); - } - } - if (shiftsPerAtom.isEmpty() || (shiftsPerAtom.get(Collections.min(shiftsPerAtom.keySet())).size() < 2)) { - continue; - } - solvents = new ArrayList<>(solventsPerAtom.get(Collections.min(solventsPerAtom.keySet()))); -// if(Collections.frequency(solvents, "Unreported") + Collections.frequency(solvents, "Unknown") > solvents.size() - 2){ -// continue; -// } - - for (final int atomIndex : shiftsPerAtom.keySet()) { - for (int s1 = 0; s1 < solvents.size(); s1++) { -// if(solvents.get(s1).equals("Unreported") || solvents.get(s1).equals("Unknown")){ -// continue; -// } - for (int s2 = s1 + 1; s2 < solvents.size(); s2++) { -// if (solvents.get(s2).equals("Unreported") || solvents.get(s2).equals("Unknown")) { -// continue; -// } - solventsToSort = new String[2]; - solventsToSort[0] = solvents.get(s1); - solventsToSort[1] = solvents.get(s2); - Arrays.sort(solventsToSort); - combiKey = solventsToSort[0] + "_" + solventsToSort[1]; - if (!deviations.containsKey(combiKey)) { - deviations.put(combiKey, new ArrayList<>()); - } - deviations.get(combiKey).add(Math.abs(shiftsPerAtom.get(atomIndex).get(s1) - shiftsPerAtom.get(atomIndex).get(s2))); - } - } - } - } - - return deviations; - } - - /** - * - * @param pathToDB - * @return - * @throws FileNotFoundException - * @deprecated - */ - public static HashSet getAtomTypesInDB(final String pathToDB) throws FileNotFoundException{ - final HashSet atomTypes = new HashSet<>(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToDB), - SilentChemObjectBuilder.getInstance() - ); - while (iterator.hasNext()) { - atomTypes.addAll(Utils.getAtomTypesInAtomContainer(iterator.next())); - } - - return atomTypes; - } - - /** - * - * @param server - * @param options - * @param user - * @param pwd - * @return - * @throws SQLException - * @deprecated - */ - public static Connection getDBConnection(final String server, final String options, final String user, final String pwd) throws SQLException { - - return DriverManager.getConnection(server + "?" + options, user, pwd); - } - - /** - * - * @param DBConnection - * @param bondsSet - * @param elem - * @param neighborElems - * @param minShift - * @param maxShift - * @param stepSize - * @return - * @throws FileNotFoundException - * @throws IOException - * @throws SQLException - * @deprecated - */ - public static int[][] countNeighborhoodBonds(final Connection DBConnection, final String[] bondsSet, final String elem, String[] neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException, SQLException { - - if (DBConnection == null || stepSize < 1) { - return null; - } - // creation of frequency counting matrix and shift indices holder - final int[][] neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.length * bondsSet.length]; - HashMap> signalAtomIndicesInNMRShiftDB = new HashMap<>(); // holding of all indices of each ac set (DB) entry [first value] and it's atom indices [second value] too - for (int i = 0; i < stepSize * maxShift; i++) { - for (int k = 0; k < 3 + 4 + neighborElems.length * bondsSet.length; k++) { - neighborhoodCountsMatrix[i][k] = 0; - } - signalAtomIndicesInNMRShiftDB.put(i, new ArrayList<>()); - } - - final Statement statement = DBConnection.createStatement(); - String multQuery = "SELECT (FLOOR(sh.VALUE*" + stepSize + ")/" + stepSize + ") AS shift, nmrsig.MULTIPLICITY AS mult, COUNT(FLOOR(sh.VALUE*" + stepSize + ")/" + stepSize + ") AS shiftCount \n" - + "FROM SHIFT AS sh, SIGNAL_ATOM AS sigatom, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" - + "WHERE sh.SIGNAL_ID = sigatom.SIGNAL_ID AND \n" - + " sigatom.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" - + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" - + " spec.REVIEW_FLAG = \"true\" AND \n" - + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\" AND \n" - + " nmrsig.MULTIPLICITY IS NOT NULL AND \n" - + " nmrsig.MULTIPLICITY != \"\"" - + "GROUP BY shift, mult \n" - + "HAVING shift >= " + minShift + " AND shift <= " + maxShift + ";"; - - double shiftDouble; - int shiftInt; - System.out.println("\n\nneighborhoods:\nQUERY: " + multQuery); - final ResultSet resultSet = statement.executeQuery(multQuery); - while (resultSet.next()) { - shiftDouble = Math.floor(resultSet.getDouble("shift") * stepSize) / (double) stepSize; - if (shiftDouble < minShift || shiftDouble > maxShift - 1) { - continue; - } - shiftInt = (int) (shiftDouble * stepSize); - neighborhoodCountsMatrix[shiftInt - minShift][0] += resultSet.getInt("shiftCount"); - switch (resultSet.getString("mult")) { - case "S": // for qC - neighborhoodCountsMatrix[shiftInt - minShift][3] += resultSet.getInt("shiftCount"); - break; - case "D": // for CH - neighborhoodCountsMatrix[shiftInt - minShift][4] += resultSet.getInt("shiftCount"); - break; - case "T": // for CH2 - neighborhoodCountsMatrix[shiftInt - minShift][5] += resultSet.getInt("shiftCount"); - break; - case "Q": // for CH3 - neighborhoodCountsMatrix[shiftInt - minShift][6] += resultSet.getInt("shiftCount"); - break; - } - - } - -// this.neighborhoodCountsMatrix[shiftDB - min][0] += 1; // increase number of this shift occurence -// this.neighborhoodCountsMatrix[shiftDB - min][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member -// this.neighborhoodCountsMatrix[shiftDB - min][2] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN -// this.neighborhoodCountsMatrix[shiftDB - min][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH -// this.neighborhoodCountsMatrix[shiftDB - min][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 -// this.neighborhoodCountsMatrix[shiftDB - min][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 -// // add counts for a specific atom to matrix m -// int[] counts = NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); -// for (int i = 0; i < counts.length; i++) { -// this.neighborhoodCountsMatrix[shiftDB - min][2 + 4 + i] += counts[i]; -// } -// // add this atom container index and atom index within it to belonging hash map -// this.shiftIndicesInACSet.get(shiftDB).add(new Integer[]{k, atomIndexDB}); - return neighborhoodCountsMatrix; - } - - /** - * - * @param DBConnection - * @param query - * @return - * @throws SQLException - * @deprecated - */ - public static ResultSet getResultSet(final Connection DBConnection, final String query) throws SQLException{ - - if (DBConnection == null) { - return null; - } - - return DBConnection.createStatement().executeQuery(query); - } - - - /** - * - * @param DBConnection - * @param minShift - * @param maxShift - * @param mult - * @param minIntens - * @param maxIntens - * @param elem - * @return - * @throws SQLException - * @deprecated - */ - public static ArrayList getSignalIDsFromNMRShiftDB(final Connection DBConnection, final double minShift, final double maxShift, final String mult, final Double minIntens, final Double maxIntens, final String elem) throws SQLException { - - final ArrayList spectraIDs = new ArrayList<>(); - String query = "SELECT nmrsig.SIGNAL_ID AS sigID" - + " FROM SHIFT AS sh, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" - + " WHERE sh.VALUE >= " + minShift + " AND sh.VALUE <= " + maxShift + " AND \n" // for filtering by means of shift values - + " sh.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" - + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" - + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true - + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\" \n"; - if(mult != null && !mult.trim().isEmpty()){ - query += " AND nmrsig.MULTIPLICITY = \"" + mult + "\" \n"; - } else { - query += " AND nmrsig.MULTIPLICITY IS NOT NULL AND nmrsig.MULTIPLICITY != \"\" \n"; - } - if((minIntens != null && minIntens > 0.0) && (maxIntens != null && maxIntens > 0.0)){ - query += " AND nmrsig.INTENSITY >= " + minIntens + " AND nmrsig.INTENSITY <= " + maxIntens + " \n"; - } - query += " ;"; - System.out.println("\n\ngetSpectraIDs:\nQUERY: " + query); - final ResultSet resultSet = casekit.NMR.DB.getResultSet(DBConnection, query); - while (resultSet.next()) { - spectraIDs.add(resultSet.getInt("sigID")); - } - - return spectraIDs; - } - - // currently only for 1D spectra - - /** - * - * @param DBConnection - * @param spectrum - * @param shiftDev - * @param intensDev - * @param stepSize - * @param dim - * @return - * @throws SQLException - * @deprecated - */ - public static HashMap> matchSpectrumAgainstDB(final Connection DBConnection, final Spectrum spectrum, final double shiftDev, final Double intensDev, final int stepSize, final int dim) throws SQLException{ - - final HashMap> hits = new HashMap<>(); double shift; - for (int i = 0; i < spectrum.getSignalCount(); i++) { - hits.put(i, new ArrayList<>()); - shift = Math.floor(spectrum.getSignal(i).getShift(dim) * stepSize) / (double) stepSize; - if(spectrum.getSignal(i).getIntensity() != null){ - hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.getSignal(i).getMultiplicity(), spectrum.getSignal(i).getIntensity() - intensDev, spectrum.getSignal(i).getIntensity() + intensDev, spectrum.getSignal(i).getNuclei()[dim])); - } else { - hits.get(i).addAll(casekit.NMR.DB.getSignalIDsFromNMRShiftDB(DBConnection, shift - shiftDev, shift + shiftDev, spectrum.getSignal(i).getMultiplicity(), spectrum.getSignal(i).getIntensity(), spectrum.getSignal(i).getIntensity(), spectrum.getSignal(i).getNuclei()[dim])); - } - } - - return hits; - } - - - /** - * - * @param DBConnection - * @param elem - * @return - * @throws SQLException - * @deprecated - */ - public static HashMap> getLookupTableFromNMRShiftDB(final Connection DBConnection, final String elem) throws SQLException { - - if (DBConnection == null) { - return null; - } - final HashMap> lookup = new HashMap<>(); - final Statement statement = DBConnection.createStatement(); - final String query = "SELECT a.HOSE_CODE AS hose, sh.VALUE AS shift \n" - + " FROM ATOM AS a, SHIFT AS sh, SIGNAL_ATOM AS sigatom, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" - + " WHERE a.ATOM_ID = sigatom.ATOM_ID AND \n" // to get signals of each atom - + " sigatom.SIGNAL_ID = sh.SIGNAL_ID AND \n" // to get shift values - + " sigatom.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" - + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" - + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true - + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\";"; - System.out.println("\n\ngetLookupTable:\nQUERY: " + query); - final ResultSet resultSet = statement.executeQuery(query); - while (resultSet.next()) { - if (!lookup.containsKey(resultSet.getString("hose"))) { - lookup.put(resultSet.getString("hose"), new ArrayList<>()); - } - lookup.get(resultSet.getString("hose")).add(resultSet.getDouble("shift")); - } - - return lookup; - } - - - /** - * - * @param DBConnection - * @param minShift - * @param maxShift - * @param elem - * @return - * @throws SQLException - * @deprecated - */ - public static HashMap getRMS(final Connection DBConnection, final double minShift, final double maxShift, final String elem) throws SQLException { - - if (DBConnection == null) { - return null; - } - final HashMap rms = new HashMap<>(); - final Statement statement = DBConnection.createStatement(); - final String query = "SELECT a.HOSE_CODE AS hose, COUNT(sh.VALUE) AS shiftCount, AVG(sh.VALUE) AS mean, SQRT(SUM(POW(sh.VALUE, 2))/COUNT(sh.VALUE)) AS rms \n" - + " FROM ATOM AS a, SHIFT AS sh, SIGNAL_ATOM AS sigatom, NMR_SIGNAL AS nmrsig, SPECTRUM AS spec, SPECTRUM_TYPE AS spectype \n" - + " WHERE sh.VALUE >= " + minShift + " AND sh.VALUE <= " + maxShift + " AND \n" // for filtering by means of shift values - + " sh.SIGNAL_ID = sigatom.SIGNAL_ID AND \n" // to get shift values - + " a.ATOM_ID = sigatom.ATOM_ID AND \n" - + " sigatom.SIGNAL_ID = nmrsig.SIGNAL_ID AND \n" - + " nmrsig.SPECTRUM_ID = spec.SPECTRUM_ID AND \n" - + " spec.REVIEW_FLAG = \"true\" AND \n" // checks whether review flag is set to true - + " spectype.SPECTRUM_TYPE_ID = spec.SPECTRUM_TYPE_ID AND \n" - + " spectype.NAME = \"" + casekit.NMR.Utils.getIsotopeIdentifier(elem) + "\" AND \n" - + " nmrsig.MULTIPLICITY IS NOT NULL AND \n" - + " nmrsig.MULTIPLICITY != \"\" \n" - + " GROUP BY hose;"; - System.out.println("\n\nRMS SQL:\nQUERY: " + query); - final ResultSet resultSet = statement.executeQuery(query); - while (resultSet.next()) { - rms.put(resultSet.getString("hose"), resultSet.getDouble("rms")); - } - - return rms; - } - - - public static HashMap getSpectraStrings(final IAtomContainer ac, final String nucleus) { - final ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); - final HashMap spectra = new HashMap<>(); - for (final String prop : props) { - if (prop.startsWith("Spectrum " + nucleus)) { - spectra.put(prop, ac.getProperty(prop)); - } - } - - return spectra; - } - - - /** - * Creates a two dimensional array of a given NMRShiftDB NMR entry - * with all signal shift values, intensities, multiplicities and atom indices. - * - * @param NMRShiftDBSpectrum - * @return two dimensional array: - * 1. dimension: signal index (row); - * 2. dimension: signal shift value (column 1), signal intensity (column 2), - * signal multiplicity (column 3), atom index in structure (column 4) - */ - public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum){ - if(NMRShiftDBSpectrum.trim().isEmpty()){ - return new String[][]{}; - } - String[] signalSplit; - final String[] shiftsSplit = NMRShiftDBSpectrum.split("\\|"); - final String[][] values = new String[shiftsSplit.length][4]; - for (int i = 0; i < shiftsSplit.length; i++) { - signalSplit = shiftsSplit[i].split(";"); - values[i][0] = signalSplit[0]; // shift value - values[i][1] = signalSplit[1].substring(0, signalSplit[1].length() - 1); // intensity - values[i][2] = signalSplit[1].substring(signalSplit[1].length() - 1); // multiplicity - values[i][3] = signalSplit[2]; // atom index - } - - return values; - } - - /** - * Sets shifts, intensities and implicit hydrogen counts in atoms of an atom container - * by means of given spectrum property string. - * - * @param ac IAtomContainer to set - * @param NMRShiftDBSpectrum Property string of spectrum in NMRShiftDB format. - * @return - * - * @see DB#parseNMRShiftDBSpectrum(java.lang.String) - * @see Utils#getHydrogenCountFromMultiplicity(java.lang.String) - * @deprecated - */ - public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String NMRShiftDBSpectrum){ - if (ac.getProperty(NMRShiftDBSpectrum) == null) { - return false; - } - final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(ac.getProperty(NMRShiftDBSpectrum)); - - Integer atomIndexSpectrum; -// String multiplicity; - Double shift; - - for (int i = 0; i < spectrumStringArray.length; i++) { - atomIndexSpectrum = Integer.parseInt(spectrumStringArray[i][3]); - shift = Double.parseDouble(spectrumStringArray[i][0]); -// multiplicity = spectrumStringArray[i][3]; - if(Utils.checkIndexInAtomContainer(ac, atomIndexSpectrum)){ - ac.getAtom(atomIndexSpectrum).setProperty(Utils.getNMRShiftConstant(ac.getAtom(atomIndexSpectrum).getSymbol()), shift); -// ac.getAtom(atomIndexSpectrum).setImplicitHydrogenCount(Utils.getHydrogenCountFromMultiplicity(multiplicity)); - } - } - - return true; - } - - public static String getNucleusFromNMRShiftDBSpectrumProperty(final String NMRShiftDBSpectrumProperty){ - return NMRShiftDBSpectrumProperty.split(" ")[1]; - } - - public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus){ - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { - return null; - } - final String[][] spectrumStringArray = DB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = new Spectrum(new String[]{nucleus}); - String multiplicity; - Double shift, intensity; - try { - for (int i = 0; i < spectrumStringArray.length; i++) { - shift = Double.parseDouble(spectrumStringArray[i][0]); - intensity = Double.parseDouble(spectrumStringArray[i][1]); - multiplicity = spectrumStringArray[i][2]; - spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, intensity)); - } - Utils.setSpectrumEquivalences(spectrum); - } catch (Exception e) { - - return null; - } - - return spectrum; - } - - public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String nucleus) { - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { - return null; - } - final String[][] NMRShiftDBSpectrumStringArray = DB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = DB.NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, nucleus); - final Assignment assignment = new Assignment(spectrum); - for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { - assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); - } - - return assignment; - } - - public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) throws CDKException { - MongoClient mongo; - try { - // Creating a Mongo client - mongo = new MongoClient( - new ServerAddress("127.0.0.1", 27017), - MongoCredential.createCredential( - mongoUser, - mongoAuthDB, - mongoPassword.toCharArray()), - MongoClientOptions.builder().build()); - System.out.println("Login to MongoDB was successfull"); - // Accessing the database - } catch (Exception e) { - e.printStackTrace(); - System.err.println(Thread.currentThread().getStackTrace()[1].getMethodName() + ": could not connect to MongoDB!"); - - return null; - } - - return mongo; - } - - public static MongoDatabase getDatabase(final MongoClient mongo, final String mongoDBName){ - return mongo.getDatabase(mongoDBName); - } - - public static MongoCollection getCollection(final MongoClient mongo, final String mongoDBName, final String mongoDBCollection) { - final MongoDatabase database = DB.getDatabase(mongo, mongoDBName); - if (database == null) { - return null; - } - System.out.println("Access to database \"" + mongoDBName + "\" was successfull"); - // Retrieving a collection - final MongoCollection collection = database.getCollection(mongoDBCollection); - System.out.println("Retrieval of collection \"" + mongoDBCollection + "\" was successfull -> size: " + collection.countDocuments()); - - return collection; - } - - public static void logout(final MongoClient mongo) { - mongo.close(); - } -} From 60dff7f67e17bf45b04beaddb0ed387e1468e229 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Jul 2019 14:32:11 +0200 Subject: [PATCH 121/405] - removed SNAPSHOT --- ...Maven__org_openscience_cdk_cdk_atomtype_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_charges_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_core_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_ctab_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_data_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_depict_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_dict_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_extra_2_2.xml | 13 +++++++++++++ ...en__org_openscience_cdk_cdk_fingerprint_2_2.xml | 13 +++++++++++++ ...ven__org_openscience_cdk_cdk_forcefield_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_formula_2_2.xml | 13 +++++++++++++ ...ven__org_openscience_cdk_cdk_interfaces_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_io_2_2.xml | 13 +++++++++++++ ...aven__org_openscience_cdk_cdk_ioformats_2_2.xml | 13 +++++++++++++ ...en__org_openscience_cdk_cdk_isomorphism_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_legacy_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_qsar_2_2.xml | 13 +++++++++++++ ...ven__org_openscience_cdk_cdk_qsaratomic_2_2.xml | 13 +++++++++++++ ...Maven__org_openscience_cdk_cdk_reaction_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_render_2_2.xml | 13 +++++++++++++ ...aven__org_openscience_cdk_cdk_renderawt_2_2.xml | 13 +++++++++++++ ...en__org_openscience_cdk_cdk_renderbasic_2_2.xml | 13 +++++++++++++ ...en__org_openscience_cdk_cdk_renderextra_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_sdg_2_2.xml | 13 +++++++++++++ ...aven__org_openscience_cdk_cdk_signature_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_silent_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_smarts_2_2.xml | 13 +++++++++++++ .../Maven__org_openscience_cdk_cdk_smiles_2_2.xml | 13 +++++++++++++ ...Maven__org_openscience_cdk_cdk_standard_2_2.xml | 13 +++++++++++++ ...n__org_openscience_cdk_cdk_valencycheck_2_2.xml | 13 +++++++++++++ pom.xml | 14 +++++++------- 31 files changed, 397 insertions(+), 7 deletions(-) create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml create mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml new file mode 100644 index 0000000..78820cb --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml new file mode 100644 index 0000000..dcb1a3c --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml new file mode 100644 index 0000000..49df521 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml new file mode 100644 index 0000000..b006654 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml new file mode 100644 index 0000000..f25fd58 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml new file mode 100644 index 0000000..53e4383 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml new file mode 100644 index 0000000..de7548d --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml new file mode 100644 index 0000000..a305a2d --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml new file mode 100644 index 0000000..9fcd14f --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml new file mode 100644 index 0000000..20e139e --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml new file mode 100644 index 0000000..e430695 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml new file mode 100644 index 0000000..5bb8c1a --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml new file mode 100644 index 0000000..469ac06 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml new file mode 100644 index 0000000..42f4717 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml new file mode 100644 index 0000000..bbb359d --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml new file mode 100644 index 0000000..6019e88 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml new file mode 100644 index 0000000..2b69145 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml new file mode 100644 index 0000000..7c60ca1 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml new file mode 100644 index 0000000..7863327 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml new file mode 100644 index 0000000..daf4dfb --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml new file mode 100644 index 0000000..8b0cf68 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml new file mode 100644 index 0000000..89081f2 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml new file mode 100644 index 0000000..18a4c16 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml new file mode 100644 index 0000000..5807025 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml new file mode 100644 index 0000000..5d53203 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml new file mode 100644 index 0000000..e6403f3 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml new file mode 100644 index 0000000..91b8ffa --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml new file mode 100644 index 0000000..7c1e402 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml new file mode 100644 index 0000000..90dd31a --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml new file mode 100644 index 0000000..447e642 --- /dev/null +++ b/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 4fb7d98..c8e91d3 100644 --- a/pom.xml +++ b/pom.xml @@ -49,25 +49,25 @@ org.openscience.cdk cdk-core - 2.2-SNAPSHOT + 2.2 jar org.openscience.cdk cdk-legacy - 2.2-SNAPSHOT + 2.2 jar org.openscience.cdk cdk-depict - 2.2-SNAPSHOT + 2.2 jar org.openscience.cdk cdk-qsaratomic - 2.2-SNAPSHOT + 2.2 jar @@ -79,18 +79,18 @@ org.openscience.cdk cdk-fingerprint - 2.2-SNAPSHOT + 2.2 jar org.openscience.cdk cdk-silent - 2.2-SNAPSHOT + 2.2 org.openscience.cdk cdk-standard - 2.2-SNAPSHOT + 2.2 jar From 5c56ad03e4ba64512a31b58af49f719669120156 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 26 Jul 2019 17:32:53 +0200 Subject: [PATCH 122/405] - adjustments regarding changes in Assignment class --- src/casekit/NMR/dbservice/NMRShiftDB.java | 6 +-- .../NMR/interpretation/InterpretData.java | 24 ++++----- src/casekit/NMR/match/Matcher.java | 10 ++-- src/casekit/NMR/model/Assignment.java | 53 +++++++++---------- 4 files changed, 45 insertions(+), 48 deletions(-) diff --git a/src/casekit/NMR/dbservice/NMRShiftDB.java b/src/casekit/NMR/dbservice/NMRShiftDB.java index 857da09..25d25f7 100644 --- a/src/casekit/NMR/dbservice/NMRShiftDB.java +++ b/src/casekit/NMR/dbservice/NMRShiftDB.java @@ -240,12 +240,12 @@ public static HashMap> getSolventDeviations(final Stri } else if (signalCount != spectrum.getSignalCount()) { continue; } - for (final int atomIndex : assignment.getAtomIndices(0)) { + for (final int atomIndex : assignment.getAssignments(0)) { if (!shiftsPerAtom.containsKey(atomIndex)) { shiftsPerAtom.put(atomIndex, new ArrayList<>()); solventsPerAtom.put(atomIndex, new ArrayList<>()); } - shiftsPerAtom.get(atomIndex).add(spectrum.getSignal(assignment.getSignalIndex(0, atomIndex)).getShift(0)); + shiftsPerAtom.get(atomIndex).add(spectrum.getSignal(assignment.getIndex(0, atomIndex)).getShift(0)); solventsPerAtom.get(atomIndex).add(spectrum.getSolvent()); } } @@ -352,7 +352,7 @@ public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum * @param NMRShiftDBSpectrum Property string of spectrum in NMRShiftDB format. * @return * - * @see MongoDB#parseNMRShiftDBSpectrum(String) + * @see #parseNMRShiftDBSpectrum(String) * @see Utils#getHydrogenCountFromMultiplicity(String) * @deprecated */ diff --git a/src/casekit/NMR/interpretation/InterpretData.java b/src/casekit/NMR/interpretation/InterpretData.java index 98850de..ee7f972 100644 --- a/src/casekit/NMR/interpretation/InterpretData.java +++ b/src/casekit/NMR/interpretation/InterpretData.java @@ -289,13 +289,13 @@ public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum final Assignment assignment1D_13C = this.getAssignment(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C")); for (int i = 0; i < assignment1D_DEPT90.getAssignmentsCount(); i++) { - if (assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { - assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT90.get(i))); + if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { + assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i))); } } for (int i = 0; i < assignment1D_DEPT135.getAssignmentsCount(); i++) { - if (assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { - assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAtomIndex(0, matchesIn1DSpectrum_DEPT135.get(i))); + if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { + assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i))); } } @@ -378,9 +378,9 @@ public final void assignHSQC(final Spectrum spectrum, final double tolProton, fi for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { // if heavy atom i has an assignment in class atom container then assign that index i to belonging protons as index - if (assignment2D_HSQC.getAtomIndex(1, i) >= 0) { - assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAtomIndex(1, i)); - assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAtomIndex(0, matchesIn1DSpectrum_1H.get(i))); + if (assignment2D_HSQC.getAssignment(1, i) >= 0) { + assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAssignment(1, i)); + assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAssignment(0, matchesIn1DSpectrum_1H.get(i))); } } } @@ -388,11 +388,11 @@ public final void assignHSQC(final Spectrum spectrum, final double tolProton, fi if(!spectrum.getNuclei()[1].equals("13C")){ final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { - if((assignment2D_HSQC.getAtomIndex(1, i) > -1)){ - if(this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).getImplicitHydrogenCount() == null){ - this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).setImplicitHydrogenCount(0); + if((assignment2D_HSQC.getAssignment(1, i) > -1)){ + if(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() == null){ + this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(0); } - this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAtomIndex(1, i)).getImplicitHydrogenCount() + 1); + this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() + 1); } } } @@ -458,7 +458,7 @@ public final ArrayList getAssignedAtomIndices(final Spectrum spectrum, return atomIndices; } - return new ArrayList<>(this.getAssignment(spectrum).getAtomIndices(dim)); + return new ArrayList<>(this.getAssignment(spectrum).getAssignments(dim)); } diff --git a/src/casekit/NMR/match/Matcher.java b/src/casekit/NMR/match/Matcher.java index 9c2af81..ef0ff58 100644 --- a/src/casekit/NMR/match/Matcher.java +++ b/src/casekit/NMR/match/Matcher.java @@ -20,9 +20,7 @@ import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.similarity.Tanimoto; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashSet; public class Matcher { @@ -37,7 +35,7 @@ public class Matcher { * @param dim2 dimension to select in second spectrum * @return true if both spectra contain the selected dimension */ - public static boolean checkDimensions(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2){ + private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2){ return spectrum1.containsDim(dim1) && spectrum2.containsDim(dim2); } @@ -117,10 +115,10 @@ public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum sp final Assignment matchAssignments = Matcher.matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol); Signal matchedSignalInSpectrum2; for (int i = 0; i < spectrum1.getSignalCount(); i++) { - if (matchAssignments.getAtomIndex(0, i) == -1) { + if (matchAssignments.getAssignment(0, i) == -1) { deviations[i] = null; } else { - matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAtomIndex(0, i)); + matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAssignment(0, i)); deviations[i] = Math.abs(spectrum1.getSignal(i).getShift(dim1) - matchedSignalInSpectrum2.getShift(dim2)); } } @@ -274,7 +272,7 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s } final Assignment matchAssignment = new Assignment(spectrum1); for (int dim = 0; dim < spectrum1.getNDim(); dim++) { - matchAssignment.setAssignments(dim, Matcher.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]).getAtomIndices(0)); + matchAssignment.setAssignments(dim, Matcher.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]).getAssignments(0)); } return matchAssignment; diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index 9206565..2c6fec7 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -26,7 +26,6 @@ import casekit.NMR.model.dimensional.DimensionalNMR; import org.apache.commons.lang3.ArrayUtils; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -55,55 +54,55 @@ private int[][] initAssignments(final int nDim, final int nSignals){ } /** - * Sets an assignment as atom index for a signal position. + * Sets an assignment with value for an index position. * * @param dim - * @param indexInSpectrum - * @param indexInAtomContainer + * @param index + * @param assignment * @return */ - public boolean setAssignment(final int dim, final int indexInSpectrum, final int indexInAtomContainer){ - if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + public boolean setAssignment(final int dim, final int index, final int assignment){ + if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, index)){ return false; } - this.assignments[dim][indexInSpectrum] = indexInAtomContainer; + this.assignments[dim][index] = assignment; return true; } - public boolean setAssignments(final int dim, final List indicesInAtomContainer){ - if(!this.containsDim(dim) || !this.checkInputListSize(indicesInAtomContainer.size())){ + public boolean setAssignments(final int dim, final List assignments){ + if(!this.containsDim(dim) || !this.checkInputListSize(assignments.size())){ return false; } for (int i = 0; i < this.getAssignmentsCount(); i++) { - this.setAssignment(dim, i, indicesInAtomContainer.get(i)); + this.setAssignment(dim, i, assignments.get(i)); } return true; } - public Integer getAtomIndex(final int dim, final int indexInSpectrum){ - if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, indexInSpectrum)){ + public Integer getAssignment(final int dim, final int index){ + if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, index)){ return null; } - return this.assignments[dim][indexInSpectrum]; + return this.assignments[dim][index]; } - public Integer getSignalIndex(final int dim, final int atomIndexInStructure){ + public Integer getIndex(final int dim, final int assignment){ if(!this.containsDim(dim)){ return null; } - for (int signalIndex = 0; signalIndex < this.assignments[dim].length; signalIndex++) { - if(this.getAtomIndex(dim, signalIndex) == atomIndexInStructure){ - return signalIndex; + for (int index = 0; index < this.assignments[dim].length; index++) { + if(this.getAssignment(dim, index) == assignment){ + return index; } } return -1; } - public List getAtomIndices(final int dim){ + public List getAssignments(final int dim){ if(!this.containsDim(dim)){ return null; } @@ -139,30 +138,30 @@ public Boolean isFullyAssigned(final int dim){ } /** - * Adds a new assignment entry for a further signal. The given atom indices - * will be stored as atom index for each dimension of the signal/spectrum. + * Adds a new assignment entry for a further signal. The given query spectrum signal indices + * will be stored for each dimension of the signal/spectrum. * - * @param atomIndicesInStructure + * @param indicesInQuerySpectra * @return */ - public boolean addAssignment(final int[] atomIndicesInStructure){ - if(atomIndicesInStructure.length != this.getNDim()){ + public boolean addAssignment(final int[] indicesInQuerySpectra){ + if(!this.compareNDim(indicesInQuerySpectra.length)){ return false; } final int[][] extendedAssignments = new int[this.getNDim()][this.getAssignmentsCount()+1]; for (int dim = 0; dim < this.getNDim(); dim++) { for (int i = 0; i < this.getAssignmentsCount(); i++) { - extendedAssignments[dim][i] = this.getAtomIndex(dim, i); + extendedAssignments[dim][i] = this.getAssignment(dim, i); } - extendedAssignments[dim][this.getAssignmentsCount()] = atomIndicesInStructure[dim]; + extendedAssignments[dim][this.getAssignmentsCount()] = indicesInQuerySpectra[dim]; } this.assignments = extendedAssignments; return true; } - private boolean checkSpectrumIndex(final int dim, final int indexInSpectrum){ - return (indexInSpectrum >= 0) && (indexInSpectrum < this.assignments[dim].length); + private boolean checkSpectrumIndex(final int dim, final int indexInTargetSpectrum){ + return (indexInTargetSpectrum >= 0) && (indexInTargetSpectrum < this.assignments[dim].length); } private boolean checkInputListSize(final int size){ From 4eb0c9b1dc807ed68142f5fe95a789cf996b5d6a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 26 Jul 2019 20:09:41 +0200 Subject: [PATCH 123/405] - removed the IDE project files --- .gitignore | 5 -- .idea/.gitignore | 2 + .idea/casekit.iml | 60 +++++++++--------- ...nscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml | 13 ---- ...enscience_cdk_cdk_charges_2_2_SNAPSHOT.xml | 13 ---- ..._openscience_cdk_cdk_core_2_2_SNAPSHOT.xml | 13 ---- ..._openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml | 13 ---- ..._openscience_cdk_cdk_data_2_2_SNAPSHOT.xml | 13 ---- ...penscience_cdk_cdk_depict_2_2_SNAPSHOT.xml | 13 ---- ..._openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml | 13 ---- ...openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml | 13 ---- ...ience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml | 13 ---- ...cience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml | 13 ---- ...enscience_cdk_cdk_formula_2_2_SNAPSHOT.xml | 13 ---- ...cience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml | 13 ---- ...rg_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml | 13 ---- ...science_cdk_cdk_ioformats_2_2_SNAPSHOT.xml | 13 ---- ...ience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml | 13 ---- ...penscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml | 13 ---- ..._openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml | 13 ---- ...cience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml | 13 ---- ...nscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml | 13 ---- ...penscience_cdk_cdk_render_2_2_SNAPSHOT.xml | 13 ---- ...science_cdk_cdk_renderawt_2_2_SNAPSHOT.xml | 13 ---- ...ience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml | 13 ---- ...ience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml | 13 ---- ...g_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml | 13 ---- ...science_cdk_cdk_signature_2_2_SNAPSHOT.xml | 13 ---- ...penscience_cdk_cdk_silent_2_2_SNAPSHOT.xml | 13 ---- ...penscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml | 13 ---- ...penscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml | 13 ---- ...nscience_cdk_cdk_standard_2_2_SNAPSHOT.xml | 13 ---- ...ence_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml | 13 ---- .settings/org.eclipse.jdt.core.prefs | 5 -- .settings/org.eclipse.m2e.core.prefs | 4 -- lib/commons-cli-1.4.jar | Bin 53820 -> 0 bytes 36 files changed, 32 insertions(+), 434 deletions(-) delete mode 100644 .gitignore create mode 100644 .idea/.gitignore delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml delete mode 100644 .settings/org.eclipse.jdt.core.prefs delete mode 100644 .settings/org.eclipse.m2e.core.prefs delete mode 100644 lib/commons-cli-1.4.jar diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 4c01fbb..0000000 --- a/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -/target/ -/src/casekit/test.java -/src/NMR/remarks -/src/NMR/test.java -/nbproject/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..5c98b42 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,2 @@ +# Default ignored files +/workspace.xml \ No newline at end of file diff --git a/.idea/casekit.iml b/.idea/casekit.iml index fba646b..9ad8882 100644 --- a/.idea/casekit.iml +++ b/.idea/casekit.iml @@ -10,29 +10,29 @@ - + - - - + + + - - + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + @@ -43,7 +43,7 @@ - + @@ -52,17 +52,17 @@ - - - - - + + + + + - + - - - + + + diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml deleted file mode 100644 index 67e398f..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml deleted file mode 100644 index c1ec4b4..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml deleted file mode 100644 index b5ae76c..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml deleted file mode 100644 index a9c65a2..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml deleted file mode 100644 index 63e5eed..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml deleted file mode 100644 index c6831ba..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml deleted file mode 100644 index 4aac108..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml deleted file mode 100644 index 3086186..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml deleted file mode 100644 index 0194ace..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml deleted file mode 100644 index 25068aa..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml deleted file mode 100644 index 4a5e857..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml deleted file mode 100644 index ee1b6a3..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml deleted file mode 100644 index 6db98ec..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml deleted file mode 100644 index 677f9f4..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml deleted file mode 100644 index e9a6c18..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml deleted file mode 100644 index 673c1bc..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml deleted file mode 100644 index 63ab4f9..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml deleted file mode 100644 index e255df3..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml deleted file mode 100644 index a219dbc..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml deleted file mode 100644 index 479050f..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml deleted file mode 100644 index fd79be6..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml deleted file mode 100644 index 14e359e..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml deleted file mode 100644 index 23a1cae..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml deleted file mode 100644 index bd1da56..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml deleted file mode 100644 index cadfcb4..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml deleted file mode 100644 index 30c1ced..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml deleted file mode 100644 index 1ae1451..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml deleted file mode 100644 index 35fa6d9..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml deleted file mode 100644 index f7ab0b6..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml deleted file mode 100644 index b6dfd79..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2_SNAPSHOT.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 714351a..0000000 --- a/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,5 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 -org.eclipse.jdt.core.compiler.compliance=1.8 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.8 diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs deleted file mode 100644 index f897a7f..0000000 --- a/.settings/org.eclipse.m2e.core.prefs +++ /dev/null @@ -1,4 +0,0 @@ -activeProfiles= -eclipse.preferences.version=1 -resolveWorkspaceProjects=true -version=1 diff --git a/lib/commons-cli-1.4.jar b/lib/commons-cli-1.4.jar deleted file mode 100644 index 22deb3089e2f79a983406bd13a75a3e6238afdcf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53820 zcmagF1C%A(vNl?_ZQHhO+qUg4+qP>}RhRARvTfT&cc}}n_dWMNcklPVb6<{?W8@ku z=E|87-wb>+Qb`sR3WiG~jy!JGQP$dJoZNE3J|1&7F)gGOTTNYh)IwEqA>PQbkmt&n?CFmH!n@QM zNT}Gj9BE_E>2ou-e8$mc^w6Mh59Yzgk0(2YtGSL$u;F+mVe59W9zjxSJsiyk!quPNzn6L|&Ll`Al0BDX)^_Pxs&Z#{jsee4^hQP%>W zwoFyzYRekjNUDS`TFhZU4#oGIRRn}f2e+goz-3sn6(-Va<6e3*F;a<0MqqRmaTdnkPoxYawhCPYXz@>WHD2_PWFeL}K>{4pzMi z;%8YTfoJx!oao2nDP>#=45e3;pE9Z7#jMG!r@_G->Ex)RW+wTm)2T^l!Myr9*ld5C zVvNQ0Te!3})~yR$m``X}zgYp&n5D%U%N$F_1`5#|9o8&~ZQEvc*MA`nR(WKPe^H7r zsD`DZw=sM9_|k0j9(Os%xj}%iju%@gio>so0$%I=Surx4Tm^1I$yBKdeRcK^%$YPF|YW5KYl~| z?XtIQUa|TKq$CRo^><(@{TF0`pn(1hvVTze?*sbJ)xRP8KVqzZko|9&KNnzsPDd9@ z#{Vn<``;4APR6EI=KqTz>OTcd9qsKM9bEsH0hs^a15E9#|2F8arT9ng-v{EKtA9V2 zy|IV6!~Y_T`+p1n_q6E$_q$O3?*squKl#gRu>6@SQ>pKr-Jdxc(EY!>hMcshxPq!U zgPWJzwvMjr<_LPgY~7j*K-C%okf*!Jr5H<2X@@UEI^oWm#YYze-oYLy?g6ZPRd?HQ z1y4rawb*9EQX_3Lo6nzd>vu{Q`ZBazoN=rW<=Ls0r)tRecqEwWzTtLI^;#wJF7Hv& z40*p?rMb)3VZ{meahbvR{QE<1fy33bjS+WeqURk1bH{M^qZ=b;InB$psZ(c%(NV+p z{^>-a+t3j6;O6b==H%i7_r0^Qyf*#vch@oE?L98a-LaltrH-c$(DBwgY~A!!ffdD; z0!N%x+SL#cC4Q=XUZOs^%%Xa?%+g5oe5HuivaV_zd4Rr`@{3*Bw$U$(q1!OG;sJVf zat_D_4RY95lp|eM2hRguW1E-4eQwqGnu~DLo>+e6YPOI5i)#lYi(P73A#$yXh>*m? zdRC3EoTo>ZnHnq)7o__06&bBH`*UK)2sz-Ew77PPc}kgBe1gPgO_6jmaw}kqK+7I1 zP(&~A7ME^zEVp_9b!ane)(e_VFjU>#)3vR}3$^yXzZ3@et3oD09;QRRiQF%Z4x>PVyL z(aDfAowrVl_=#716!LblH`(koecd*eBGIKt7}X_V^PGTuxf8$TX*R- zTxJA3Ay`}pZr{Zjs8fnMgA`a2?s5>a418^_)a*|=hOgmRR%c+_dqWS8EMNRj(Hy=r z8!>H-qx*gE3duPQUNx|v4lS-|cW9QmU`OdGO1gs%Q41H6BcBuLMYKQaK-aABisE6GfRU6gOl^&d#Nzhgf0nS-pubp$PYhO zHs=>^_tGWI@0^=nF+&F`FB*C8Jon-oiry<~toKJM!vXHSNzk;`*6yIiyf{zSkHU=8R1rwRsu_)07nM}{SbCQ6uUsx3 zRrwIO)D;(gAdQ|pt_)f|^^)HA+Gs7y;6JMVT2Lu1)IYEuS zZ7|J}L^#=abB^ZagZemzjv7MDA5@nUxOKZ7`08#C*$Yw}(^}!;Rjn`%YE;om{Ej89 z%d$#bK@BO*+Bfz&4#afDD7t)n@>JcIImR;ETU+mzlx~n@SqMZ2)JU%olcTYSYeQFW z75Np5*U>*nI}ul(T(l?;A|pBM>NeU^R(Yt_5AlWZ94xW|S8ofb49r@4JX&oGf2&X@ zLzHW2OW7Ejx>Dkea_vLwHM2&nHVwCD%*YG{Jga5iq0f18UOBBCUw1%OkyH@rplqR( zAOsm;34aeI>D4)4i>&z z1x$xJfLK^p`GnIUVcCg!|E{zcnz*|Q^=LOf{y&UwysdY+&mS&jbZ)^<_*e*(IT}WGVo_$l9-0#WISdlsB^?+ z6@A4lndA4|U6X`bs1^r?boMvFClQ217*ie|^Eq7&VF&V0IigeC=NPqmlD5{74oov9 zJ_&1yBgJ=YK1OlUXNM(Ff)-_61`wl$Lcaqr$%+@eK4p2M;XKEdlw20>Q>1wG_jwdm zaAvt-KoS;}s+EL4AXI|%IFr#kNZr}J4^8|wOe={2sTHOdt{<}ocA;Le8Vja3D~2~s zG5n<2ofZ*#Jzg2gNeR+{wZFtti=YF{n$(U1WK0>4xFqO{vw=i$(}H1Nkw`m9hnPhQ zrbdJM$;BS$EULX1^2!JQ;wr5BC?y`rT6YO^`7i)<>-&Brln(c=xNYLkJUlfOf36#{ z-f|Y4r!6zzhdl~q>#LIbZ>L4^!$`qT!k$=^Tl}4NzM<_{3sk7O&-B=$4Y}tt=B-WY zpk!LeUal(xs-8eN_=F6bTn?Ea#eHzH%MEw)`b2aZFy5+3NzgF!#AP{xxCulvXRdIS zFBzG*C6faSGJKLiU7Iex3^Yw8Z8^g7HC6o)_lQmXK%*`-;Lv|5Ly`-=eNqjci2 zH$T1*v_55Se{zRB`kh*D!*FGq`JT^u>f|$oWwjciISew?8PK{m(VPb3U1G#p4#KJH z9204S5=!>mZ+uVVNW*fAnRof4Al-AWqYwfT_PQ}8z{SfJ$queL zkyE8$%RONNl&CPRy`Yjumc;=uqqyp2oi=u$_IYA*Ko~b)!Mq8~ad3FySVrMZ<6?_6 zXIYt$Yxi8AYfOsUy1DGi7c`Nr3-$TXee-*?uHZ2Dr(^0PNx2d$JjNZCSbjLyc^xb! z%7bO&@6zb;)wq`9Vr9(n6sB2gm3ZzT zs1bg$)A2u`_YBw@gg7y8c`J(Ja{&a#B`T)BWl@@7leu=e@S4W#GSlK3^6~V^0SUYW zh%T8I_7CTe-xjmIv&C&JfCou3J2m#7l2+@c0%H!Y;g>Vw_5pB<9BuV6aY_ie z-_Z=gjB(5u?s1vB_JFCim|Mkk&(5jt5CudlNL3TQPZ8w@a^ehquW9zHbKCNL=?WAC zMS}o2K*C@p(8m^;38MD&APo(F#h1)d9%g3~OrxZ>)NO1BT8kWGUxEY1KI>9D)Iy;EJ+vGHJU9Ks1(_zf|~rqjJJ2m1!5d>HE14@^1L{m0eK9txx@rGo)&wy`Z^VTjQiHE%vhJhqtlf8fPOJx z%eS~4q$1MeR<4A|XAI*>;9VgnH{JjNK`egCW6%hee)Bj$+=p189<6Ri-prk|yB9f8 z58j?nnCIpCH7gk!l5Vez5Xc~_es|)d2&l;66#UN-=ayq$*Xn}?jl-II=Mvc!JkpWa z_qdyGR#v@fN(ju^ohZnfyS2Fez-=7WYxk5{1VIsH4u3{xnaErtv0>ZFoOu`aWO;s% zs4XRAzDEkE9T20tlc&A7w2;1jF0tSMzQ{+G115b9#B2s>+pjkWvp`M}ua5qhQ@9rg z1`P5Qux1e0yW<@0&GKb7R+?VmOota!XL=jyiByS~R>P??B`xQzQ;PAxDMhrS7HJU` z!{=LGu6Jgn_-BC_QXFw+0R`fw61XJ{czvr?wAEFo1|cQ8m~Cx$V-E#&^I>fCKw|ta ztC$a(&v%?E=7hJ&d$*sgm}sBu(dTT$b2gybrbY0C18OBwd44tOs@%ZfAjx^c-$L3_ zVMtt`E*+bo{MfSYmT!=Pqm-6C{k19iF)=j(zg7JVi6-ZX-Vrl`JKb$m@#kP8!3tlL z5qI&;%z3+VhV`5w`yNNVkY(igX@R(aVR0ic)!v&6QXtUEEx8?tCK4N5HpSVR5Qn++ zY8=#j%)=a{LMRY0XN0(M^hhCQ>ow#Y1wJMjD=5J7k&#z}5>{x2FoB3)v6qD6Dg;x)=se{7{oYJ=@9yct z09Ez%UxdluYw~~b9Dh~oe=#9KlRdzHQ6S2YKtL3KO8&o?kpJd6{ttce7f&GR;I3rs z;%e@~U}|UV>Kdlb?}RIk_Nl>hPIhbpjh%%Tb@$U|(#UH*PMp(GWZYE;Gm&h+nCdxx%t7^Hb%rz$A#9YQYfJ~D+O4Ta333lHURQJ?3 z-tZ-II`}bOeq=o9F-`rTtvceySN_@!ws^x<$J1>E)jM9uxJ+lP@$&sVJC(#%yOk+h z&Fz}opOd0w-<+)Caa*M^-t=nB2g^@X6vu+O+{8c>!F7q7duPBM-5cJ~RCu^J9Jjr8 zAD_-zYb2VDlfBv`E|YsLA>6Y;q3wYLn%RvI#QGZYbvs)b(26q7?@y1>>aA?1;>M5v zyCV%{oh+QBmI-gaHmlf;sCB$OmOnu1A;%3z`52`!`*eh2|J3t4xgxm#^^8q9Amyu0 z3dL;PNmoQw-Ztf1vsXmEAxj2{b;Pz#|F8IBT=^3hdsVXJ>yK#LfT^BUM#8B2o*WA) z3dffH`Z&aLH`MIY-2 zQ7%`P)0>bf^5hF1gz+VHBjn+cxaOkyFNYzX#KE5mO}879wpHS97*LkvXDNkGm;*y= zr4Vp*kNjV*6s-$Bg@MIHAqNp71tg{-3howG#cFp+zX&?V4ajU5kzY;4+{38~#0VKk z!)Sl5I;CB)^&Dtd8xyph>pK!73g5o6`V1MGfD112exhH#;N!l;AbmUZe-8o44{BnP zsRrG=!kVbg7zv1NfNj6jfAUTJ#UW4*qd*dh8hTlj6-~t(r&?NE*nI;)JGz3Kk@23Z z#ypkcb6JU4tw>?Y5lX&LFKl4`nZg(t1jxnl4QW0dO{DC6EERk0m_yrTy8FX7LPGuB zbNth%Y-8l)m;do4=YP&WB;bGYDN^QkP7;nT_Qq~*<}PH+|MDzxvBEF|Oi1EiRxK?s z7>au$dN@814QC>HUcp!q8_5*pl>{?kkatQZLutI0nP1pm-KUqIK#8V!r18uorINnx z(J?Zt#;xYt7%S|JmuodP-YKEyBUJYO<`ev$n7;8zDCsL{BCRU=T2z~CD_%uzl-;A#=###3@eJMrWXAt)!wA|7f=x(7H8ewa4qGqWuDXDI|ykT~it1%BGsYtdWMs> zPdE(vF-TT)@b_SxK{G0lh%R~L(D+;aUf+nIZBwkxyB?+kgQX(}O|#wJ&->-K&k>8U z*SyF;xJP)wdikCGm0l+LAKL(sMHhTmN{DLYpMXQ|d7i=Md9N8mr^VxZ`x}X%!*$y< z3Q>zeN9gdV@WZHU2VX_7gqM8TER(@DDO5^xG|F+g=G#!XU&{W3T*gvUZ6IBn7zWL+ z3n)L*sO*u$x8k!RQkm$mMyE)IG3=19$(l0aCK?v2x&k=shtf$~G9;pOlU&$x2hawu z3KkqZ@!QBjUVY9JMNgrlKBo|W_Md7kv(uT12UYIFT;SBoxT1(dJPNK3*K-vg5F?5> zgJp0W8Mt&WA{)dQzd9OvQ>9Ib!P0!VhKR|*{$Mm*A z9rVJJpA+RJ5|roOU|zS5UZE~1X4S0u)xR^khTUkBt1pu7!7aNp1r)Z(1v<2YH-i#! z#FOE~Pwo)4P*srRvmn`-c}}c*Nh~ReC`0&ab4>~pp+8ay(ypM#$&|26n)fqA!KYf+ zgJ$1vVLaB9NcSUfpbNB9FLx`}x*3?LsOTG;#QBf^itvAj+&{r6*+}z)>dy`y!yitb z_CJGB@xP!)CgN^wXZC->e4e_j>!JqQrx|zpbtD;>lqf-*v^SSbKSH7qG<3q9713~6 z5%5Ae-7Z&gqZwIF(PC|vqo@9yeb-s>6w(4U>$O~$5rFjr__}l3lSiMG|5=BNBt7a( zO9o)3L~T!I)i+}ixa>zJUWGq!7U7-*#v)1mhwTRPoG=w+3zeagnR24Ld& zbeI^44Su)F)L#QTx~AWFAJMf~rQ7#tc%FaFOn)ibG&PfXOSWB1nW~fIq$F}#Z&=uC z4Ak0Hy)}?OaGa(_*7)`}ucH?oXB#wQC9E*J*-CrL3p`OCwy39=(O6yr)J+)93)<{Y zV7_7l@U)z^TomdqMrbCn70M3Q3}))5^n}5BR!5U&e!g}=pr(N3AfRGq(afk42~W;Q zj#{lBpT#T!H1AR^M;MHtAsYCH3hNQ-`8n{EAYMgK<-oB5o-D3f4r>4e`Odgy+`Z<#j;SeAp zC0~SMuB-zIh1wTWNd zfRTGmmXSHmS9U>k89fAF+r4uQ`O9DhFzAS5n9@8Q21YA)7n@))R$=V}2)oqH>LrJX zZsDC+hvYHhL-VJRsVly>PmhN)SDN5Zs!y4y44jQjcu$&`JEO1CPSvvCl68*H;i3-faN~G(;U4&QqV})kY0NHU= zdh+!A)t=%X2?7%wzdThFk`*;dk`(M?XKgN(TGnuqb#7bPhem%$W^nXE9QOt$q~HsI zBw8Y6Ad--u&ePADMEJpcJXm+jeEr4|euJStwN(+xmHPAjPHK8Dwc5_g2TfKJht%m!8Gow$>;9mP-M zQ|_)J4qX6IGD7CuD(Qh+`n5PN)!1-?D*m^)7km4mvoHnZ*bVZ!ru`TW`=^d7ZNd6# z^&9F^j)Mf>zGSr(KP%#V6ZerL2r=z+;<*ScthNL6tEI}TEN&XB+k@9w(|*?v|EAPk z5zlX}9ltUCskb-wGL#MsNeN)9YIhstKS_xyyQnL3#&EQ`mQ&e|Wo2ym6$jQn#Y8@7 ziRjkRJa8=i;%yH~K}$A3sS+AevX@N%5Q;lFJ-jOokL8ky4mJxs9`^N~%8kTdaSX47 zowA?sURd^CXVM3BCw_?BML24%h;tp@yKkU-2)U2_r=tDmR(r5TdunWV;@t!w3IVv# ze1sR;Q75k`3=x5-^L&hSNmyz3u z;+8nH*^(r}72BUtx}_rV-~_c;I~aJfWUwY%l8+HqAL5p%$o~HvuAQ#4KV&l>@joNehbP!Uy}H2|U=fLZhgMQB&7;o;a0&l1{;|8hh0m$1)prD`KQMof`E`ohtifotEAf!QYTtvmLIi zzsJJGvI%c?d{1Vc^F}>qr;aMW41Cb~Xu45`d~%oWc8K`6w4>a;2ocNKwy?#*OgmyC zo+0Oh5JT^T{UR+15%x*jc|#KpO!@ru1}TFp`YA)2f2s2Qc7rPDw`AWM{P7IY0QG#| z0^RE&h*)sA|Hvn`1nKnxf@pZ;ZwPZR`2y4BL^#`vGqif=9{vV_Snln`6zX^Y-1|nK zzg@GxDJ*uE7=CN@;((OLh28i3@(c&*%@G*keJ=%c`tn>I{)j`tu7^}Ho{c>zb;Zo8 zO`EpWbBFP@Y=`&QwoFji?4b+;Kw!ZMW-gznV#EusEnss5@ z&3z6>7RQ6Z6vmFBx3gcHBKAnjZEi+uyy0d?&4SLRG;Y2f8(%&y0oB}%%~%{d&+mLk zDlkZlhWn)c9Q*MpH;vhJrV#+tyE?Ja+)*~9CdJLp_f!H93~0bu;&2WKiTW9@R<`}!St~cK^(d}Stem34lGEnNtV%x zD;?@B!kq>B+>eZ%-$M=0JE!ZvJj2oL3XBs*=(8*#Tg&K{VugO*zsI?0iaI}Y<>%iJ zY1({)$s^ShL|oB!s3GH5I5k%0#!9wEzdo1$!v<_euY_D=?PEpj2KBA1Qq`Rkpw_F# z6N12&+Zy^Q^pJLlg2b^h*w1K3Wo8xO_xGzYu-Z5@aTRrbSEIaLZ2-cWQ%u~lsf>wr-)d%K=Gjy1|%!sF1dfzRK4P6A-lTX4qSU3G>M#R(`pRDM?) zH022hrN$Q=IRuf{jMW)cybFyt9&&p}qhx!R8=bu;%iH7Q{mo&8@~nzw@>Ov1!IErv z<`o_l5QWN zeRTKE(+4f%M0Hd!5!EWc%We@mPa96=u(zH@NYl$utJgEs{T=md!5LCPsb&CjY$i`O z`-^@Lnh;6L=h#5)sP<7q$R~+{79Bs;QFH}=CJIJp{oH_v@Ph32;vhuC!X}fvT)4;f z?kVgT2TyvraXQwxoN6Mxn4?8>y~BK}S)~N9xa3GcYGA1HiAJl>tom*0WE1P!etVLU z4BhR)!v(jN#^h! zt?J~@TLUhrY_%nwPI|WxD9eP>T0Q{bX z5tPttk7-2D?MIrP(&cZqjPLZ*G*Ix+iFT(rl90@V@Z(960y$U6eUlrEL>0=LNesgu zMjgceTD1ZrQwUcc1Bx#^`xD`o%x05^LYY&nO>Lnx-$D0mc)XRXE_!qZnJPa z(UU}G%QmV9HU#aHVWUaX2a7_m_~_Bz?72~JLNot*z~TIh01lbPce`dm-4{D1JnI__ zJP2kzm!cjqXLk?LyAJrd+Fp$r?ubEJxPT17V)zEhz@|WyZ8(g{-Zvzmwh|QVRn3hH zAFKg^s|++^yuvI=5kj>&QLt;(xmEYnFahy7Pnk0gOIz);Hf1(~j3HWSIB^r@;EwlS zWO3R1QogDsw;(v??lR*yMPggD<10LE?Pk9iwD~2>@c2x1>T;QD5^57Vn?04YY0ug! z6I&*C&z>82a6ZTpmd4KrB-jc1MREsGax>!X+=TQbv;X}`C0!u*AvcP(wegfVN{jdKwc=aw?~ElmPQo+j2jsInf$ zhH924@k3tCp!t0aVh$Od$}d6MGdRshgYKv1cTvHX0n*EIsmv|cN(x3LgI98;Hw0hC z^+kP{DFkwO@kw@l66_(tus{jxD`u@f!(O2tgHGVUY-u*XrQQ$;RwnPIO!*EK6KH1V zq_q9hg9aJeM6S}TTk1#kYe=##`;q`f_P`}rWK9}A^xScprQRdj2G|>~8ftfuD0{zD zM%}!micLY_&d!;uo6FmB#>Tp)qXB^l@#}_ATr=|I5K41o*%`9&7ELxYold=BwNpX> zqBtkXu)IKL5}sO;n)0sPqUf+_&)9gq>3HIBrCPcjhYQUjq#7V2!|W|!sbXuQ*|vA_ z&YO_N0o4SZy9YqjFsbjE-WA1vp<=J8#=fWq(}VVlLS$PlQ0K6CrM==d6MtEgAg!|v zD--k=Ry^%e4NTgx#<;$Sb#;l@xQQdMSq-dqRea4}9TK>=jBssNtU|I{GpEY)n{+i! zbp33K7X7KaEkJ$=b9Lbj-i1Gk^CT)mg)o8%8OfBakt-bvcKU#cA)Ub*~v55D@c!PP>%;i+*zWSNZ#o>Nia@ z&QVnz?Q6T{T6Z#p9|nA*e+?_7_BWMbv7PBcTF`LgerFsrlHcXd2IRJ_wIMRn52WEb zF;rk9>v7t#|F>9Yow2Brq?3~UZ$g)^}#6rAh&8cJm3S{tnA^HLEY^)+q8 zxPLQsp;_)Xg0DoDUs;q6D{_AFwj7s`Qv9w{S`uS7)O!QU{~1)g={@}>p-5xQ@3_M2 zRo_+qP1&O7JW83|I&=N$4Zg$n={pD%fbomH8Ky_2e!nt{Ch-ZyZk17;! zW}w3QlRob|;6fiR?FK*$ks>G{Fg4ipwb+X^( z$YGeEqDD*(-y7SD`S(Up(HQ#f>kgd=Feu_Var=>D2|Q_fqc~|sZXjb zV22mS>jx%h_x#Xy?D@ozdGAXj<*kq!f>=2%>2tlgY!B^VEjPI~gCN2;UQyBRNoJzC zA|6Tn&M@v#7@)kghiJX@`^1qQ_mU%tEPK3EhjiQN4(z*0O*pCUFSKjMiKa(fMl^b< zE$oherx>qK6ARJaL99i6j30Mh$#v^FEkNaOf3y65~1?o`^2()|HgugStg|!GeK>b zOmpO%K)2)D)peEA=w;gE?CG<#1-o`9sYHNFYjWbsSlGKqDoi-*^QHx=@wTw=Gtih+ z8r_P_TQ#aX%gi%sORTY6%z>h#Ow572qekp0xw}kkN9Lgw#hZ7WSLTS)2(ZhxK2%VQ z&sYIt7*;PTtf}9maCkCR7o|HDmF$@4Wu1kD711kAB zMCC<@><5Le=>((695jnG>Yh8IDYXx=&>q%AZnZVCI6`0Zj!!FuOQw zGi%f(Yco~2OwMjuzfR6>rgWRk!&>1it&h#6mrNOAlW{cNcaeNiC(0y0pyq9>fzOa8 zc9S1}HM%2$-&m?I?yFr{+vLJ|`2j5(zZJ6P@O{hLBa@$-?gQlesPT;PxH*U3+ZYVR#z&Cl2FXgA3eI>)rWB{~Y`~~M z^M9k|lk@x7RZs_~DuZk97T;=u=1HTKAx0PtxQ2n8%<7z1M;D`)J8e8RT5Be^gAU^6*`ND4|2Fsc;plALE|8sRS0HVBO^Hq)&CQsT zX1Fhp4bRQwLlB%T&IQg2woBIl=DljTT#5HU;SD5%rPdWp!4Et{Up~H8UjpJXhrP9( zD14aLf?yjyBxKPR+))2Xg7uj|T?HmQhyXM0{Cb1YflXtr=mVwL&Ml9y-EW`9Y%>%c$Y+qqx*z;vU+^Wm#GrqmPcpC#H56fJU+}1~YH_wN_pqET& z@xjh#cc8>Ir)6!ecGi7;AR!Aj)>c?n18h#HxDXukB*l2_p!U$ZYbF)%mQ8TOf((eh zMqzfIlfNi$@uJ@}u{B4Dj z#VaG&2k|jR(U4Y{@G3R!RP5vGSMV!VbQlck6BH-#VKe7OpjD;fYE$S{)rZLJ@s8G4 zd~FZo^i*h;Q!S>Z6R-;~?O+$`QiYNLF``~>ByGOB)qp8ON#YV$?0RuDI$WB%WAaWWgCY#z`T!1V|V*vunEa8!1zk|Z<5{w z2o8zn84RBkTbjJY7){8z>vf^q9$^1UNdAtc|3TSbjqw3v!Z-eZ&{h5C{Oe7DfA{b! zD60KwjsL4J-hZke280+1Xs*kUd6cL0#vs@s1_^9?gAgfC!*~@LKjpKRs?+0JH?=M% z5}ZA58O(_fa`;2j5IIw<4q}8dqGaZ-#oGezcE@J06Cl0&SV}sQdkCf@(J`KwqKzW~ z{UX7~Z%+oBUkl@laSi;$Pv!eF!*zr-P0WVuJ;-0P|N9*OXZC+J3ZUcihuQ%H0hRt) zCshA=jffb#TATi(RX}-M22>cWI%9dr>Yz6yI)dsT(N)YzSSdJK2EoT>l%sXZEh0aD zw~Vqsgm6zX%FzNOnrtniO|9qqj=+x{K_IqzH3^JIaXPdHDXJ@)nk@oi-Ca9WmxvPs zl<)Lq$J&hDDNj0E;IVS8v5dj3^)xrXjb`}2N7m1?v>ry)-Vv6a<}k7GJiH5eLz1X` zhSeuE%ZuC-Bh5(5b5$a$=6dEGbb4?4pv(@yhTeE-&d_XI@7N&#tuL3N3|xEzgeKSL z8Bnzx-d6^BERSW~^zyRus3V_+EA_JQUrw5$?{+?2>hLyzKT0;*mNDjbQ_|@@HLDic zRY~2mUfbH|o)tJht>z16TYz&PL0^9TAT>50Q<^Y^$Nt>)G8bx~9|iFwusq1!^#Sdn za7!{J2F_p}Xh=pV3}rdRlRzcID4@`&UJrOJu_K$PNxh~$w1GQ>E)Yg%k9TGdk`~ax zP?hG4%QEw5y=8~=1O3Z_f49>=Etu_6ygT&Aa^-)l$nc*nDEjBE6=Mf8Ico>=zuPfS zQ5FGM7|H*b6Ap!zGqHZYA-}9XJ9G%9YK5o79ALQ+&Xp4e%9VOX(Jv7xL-WhO?n1w| zoHYQGJ`b#d3scm>`=`p#cn`>A; zI|q)lB-Rg?a}&RPFo>5u@;+U-J3R-H=T4OG)!0Fxc;DTt2h?$IMv}c7+0kWh8lf9p z*ioLWxDwz+mKP5|_PZv{(qfrU#UqF+CCM^_>2K=pD0AMvsFmNYne5UaLE^h(BS$@# zBTLdIj7pFIe_cIp=InBEca>cQ#E%^F#d9A{=J4h;9)b`j;@@6bU)prLsE%%&q_5B- zzgI<&Y?8R&OPLhbvZh3nL!{3cc{uw@X0$xW$(s0XMoY7@mQT5Hc(T>nU)dPufL1tP zN6XvCX|ko@ycK^W&3gfOS2D20=F%l4Rpxi3C^jOH^mwz!hQdoOP0!cTy_%Vmq%26P z$rm)W5muu3LsxBC^zg4|-NfD~Akrlldt3^a@Xoff#>70_N{c@l@KGl>iZ?Y!QItV! z5fY6XZ7s~aDN#YW2iI%jDuY_bm>lI5m;q6qY^X7-^0BA}nwEA%$m6j9tKDI6OnUu1 zIrt@U36h?W`OclAJ0|MFT26v0Ry~VxmV<*B+(e#|bg|zHS|sV(;Y8nUS4b=5#|2Q7 z2O?~7@pn+<-NmIU?S7d#lJyECM?3H0sj-GIFFbe;i`B)BNa`e4?n_ai9;CEAlT!;Kl?@ z7^twvc3Da~0g0EkLz^y;k|dq{4Ak~SmRjZP+_i;>au;te)hT(yoJ{5wZYJvSA2N_I zaV!**vrD0VOhmTTOG1gK0hZ>9T+6_im|5BJn`r=8Xjl!*^kI!TA4>gb>Do3|vdaMY zFz#Felo+SD#1EFMBB&S`u0dZ`X<35&oqYk0`h7|>|3itzvqVJU9h?qjXarl2h#+GVOI)PSe+EQnsU{jq z94=gtlUABJrqZ+tjI-wz>>l1*)kVdVj$UYGo4vWD-$BVU&;Xh8ID1+PTbHj}Qw6p$ zPKmPMCs`)i6i4;qX+UKt#9;xV0U@E&)u%4vS`V5mY+LhL(GK`=W$kWoiksMFP2wt` z_|lEV4EUY=xR)7dhb|kiq=yndvgxLt(X;EL?&@xk9RuAYu`zqJ(sK+7lBoo|tkMhJ zl;S!u0J_r4Xc`9|qZr;!(&QT27m)=f^JkDM(|dsqy|@G?CY9euqHwMkOR=4k?VwVi zMbtZ*V%=`MhlK!;uF@x+Xu{S+keC96*!uZNKx8{z{#6fz@v9+yxv~%YDhtaVS+$V} z1gDnR@0E#n+B6DUY0YnRX){>rWnF^lXQo06w6k=Hnd3HUvZjn5+3oK$Vk+1jYl}J2 z3ss5gM3CQM*D_`a5>V*ONv~VWtei5xe~6^H7WGKEG|wDaedpXjNx4Nz*?~NH#K6A^ z`AtowcnU5Op=(r*(B|9(>yf+}=b~U;v9#~e9`rq6W$xXr6~<$)h9>~-gU2t}vOyz) zH&e+bYGXQ;sBL*)fdW$=vXA>!oUUjz;nE3nOa2{tq{k@)0kIz#-yqo0>5-i#09MZ! z-*HgC`_%)V;P|+J{iz#pNVyk)e;Wu2N1OTzgbPj~+`qx%+o!RmgSSpVjot;^YfLDk@?75*Pi&b(ogb}dsz z+~a}YZEu?=0Oi*B<@v%1EuQ;M0RMIr60coywC9ByQlR3n+_@nB_dRiF^m8m74?$l9 z{!3(h!hxmRJ01I-e8vgYLr*Q{Xw zbV|1+p4YI-+q~{|mUuA|;THzI48OvFs@8*zO$_!Qn{G@_p*1T$;GYfRH&{lT5+#Jr`l0 zOU{bNy9T8TUh_uTS)%YQS^g`RR3xxl_KD zJg8-pw}9u7cr_QY?1`(?S682IvjCLw`KeTtEFnqKL0Jj|Nb6#MHddDGyd*>Dx5JWi z;Ryz`IEbTFP*Sw@xK1MyRRz}Kf{&f?886LNo>#|VZgbro-fH&2LeMaa$h>R?fD|J8 zvNyDY^!=*w?NJ)qxTyuU6axpezCzkX0gnu^D^s#V%DJ-Is@HosDHDb%z5JViDTqm5Fbl>yWx9c_y7P?OLOmmX0*Z;7 zr0u zx+FB&qv{L5^LJ6G?NmAbagFzBT69*`5L*6Da{G(2>Q1GPkpc?}hdgcTOmdgf*$QU6 zWeIrWTm0Bk9M*7?oW=%Y683u2=g|Xa9qpcmO6eUPk{4|r;Ff3Tm4yRKl}#NWn|@515-vH4RmJ|qj-jtZQ?Koq5|eJgqb7s!NY z?{{HvtUE$~3Wm%Gq?BqVzSFEBvfp7X^{duH#S4@2LHb%pIP3)}_J=(96;Q-Nte6jJ z@1urWAWuO{XXEh$b8t~}r(##hA;?{4f;=qzg2#wg(Q8A|}|MFO9`SiHsN!C}WTd z;zBFvz%F&rM_Iu_Ag@9k=wL3TOh|?TiCDncG$3YEU@Qhmk&J{=okU1JLB;B?IkJzO zhMGoA$dd(S4Tos-@D%tR<uJz$JEr&mCb zcR!+fisrCV!<-4CnYmaEJrM&TpQWtRMr>rWF%6PBO)q5c%yp)>6f?Itkv!c|o})pw z5K*2-$(|D%-uT;D-BAq4(dbTT0l83l^Ad`lRD5+D6${8S6PPRzIOJ>?E=dhbCf z?fbW83+BokK_^hJB<1DyfPCh=bc>37N}@jKgRP6tE-1C@ z+0Ysxas?|atgn0XsM_smUZMOyguP>w9D+H_afT#KNyWk!_zMjO3EX4q=;8P~`jdf3R zNtR*HnK;@j@%uabH7?5b1})D|CYEIp$*_o|n}^d*!D?k= zHLLR-0hQJ_U=L(%LwLwaO}7@CfT$N@9u-sRpy1d z1V0zL!)}iZO|^q}YX)5_&Y~|47wr*l`4a1OnT&DL{^slmVTY>z*sX#nqZ{5uHaNnh zRpF5FhNaJgJj__g`%wlqo~oW%S4;f+XLw91yYe6GQ^k&5>N1?-ad1rK`f$5(^EUmn z5It#Ne8rXlfHXFLMeG2|xcqCAa~*IK`iXi-tT=@M?!z#l`5LDho2F4M(BP9XI&!!mNFZaxnO(3>$y#~>C2z9EL ze(ocn)ly!_KjZwE>iA90!@?bmwG!?pp=BeXiwM%uPj#~~iW!RuiSK)lwwJys?$|C4 zbGZWrqEnbp6z5DUdT1!rMJTKVp;$(}Io_K|6d^1F{s~O8#`W5b5cCxz<0-p)5PHvi z4V;ra`emL%W{X0MdCZp%Y%TNJaRd-S3?oOrY3g^u(PPvTamWP_YF@#_Lq^d_3LPK9!f%m++6 zt>yY4-*LEPm$J7?C%g=7_0(*MXe4bu{Q2>YTKZM zOZyXYp6!yDZWY>Eh|n`Euu##czTs$*R1H0n8b&k?tQh($kCLcrXN-0~ERdI{;0fV2 z9!R#d@}^|=eoHY>v5sV+L1~$dQ?#2U2V1u^AAwGgX+2+uXA|46TVGPTem z?h5Mso_z|=dv*5Vb88VXhUTnag>HCB*H(T(Hi3(Tfer=dxo@&?L?Xl9yD9Yq^9Z@H%*4^H?~Rhq zG+Glh_h~w$!nSxjZz&XiQBH1iI<*Q4VW+c@X18Z7Zsc@e!4NLX8<(AFn`sr-ksmgw zkOS3m+@9d9;M&|{c0$Zh$DfzP=M1K4Q*oV&pDMF@gcopI0}gNQX$G&-h50UjHcTu9QdlCxXMD`raRJ{9Q6% z=Rs_!_gvR!ZAkTLsSvIG8^24};r1RmrL0G6wixS*ALE zQEiFdlM^v5e-CMYqlclUee0N)KdO-?nXDHnD$-Rnb^P2$Z%|r)3=_d6FJ^wgJ&xav zS#y9jyV6ne|6zOHq!`xKVeWfkvuNI>0yp}e2Jj8ZrVvD62{5MzD7}^J!mUy-_C)mQ z>pRep-|ki^=`~7>-d*pvx@15`-Z>H)R%&;XVk1iWXgMb92M|{D8Cy4E1;Cl4}{`l%w$~-m}#q zVw5*qA@#>tt@RtpE^Oub@d$ulBPlETe#*!h#F-+@N?*z8hxI@%TJj}x&-fq46#R96 z%Cd-v&qKmCdu4^BaLcO_Lxjq;yvZ}7YGf|SW8ZGOAQnlZ9gvJyeyU3dMwL&<>cNB} z`Oi?)XBACyfA|{=2LSaQ&M9hdn9!Y%b8hRH({|X^e!?MVK={QV6%B;@fL3LZ&N4+O z-OyEfwUCqJL0h%q@!i;8m$($m)P>@__2Kh#mEmN{s6QQB zcw2-yOu=0MKWtOL!huV=W=}{LC>!vazk&YcBmdJa{!bq{4c`oMf zGrhgvK!z~?VJ;qwOeJvJ^JSkQ7; zvm6Nv^;MUZSppn9WF!di)E{;B8%!;W#ZgpKlr8}x#AauB4t!>3;230-VKEYY$4S}T zV0>$dN!MjZ=oZvfy_&11bS#pb_yo?DEV&m_S{^(OXK`oU;gsb=Qn}$2EEfM&wz&m~ zK0$qR)A=N-AW6O-Pc2=FjCN%h6`-Hp=F6_{px7Hc;$Xgi)TOBfrfPAiW#gqjDw)!g zpeLl6EklZ2^ibb(omdze?)vSoe$@rzR3}M$AqVkY!GzA=IcECTK(^Tf@q*{4PbV@_ zo!v%1$5U5-)WsT4ZY*&nSjU@Ewupc}(IUE!bXj1?>u@sUB{?6n(@G+^psl-5QOUFb z&dwiXWbzk}^|7$%D|S}=$HSN&(6DL(#gH6nZ(up%(5CZHG_@dFI}Od1=gF&QTubG0 z)EW_#jXBfjTb9tLRqIuGw_Gj*Fw2EW)cfVfyUT&C>Zl{8pcO_?R*4(Bf~S=E8B{*v zJ~@4)C3v`9bq52(Gu(6sQsXB)#YSpQm*%4A;WS*k!dJldAKIE|Ok_yd)QCHCLCxN( zkG$Vs79XrHECz_#k zz#hqCk@SF58oxDsCsqGGJBl@n-3^1(9TA@5G3N=Um29ev*vxO3E*>rEo_}s!xJH^76QJ04>>v z4{nXZ>be=mS8Y@vaYdn1Vk>(_&%Pn@WybD=`7;CEYE$IIE7Y(sIf0dpOR_+fjaxy2 zqK4|0L3K{A+??Y=&};Pd&ZIag)P-rBa&O}9icwu`Lrvj`@SGk~)X#RDP~Q`Byhg@2 z#F9~?d!M0q-&s}$djt65!nOLcN+(%4aW!o~4U2ScO=!Y-nH>n$O^uWlHFx!hI5lMdGl)?BBogk&(y9Sqoa)yQ%@JISq$A-dP#dtO7?VP zGp9t?I>`ULsnIm8>omi@K+TG?hlN-*>63umHJRq`m-Zc(|BUr}=6W(<3vqHmdJ|zEk4g10Vl9i2w75rn}7kCG~eMu1yIKJ+`^{TQ;{C z{ltWsR8h&1U(ogZ82@~cFSn!0MgGOr-sEJ{2W6h!3aj9cQ^3m$=grE^Xa465NW>4> z3U7w=19OD*JsxQLyM6V`dt!ptvFR&){OiQ-eBjIBJg`^#>*PIquousweSNMEeV}VV z9`v)PQtS=PxIWm6kCJuLRs|td&W9Q2t{4M8qA>)_Z6Tr$qDSXlB8C~~;jmG^w<>xO;fU}Bl8~3Y=$|N9 zfwVmfX#eZshk+w74t>`H4B)q9M^CMAJ>qQ|X#X=Y5`Qw}ul1OnUA6u{b-^oLnGq{! zU3+3I_U;EB_ut*H3Q&VN!uxbt-E;tg&{ZDdnGNW#%3VFM0OCUk?hPmu3<+brCv8<2 z+t~|{r0Ho%PJIGMu#1yBU6@t|`0%GSP%>n;U|&(nnXrb%V8h+wOn)b0%!Q4N^2BHi zhedXqCnyrkY6IL@9N5Eb6j+Ej@$`}=YqowShWUDy>RTHr*ZKvMnr1hmZ7MBXD9fTH zZCXkjp9LdZNai)Cc2lkLp?&YzcF%*3q;zB9BgLi|((QQI)adc*q9;0|ShPQC^hT3F z(li$$*+!>E0|P{6?X$uNR(tX9Y2vh)*O^=DX=W*8_C2hHI-1biB%CG1{6$Zyo`;LA z4uYP|ni9n-2spLYR_ay66$5+#(+T6GN_>`J?@v>+d+`Oz(5A~$NV^L2w-l5jN_){_ zIdK_MTO<-i9HRGcmZ34|2splSEF^LKB?db5b&6j@7JkltGOTGgC?9*1Sd@Y!2P?;f zc_6-=BQk;sBN=9)alOnEkj`d~_RGRh>BDG^ei!i7Mez4#I~S3Pp*50Ot|~R z&+Frc5}GW?OzM?Ks9EN}TOpYzm#NZl-O@sa{|Jh;oi{|{G_!`4r&J`WP#X}h*JHi^ zKv0D(>+#a7jU_b(^7B{RZ|>{xEQcoH=hjFVehf4RkKAE@J2&JVW!gf8mw=rzS4}5c zgUVf_)}AZh2WHHfN8x50GG0ellI{PXUN3^#EKsZM*G9uJs(giU1q)ES)B2R_-%g*DKHz=Z?1}&-P6au@`$DyVh#ZcLXZ3hjfQk58uO090i zc_B;os#vCsU6hCN~qh;V9g^r-WG`{il zRIA9dkuP4&8vRKfhb~?*cSaqDbZlakD}8i$^f;5}iMycl*d~3TWoK_=tZZP}A&q9d z>~JTDatlNu4L_Cvdf?PzAW$S$8AUBO5CJWxZWSG=N@aQuL9t-EbCwl4I}}#e+j~qN zT;yJb-XpuPhHg!%Q59XSR&8`~P}3$}rrju!muXSW$*h5@y|zHXpI*N)ik7a(tX=uP?90(&4Ps(PwwW>_tZD7G3M=s}BM=}>sda2iU3-8^bZgwc-iuQFl> zY%*{T92<*9ONNtWiV6{y+B7aXV=!G5yLMWWlitLh19(L2%QNi?sGKvpnElewUTD~4 z=gWrZy2;h_ITcH3Ii&5xf})8hGlmDWQJK_N2=-Wma+#mNZ0A3 zoput5!MbA;G7*68XXd#C5++mXvjXKXd=7OIZIq zV^a_lNha}YTHZq~zu5uaxf8Iz5GZ_Vh^K^ zusPv!WqLa7CBW61PI1t4155+!ZIB}Aq_tG*-;#r5<4^`@qNEu!l@WYnQfYM_x|+RrhMP2 z7&h_B|8Q$Xp(VjIEpDTfcC`L$6b>W_5B^NX(;`GF67kT1gq_O~gzFonq@89~b!gT+03j;|j3y-Hd#`-hXAp1qrAlGeKdMMfz$ zj`%cBJu+ftjD0c6q5CxV$R*IWD2;YuN1c}ZTMeX)g%QL{Tfs(#9QiFU&KHp7#uG;Y z$X=Sk_Lb!65|MZTJtwc5=N+Wv%)L@g4E7ToTP-Lmo?Z9{P9)?m3|e7$oCF+BWUFIn zyXf$fdbvuiF0Q}4+fqsn>~-UP?#wRx-Bu?Qr7nkow!IQZL_8yhJW&(&v~U42p3n|^ zR$U<$w?O$M5c;J?o>4g08lw>!R;>CZQJx{?dvex&8>|R7v%Xz2Fn9RBcm1^c9Z;bv z>T+!kxbBfxCym_s12T4%+c1Bk>bx!jlrN8GH=tWXWvMSc%_Hz;E*lQA&~}(pex!E! zAr$Oor9629Wi$@cFJEifi|4>U%Y z*xe_u>&qf3;O$D0H4gn&u?Jn#QtpFbaLK#0#>0M|3a&ZW0-}e?h3hUE|Br zW>HQ2>R{?PEP1YNGfn4ndBQa?35O%$Q2l5}ow?@P?ut;`47X{>t0_lgz1WLbT@vzg zOhzdyHH7ujlwz=!8q!jYZU1c3mMha#Fgq<(l5FT~ozv6>IHX;v#$UV!@W~C+X9xSd z?loq7e3LF6Fm^sUaf2w#!Y;hgtXAb+rZP17WWn%worpXyDyTHcxC3ACss}%y_X>U=WbFmYfn| z>zH*DSP)!$$S9Ghazm6@+7H9+wXRiYvfhu};Du*cj-B+TC;Vl(-6-s2R36m zhqD%K0Qg<2O|`IP{2sm}wZUuX9W}Y;E??5s{8X1m^RD@QQ7-iJ08L<>4rR|wKPqZD znoqMTdYEYolLE^HaY`3(l0hrE&&$ur5EaH|4y_gV$zyO08G$zL9_{I+mNQuFT6iOc z0T@ee8w3lZ2QYTvs=1PF7~xEp*E9syi^J%}ur>45b#87@@vS%`ZCURerRU?*4#6Gc zaljm0YVsBR9jx!z86^-nyMo;84`{SNsrl6g10cKg>?p;c+e}*XmgftGsyahfx3j{4 z25VRju0Hy+M?w61pf;JOa8~Yiu8y)|LvxOYG0xqDeQU_cRiOi`YCw|Z_uuWf`D*qx zdOjj=ev0tfogjxCM2lhim?S*~6d}~`odtKD4pL5ooWYRy;3M~Q5au8{9y$8F=ymCj)IK`h9+~lPWWcJC|sUiRvdmXG%5#&h(mF) zMt9MnH>mb87om?SMinvuB(b(Owuoo(jjR_=DbcyFV+<+t1PtnTauODa52wr~g$Hb3 zgnbe137c>5M~deCBWi9^)$hQKdb~O}&rzInj%AUn^~#RE9380vC{Ea;pccF>M60kn z3#Rn^8c2I?E1^9!^kE+KdNZ)#ms3x`N6)U5d%?rhe|*8g1XVX?hbG*xJL9nf8N`LJYexZTv-SR?bNmm`;3{dWIf@s;4Vo} zPwo>LmT;9PnN3yHk}R9P(NfgP`uu@t;^?!|@dos@!ro|}o=ftFr>j^ZlbN&NuG~=$ zbv|?TSp`TuUts=qMD1X3*?6cDZ&$0)zy8Zm{O2$KpPG1G;=TXWaK`)T3k(?03ypt(vXri2< z?70&d$}CEzDkM{0pUIhTaUtFrIx>s$40y}fY|Hg( z{)}BfhK?Fi9)G-F-o@p%MI^djg|-pq7?-`$37UOss6FQq^PU@k`N;#V0J%H45-dCu zg%=C_=djFau~p6&vxoxqOH#mo$Iw-A&rxT&p04B#v*>bOu;J-wR*R<&-AP#I)C0O# zhjY9TDkoBr2LS6%rhJ6aO>0YS0DMpyuOs1a*g&zh!=JPB5KJq&SI(~+v4h;61bGf0L~L87VJrkH@pj@Q18+5$DC zhCkm(p?yZE`c7TCDpNujC%PJK&~t0nBEO5m1N8voi>%V@y_&(nGnDU)+jzEaJbG@j zIx+1}kJ6*C2DYDbD_!ZaCSpS_S*c2CYP2<{LO-MVH;rb$U4FqP&_{XB=qcp(hp9O8 zG3i%9cJ?v)&k3=&V}lf1A?UWlOUrKfkicVhs7zT=`oF&87+RK*QlMfX_oyI@ApYs# zIh`Sj^>^JzKaD&?8?bwyy#&q~mf#lNnX8xDE?HrI6Dc*xx&^ZEQ-mM^=^`e#TBIqqfY;*Ib$u52!?iyN%UDHmbGI(gg(gf z7-G?(Ld-bGg+;P*87MB)l@YU>uzo>_^)Be=St}Q~m!L7sKM)$J>9;O7_2) zEyn-;to+A7`+wq%(o%L578FrOdMhr}*{cbGqlom@a++V=f@9L-@GL~Vwcp?37?gz zGH;Qi=6c5%vhN4RoVY%26>h+%ycFX#S`=Y@5L-uyTJv~hpA}uA)aC`n1~U?^I;26T zh?BnmNd%dxI4w%YaA7b*0`=)u9gc3eh^@ghw_W-5g8j)vtR>Qn30DhxVuH`uU|pC zCBLI16>1f<>*{`5*0G5}mad{VwxHFhUDPbU)GP<|?F_nHPfpvjciJb6jd8yLgBoGF{-<`$~ekH~~JTibT0znA7W$c;U`gwa$1N-sY z+ZQk8^EEu!;l5g8b+)zbiBh0vss{gH#N(BzY}d)ZKphcbuagrXX@Mpifj9(}&oxr_ z@H=yF8s(`ikjj5H+yuI(Vpj@Gfyz@`u${_+6s3c#5I(cqekfU+y~YA8yQ#y>atvu} z9NWs}q>miPsYXzQ*3R5+s#&7hR32gA4Fwoz3j4?|1md?-9fJkVSdS2i%}lJ3rubJBz3Kh!D)eQNp z?=9na&K^_X0M^Sm;IG?iBx7_Lic!~tLZfp)`=bBWdeN1_yySx+w>-Y}A;r^He;}W^ zX_h4z50kZxd2s(~D-+#?Cb}imjGOsV-e7R9k%N(d6hstko6#Fos*Q(rZ zYN_Y-8OM*;#y(PlYtNITQoJ3JrGZ+$B0K{l#L`5mNcb1KPom=9LqA zO>+!1vNCO5phOm67*O1HdulE)bK_hgXyXcT>5_=5VI*>7LqG3q@#rQ-T)f((Z#{MD zj4sm6E9l})nKFPpa%~L^+>QCo7!JdTCVVB!tj|;>sz=Ou(KLn}Fuz+b-zPTT@rKp9f_G|O zK}0^00*Je1@|5o^KE?VC)IQ_dF9Ahb`xw&|(Y*%*)L&5=Z(q&M9nk@M$-yg>2sH*J zzi3Yw1V-YMt~clfL}E+ivEo!CboPdf`!50VCkequHsLNClifD2)ar+MjZb~HT;xqH z`NbwbbwD?mGf!ILRGXw|^kl}Wlg3WT@yhbO^c3mkUUuQPd96{r)PN;E^E9Ck!#DYW{2xsG;Q-DuUD3|>GJS~mGQQ;{To9B>yB zwr_FbO%SqcRZh~FY>%DJ+RPW0ESc;QN0uONxjbu86}1avq<%x86B-OyN+g|JX^mB` zU`@kHdfK>!Mm$&Q&5oWPSQ_K8b7%F1oAOOEL1W2`Swea;iM%@G%qH9x*#(j=J#eXN zBL!n?WcM8|52-5(*puApU@uN+e`wp_RTK9{lxs)8t#>2~F8FOlSukYJcSt{&&K;=F z9fpZP+cxJ%(Xrd(5J9^)NKOceF?Kr-a6c`O3e6L<*({29={ewMe`U1<$AZtw^aiM> zoZ&dq#8twBZifqfP4zvkge4$#C2Yqfu_zL6ohb5za1-D^5!xIsXNFq2rFSrHu1DLk z&IGo$2Xf5+U2np$5=fY3J(np8k10Iot>g~w&=XdwBa?@9L+JkVcpc}eAd-j89S4Rn zD=E*x)@zSF#k#uLm54uQMO(bWs$D)uU0#|-sAZE`HqK*d%7AM5_+i_7I4gAu{bzk6 z3Tdo-#bn`vyG7=C``Woz#9Cqnz0TY};%sUZZ_?o|QPrXCM2m9n1QAtbF8^=qEun=< zAyEBxrlk{dvN&-S^~?!7C()D`K2@R!P^K8dep#NL;7&V;?+i%rnD-4l#xB+Mp`q+; zHe6Nu0#Qq?@R6nm&F{Ye3->W^kljUHRzCQPnwe{)pWQ;w6hC3D z?;wb4>dsxSn8iGL#<6G^h|o*X$WNHwwcZ|&ZxyX-Ro<0yog%y>$yJkGVK}2F8|Wjc zEED;KoG=GeKzv?tCYgH&+=)zMddfLSHItQEqe#c~t3Wysq2yZ2P25v3*m zp8K74JrLs-{hfFm!h@^*{E$HDHuvv|}uRW;RCS?#w<>Io5oz+uyI zdI&?J_^=RK++g^_!7G?36_?LAsYSK7^ER=Ii(UgAXp0V9s${Odv!F{`~vzeMf71S9i%B5Jooi)NhMC6?==DmNWJ?LHBdp|#4q562!of{a_M0&NFD326}o5P}m zENCaS9cd@EGA$@Wi4!+BkcBAIzH7fCgX&lgZN?GUUY^ijI*4@wKAtuZE;?ssTWS5O zG%)AO?$Jfc%k?3{UEt1QDz=_mAlctqBt{KI+DRv*a`RCbOFT>^nK-z!gO^l00U-5i zeTFN32B2&S5k-W<#v)*J5gu)wB)A&CH&Wvcyyv}WyQZ_D*L7`>WFOB}v1mEGiw$ea z4n&Px&!K)!xJhfev9eixf?CUV0T|V+FYhxp@7i{p*;_oqYHA{qoVS3JXhz6A8^+(N z&s@)!Z~;|(nBdt>M=o1zb)NpZ>Uwm1Cl24~3Y9KoPATJ7;wB_JnIUDC?d9j-^_nSj zYXqa_%WbQayIpaeZ5BrT!+x~Pv42=H<0deF-v%1(^_m9_*S1=Q@0C=4!M&&sF?w~F zFG$QGzejn>376gXsB^dx{!xDtBxf3COr1O{uWt24i^N8t+Lt_Q=*$8{Etm!q`)&V` zH2T5C7`qQ^+vN35R_ch3rG177iA6d{G%;#kAwc-VUkp39ZqYZ18*b$=0vfxEtYtn; z;-pXEzRpPPBRMnM9WmL79~Y4%&U8$MZSDc4L86Ca(kz3fF7={a+$m`bm{HK+C*cu! z!GHG-d!fik2!lDMCMT9LRgp5gJRPSW3>DBabHd{|^~2^M`2w{Ub4SJ)gP)%su)7~O z+MI>?^KZ28KUMsn`sNpQDJuG_>&3q>vj1M+|5o(>N7(lJKZI?z-)NU$q8RmXd?A!= ziS)ceu|;xHDf6lbCj%g!luTtl7MTLRp5FmIKRE(%sX$CL5tD0tT$S^hl|q{>eXcHh z&=rI<@#Aak3(k`JcdhyoKL!pMV!o?hs%=aWRA>sgJK}A5L`vm`r>3=@ie?GGq1`asEUrvYG z-xe{~N?{Du09cuLlOdrLT$JFRfMI*Myou$24`TS<**6N5A8)(*Ys225gk-9#X*MFLn>mk~B=dPe_?0wG!WLKUK{zqP**>{LIM|wj;qIo@C~0O`GPwyiU;4c4 zF+3Ia1?@$GR^Pp>19RqRGLTl)+uQHD@FJgtxBGVzqql%I?5SCo#xRxpiGG){1tqmS z81Q_Bg(l)IZ8Z5LqelmutrKhb2z4$&n2!;+Ne>Uq$I@pIsJXpPYk*7sNC7qX8HDJ` zEIwl^3;sBvjm+T;hLziW5$ZPH^&6)Ifh5ABLWw{((ClBbYO;ALAnPw=*0gz#a!hjf zQiMUO1gKC&;0zO-QIZnvGcP$zs&~>i?3rFgBBv!PCQA{oXzc4{R^RB9WWa_1Nozx# z6~CsWyBD5sTSy#ln4Md|!6<7V9#x!QF032^V%yN&^maZoeRjL#0!=&s;ijB&pk)Lh zS<;d&DnDX%d4|CUPnU4Ju!XH@08@mbejakTDn=@KA+uPcJs7NZ4?I8V4{%ta4hqo# znG@_t)eTy-dOG&HOb3t9yD6inxR%?~87o9qNr$z3w;#|$eb05)isj8k{+6ifMlibf z2J`09PWVR5f+}dLqbJASwK=6x7hC3cW0p80$ue}>JJ`Ph@1NoIpMkeCi5{yC`QwKI z;{WKFC_1=UdH!>7Do*{v1JwfSyL)XdZ_3=+%AQk_P*9npQp=uQpTsxN%-yNdOydsU2twTq z3rpu3Db6jR=cDW9WAf&WZzT&spyv%J@Z+` z^7#OYUq;Oc)=FO%D~l0cR1uB_HirRk+tiC|bfO4y*eR1Co^ifalbiZL6O=mtaH>^n zj}ymJdN>L)ow$F(gP1ewrpYzhNdDoTwI<&v^XBdzmSM*t*22uz_|LD}CtQY^2~LUw z2wmh#kgC^=;`7-lzg>e`xK=K@{iJ0QT(2EzN%-a$ zWD-POr`EWt9aiil1TT>)<<0426nJ!4;*H9|(&4g~B`ebBl1scT=5cXY7ThhE&0M{q zL5XteMN_qJYHd=SExVK0bM=D+n@uDI&p9HZY_AHFmRn6nkUM^>$dsOz`p?Q*R}oDy zd)^R8PgffKvVqB5GhZKmWuIzti;oj<%#w)$)HBk z8}BETCx`>}E7P_zW7Gt*fAMaGr%pc)EK8^!w0}4aa30K}$ByOz=2*6tKkQ8` zh$ii5NOrM_+2Rq9bw)LpL3h@PMh~p%Q(U#)3QaZkdBFSYvY#mn_pH!D&HOaEZSQ4v z2?z+@(eT_nPlDXNv#1CA%p~_sLmh6!n^G#A&w=JJmu#x-LFDzYsiApGD!<;nV{evp z()TC$z7i54@a2=>8l?KXol#;v-Nd>5ODolI*1u&$M|3 z%WKx-d8_E>45FZ;$ESIz9RuWZFH~VexGU<0bbnSfy{zMy4+WN_FSGd~FX;>K^(SIM zWJQde;K(iEp=&D8dXbJ}{<|M_pIi`4Bt>z}Ktl^78MlQs3mSvxC3rha!;nOnJ{a?? z!oVzwY^JcH6q2FVk<+lcbqExDM2{)#{c66VozjB3WY*5K{&;&c+k!`|#DfOPP@U3* z6!A@^klAiaTmA3YY?%^Qpw$-_x>OO~$3@^3P6}Y|a?x)NGiR&_{EtbR%k93FX%f*!kitSi)|ya4oV%bEL5RAE-yCa*Nx+9Xe@C zx2Whl@k2|FhS&x5hRj~H?eXaB{xC9Bl@qJBG*>$Jn)1r{G`#Yre~`2J(&dP(9PnxU zn%X2yZFHL|R&!c*$ta!D*JE6-^9F%lD#yyWg08mfc}m00Q(&u!$gwm;%5Xc6G=m7V ztj#L?-0?k0a>fo=QX*f!MpUh!=f)BeSj-s4OppCk6-q*sOf;wxfH-z68k3hnkC>#g zHY3C_AF1+%Vaboaez23o=y8U2KZ@lKA>jg)a)(nmBMu(x`GY1n#eTL|++pPQ@7%$D zi9~)6JqQ;&+?(fO)U^?+AAmK+pBEx9L{E8Y@rG**B6$xH0C4tM>0^F*>g0~_>=W0w zJ*5kwt#wAitwthh@dZWPL9z`oHGP{|7cv&vIhG+B(G_jbcf>{g!6fV^Tq!ZAp>>jh z(s+kvcPdF_m9)bUEb)x`+z{?@AX`%BzN4+B)7F%E=Bu+SeNt6gOpRpNgTR}l~4DsA4757VMa@=_8UIS3yQbb`vXB4eO z*wN<KaMv89I;yX}sFV0lr?%gVO7b zF3-Cmr+5 zHrk?8$Zkw6Ey~U(i^a$R6iQyqgyaHPIZj;}y5W(tylR#3HnY^{vozQJ(|- ziwYy#A{zqDHm%Cr!i=_|h&5Ts#+|y-B&5TxzktsacVgK>CLfC37PL zP_$XZ%8bNlQl-YDWT^v`{L`>~#v@6yRmQYN1C|AeNG9ojhK{CgUsGj0CnX1eZ-7?J zv}X~KupDGhmUWPz7>bod9?Y?l>;52g3Z`T)gMJEfSqR%-$Rcv47d*mJ@H2G@$&Tem zZ?YU&$%pp#Pt&Rw7dzNQ>^$_a;C9}W26Ibx`3wPSLQWx-Ur^A z>Yrb%8br91dSpLBl!Uw`NSJY-M*n^}j?FQ$J+Kb`-JiZ9J-B)X1bUmW3KEC+*|-M! zxv&o585j-klJWFC$vfc~6rG_#jk~zdiPVcf1`jPI(Y1b}3#mp15(V4jH6GhV@}a0< z=<4M|IDGO=GReGM3q2%bZ?Of~oxwjpKZkth7Q`8v2=mc5_wNSkW%n+idyy-ibM`>_(F?1rfzcBw5khdw`%MgVq`lFqy{+`KL?E@eQh<_xWA7KBNg12Dbjy5zz8PIytCdy z4kXNshHyVSYNrAR&k3GPRH;dK3eh%1Rp&N5T@`pKl|EODvEE?nIDz)}Hn{;KYKyhH z-zACsm^54~v<<_*M+ztFOc1L9!8DsB!1M5Iz`Hzt4BJN$eFaM>s4`{nmHDXespmo3 z;%58ssWpx+mn7~p#-u#g{1O5Wh4ejiENqxNF6_MsD^7HA=2yQ|(hNAzoG*xb1o{QC zV`GCCDk+V5Dt6~~)iWsZ(;z8@dG+_V8N#k0;58J1BcZ=C>NG->#`m84!i_Ru#ZF&B zu-BomSxNQC-XOThPxdmfG!W12`bV5MiI@d2d zntlh#z&mHH+koCP2Y*P6g&smopq>|RDLTSXU$!iNr{qH22Hw3Fs(?N>*Z`GIuS`di-t@_zMHRvk@t`E; zPfMhno#r3HRP=gN4J0dU!|MuNuFyPUK~F*0&4lk>TkY48<{50M@-8i;&|`nuNv+~w z$r41>WyGJ_y|m83(%4`L;PDMhuP?6?u(5j1Er77w$VN0L^ zCyUJ8eMDkw_E8VV?H_{IyFU|w#Q5Y?`JM%><^!_X?6+%UkIpB2f^cQx$|D|`Vy0kq zrhDq?MR*W~Rc2uhF#@tVoPcyqjzT1T2c=rBwaOztqy3F1fH^Ffdy2sV8?r7qO}eQz z>&QVmc9X<8Qa@H9pJhQSKX)+zmpVPS`h#4yJJ&`}Fdv{u3+ODT%S)w7{!BU~y|FkO zrrFbAPsn20Mhh>B&Q}Jz$oHnAOE7-^5mmzM43CmDnvc)IKn^9@e{2dmB{_+{F^VKp z;uKkM3vt@pFcZ<`P381ex0>2k{c)Mi9=unumGmv_8j(A^aheKcA8EAC%Y@>At*F=B zpr}JkE@A?eN|LtWAi*KHyF0<%-QC^Y-GjTky9Rfc zAi>==I0ScxpX}`Ho6JsTzrRmzE}TO@U0q$RxB97D?@FDs1tZOnB|xI<_(!*xaKn)v z$lgY6;-^%%V=Tu=7LSMQ7A%gmE|@9Au1ClpbPYsUp&1G%X zi~1aS%!T8+0lX!Y?aS*F=`*SltNyCG)9Z7uK0fg~DNj%+Z{@{1m;Nv8DJcjQIvv0- zKrDf-$=Wx9MhK1>#^=}^10%hUuFb9f%LA4DtbqM!QIPKW6WX@2$YCCuC0Ggm4wZ5W z=85530ed2?a&w9m;8{kcPS4wlU)r7GSG7TnvK3JgLEr0BKLrtLam7=9-U!U7QfaJ( zs05!565x!}dw{FC!9?%uMm((~wkffkD4|yA&>Ph&Fi*7Zn5om8+CZGTe*z6^B)6Cj zh@1hMsy|WR<2jPW{=yQb(&H`e?+DoMn4SDNp6$#yFz}88L{maa(HPyqR#SJe&@xMh zNIAwH!@4dB(p5x=X0)GnzSf!+BRo~En!?QJK`4`ljIqqHn@CdBPW+Ut&gWs4BDrU2 zKM;u-v!OxG%z5ST#N^{zY5dcAINGLX-YCA&;j!vl zWZIV(>GDL2@^-AkJ0ZHems8qg(#`L1=Jm6jDi9lH&idjB7zlSnM{IsCj`2j`0g!fl z`p{NKT?)*b**@BZXUmg`UtDH6rD~+8(==3#hJsVv$|<@dSnSW7>K)Iz<$ArM{m>b6 z{CEM%GL!(m?+Ide#EzI0J`1c3LE|xR;G-MMA2%;7lYi5CbY}_XB-$K-(qM>_DWE>< zp>MJTFX@{y@GWH5MM&+liQ7Fy4{6QG2KaPMZssX#X$~vlo5}#8gmXY%HH0&zu)OIVQ6E$hFMGWlNh#2vuvF5n!k4^ z7=YhEkrooVN1c5mEnY29)cqK>1L%0!O)3;f=jD&VXh;`8k$q8M$(7~IV+oo&d}=M; zk#K%btHQaeB)b9sy^f|`4JDLXMs97uNZC zd}l}o0M+1Ho$Mxzv@*al>PF%6Yuml7{8m{NA)eG)a$1RJ*MXjD^Bh#+Z@Tyn<0|e6 zg&Aqb2y9aH9~>U?%|g(6Kft?Fi>$OrGAB%g)orjEy?gL=J*&K#)9G7tycCquEw*wh zWqb~y2W$`y;w-jqw^BRIFL`eNqFeb9EdIlQ5b5|@8OuFF3FAJ& zyFY7&o5&lp_r0ny!m!yU?n?J`7CnSQuHB}|0X2<(0m2Gd$8yG~_ObON1)sZNp61R% z%ng!29<$Z83a@=KbZL~yteQ6US@kIO*{!i(#vga*e_o#d-@^R|%W%||sBGxl>3;cJ zIO2bDb+yzpHFmUiwEtV*=YoHSEwnmXh|J@FHFfbvGZ=HzgF6^!}hShK3#1ee5h?iC0<%# zR!^K>Ik~IL8UCo*+|#)s^i91%*2z*md8%sUV~sf@)(L2)4ry-!JKkdA9a8zO?E#a`0AD^TGrLcmZlco!^>ngE8c@uHMK#9zhyUvugYWy=_ z-*qa;sZA{r)U;(|32{#y)s|VS6(bPyzED8LDx+YLvdR!N(Loap=VBE9WXEw^xXX^? z=Mc-VG8sz<@59*_obesdj89x$4B4mpg(OWh4FZkBY4J7(*ULEb!2+C3qc*wY4pdb~ zfoH+A2KP|%pc+) zpQWEvar~LKIHg6%zs)yoLI2~+o(erB%tG~^%NK{V$m#phn`y!e~=eYvrM>3^5@ZFXnEz=`txpSQ2`z zUO|8{Vd1A87EiiDH1_E^L8=AwZeS#~i1OJm{!0uw48-|aP}gXLB3uEgvFpV7f^NQZ zgM6~D&}Db;_}@P(6M7L-`b=M8eK-cGX$l5^z%a=-ox~1xLk25wwuyWa6BWYv(n?72 zI3SUn6z-i_#k%b_2|Yz<+;j#Bf6M0!l>*T{tXn)VXhWm0O(;D$LAB3%pA;)PrS^(7 zSUIx+Dq?vM@F?P)^BJuf5l7EIhwT+sHSn~$_;X%P!n=SC>2Sdn*rS-t*YeL>aCeS>LvsDdbuf=Xk5dF$$G!!l>faZCA) z%=~eS)4b)OSZC+0=iVrx#HVB9*W^9Vigiz}%kM$s?O-|`E9hNXZLnR~ZN)t$9xz3Y zcl20VQyb%w9u%LFXi<5vwI(wl2=nEc684(X$g9#P=d)8{FC-r57k-*{^G$A`j!I~El=wl9>weQn80koFhV?by~d&1D9voiz%gwcgY(urL4a74 zHPreJO2s`Yn#c#wwuOkGRWLN$gDzf%zSLg%&SFzwMrjhOln zj_Ws0Y|Mr88yMGSk)nvq)!FmvDOf*zT}sWZ9+2}vJCzc2B}`^Q?_%2np^1_#++4jb zNpu-mL>PeanO%w#5zs3cJ#UAzLSjA!KZTrk3e-QYPm>EVDa(k~Utu?GD%m3z3ijPz zGr4`R!50P2hg)37GL;lua*@&s!$jvSF)_B3XGqz6UNM|hVW{`rR32L;KPwM!6db4y zU7VV(r-)s^T$@Fcz1b|^7%SfpP?$d{q?v^yT3XDrbu=_^ZYrWLI1+#-+&!I3(Q)6o zk8ilNupDXsIJ_N%aLlPWgNBBY^&UE-QPg19FeX$e7G+33 zdl?8W-bk2e;XLT9ehW}_|K%0oflu^d8R0zh#NI!d-?e^i^SZ4L@1PN@cM454E2q9@ zNS2a^*VKW733oU~gJzb=o|X?m6vjPKf@->Ep9t%ok4ep%yx`pM2(jR# zfp~t#(UJX@l8tS@q_MRk$0U-J>q(_pB=K~!p#(#72|?zWourg=dYM>W+>zQBO@&WL zl1zR2$6DwF{C$y>>U!?_=NS4GA{E_qr-@N3Eo`aZgKzWEt@gp4;px%LkAs(jM6Y8u zQp^??jr}`zPU^cx(rM@-AQY_ka!~X z1opg^rP$AgX++C|OxXRp@=hu>^6#4i@i%PSiDr=_i`P0Udh%LcgV);TVh^|TEV#zv zq$sb*Mv@eT=f#u3Osh4g%tX@!92P7^Td|!)W@0&O=3}J}YR(uU=`fh?=B`klo0r03Ch$QKo6uh)Z7ZXOR&388>(x&vr?-*pK`8h%*TwRltCUr;CWwIpgvw=3d=i#Ke1&2aS)thV?>dU2H2yo|<(hFHO9 z4&*Dj(;k+n3(>VrJt=j5Ak4EV8I{V5uFo zbTU3xNu!x@@(h!i)m$U5XjA!UN6(57VtGvdWi)xHiF}NOMU?D0MbASp8Rp4=Ly}2N z&S!Pe$V$wEozf6re*_p9=H~ou3CtM+HTL=PnPb2WC1`fgquj|-Oi@}QA~F$Y1bwAL zzA{UDe9Yr*2YzXe0zJ^!5*z%48ER7%vu3(Aedeb3H$YVFLrdmbqLJ(b)lPj~RTDQK z(apn-v>V+K->nTu-yDgxP^-! zm60-_Fn+jBlA=y!b(>~B^1(bgiR|6iokf`;Y%w%1>A^C*B~on!&XENoR$m&%>oG1F z8fMs{FK^rBaTXTXLF4B%WNIe1Wf_O|HaF%#o-DFeSAU3)3UzQQ3~y^;%B_wKcIdkm zw4uGxp#jeLxG^0G^)2Z*@7WO4)O%59LHj1==2pl#>lwc@4s84-Zi`A|_JZQ+!4D`p zFCDchLBFD%6$xj{gcaq?cBQ|v%J3Q3n(T}me{_SH2DLQS3spMmleSThwEaob8Jm1y zU#2H~Bf_RUle;gw#gTK%0&{d>)VhaW<=BDnn8=kR(?hoXAb-|r&bagB;Su$UBvT;5 z!xp7~K0ag)^eZTznSYG`i7l8Y*6oF>o%crvs1BWwQ`GBBd#Gk!RuFm@c-LkA=K&m^ zZ>SI~wNg6a*<;$f+&+A|jhv#!`tOTm6_V5w#WSJ~*)Kr=!Wuv5Jkd%NqYD@&0Jb%e zG8@OJ>LqS-g-+3Vhq#JrYE@BMzawf)n{wWjOl@UQ;rtYmE&;(62F_1c`GqC2wItpN zF&juQIM+XtISyu?CuLslJF-}FhLsbQrUeexA#!9E?iJR|C}+B7;Dby?c}^aaOjuTb zmd(o7^>+Uuc`O&Vw)N?)=S=KVXXrxu*wT&ORr$D+mwalA4;`QiSBP z7Pc>gg1%nyjWS%?j@;zmJq$aPWnAFp8&ISR&ZeafL{lnpP(F`=&^C;$M;-Wm^z=O2 zp&U}|gSF|$(@>6e2V3UX7%8Ji(o>=-+;_ggrF?wc#G8q~9~$u*Ip`VL6w4Up^dIcw zbyiZqv@>q@G_C+oya$mn#OOGc2outZEPuu#-EpDWd7z{^a}EE>tMH9?q#H%1uV`M6 z3!$P**P<`UB=4(9TqWH^r%*eGR4|F_1ER7pix-0M{giHA*q$wB(E{;jK_)qEO_iYW zNf=^UoPN*r9oSc1xn?GnWjU68; zbD=#6TYMT49cIQ1XUx07&!{}X^rdr+S}Q(^lvbjsyIIC?+N)G_3%17C9AaTG=}zfpZ*k>^PlZntbHXJACB`?PXGIQZ zXoYAUnyaC937=Esc1{T@2$ODGXs>(EX2^{|0_39jO2oS-k}AS((JN+@0LVXkrJaTt zZNg(ji6%MPAe)CKo}Uc^C(DI$q--Z-bF8rrWXp;HM9FpKU70w6E483^+A)9|r3?Gg zxVvN#ps4XVGh)}GM)Vo@G9#dD(^ z_xyR_MGd$B&9Mf1*nl9_5OQv?0g9H%MbF}_SLG-a&cNw0m7;%s8gB$@zcykn89A>X zd5d<0xbcNQxzur!aIU(pf@WAra*mT-zb23&_3JCD=qLg6XNi3 zGO4q{MB4UbT71A7Gl!e-N`qCsS_B`Hxn%iVsv7F{9AIGvmxAr7S;J(M?ue+xGIl9? z*Nwc}(dQvnA1sit9;rl)xu%?@U6!5afh0eJQ(grTn%x2ZWSXFAv5xpjs)`#G&EuNX zafKUT43(bGOk-nl1f`?$TA>}`?)gEjSoKsN5*{mzb?RBDRrkc_UyO>n(aF9UtvV25 zeul4Uc(g=A8p$HdYtXZjq8Xme9W#(H&e}qBMj!5D%?D$r7>3&|&a>Fd%*lw*ZVFv@jBCLVHU5|ueG_%Tv7mT=92>1n4;Sl@C z@u^n}4Ps{0;WlLYKIhV7AWOp?i+9%e^y8@3A*Lve^Q#SgNoi3WN&l->AW8=Q!m7Rw8AqL1N550XtvuzLxKV-BCAzD!w921TVSZXfT? zE`U!rfe7|HqRoEXu136w?UqPu>%LtPY~Za!$KE>Z5`Dn250i-KfI@rUDn+o8KQ{k8 zv85sUd7o?|aNcQTBVyOVGYHM{46Ric4GY!yh&N^O?W1zIm_+uHh+%v#dQ>NRTGB>= zN!QkQyvBpiBFo%<_uYHx{2!roHR%m;4D z)&&h^NKvhV2)fg9Cws-Wpy|Lmx}`CG|G2wo4OpbHIPR;;TQp8eUZjsg%61dPMN+@L zZgs}{`Yrksqep#@3p~IUSU-j^ZJ!GW?-ptRd!*17aB4UT9IaH2pR-=Sc;2?Ie3~ zq+YJ~R%(kX(Wa0N9;e7g6@i9s!Es+dac??NMH{lyJnQ)9=qs?%q3SPWm4gnikS#*` z2;T~UB%pwQ%)m*1sg2(2Mwgd<<)GG*$m=q1|*!(2-yc8 zCemw>Jzq*zj$&7bTM2smgsDUiv+i(q%!suhEOAV)IP32&bH%SfXIp#A$op@>irNs zi?8+MP(>Xv+~b_u4BG|Hw;@i`Nzx@qlSf?)$DEZ)0ku*)%g4{o#F_CM7A9S-O69s2 zMxT?IJp3MOH*K@g%HXktG1?dV&FsxT2*_FvMw@#0uIld%II$GP zQqu6LN-98w9#4tQxSihsax(oIxwH*S9tyZC_$dQ z-Wt~Z`h5jIdl5sptTdReoY^p$xaNz1pyJ{zH|*=|(E=a7^-e=f{$Lx=)R9L>Q_7OS ze5fJ%_iPnreq9?3)4)9ztC)r$T?BV!zV$tCO8YD^&-;68bA&=Y#MyAEytr+S*-r7Y zlSv?CT7M#qeu#cs9Oo*rIqOv0!ZC78u~4fWok@zo;sd{#D=<^`V_8+EjOzu~i;Q{A zCZ2ng`kU#0IamGMOMmUZPq#DMNFUz4!y^1g^2|Rv?jN`2GgKfw)Ms5^vnW(!cPKhz zS#04+#K5{^FxhEhzxgA6h3O8l2x5sMHbt%|FjvPF7g7cU%p<~57jJb6uQ$(t9@<|j zqbLB^NrS~bSSrBKG@UQUFduHPI?gsOlHZvxC8l+ZW3+PsdCG zb1(qAGx{8e3-;dQgFd=Hx_`KGB65p5`3?u$&M==bjQ*ZVoh9EewB?lQ0il?CA#4Bm zkhALZUT;oCZ?~o&$1CwDh8tJ8AHfVNc|?nH8vVk`gQfXQM^ zWH%pI2G*jGoPph941gAl*?o+cr36Z`=z$Pj7B&qmt&faWXM6J4AsN0~JhHu=*095l zut<@Gt%b<=2dL>_hYJ*`NCY1cP?8cF+EQ$ZD1&-FTIs{f)x%|db`$Y$Fmnk5Giir? zd~DM(A{g)0)OS*sxYW^z;pjy7w+JnmWmhuC3L)8n^~ysuyz==m=HL9}h3S}?gwC7@ z=?@|SO2){CQJ3d1@u^zu$zp4$W5&Ke%XI=R$p{$(+l%_D5>%O5NIpR3nZ~-_FVpK3 z2Z@1n8byAY7@J#k8_U$q>1jO?`+m^`hiL1g6cs1j9t9%A$gfswFSSB0Q-VGT*}hHK zF~_*w34ne)D;-;|KJRjmRhecboB=dtZghf)+M+03v*RaZ+T%O7csSBLd8pDPj5B^H zTHY18+}4>)mH+jM-;_X7=y1r${=K9g(Zn{3CRc65PRdUINV#btLbO350cCo9)~ec4 zpo2vGCZz=n2qXvW9zuPfe-Wn1%UqsarUavY=0ssO)O4L>ZDP|k3qf+oXfjw)#Iuvk z{Dhfn@04Q#JixFfcCwZpjPGvO$5H+F-sUu6`+4y^WSI>n!I z2%q8)AtP8n+(_n~B(7Q-Ce@NEE4>9iIOT#tYP(th6xp8|}D8&iMI}rOm54ld-(ZRxg z4HIKc1w@(c@i0ItYjNSeau&YZ&lmtmjd%nfD~}vB@7tVWIi&?*wn|kfmK66zN^||( zdsfuHwjKQQk^wRE%Ci-2@={##p#gnguEuDhkjM@pP-$} zYj1yU3YF)85pdmP=~JznUdg~M3YogFAL)oHI6wg+r>?fq#vipo`w;!2hm#;_A0i5o zU174bv!9U)M-{$n?<{6&ZB+5NIA1}Az@5w}wg=LG-PWd#bp;73bZtV`o2T!l%a9aW z7(vRPF^23gG+DB5LybeEJX;%JGC8rF=hv5$uSzK#F{wctw`mHWFJan`q!U-rqB<{k z?7#PQ@$B~7;TsbKnHgGLH`<2oZjYQnye)(E)4d&2#vy9_wey3Oz1uyKo~n*viiS0D zUev&F8;ejWJ5TFSbqGzhkDjiMi*ZG=BwC~<9Tj?ispH|?;5O4&y7^&E@%%aZk|`ZO z`>GgF`^p0MVG?H6L5;_-7*;i|=0#B>$Pa9<+~i5vL_}ih-n2f&BC5{~>VxdlWVLoB z?L9|_K#ARegR)fXiXRV&yz&gLjzo0h%`jCJwB4v~LR-aPlY*Cp+_yRO>Y zqRnQCav{H(6j;5Ta$)6raeRwCqJ*fbQT)&e7hNS&?yy-vDU8pkmUTLX_M8d;&JBMz zE)1@(Q5%9Vu1!4l0z~BGoo3{@EoDE=hCgS=`v?!(A+I4b$In;|YAu9@X>b1*8;YaQ zPol*=Q$9~yT0~hFdACU3Ct-b;h2x=cp4g`#{LE)b#{^AODDefyyew4!*9oATU>Zy^ zmkrq)WJ7eu=Cc(37t3S9?@i@7dxiQPGamnMyzM>4atgttRlJd-=G|Q;i7q zSDb*{R(y;%xiHVbJ+ftlE_%S@Aif20I*X!RM|dJP{CTr@;3Jw2HG_y~B({xDtN5ZR zk4d6v-Ge)E+>lj)2I`8r--lOgIpwAC`gnYBzLK~MrOvDz-EcFriT^z z5kCp;D$8emkGN)lmrxIZ)exb%J2ZwEf72V4ocY?COU-9V478p&ZRyknootgX*5q<# z&$&GU(H5vR;M1xaz=H0u$Om`8D*pw z7{%-YburHGAyyF>s|cB5&zTV-NscafI33|2R)w&h`Mw3yFm{lZc2TSYWK67`uW`zD zm_2bTB(T}SiSH(ZJw*pFJ#WF^A71{QODb=RUJ%-EyA(=b_%u-2&t?gh(Gq)zV z4Io^MyMdV46J_JvKuvr-79Sz=S@*mv3j*%ivp97xQgoo&>>0`n*60c)h@dF4o$XWc zK>s}SjlpgeH&alABS`2R5IHmqCK`FM4rK%UAPM3%T-Vk$bIfjbQuy|07Gb~8==%D+ zK)%UHIP3!4f^c_sZ&Iuh?IvL+4=laAMrCE5E`V`5xKI75Q35aXqjb9KArUgwE;&K( zJS^h+C-M%_&31`RPYm;33KN9&KBIHaBiKs4#ydFpO~+O!xoSx@1-zY#KGh2ZYP1In z#kYTBi=~*2@67SAmZa2O$`f2-;Lk5jlx-FwmZU}Nm?wDl63M^#sD+J|_URGb?V*J! zHX`wkDi}LTz=qHl8xwb2suX>LFV%pnD2CBzUe3HA%&a-t;2J(%1x`}HU)YSUUrel5 ztBs{8a99P*B@1Gm$#DxdR z=3~mCA9LEsqpCQk3x256I&oRd76n~4Ce+B50pp>xc$faQ7MW{Iq|^!LkK?nyk)YADQh(YuWeS{E0za?#CJ@U>fu=fYtb#24#+04xtWKx|7kYsnT z^-T@Yp7haP0C|P)(2>2vKQK1h3ZbzsHjmp+WS$v>jzmNw;LTp)&vc>j9dZXtmsiPOk$%_yVL1V` zivJ!rk^%=`2HU}<)X2RwkUMtP8q!VVskOk^YjAAXbQo?fl}=dV!~O&ZtgQT0 z@BxtBL*lXc(3e>{`W^k`B9HVk#*M08HStfbS4t21hjy(N-0!(0Xvx-j5OG_o&s=B3IfvuFO%yg@IxNW6}e2tS{_Q!EE0Tc@kj_OR-$%w8&W*F*P2(d@X11uCzi5?&apuEn8v5+ZlKXe|S-_3=Bc3-3OZitCl%&&n1Lp)MBQ zwy*Ti<=pkOrC7JyukKFVBubm64V#9jI3}UoOCXZV#}xD!jR#LFyyJ2`{0uzvVWRjc zx*~oaD_lt>ywvtXhY_t=3`KlHFFh&?#U!QpJwo&Kp%dn#dQ5V1|7ZorKIB+c zDroHkm5_5(wdiO) zJ_zw1w30YE3A3sKE{eWKx%~X(spz>9dbZN&1na$g8;0#%8^_+5dAL4TGHMZ9*-&vg z58OUOvI-Gf#h#!kM~xmv0ICF05_(l-+%@kx4r{0U5r-Q61;--?yDXkj{{(Rsr@4B1 zQ}i?`yRP&a;M_<@YHRj6|4%GrI{BCKTZ!~8g~Ot|E;AIcwls#O3o^b-8FconzWss6 z21^c?Fy7j7FYd;V0E20=@kXJa4A`v;FOlZbAwSeH@3 zH-I-Xn{lwXKgJST5lu5yWDWO8C~ekhje9Oz&MsSCn+pgq|I9$iX{ouNb;xs_pCW(X z(4Yi0G3QpJr;g-2uK`J5THwIW%&I^wyGOD=xlF`uSsVu>U!aB774D#c70BPBRF2$h= z_;kV`70ytRruz_jy*-n00Y%1GQ=z{;ha}OOAP)YKsiqADIZVYM_uIY^V`}Vu_650O zxH?IgbJJDBd<|5G-*!_6@jm~AYlxZ$S9{4utk+kz!NZJmJ&k*f zX%iA|$W|VlvF!|h$6h7ob>*0HIxs&-_f{W96-%9R#Lrb~*&Q5QK;1b?Sc#vLanaj=W??ji(Li~GqyZk1r=V)1 zK`8s72p=DG(*s7>@x6sKWdLzZdUVn6a;+*DdyCEEZhj}L!2ox{r#5pj(MZpLjmRY6 z4{1_oBh&Ov9MB3M8r?*u1B5_gaO>8Ilw#>i=PnOa3np~Oft^ac)}g6Xz?4PX6ebc_ z-6f~L zt^+F*c9-ZiWR=heZZF9Q34CQLucHfynh6N4S%!3k01AXB&@^BqoM-SWBNT!sFf>b~ zP`DoJUd?-J+CEyf<9_TY<02H3x{SN&k0(iesko_%@aBl_iYa;vK~f*|Gf>PKzEp8Q z$@<3N^|S6&uV1o!tDa-_hNQCl5F;~L8~;qMTA5HakD%YK#|@jz%kpjuX7Tiaj%DBQ z%uEMFjR!;Ry88x|a4;14_gM|}^LmC!` z|LDRC34WE6r37#O_>}z|EEpxD-dmEmm%FO`DGxOpTfwLEU=i~YHfX<*pUQW0U&%ct zAj}Y{6?HL_ZieZ*?)o6{_b*Q9cn{}YVQ^{-47SPp!XEp1Yke%;f^ZjmD^FmKQ8L<@ zm%-!S5)pTzTyUAaNK?z$az$nzt1QrCii*Ac1HT4G^`UV0#*%#Dr@o?>vJRduMkaoI z`vCccyoX~)*Xt{Nh?~L{w~#}bCwfkh$*yBy~hLFPr zDywhVO#%FyOm4-Gy;zE=#E#sNH4LATv~bqCTMbBw5&I7A%RA*&Yk}=>xP*om_}f^3 z{Cqgil7hTx@F`8H8byJes(Zrjq{D-jcB*+?VCOvIzncY8b}uLMXEjnwOr|Q!h&jWVz0mDK_M6&oan)e(~3mp$cpJnM)y#w3ou1U z0O%P*CQmvHp59ydB1Va`PJl;NhF}aY(xs6 zAq^!H2OU;QsKr^t^H)UaOFR^GU7*{%XhWZ8}!h5a{in`8Y`E9*Fh zH>(*?^e}0s&*DCX0neX53nEX4)c8Yl)8>#A;wOvHh-@Y=x8usKHGh0_wta7xdU5Fn zOp-Vk`f>bmTG}Q^)CkZu-)LiW{p2{M!}slQ_S*Z}wpaQqa~LDy9VEaP@=(%q>Py8> zJ;6rz94N^L^vd5f*H+vBId@Ybq?wJ`kB)MOil0lq;G6M8epeZ)MQ<*;{}&1P&zk765@;E#YXJx0Xz6E31-z_M zjxn=TqsqWT^r^3&pKijmM?!*FQ}cVr>7&|hJMtrs`!?qFB52fC?k?sm&zruj+cY#j zupEZH?X}aj$7iqG##Q&d##gTgv@YcIsf8m&U6UsCEnUKx*#7tp|7=m-M_^Uo8%Q=> zQYsU%XsUr2VuBt}V8i!23<_O}G0L_weR$BHtC{_Krf_0*ivx)<5vKgevEjwB6T1oU zFz3%mv8T^iGAGUmGv&_f0-@hXK5OwfigXDgOI)=06&_IdTx9#i(V2$#4UsBIix-U+ z6I(TT4aE9{ z66h1&rc~$CGwv)jtI%2Y1x6W+Xa;78r7gm|uOBQkZQ3o48I!;1PrUQm%7zoixG|vI zFrsp=$ulvs@#su(U{^zto}5WfLkdG4v>!xU@3bkwEx5*%N=2K`wS}-^xjoS|B}k&^ zbakY&#N;gf_C?U)y(FZcz^NZkIXRJam=y_kdKIx{AeDffpkfzmSB792J2vTs9l zEki?!gwrE>Q<>QS%JC;m-U(-9T{j+ zj~r{{1J&E4WrQ;$Aebog+^CtU_IfU-bMG-6F2^^u)TmF@4w9=l_Bc~wa##qO>w+y? zXzfU0Q?mA%rx32DBy0;7Cc7fF#Krb8#i&S#)9)t+f^$@rrM6Gx(5N=HYX^0bT5<_m zRfE|oy6f9u#Q+Poh>v8>OC#qeO12zLt@^03tijNGt+1>i(7THx8-bzma<(jaoj0to zPQl!iQ{8PUBX5CMLpa&ignheJD-cs|iB8g2g_w+y&y|t_v+1o1U{4d~N7Y(+E0T{LTfy{7vXacTM-$AmnWHh{Aw+ z>4t)QEZ43@p!4;cw80hGpR6h4un|_d+WI2?gr}hBYMtl6XS&$zF*Z;#ONfz| z)D4-j*X$F2@5Vdx-g zh=qV*8w`F_VMw&Jsaz>_a&<_SljrdOqR$|}p;VTIMIW!UU~Vc%&oyNP69jBO6k zG;YL~p{TQ{i8UDg>gj`{W!KxHzo#fiz+!1igQ`~(gY}~a0fpXQOXU!#eh$){x zTWrTMCglkk5sXoa38ng3AYt@0vXIKg*>}*6_;)_So1Voh+HUoC1tV!ebZ(7^X2!?| zjGCi1vk>fAbIH^+Ys2&~_-28053Y(yewj0>wk_+}fl~7(WA*Gr#?q#;A*Ec0x97)2 zzyBW$9X58>HimW%riS+NPSj$Ol%kTdBBcs8lp+#iB8q2{l;WfEALr=jYPaQJ21(yT zAd;b~kfWxmprNXzBvY`o29lzpp{Cj=6d)rR@9r(4e^0R~K-oz`b}3vS-rL*V-9tJg z-ZQWxLQXI!BqC1A1_yfgqbffOEF%s8hzNxE4h#(J-H*zY`t~l`zVVE*zJK=)@vSm{ zrhK0tKmDg_lDtx)LW1%N)RIDfAV9o(_eQAp1L4mcKi+Sw^#4w2w z*Cqe_!yoUrAPE0XKe8>oW&7b+^G`Y62vUB^@$~N;|8~Xsr;G@H=6m;}D55uCp1<+o z`6mFBUjUT;9^i))(9a-$)zg~`!oSlGDZ<`B{-dWqXkPv)^IPE0pE5W7cjljYVg3n- zEisKf&Lrx44qz%$$q@WTvb-!9J*|3|EM@A7@N-&*W%n&^jyg{>TazFGHI%vI#H%%V4}gSY1g z-R7Su--iYDx0t^Y>eBw<1M*koE=e6u@;95qd6W3Z2=Hgh_hCi*E%Lve!g%eB9W4#5 z9R9M|_-hfuHRPS7Z+(*bO|3unEq|tbpZ>S@|BXWaI8pmGQpjI{B_zJm620})t+(hA zgny#Gz4Lv9@%{&}{oiXND6t4<c#E9!8+b(rQ;YwW_J^%{4e*|LzJZjzJ^wct{HVY5n@8V& zY*k8H;munA&Qy^wIQ=c&c9Z#UGX2=i{F(B7pc#HE51+2Rss2A1YpPxDdhZ)l(Hk=L zpOA0we4p_*3;Rti`QON(-Zo4UrdEdkg#B~8_qP`LPs;Z>W%@1lFCF2(Rr#aS{v4$E zS4}WW{ucL-7XK-B{9oJq=Kx~_|B>~FCYI!Wi~3L1|2aCy$||1;(LbldzE^%vXw4XgQImGd+0GSQ!PNbYCs zAASAjtMI>?w|)3;CFcD{u8aRF;wQK3Uxz83=-(FcH#hA6iu8>0xPQ0N`m^QoeKg*t%KuE&|J$DbZ`&m!{{HPC^2fwY P`tJ2@y+YFW Date: Fri, 26 Jul 2019 20:11:17 +0200 Subject: [PATCH 124/405] removed IDE project files (2) --- .DS_Store | Bin 0 -> 6148 bytes .idea/.gitignore | 2 - .idea/artifacts/casekit_jar.xml | 63 ---------------- .idea/casekit.iml | 69 ------------------ .idea/compiler.xml | 16 ---- .idea/copyright/MIT_License.xml | 6 -- .idea/copyright/profiles_settings.xml | 3 - ...hub_gilleain_signatures_signatures_1_1.xml | 13 ---- .../Maven__com_google_guava_guava_17_0.xml | 13 ---- .../Maven__commons_cli_commons_cli_1_3_1.xml | 13 ---- .../Maven__gov_nist_math_jama_1_0_3.xml | 13 ---- .../Maven__javax_vecmath_vecmath_1_5_2.xml | 13 ---- .../Maven__jgrapht_jgrapht_0_6_0.xml | 13 ---- .idea/libraries/Maven__junit_junit_4_10.xml | 13 ---- ...__org_apache_commons_commons_lang3_3_5.xml | 13 ---- ...org_apache_commons_commons_math3_3_1_1.xml | 13 ---- ...en__org_freehep_freehep_graphics2d_2_4.xml | 13 ---- ...__org_freehep_freehep_graphicsbase_2_4.xml | 13 ---- ...en__org_freehep_freehep_graphicsio_2_4.xml | 13 ---- ...org_freehep_freehep_graphicsio_pdf_2_4.xml | 13 ---- ..._org_freehep_freehep_graphicsio_ps_2_4.xml | 13 ---- ...org_freehep_freehep_graphicsio_svg_2_4.xml | 13 ---- ...g_freehep_freehep_graphicsio_tests_2_4.xml | 13 ---- .../Maven__org_freehep_freehep_io_2_2_2.xml | 13 ---- .../Maven__org_hamcrest_hamcrest_core_1_1.xml | 13 ---- ...__org_mongodb_mongo_java_driver_3_10_0.xml | 13 ---- ...n__org_openscience_HOSECodeBuilder_1_0.xml | 13 ---- ...__org_openscience_cdk_cdk_atomtype_2_2.xml | 13 ---- ...n__org_openscience_cdk_cdk_charges_2_2.xml | 13 ---- ...aven__org_openscience_cdk_cdk_core_2_2.xml | 13 ---- ...aven__org_openscience_cdk_cdk_ctab_2_2.xml | 13 ---- ...aven__org_openscience_cdk_cdk_data_2_2.xml | 13 ---- ...en__org_openscience_cdk_cdk_depict_2_2.xml | 13 ---- ...aven__org_openscience_cdk_cdk_dict_2_2.xml | 13 ---- ...ven__org_openscience_cdk_cdk_extra_2_2.xml | 13 ---- ...rg_openscience_cdk_cdk_fingerprint_2_2.xml | 13 ---- ...org_openscience_cdk_cdk_forcefield_2_2.xml | 13 ---- ...n__org_openscience_cdk_cdk_formula_2_2.xml | 13 ---- ...org_openscience_cdk_cdk_interfaces_2_2.xml | 13 ---- .../Maven__org_openscience_cdk_cdk_io_2_2.xml | 13 ---- ..._org_openscience_cdk_cdk_ioformats_2_2.xml | 13 ---- ...rg_openscience_cdk_cdk_isomorphism_2_2.xml | 13 ---- ...en__org_openscience_cdk_cdk_legacy_2_2.xml | 13 ---- ...aven__org_openscience_cdk_cdk_qsar_2_2.xml | 13 ---- ...org_openscience_cdk_cdk_qsaratomic_2_2.xml | 13 ---- ...__org_openscience_cdk_cdk_reaction_2_2.xml | 13 ---- ...en__org_openscience_cdk_cdk_render_2_2.xml | 13 ---- ..._org_openscience_cdk_cdk_renderawt_2_2.xml | 13 ---- ...rg_openscience_cdk_cdk_renderbasic_2_2.xml | 13 ---- ...rg_openscience_cdk_cdk_renderextra_2_2.xml | 13 ---- ...Maven__org_openscience_cdk_cdk_sdg_2_2.xml | 13 ---- ..._org_openscience_cdk_cdk_signature_2_2.xml | 13 ---- ...en__org_openscience_cdk_cdk_silent_2_2.xml | 13 ---- ...en__org_openscience_cdk_cdk_smarts_2_2.xml | 13 ---- ...en__org_openscience_cdk_cdk_smiles_2_2.xml | 13 ---- ...__org_openscience_cdk_cdk_standard_2_2.xml | 13 ---- ...g_openscience_cdk_cdk_valencycheck_2_2.xml | 13 ---- .../Maven__uk_ac_ebi_beam_beam_core_1_2.xml | 13 ---- .../Maven__uk_ac_ebi_beam_beam_func_1_2.xml | 13 ---- .idea/libraries/Maven__xalan_xalan_2_7_0.xml | 13 ---- .../Maven__xerces_xercesImpl_2_8_0.xml | 13 ---- .../Maven__xml_apis_xml_apis_1_3_03.xml | 13 ---- .idea/libraries/Maven__xom_xom_1_2_5.xml | 13 ---- .idea/misc.xml | 14 ---- .idea/modules.xml | 8 -- .idea/vcs.xml | 6 -- 66 files changed, 915 deletions(-) create mode 100644 .DS_Store delete mode 100644 .idea/.gitignore delete mode 100644 .idea/artifacts/casekit_jar.xml delete mode 100644 .idea/casekit.iml delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/MIT_License.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml delete mode 100644 .idea/libraries/Maven__com_google_guava_guava_17_0.xml delete mode 100644 .idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml delete mode 100644 .idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml delete mode 100644 .idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml delete mode 100644 .idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml delete mode 100644 .idea/libraries/Maven__junit_junit_4_10.xml delete mode 100644 .idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml delete mode 100644 .idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml delete mode 100644 .idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml delete mode 100644 .idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml delete mode 100644 .idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml delete mode 100644 .idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml delete mode 100644 .idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml delete mode 100644 .idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml delete mode 100644 .idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml delete mode 100644 .idea/libraries/Maven__xalan_xalan_2_7_0.xml delete mode 100644 .idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml delete mode 100644 .idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml delete mode 100644 .idea/libraries/Maven__xom_xom_1_2_5.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2ab7064c13012679653c32e2bb5b227b19e18d0f GIT binary patch literal 6148 zcmeHKO-lnY5Pi`K1us2%%w16Y2TQ33MZAc2t8JmwFS=WL-JkAznNhmBXDMZ7N#5jT zCdoe7WCI|LH?u2X1YpP_OmccmbZ^&@2MbdfWBS=V-)-t<-80O7Qd;{7F0jN5bL_7F z9FM5Utyy2<4O{x}Skd3&gRH!o-U1DoDVaH$8^$#BUimC>OXh`r8fV3J!}jm_?p?2) zRqUr!v8qrY6bJ=Efdf&1XSQs3Y8Z7W5DJ6>KMH7n$SlIju{A7P2di=gAPz_uqp!P! zaBRUED1t;43jBitUmqSS&j0`b literal 0 HcmV?d00001 diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 5c98b42..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# Default ignored files -/workspace.xml \ No newline at end of file diff --git a/.idea/artifacts/casekit_jar.xml b/.idea/artifacts/casekit_jar.xml deleted file mode 100644 index e16f507..0000000 --- a/.idea/artifacts/casekit_jar.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - $PROJECT_DIR$/out/artifacts/casekit_jar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/casekit.iml b/.idea/casekit.iml deleted file mode 100644 index 9ad8882..0000000 --- a/.idea/casekit.iml +++ /dev/null @@ -1,69 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index f2aa0ef..0000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/copyright/MIT_License.xml b/.idea/copyright/MIT_License.xml deleted file mode 100644 index 23000f0..0000000 --- a/.idea/copyright/MIT_License.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index ea2b04b..0000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml b/.idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml deleted file mode 100644 index 1b9226b..0000000 --- a/.idea/libraries/Maven__com_github_gilleain_signatures_signatures_1_1.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_guava_guava_17_0.xml b/.idea/libraries/Maven__com_google_guava_guava_17_0.xml deleted file mode 100644 index 2a9069c..0000000 --- a/.idea/libraries/Maven__com_google_guava_guava_17_0.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml b/.idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml deleted file mode 100644 index a1510b9..0000000 --- a/.idea/libraries/Maven__commons_cli_commons_cli_1_3_1.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml b/.idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml deleted file mode 100644 index 84df334..0000000 --- a/.idea/libraries/Maven__gov_nist_math_jama_1_0_3.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml b/.idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml deleted file mode 100644 index f04d302..0000000 --- a/.idea/libraries/Maven__javax_vecmath_vecmath_1_5_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml b/.idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml deleted file mode 100644 index 55a6d97..0000000 --- a/.idea/libraries/Maven__jgrapht_jgrapht_0_6_0.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__junit_junit_4_10.xml b/.idea/libraries/Maven__junit_junit_4_10.xml deleted file mode 100644 index ed8bf5f..0000000 --- a/.idea/libraries/Maven__junit_junit_4_10.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml deleted file mode 100644 index 666266c..0000000 --- a/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml deleted file mode 100644 index 5627f0c..0000000 --- a/.idea/libraries/Maven__org_apache_commons_commons_math3_3_1_1.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml deleted file mode 100644 index 9b824aa..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_graphics2d_2_4.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml deleted file mode 100644 index f2fec4f..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_graphicsbase_2_4.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml deleted file mode 100644 index 062846f..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_2_4.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml deleted file mode 100644 index 0ae4e5b..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_pdf_2_4.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml deleted file mode 100644 index 9f51a68..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_ps_2_4.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml deleted file mode 100644 index f86533c..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_svg_2_4.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml b/.idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml deleted file mode 100644 index 39b60ac..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_graphicsio_tests_2_4.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml b/.idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml deleted file mode 100644 index 7eb8b43..0000000 --- a/.idea/libraries/Maven__org_freehep_freehep_io_2_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml b/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml deleted file mode 100644 index acdf443..0000000 --- a/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml b/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml deleted file mode 100644 index 8f2291e..0000000 --- a/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_10_0.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml b/.idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml deleted file mode 100644 index 09a9a46..0000000 --- a/.idea/libraries/Maven__org_openscience_HOSECodeBuilder_1_0.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml deleted file mode 100644 index 78820cb..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_atomtype_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml deleted file mode 100644 index dcb1a3c..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_charges_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml deleted file mode 100644 index 49df521..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_core_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml deleted file mode 100644 index b006654..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_ctab_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml deleted file mode 100644 index f25fd58..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_data_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml deleted file mode 100644 index 53e4383..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_depict_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml deleted file mode 100644 index de7548d..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_dict_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml deleted file mode 100644 index a305a2d..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_extra_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml deleted file mode 100644 index 9fcd14f..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_fingerprint_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml deleted file mode 100644 index 20e139e..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_forcefield_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml deleted file mode 100644 index e430695..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_formula_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml deleted file mode 100644 index 5bb8c1a..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_interfaces_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml deleted file mode 100644 index 469ac06..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_io_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml deleted file mode 100644 index 42f4717..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_ioformats_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml deleted file mode 100644 index bbb359d..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_isomorphism_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml deleted file mode 100644 index 6019e88..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_legacy_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml deleted file mode 100644 index 2b69145..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsar_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml deleted file mode 100644 index 7c60ca1..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_qsaratomic_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml deleted file mode 100644 index 7863327..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_reaction_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml deleted file mode 100644 index daf4dfb..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_render_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml deleted file mode 100644 index 8b0cf68..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderawt_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml deleted file mode 100644 index 89081f2..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderbasic_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml deleted file mode 100644 index 18a4c16..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_renderextra_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml deleted file mode 100644 index 5807025..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_sdg_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml deleted file mode 100644 index 5d53203..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_signature_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml deleted file mode 100644 index e6403f3..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_silent_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml deleted file mode 100644 index 91b8ffa..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_smarts_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml deleted file mode 100644 index 7c1e402..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_smiles_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml deleted file mode 100644 index 90dd31a..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_standard_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml b/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml deleted file mode 100644 index 447e642..0000000 --- a/.idea/libraries/Maven__org_openscience_cdk_cdk_valencycheck_2_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml b/.idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml deleted file mode 100644 index b8648d6..0000000 --- a/.idea/libraries/Maven__uk_ac_ebi_beam_beam_core_1_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml b/.idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml deleted file mode 100644 index 7c39ee3..0000000 --- a/.idea/libraries/Maven__uk_ac_ebi_beam_beam_func_1_2.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__xalan_xalan_2_7_0.xml b/.idea/libraries/Maven__xalan_xalan_2_7_0.xml deleted file mode 100644 index dd647b4..0000000 --- a/.idea/libraries/Maven__xalan_xalan_2_7_0.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml b/.idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml deleted file mode 100644 index 74ccea0..0000000 --- a/.idea/libraries/Maven__xerces_xercesImpl_2_8_0.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml b/.idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml deleted file mode 100644 index cc2538f..0000000 --- a/.idea/libraries/Maven__xml_apis_xml_apis_1_3_03.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__xom_xom_1_2_5.xml b/.idea/libraries/Maven__xom_xom_1_2_5.xml deleted file mode 100644 index 773ab34..0000000 --- a/.idea/libraries/Maven__xom_xom_1_2_5.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 1c3f3fd..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 4b6e86b..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 61982077d31da28fb190c9d4ad0274cefd41ecbd Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 26 Jul 2019 20:12:19 +0200 Subject: [PATCH 125/405] removed IDE project files (3) --- .DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 2ab7064c13012679653c32e2bb5b227b19e18d0f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKO-lnY5Pi`K1us2%%w16Y2TQ33MZAc2t8JmwFS=WL-JkAznNhmBXDMZ7N#5jT zCdoe7WCI|LH?u2X1YpP_OmccmbZ^&@2MbdfWBS=V-)-t<-80O7Qd;{7F0jN5bL_7F z9FM5Utyy2<4O{x}Skd3&gRH!o-U1DoDVaH$8^$#BUimC>OXh`r8fV3J!}jm_?p?2) zRqUr!v8qrY6bJ=Efdf&1XSQs3Y8Z7W5DJ6>KMH7n$SlIju{A7P2di=gAPz_uqp!P! zaBRUED1t;43jBitUmqSS&j0`b From a7bc97ffe4c6e04346e5f73f9288e06e00fed8fc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 26 Jul 2019 20:14:41 +0200 Subject: [PATCH 126/405] removed IDE project files (3) --- .gitignore | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0ab72c4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans + +### IntelliJ IDEA ### +.idea/ +*.iws +*.iml +*.ipr + +META-INF/ \ No newline at end of file From 7f45e28da8b9cd5a9dcc36bf6d00bf535194dce8 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 26 Jul 2019 20:16:14 +0200 Subject: [PATCH 127/405] removed IDE project files (3) --- .gitignore | 18 ------------------ .idea/.gitignore | 3 +++ 2 files changed, 3 insertions(+), 18 deletions(-) delete mode 100644 .gitignore create mode 100644 .idea/.gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 0ab72c4..0000000 --- a/.gitignore +++ /dev/null @@ -1,18 +0,0 @@ -target/ -!.mvn/wrapper/maven-wrapper.jar - -### STS ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans - -### IntelliJ IDEA ### -.idea/ -*.iws -*.iml -*.ipr - -META-INF/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..0e40fe8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ + +# Default ignored files +/workspace.xml \ No newline at end of file From 6be16e581a670f226378d88a8802d89285512cac Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 26 Jul 2019 20:17:23 +0200 Subject: [PATCH 128/405] removed IDE project files (4) --- .idea/.gitignore | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .idea/.gitignore diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 0e40fe8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ - -# Default ignored files -/workspace.xml \ No newline at end of file From b80abed32df8f916a0b5c756ebc54f35c5c1883c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:42:40 +0200 Subject: [PATCH 129/405] - in getStructuresFromSDFile(): now sets aromaticity in each structure plus kekulizing it --- src/casekit/NMR/dbservice/NMRShiftDB.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/casekit/NMR/dbservice/NMRShiftDB.java b/src/casekit/NMR/dbservice/NMRShiftDB.java index 25d25f7..1fb4212 100644 --- a/src/casekit/NMR/dbservice/NMRShiftDB.java +++ b/src/casekit/NMR/dbservice/NMRShiftDB.java @@ -32,7 +32,7 @@ public class NMRShiftDB { * Returns the molecules of a given MOL/SDF file. * This function sets the molecule aromaticity (with allowed exocyclic pi * bonds) by using the - * {@link Utils#setAromaticitiesInAtomContainer(org.openscience.cdk.interfaces.IAtomContainer)} + * {@link Utils#setAromaticityAndKekulize(IAtomContainer)} * function. * * @param pathToNMRShiftDB path to NMRShiftDB file @@ -52,7 +52,7 @@ public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRSh while (iterator.hasNext()) { ac = iterator.next(); if(setAromaticity){ - Utils.setAromaticitiesInAtomContainer(ac); + Utils.setAromaticityAndKekulize(ac); } acSet.addAtomContainer(ac); } @@ -193,7 +193,9 @@ public static HashMap getSSCComponentsFromNMRShiftDB(final St structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{ac, spectrum, assignment}); // } - Utils.setAromaticitiesInAtomContainer(ac); + // set aromaticities +// Utils.setAromaticitiesInAtomContainer(ac); + // add Kekulization? } return structureSetWithSpectra; From 3af6967475c4547b2b68442d0bc361ee0a35ee13 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:43:30 +0200 Subject: [PATCH 130/405] - renaming of some variables --- src/casekit/NMR/model/Assignment.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index 2c6fec7..387e451 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -138,22 +138,23 @@ public Boolean isFullyAssigned(final int dim){ } /** - * Adds a new assignment entry for a further signal. The given query spectrum signal indices - * will be stored for each dimension of the signal/spectrum. + * Adds a new assignment entry (index), e.g. for a new signal. The given assignment indices + * will be stored for each dimension of the new assignment entry (index). + * + * @param assignments assignment indices to store in each dimension of new assignment entry * - * @param indicesInQuerySpectra * @return */ - public boolean addAssignment(final int[] indicesInQuerySpectra){ - if(!this.compareNDim(indicesInQuerySpectra.length)){ + public boolean addAssignment(final int[] assignments){ + if(!this.compareNDim(assignments.length)){ return false; } - final int[][] extendedAssignments = new int[this.getNDim()][this.getAssignmentsCount()+1]; + final int[][] extendedAssignments = new int[this.getNDim()][this.getAssignmentsCount() + 1]; for (int dim = 0; dim < this.getNDim(); dim++) { for (int i = 0; i < this.getAssignmentsCount(); i++) { extendedAssignments[dim][i] = this.getAssignment(dim, i); } - extendedAssignments[dim][this.getAssignmentsCount()] = indicesInQuerySpectra[dim]; + extendedAssignments[dim][this.getAssignmentsCount()] = assignments[dim]; } this.assignments = extendedAssignments; From aff4720ad758db97f296e691c792c178c1b1174d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:44:28 +0200 Subject: [PATCH 131/405] - added an import --- src/casekit/NMR/interpretation/InterpretData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/NMR/interpretation/InterpretData.java b/src/casekit/NMR/interpretation/InterpretData.java index ee7f972..190f9e4 100644 --- a/src/casekit/NMR/interpretation/InterpretData.java +++ b/src/casekit/NMR/interpretation/InterpretData.java @@ -18,7 +18,7 @@ import java.util.ArrayList; import java.util.HashMap; -import org.openscience.cdk.Atom; +import org.openscience.cdk.silent.Atom; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; From 5e4b37fa50a518b5172426671e88ae2eb80549dc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:46:36 +0200 Subject: [PATCH 132/405] - in predictShift(): now return the median of shifts related to the given HOSE code instead of mean --- src/casekit/NMR/predict/Predict.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/casekit/NMR/predict/Predict.java b/src/casekit/NMR/predict/Predict.java index 2bafc9b..597d7c3 100644 --- a/src/casekit/NMR/predict/Predict.java +++ b/src/casekit/NMR/predict/Predict.java @@ -43,7 +43,7 @@ public class Predict { /** * Predicts a shift value for a central atom based on its HOSE code and a - * given HOSE code lookup table. The prediction is done by using the mean + * given HOSE code lookup table. The prediction is done by using the median * of all occurring shifts in lookup table for the given HOSE code.
* Specified for carbons (13C) only -> {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. * @@ -52,12 +52,13 @@ public class Predict { * @param HOSECode specific HOSE code to use for shift prediction * @return null if HOSE code does not exist in lookup table * - * @see casekit.NMR.Utils#getRMS(ArrayList) + * @see casekit.NMR.Utils#getMedian(ArrayList) * */ public static Double predictShift(final HashMap> HOSECodeLookupTable, final String HOSECode) { if (HOSECodeLookupTable.containsKey(HOSECode)) { - return Utils.getMean(HOSECodeLookupTable.get(HOSECode)); + return Utils.getMedian(HOSECodeLookupTable.get(HOSECode)); +// return Utils.getMean(HOSECodeLookupTable.get(HOSECode)); } return null; @@ -73,7 +74,7 @@ public static Double predictShift(final HashMap> HOSEC * of occurring central atoms as values * @param ac structure to predict from * @param atomIndex index of central atom in structure for HOSE code generation - * @param maxSphere maximum sphere to use for HOSE code generation + * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited * @param nucleus nucleus (e.g. "13C") for signal creation * * @return null if HOSE code of selected atom does not exist in lookup table @@ -108,7 +109,7 @@ public static Signal predictSignal(final HashMap> HOSE * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values * @param ac structure to predict from - * @param maxSphere maximum sphere to use for HOSE code generation + * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited * @param nucleus nucleus (e.g. "13C") for signal creation * @return null if a HOSE code of one atom does not exist in lookup table * @@ -124,7 +125,8 @@ public static Spectrum predictSpectrum(final HashMap> if (atom.getSymbol().equals(Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus); if(signal == null){ - return null; + continue; +// return null; } predictedSpectrum.addSignal(signal); } From 673621d50ec42ec9730267f7d2270b2dd27b4b41 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:47:46 +0200 Subject: [PATCH 133/405] - some cdk imports have changed --- pom.xml | 59 +++++++++++++++++---------------------------------------- 1 file changed, 17 insertions(+), 42 deletions(-) diff --git a/pom.xml b/pom.xml index c8e91d3..48a4cdf 100644 --- a/pom.xml +++ b/pom.xml @@ -46,62 +46,37 @@ 1.3.1 jar
- - org.openscience.cdk - cdk-core - 2.2 - jar - - - org.openscience.cdk - cdk-legacy - 2.2 - jar - - - org.openscience.cdk - cdk-depict - 2.2 - jar - - - org.openscience.cdk - cdk-qsaratomic - 2.2 - jar - org.apache.commons commons-lang3 3.5 jar - - org.openscience.cdk - cdk-fingerprint - 2.2 - jar - - - org.openscience.cdk - cdk-silent - 2.2 - - - org.openscience.cdk - cdk-standard - 2.2 - jar - org.mongodb mongo-java-driver 3.10.0 + + org.openscience.cdk + cdk-legacy + 2.3 + jar + + + org.openscience.cdk + cdk-fingerprint + 2.3 + + + org.openscience.cdk + cdk-depict + 2.3 + org.openscience HOSECodeBuilder 1.0 - + From 7744170275c2aa9e3ed348980cc9f79998b62d3f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:52:21 +0200 Subject: [PATCH 134/405] - certain changes which led to a TODO to repair HMBC and COSY information output --- src/casekit/NMR/convert/LSDConverter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/NMR/convert/LSDConverter.java b/src/casekit/NMR/convert/LSDConverter.java index 645095f..f7e8788 100644 --- a/src/casekit/NMR/convert/LSDConverter.java +++ b/src/casekit/NMR/convert/LSDConverter.java @@ -17,9 +17,9 @@ import java.io.IOException; import java.util.HashMap; -import org.openscience.cdk.formula.MolecularFormula; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.silent.MolecularFormula; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; /** From 7056a9f6490a931ce384d013abdddcf36fa77294 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:53:46 +0200 Subject: [PATCH 135/405] - added a method to set the aromaticity in a structure an kekulize it --- src/casekit/NMR/Utils.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 9c2a711..3d5cd25 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -13,11 +13,11 @@ import casekit.NMR.model.Spectrum; -import casekit.NMR.parse.Parser; import org.apache.commons.lang3.StringUtils; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; +import org.openscience.cdk.aromaticity.Kekulization; import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; import org.openscience.cdk.depict.DepictionGenerator; import org.openscience.cdk.exception.CDKException; @@ -31,9 +31,7 @@ import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; -import org.xml.sax.SAXException; -import javax.xml.parsers.ParserConfigurationException; import java.io.*; import java.util.*; import java.util.concurrent.ExecutorService; @@ -938,13 +936,18 @@ public static int getExplicitHydrogenCount(final IAtomContainer ac){ } - public static void setAromaticitiesInAtomContainer(final IAtomContainer ac) throws CDKException { + public static void setAromaticity(final IAtomContainer ac) throws CDKException { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); final CycleFinder cycles = Cycles.all(ac.getAtomCount()); final Aromaticity aromaticity = new Aromaticity(model, cycles); aromaticity.apply(ac); } + + public static void setAromaticityAndKekulize(final IAtomContainer ac) throws CDKException { + Utils.setAromaticity(ac); + Kekulization.kekulize(ac); + } /** @@ -958,7 +961,7 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a final ArrayList toRemoveList = new ArrayList<>(); for (IAtom atomA : ac.atoms()) { - if (atomA.getSymbol().equals(atomType)) {// detect wether the current atom A is a from the given atom type + if (atomA.getSymbol().equals(atomType)) {// detect whether the current atom A is a from the given atom type toRemoveList.add(atomA); } } From 21ca92a8c8738d3b977d59c8de8a150b2aeacf72 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 29 Sep 2019 20:55:47 +0200 Subject: [PATCH 136/405] - updates some imports --- src/casekit/HOSECodePredictor.java | 31 ++++++++++++------------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index cc053a1..ae632bd 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -8,22 +8,7 @@ package casekit; import casekit.NMR.Utils; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.StringTokenizer; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.*; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.depict.DepictionGenerator; import org.openscience.cdk.exception.CDKException; @@ -33,6 +18,15 @@ import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.HOSECodeGenerator; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.StringTokenizer; + /** * Predicts NMRS spectra by lookup of HOSE codes @@ -272,9 +266,8 @@ public void generatePicture(IAtomContainer ac, String path) throws IOException, /** * This predictor cannot handle explicit hydrogens. Where therefore convert them to implicit first */ - void fixExplicitHydrogens(IAtomContainer ac) - { - Utils.convertExplicitToImplicitHydrogens(ac); + private void fixExplicitHydrogens(IAtomContainer ac) { + Utils.convertExplicitToImplicitHydrogens(ac); } private void parseArgs(String[] args) throws ParseException From 43f1e9c8c418eec69ce8ab598d84704de949bc33 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Oct 2019 20:14:44 +0200 Subject: [PATCH 137/405] - minor changes in getOutliers() - new functions: - removeOutliers - getMean() with HashMap as input - isValidBondAddition() --- src/casekit/NMR/Utils.java | 89 +++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 3d5cd25..cde34cd 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -502,33 +502,50 @@ public static void generatePicture(final IAtomContainer ac, final String path) t final DepictionGenerator dg = new DepictionGenerator().withSize(1200, 1200).withAtomColors().withFillToFit().withAtomNumbers(); dg.depict(ac).writeTo(path); } - - + + + /** + * Detects outliers in given array list of input values and removes them.
+ * Here, outliers are those which are outside of a calculated lower and upper bound (whisker). + * The interquartile range (IQR) of the input values is therefore multiplied with a given value + * for whisker creation. + * + * @param input list of values to process + * @param multiplierIQR multiplier for IQR to use for lower and upper bound creation + * @return new array list without values outside the generated boundaries + */ + public static ArrayList removeOutliers(final ArrayList input, final double multiplierIQR){ + final ArrayList inputWithoutOutliers = new ArrayList<>(input); + inputWithoutOutliers.removeAll(Utils.getOutliers(inputWithoutOutliers, multiplierIQR)); + + return inputWithoutOutliers; + } + /** * * @param input * @return */ - public static ArrayList getOutliers(ArrayList input) { - final ArrayList outliers = new ArrayList<>(); + public static ArrayList getOutliers(final ArrayList input, final double multiplierIQR) { + final ArrayList outliers = new ArrayList<>(); if(input.size() <= 1){ return outliers; } Collections.sort(input); - final List data1 = input.subList(0, input.size() / 2); - final List data2; + final ArrayList data1 = new ArrayList<>(input.subList(0, input.size() / 2)); + final ArrayList data2; if (input.size() % 2 == 0) { - data2 = input.subList(input.size() / 2, input.size()); + data2 = new ArrayList<>(input.subList(input.size() / 2, input.size())); } else { - data2 = input.subList(input.size() / 2 + 1, input.size()); + data2 = new ArrayList<>(input.subList(input.size() / 2 + 1, input.size())); } - final double q1 = getMedian(data1); - final double q3 = getMedian(data2); + final double q1 = getMedian(new ArrayList<>(data1)); + final double q3 = getMedian(new ArrayList<>(data2)); final double iqr = q3 - q1; - final double lowerFence = q1 - 1.5 * iqr; - final double upperFence = q3 + 1.5 * iqr; + final double lowerBound = q1 - multiplierIQR * iqr; + final double upperBound = q3 + multiplierIQR * iqr; for (int i = 0; i < input.size(); i++) { - if ((input.get(i) < lowerFence) || (input.get(i) > upperFence)) { + if ((input.get(i) < lowerBound) || (input.get(i) > upperBound)) { outliers.add(input.get(i)); } } @@ -536,26 +553,6 @@ public static ArrayList getOutliers(ArrayList input) { // System.out.println("output size: " + outliers.size()); return outliers; } - - /** - * - * @param data - * @return - */ - public static Double getMedian(final List data) { - if((data == null) || data.isEmpty()){ - return null; - } - if(data.size() == 1){ - return data.get(0).doubleValue(); - } - Collections.sort(data); - if (data.size() % 2 == 1) { - return data.get(data.size() / 2).doubleValue(); - } else { - return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; - } - } /** @@ -654,6 +651,31 @@ public static Double getMean(final Double[] data) { } return ((data.length - nullCounter) != 0) ? (sum / (data.length - nullCounter)) : null; } + + public static HashMap getMean(final HashMap> lookup) { + + final HashMap means = new HashMap<>(); + Double meanInList; + for (final String key : lookup.keySet()) { + meanInList = Utils.getMean(lookup.get(key)); + if (meanInList != null) { + means.put(key, meanInList); + } + } + + return means; + } + + public static boolean isValidBondAddition(final IAtomContainer ac, final int atomIndex, final IBond bondToAdd){ + + if(ac.getAtom(atomIndex).isAromatic()){ + System.out.println(atomIndex + " --> (" + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " - 1) = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) <= ac.getAtom(atomIndex).getValency())); + return ((Utils.getBondOrderSum(ac, atomIndex, true) - 1) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency(); + } + + System.out.println(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency())); + return (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency(); + } /** @@ -812,7 +834,6 @@ public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKExcep * @param lookup * @return * - * @deprecated */ public static HashMap getMedian(final HashMap> lookup) { From 5de9598a4634ae067da83671ab9678a9446367b4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 12 Oct 2019 14:19:47 +0200 Subject: [PATCH 138/405] - changed pickClosestSignal() to pickClosestSignals() which now returns a list of closest signals --- src/casekit/NMR/model/Spectrum.java | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index c90c03f..27dd74b 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -489,27 +489,32 @@ public String getStandard() { /** - * Returns the signal index closest to the given shift. If no Signal is found within the interval - * defined by pickprecision, -1 is returned. + * Returns the signal index (or indices) closest to the given shift. If no signal is found within the interval + * defined by pickprecision, an empty list is returned. * @param shift query shift * @param dim dimension in spectrum to look in * @param pickPrecision tolerance value for search window * @return */ - public int pickClosestSignal(final double shift, final int dim, final double pickPrecision) { - int matchIndex = -1; + public ArrayList pickClosestSignals(final double shift, final int dim, final double pickPrecision) { + final ArrayList matchIndices = new ArrayList<>(); if(!this.containsDim(dim)){ - return matchIndex; + return matchIndices; } - double diff = pickPrecision; + double minDiff = pickPrecision; + // detect the minimal difference between a signal shift to the given query shift for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) < diff) { - diff = Math.abs(this.getShift(s, dim) - shift); - matchIndex = s; + if (Math.abs(this.getShift(s, dim) - shift) < minDiff) { + minDiff = Math.abs(this.getShift(s, dim) - shift); } - } + } + for (int s = 0; s < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) - shift) == minDiff) { + matchIndices.add(s); + } + } - return matchIndex; + return matchIndices; } /** From 39f5e088801476ca322eaca665fa62f954f2b46e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 12 Oct 2019 14:21:45 +0200 Subject: [PATCH 139/405] - in matchSpectra(): handles now the situation if there are more than just exactly one closest signal for a query signal --- src/casekit/NMR/match/Matcher.java | 71 ++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/src/casekit/NMR/match/Matcher.java b/src/casekit/NMR/match/Matcher.java index ef0ff58..276294b 100644 --- a/src/casekit/NMR/match/Matcher.java +++ b/src/casekit/NMR/match/Matcher.java @@ -20,6 +20,7 @@ import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.similarity.Tanimoto; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -184,35 +185,69 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s } final Assignment matchAssignments = new Assignment(spectrum1); final HashSet pickedSignalIndices = new HashSet<>(); + ArrayList pickedSignalIndicesSpectrum2; int pickedSignalIndexSpectrum2; int pickedSignalIndexSpectrum2Prev; for (int i = 0; i < spectrum1.getSignalCount(); i++) { if (spectrum1.getShift(i, dim1) == null) { pickedSignalIndexSpectrum2 = -1; } else { - pickedSignalIndexSpectrum2 = spectrum2.pickClosestSignal(spectrum1.getShift(i, dim1), dim2, shiftTol); - // if matched signal is already assigned, then consider symmetries (equiv. signals) - if (pickedSignalIndices.contains(pickedSignalIndexSpectrum2)) { - // symmetry exists - if (spectrum2.hasEquivalences(pickedSignalIndexSpectrum2)) { - pickedSignalIndexSpectrum2Prev = pickedSignalIndexSpectrum2; - // assign the next signal in equivalence list - for (final int equivalentSignalIndexSpectrum2 : spectrum2.getEquivalentSignals(pickedSignalIndexSpectrum2)) { - if (!pickedSignalIndices.contains(equivalentSignalIndexSpectrum2)) { - pickedSignalIndexSpectrum2 = equivalentSignalIndexSpectrum2; - break; +// pickedSignalIndexSpectrum2 = spectrum2.pickClosestSignal(spectrum1.getShift(i, dim1), dim2, shiftTol); +// // if matched signal is already assigned, then consider symmetries (equiv. signals) +// if (pickedSignalIndices.contains(pickedSignalIndexSpectrum2)) { +// // symmetry exists +// if (spectrum2.hasEquivalences(pickedSignalIndexSpectrum2)) { +// pickedSignalIndexSpectrum2Prev = pickedSignalIndexSpectrum2; +// // assign the next signal in equivalence list +// for (final int equivalentSignalIndexSpectrum2 : spectrum2.getEquivalentSignals(pickedSignalIndexSpectrum2)) { +// if (!pickedSignalIndices.contains(equivalentSignalIndexSpectrum2)) { +// pickedSignalIndexSpectrum2 = equivalentSignalIndexSpectrum2; +// break; +// } +// } +// // if no further equivalent signal exists then that match is not valid +// if (pickedSignalIndexSpectrum2 == pickedSignalIndexSpectrum2Prev) { +// pickedSignalIndexSpectrum2 = -1; +// } +// } else { +// // not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch +// // -> still open +// pickedSignalIndexSpectrum2 = -1; +// } +// } + + + pickedSignalIndexSpectrum2 = -1; + pickedSignalIndicesSpectrum2 = spectrum2.pickClosestSignals(spectrum1.getShift(i, dim1), dim2, shiftTol); + for (int j = 0; j < pickedSignalIndicesSpectrum2.size(); j++) { + pickedSignalIndexSpectrum2 = pickedSignalIndicesSpectrum2.get(j); + // if matched signal is already assigned, then consider symmetries (equiv. signals) + if (pickedSignalIndices.contains(pickedSignalIndexSpectrum2)) { + // symmetry exists + if (spectrum2.hasEquivalences(pickedSignalIndexSpectrum2)) { + pickedSignalIndexSpectrum2Prev = pickedSignalIndexSpectrum2; + // assign the next signal in equivalence list + for (final int equivalentSignalIndexSpectrum2 : spectrum2.getEquivalentSignals(pickedSignalIndexSpectrum2)) { + if (!pickedSignalIndices.contains(equivalentSignalIndexSpectrum2)) { + pickedSignalIndexSpectrum2 = equivalentSignalIndexSpectrum2; + break; + } } - } - // if no further equivalent signal exists then that match is not valid - if (pickedSignalIndexSpectrum2 == pickedSignalIndexSpectrum2Prev) { + // if no further equivalent signal exists then that match is not valid + if (pickedSignalIndexSpectrum2 == pickedSignalIndexSpectrum2Prev) { + pickedSignalIndexSpectrum2 = -1; + } + } else { + // not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch + // -> still open pickedSignalIndexSpectrum2 = -1; } - } else { - // not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch - // -> still open - pickedSignalIndexSpectrum2 = -1; + } + if(pickedSignalIndexSpectrum2 != -1){ + break; } } + // check multiplicity if ((spectrum1.getMultiplicity(i) == null) || (spectrum2.getMultiplicity(pickedSignalIndexSpectrum2) == null) || !spectrum1.getMultiplicity(i).equals(spectrum2.getMultiplicity(pickedSignalIndexSpectrum2))) { pickedSignalIndexSpectrum2 = -1; From c7b10849778fe77b093aa690624540661346bf0d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 12 Oct 2019 14:23:13 +0200 Subject: [PATCH 140/405] - added a TODO --- src/casekit/NMR/match/Matcher.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/casekit/NMR/match/Matcher.java b/src/casekit/NMR/match/Matcher.java index 276294b..5d9b016 100644 --- a/src/casekit/NMR/match/Matcher.java +++ b/src/casekit/NMR/match/Matcher.java @@ -238,8 +238,7 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s pickedSignalIndexSpectrum2 = -1; } } else { - // not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch - // -> still open + // @TODO not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch pickedSignalIndexSpectrum2 = -1; } } From 9f7b15b12d7cec1333dec46fafe445ef1d2a93fd Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 17 Oct 2019 02:03:55 +0200 Subject: [PATCH 141/405] - added Utils.checkIndexInAtomContainer() usage in isSaturated() method --- src/casekit/NMR/Utils.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index cde34cd..37b9219 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -669,11 +669,11 @@ public static HashMap getMean(final HashMap (" + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " - 1) = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) <= ac.getAtom(atomIndex).getValency())); +// System.out.println(atomIndex + " --> (" + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " - 1) = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) <= ac.getAtom(atomIndex).getValency())); return ((Utils.getBondOrderSum(ac, atomIndex, true) - 1) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency(); } - System.out.println(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency())); +// System.out.println(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency())); return (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency(); } @@ -787,7 +787,10 @@ public static HashMap getRMS(final HashMap= ac.getAtom(atomIndex).getValency(); } @@ -1073,7 +1076,7 @@ public static Float getBondOrderAsNumeric(final IBond bond) { return bondOrderAsNumeric; } - public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, final boolean includeImplicitHydrogenCount) { + public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, final boolean includeImplicitHydrogenCount) { if(!Utils.checkIndexInAtomContainer(ac, atomIndex)){ return null; } From ab9b26f5f0fdc418473fa02dde11d06420cab9e2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 19 Oct 2019 16:06:30 +0200 Subject: [PATCH 142/405] - added removeAssignment() method --- src/casekit/NMR/model/Assignment.java | 34 +++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index 387e451..5ea4a7e 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -62,7 +62,7 @@ private int[][] initAssignments(final int nDim, final int nSignals){ * @return */ public boolean setAssignment(final int dim, final int index, final int assignment){ - if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, index)){ + if(!this.containsDim(dim) || !this.checkIndex(dim, index)){ return false; } this.assignments[dim][index] = assignment; @@ -82,7 +82,7 @@ public boolean setAssignments(final int dim, final List assignments){ } public Integer getAssignment(final int dim, final int index){ - if(!this.containsDim(dim) || !this.checkSpectrumIndex(dim, index)){ + if(!this.containsDim(dim) || !this.checkIndex(dim, index)){ return null; } @@ -141,11 +141,11 @@ public Boolean isFullyAssigned(final int dim){ * Adds a new assignment entry (index), e.g. for a new signal. The given assignment indices * will be stored for each dimension of the new assignment entry (index). * - * @param assignments assignment indices to store in each dimension of new assignment entry + * @param assignment assignment indices to store in each dimension of new assignment entry * * @return */ - public boolean addAssignment(final int[] assignments){ + public boolean addAssignment(final int[] assignment){ if(!this.compareNDim(assignments.length)){ return false; } @@ -154,15 +154,35 @@ public boolean addAssignment(final int[] assignments){ for (int i = 0; i < this.getAssignmentsCount(); i++) { extendedAssignments[dim][i] = this.getAssignment(dim, i); } - extendedAssignments[dim][this.getAssignmentsCount()] = assignments[dim]; + extendedAssignments[dim][this.getAssignmentsCount()] = assignment[dim]; } this.assignments = extendedAssignments; return true; } - private boolean checkSpectrumIndex(final int dim, final int indexInTargetSpectrum){ - return (indexInTargetSpectrum >= 0) && (indexInTargetSpectrum < this.assignments[dim].length); + public boolean removeAssignment(final int index){ + if(!this.checkIndex(0, index)){ + return false; + } + final int[][] reducedAssignments = new int[this.getNDim()][this.getAssignmentsCount() - 1]; + int nextIndexToInsertCounter = 0; + for (int i = 0; i < this.getAssignmentsCount(); i++) { + if(i == index){ + continue; + } + for (int dim = 0; dim < this.getNDim(); dim++) { + reducedAssignments[dim][nextIndexToInsertCounter] = this.getAssignment(dim, i); + } + nextIndexToInsertCounter++; + } + this.assignments = reducedAssignments; + + return true; + } + + private boolean checkIndex(final int dim, final int index){ + return (index >= 0) && (index < this.assignments[dim].length); } private boolean checkInputListSize(final int size){ From c956380b9081de9518570abe107f44392ff26cb4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 26 Oct 2019 01:37:58 +0200 Subject: [PATCH 143/405] - minor changes: rename "ac" to "structure" --- src/casekit/NMR/dbservice/NMRShiftDB.java | 28 +++++++++-------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/casekit/NMR/dbservice/NMRShiftDB.java b/src/casekit/NMR/dbservice/NMRShiftDB.java index 1fb4212..87b1cd2 100644 --- a/src/casekit/NMR/dbservice/NMRShiftDB.java +++ b/src/casekit/NMR/dbservice/NMRShiftDB.java @@ -156,27 +156,27 @@ public static HashMap getSSCComponentsFromNMRShiftDB(final St new FileReader(pathToNMRShiftDB), SilentChemObjectBuilder.getInstance() ); - IAtomContainer ac; + IAtomContainer structure; Spectrum spectrum; Assignment assignment; final String nucleus = getNucleusFromNMRShiftDBSpectrumProperty(NMRShiftDBSpectrumProperty); final String spectrumIndexInRecord = NMRShiftDBSpectrumProperty.split("\\s")[NMRShiftDBSpectrumProperty.split("\\s").length - 1]; while (iterator.hasNext()) { - ac = iterator.next(); + structure = iterator.next(); // skip molecules which not contain any of requested spectrum information - if(ac.getProperty(NMRShiftDBSpectrumProperty) == null){ + if(structure.getProperty(NMRShiftDBSpectrumProperty) == null){ continue; } - spectrum = NMRShiftDBSpectrumToSpectrum(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); + spectrum = NMRShiftDBSpectrumToSpectrum(structure.getProperty(NMRShiftDBSpectrumProperty), nucleus); // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if((spectrum == null) || Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ + if((spectrum == null) || Utils.getAtomTypeIndicesByElement(structure, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ continue; } - if(ac.getProperty("Solvent") != null){ - spectrum.setSolvent(getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord)); + if(structure.getProperty("Solvent") != null){ + spectrum.setSolvent(getSolvent(structure.getProperty("Solvent"), spectrumIndexInRecord)); } - if(ac.getProperty("Field Strength [MHz]") != null){ - for (final String fieldStrength : ac.getProperty("Field Strength [MHz]").toString().split("\\s")) { + if(structure.getProperty("Field Strength [MHz]") != null){ + for (final String fieldStrength : structure.getProperty("Field Strength [MHz]").toString().split("\\s")) { if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { try { spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split(spectrumIndexInRecord + ":")[1])); @@ -188,14 +188,8 @@ public static HashMap getSSCComponentsFromNMRShiftDB(final St } } - assignment = NMRShiftDBSpectrumToAssignment(ac.getProperty(NMRShiftDBSpectrumProperty), nucleus); -// if ((ac != null) && (spectrum != null)) { - structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{ac, spectrum, assignment}); -// } - - // set aromaticities -// Utils.setAromaticitiesInAtomContainer(ac); - // add Kekulization? + assignment = NMRShiftDBSpectrumToAssignment(structure.getProperty(NMRShiftDBSpectrumProperty), nucleus); + structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{structure, spectrum, assignment}); } return structureSetWithSpectra; From 87a9c226ce8f109ae4232e0d9cfe6bfce753de14 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 26 Oct 2019 01:38:57 +0200 Subject: [PATCH 144/405] - minor changes: small optimizations --- src/casekit/NMR/model/Spectrum.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java index 27dd74b..75e52f7 100644 --- a/src/casekit/NMR/model/Spectrum.java +++ b/src/casekit/NMR/model/Spectrum.java @@ -31,7 +31,6 @@ import casekit.NMR.model.dimensional.DimensionalNMR; import java.util.ArrayList; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -490,7 +489,7 @@ public String getStandard() { /** * Returns the signal index (or indices) closest to the given shift. If no signal is found within the interval - * defined by pickprecision, an empty list is returned. + * defined by {@code pickPrecision}, an empty list is returned. * @param shift query shift * @param dim dimension in spectrum to look in * @param pickPrecision tolerance value for search window @@ -537,12 +536,7 @@ public ArrayList pickSignals(final Double shift, final int dim, final d } } // sort signal indices by distance to query shift - pickedSignals.sort(new Comparator() { - @Override - public int compare(final Integer pickedSignalIndex1, final Integer pickedSignalIndex2) { - return Double.compare(Math.abs(shift - getShift(pickedSignalIndex1, dim)), Math.abs(shift - getShift(pickedSignalIndex2, dim))); - } - }); + pickedSignals.sort((pickedSignalIndex1, pickedSignalIndex2) -> Double.compare(Math.abs(shift - getShift(pickedSignalIndex1, dim)), Math.abs(shift - getShift(pickedSignalIndex2, dim)))); return pickedSignals; } From 6232a5d1a84f0c70681bed957b83f794c96c11ed Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 26 Oct 2019 01:40:11 +0200 Subject: [PATCH 145/405] - minor changes: - getAssignments() returns an ArrayList now --- src/casekit/NMR/model/Assignment.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/NMR/model/Assignment.java index 5ea4a7e..1f0b067 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/NMR/model/Assignment.java @@ -26,6 +26,7 @@ import casekit.NMR.model.dimensional.DimensionalNMR; import org.apache.commons.lang3.ArrayUtils; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -102,12 +103,12 @@ public Integer getIndex(final int dim, final int assignment){ return -1; } - public List getAssignments(final int dim){ + public ArrayList getAssignments(final int dim){ if(!this.containsDim(dim)){ return null; } - return Arrays.asList(ArrayUtils.toObject(this.assignments[dim])); + return new ArrayList<>(Arrays.asList(ArrayUtils.toObject(this.assignments[dim]))); } public int getAssignmentsCount(){ From 6cc4840cf04831a171b7972728d2fca50f2786ad Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 26 Oct 2019 01:44:13 +0200 Subject: [PATCH 146/405] - in isValidBondAddition(): - re-structuring - subtracts now 1 of bond order sum for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group --- src/casekit/NMR/Utils.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 37b9219..81d9fd9 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -667,14 +667,20 @@ public static HashMap getMean(final HashMap (" + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " - 1) = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd) - 1) <= ac.getAtom(atomIndex).getValency())); - return ((Utils.getBondOrderSum(ac, atomIndex, true) - 1) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency(); +// System.out.print(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd)); + final IAtom atom = ac.getAtom(atomIndex); + // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group + if(atom.isAromatic() && (!atom.getSymbol().equals("C"))){ + System.out.print("[ -1 ]"); + bondOrderSum -= 1; } +// System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); -// System.out.println(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd) + " = " + (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) + " <= " + ac.getAtom(atomIndex).getValency() + " ? -> " + ((Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency())); - return (Utils.getBondOrderSum(ac, atomIndex, true) + Utils.getBondOrderAsNumeric(bondToAdd)) <= ac.getAtom(atomIndex).getValency(); + // @TODO including charges + return bondOrderSum <= atom.getValency(); } From dff842c5371a714dc8d21746db3de797c5c47500 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 1 Nov 2019 16:58:36 +0100 Subject: [PATCH 147/405] - minor changes --- src/casekit/NMR/Utils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/NMR/Utils.java b/src/casekit/NMR/Utils.java index 81d9fd9..1f336c5 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/NMR/Utils.java @@ -674,7 +674,7 @@ public static boolean isValidBondAddition(final IAtomContainer ac, final int ato final IAtom atom = ac.getAtom(atomIndex); // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group if(atom.isAromatic() && (!atom.getSymbol().equals("C"))){ - System.out.print("[ -1 ]"); +// System.out.print("[ -1 ]"); bondOrderSum -= 1; } // System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); From 712de9a2e91e628644f50379792135989e9c8685 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 9 Dec 2020 03:15:59 +0100 Subject: [PATCH 148/405] first preparations to use in webCASE, i.e. in Utils class, NMRShiftDB and Model classes --- README.md | 6 +- pom.xml | 47 +- src/casekit/HOSECodePredictor.java | 8 +- src/casekit/NMR/dbservice/NMRShiftDB.java | 448 ------------ .../NMR/interpretation/InterpretData.java | 539 -------------- src/casekit/NMR/match/Matcher.java | 360 ---------- src/casekit/NMR/model/Signal.java | 128 ---- src/casekit/NMR/model/Spectrum.java | 558 --------------- .../NMR/model/dimensional/DimensionalNMR.java | 31 - src/casekit/NMRShiftDBSDFParser.java | 20 +- src/casekit/Result.java | 1 + src/casekit/SimilarityRanker.java | 504 ++++++------- src/casekit/{NMR => nmr}/Utils.java | 673 ++++++++++-------- .../analysis/MultiplicitySectionsBuilder.java | 116 +++ .../{NMR => nmr}/convert/LSDConverter.java | 18 +- src/casekit/nmr/core/Dereplication.java | 32 + src/casekit/nmr/core/Elucidation.java | 31 + .../{NMR => nmr}/dbservice/MongoDB.java | 18 +- src/casekit/nmr/dbservice/NMRShiftDB.java | 422 +++++++++++ .../nmr/interpretation/InterpretData.java | 526 ++++++++++++++ src/casekit/nmr/match/Matcher.java | 287 ++++++++ .../{NMR => nmr}/model/Assignment.java | 41 +- src/casekit/nmr/model/DataSet.java | 81 +++ .../nmr/model/ExtendedConnectionMatrix.java | 431 +++++++++++ src/casekit/nmr/model/Signal.java | 148 ++++ src/casekit/nmr/model/Spectrum.java | 359 ++++++++++ .../model/dimensional}/Dimensional.java | 51 +- src/casekit/{NMR => nmr}/parse/Parser.java | 101 +-- src/casekit/{NMR => nmr}/predict/Predict.java | 63 +- src/casekit/{NMR => nmr}/remarks | 0 30 files changed, 3212 insertions(+), 2836 deletions(-) delete mode 100644 src/casekit/NMR/dbservice/NMRShiftDB.java delete mode 100644 src/casekit/NMR/interpretation/InterpretData.java delete mode 100644 src/casekit/NMR/match/Matcher.java delete mode 100644 src/casekit/NMR/model/Signal.java delete mode 100644 src/casekit/NMR/model/Spectrum.java delete mode 100644 src/casekit/NMR/model/dimensional/DimensionalNMR.java rename src/casekit/{NMR => nmr}/Utils.java (77%) create mode 100644 src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java rename src/casekit/{NMR => nmr}/convert/LSDConverter.java (97%) create mode 100644 src/casekit/nmr/core/Dereplication.java create mode 100644 src/casekit/nmr/core/Elucidation.java rename src/casekit/{NMR => nmr}/dbservice/MongoDB.java (90%) create mode 100644 src/casekit/nmr/dbservice/NMRShiftDB.java create mode 100644 src/casekit/nmr/interpretation/InterpretData.java create mode 100644 src/casekit/nmr/match/Matcher.java rename src/casekit/{NMR => nmr}/model/Assignment.java (86%) create mode 100644 src/casekit/nmr/model/DataSet.java create mode 100644 src/casekit/nmr/model/ExtendedConnectionMatrix.java create mode 100644 src/casekit/nmr/model/Signal.java create mode 100644 src/casekit/nmr/model/Spectrum.java rename src/casekit/{model => nmr/model/dimensional}/Dimensional.java (63%) rename src/casekit/{NMR => nmr}/parse/Parser.java (71%) rename src/casekit/{NMR => nmr}/predict/Predict.java (76%) rename src/casekit/{NMR => nmr}/remarks (100%) diff --git a/README.md b/README.md index b37628e..4594f8f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ License: MIT, see doc/mit.license ## Introduction This project hosts various Java classes for teaching and research dealing with spectral data in chemistry and metabolomics. -This project depends on the Chemistry Development Project (CDK), hosted under http://cdk.github.io/ +This project depends on the Chemistry Development Project (CDK), hosted under http://cdk.github.casekit.io/ Please refer to these pages for updated information and the latest version of the CDK. CDK's API documentation is available though our [Github site](http://cdk.github.io/cdk/). ## Download Source code @@ -41,7 +41,7 @@ will create an all-in-one-jar under ./target ### Shift Prediction with HOSE codes -The following classes are to demonstrate the prediction of Carbon-13 NMR spectra with HOSE codes. They only demonstrate the basic working principle and ignore, for example, stereochemistry, which can lead to large errors in, for example, the prediction of shifts for E/Z configurations of double bonds. If you want to know more about the details and a sophisticated system implementing them, please refer to Schutz V, Purtuc V, Felsinger S, Robien W (1997) CSEARCH-STEREO: A new generation of NMR database systems allowing three-dimensional spectrum prediction. Fresenius Journal of Analytical Chemistry 359:33–41. doi: 10.1007/s002160050531. +The following classes are to demonstrate the prediction of Carbon-13 casekit.nmr spectra with HOSE codes. They only demonstrate the basic working principle and ignore, for example, stereochemistry, which can lead to large errors in, for example, the prediction of shifts for E/Z configurations of double bonds. If you want to know more about the details and a sophisticated system implementing them, please refer to Schutz V, Purtuc V, Felsinger S, Robien W (1997) CSEARCH-STEREO: A new generation of casekit.nmr database systems allowing three-dimensional spectrum prediction. Fresenius Journal of Analytical Chemistry 359:33–41. doi: 10.1007/s002160050531. #### NMRShiftDBSDFParser @@ -70,7 +70,7 @@ It needs the TSV file generated by NMRShiftDBSDFParser as input. ```bash usage: java -jar casekit.jar casekit.HOSECodePredictor -s -i -d -m [-v] -Predict NMR chemical shifts for a given molecule based on table of HOSE +Predict casekit.nmr chemical shifts for a given molecule based on table of HOSE codes and assigned shifts. -s,--hosecodes filename of TSV file with HOSE codes (required) diff --git a/pom.xml b/pom.xml index 48a4cdf..8b698e3 100644 --- a/pom.xml +++ b/pom.xml @@ -41,42 +41,29 @@
- commons-cli - commons-cli - 1.3.1 - jar - - - org.apache.commons - commons-lang3 - 3.5 - jar - - - org.mongodb - mongo-java-driver - 3.10.0 + org.openscience.cdk + cdk-bundle + 2.3 - org.openscience.cdk - cdk-legacy - 2.3 - jar + commons-cli + commons-cli + 1.3.1 - org.openscience.cdk - cdk-fingerprint - 2.3 + org.apache.commons + commons-lang3 + 3.5 - org.openscience.cdk - cdk-depict - 2.3 + org.openscience + HOSECodeBuilder + 1.0 + + + org.mongodb + mongo-java-driver + 3.10.0 - - org.openscience - HOSECodeBuilder - 1.0 - diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java index ae632bd..b724840 100644 --- a/src/casekit/HOSECodePredictor.java +++ b/src/casekit/HOSECodePredictor.java @@ -7,7 +7,7 @@ package casekit; -import casekit.NMR.Utils; +import casekit.nmr.Utils; import org.apache.commons.cli.*; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.depict.DepictionGenerator; @@ -44,6 +44,8 @@ * @author Christoph Steinbeck * */ + +@Deprecated public class HOSECodePredictor { HashMap> hoseLookup; @@ -165,7 +167,7 @@ public HashMap> getHOSELookup(){ } /** - * Predicts NMR chemical shifts based on a given HOSE code table read by the + * Predicts casekit.nmr chemical shifts based on a given HOSE code table read by the * Constructor of this class. * The predicted chemical shifts are assigned to each atom by * as a property of type CDKConstants.NMRSHIFT_CARBON. @@ -294,7 +296,7 @@ private void parseArgs(String[] args) throws ParseException // TODO Auto-generated catch block HelpFormatter formatter = new HelpFormatter(); formatter.setOptionComparator(null); - String header = "Predict NMR chemical shifts for a given molecule based on table of HOSE codes and assigned shifts.\n\n"; + String header = "Predict casekit.nmr chemical shifts for a given molecule based on table of HOSE codes and assigned shifts.\n\n"; String footer = "\nPlease report issues at https://github.com/steinbeck/spectra"; formatter.printHelp( "java -jar casekit.jar casekit.HOSECodePredictor", header, options, footer, true ); throw new ParseException("Problem parsing command line"); diff --git a/src/casekit/NMR/dbservice/NMRShiftDB.java b/src/casekit/NMR/dbservice/NMRShiftDB.java deleted file mode 100644 index 87b1cd2..0000000 --- a/src/casekit/NMR/dbservice/NMRShiftDB.java +++ /dev/null @@ -1,448 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package casekit.NMR.dbservice; - -import casekit.NMR.Utils; -import casekit.NMR.model.Assignment; -import casekit.NMR.model.Signal; -import casekit.NMR.model.Spectrum; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomContainerSet; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.AtomContainerSet; -import org.openscience.cdk.silent.SilentChemObjectBuilder; - -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.util.*; - -public class NMRShiftDB { - /** - * Returns the molecules of a given MOL/SDF file. - * This function sets the molecule aromaticity (with allowed exocyclic pi - * bonds) by using the - * {@link Utils#setAromaticityAndKekulize(IAtomContainer)} - * function. - * - * @param pathToNMRShiftDB path to NMRShiftDB file - * @param setAromaticity whether to set aromaticities in structures or not - * @return - * @throws FileNotFoundException - * @throws CDKException - * @deprecated - */ - public static IAtomContainerSet getStructuresFromSDFile(final String pathToNMRShiftDB, final boolean setAromaticity) throws FileNotFoundException, CDKException { - final IAtomContainerSet acSet = new AtomContainerSet(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToNMRShiftDB), - SilentChemObjectBuilder.getInstance() - ); - IAtomContainer ac; - while (iterator.hasNext()) { - ac = iterator.next(); - if(setAromaticity){ - Utils.setAromaticityAndKekulize(ac); - } - acSet.addAtomContainer(ac); - } - - return acSet; - } - - /** - * Returns all spectra for each molecule and a given nucleus which exist as - * property in a NMRShiftDB SDF. - * - * @param pathToNMRShiftDB path to NMRShiftDB file - * @param nucleus nucleus of requested spectra - * @return - * @throws FileNotFoundException - * @throws CDKException - * - */ - public static ArrayList> getSpectraFromNMRShiftDB(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException { - final ArrayList> spectraSet = new ArrayList<>(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToNMRShiftDB), - SilentChemObjectBuilder.getInstance() - ); - IAtomContainer ac; - Spectrum spectrum; - ArrayList spectra; - HashMap spectraStrings; - String spectrumIndexInRecord, solvent; - while (iterator.hasNext()) { - ac = iterator.next(); - if(ac == null){ - continue; - } - spectraStrings = getSpectraStrings(ac, nucleus); - if(spectraStrings.isEmpty() || (ac.getProperty("Solvent") == null)){ - continue; - } - spectra = new ArrayList<>(); - for (final String spectrumPropertyString : spectraStrings.keySet()) { - spectrum = NMRShiftDBSpectrumToSpectrum(spectraStrings.get(spectrumPropertyString), nucleus); - if(spectrum == null){ - continue; - } - spectrumIndexInRecord = spectrumPropertyString.split("\\s")[spectrumPropertyString.split("\\s").length - 1]; - solvent = getSolvent(ac.getProperty("Solvent"), spectrumIndexInRecord); - if(solvent == null){ - continue; - } - spectrum.setSolvent(solvent); - - if(Utils.getAtomTypeIndicesByElement(ac, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ - continue; - } - - spectra.add(new Object[]{spectrum, NMRShiftDB.NMRShiftDBSpectrumToAssignment(spectraStrings.get(spectrumPropertyString), nucleus)}); - } - spectraSet.add(spectra); - } - - return spectraSet; - } - - public static String getSolvent(final String solventPropertyString, final String spectrumIndexInRecord){ - final String[] solventPropertyStringSplit = solventPropertyString.split(":"); - String solvent; - for (int i = 0; i < solventPropertyStringSplit.length; i++) { - if (solventPropertyStringSplit[i].endsWith(spectrumIndexInRecord)) { - solvent = solventPropertyStringSplit[i + 1]; - if(solvent.substring(solvent.length() - 1).matches("\\d")){ - solvent = solvent.substring(0, solvent.length() - 1); - } - if(solvent.substring(solvent.length() - 1).matches("\\d")){ - solvent = solvent.substring(0, solvent.length() - 1); - } - solvent = solvent.substring(0, solvent.length() - 1); - - return solvent; - } - } - - return null; - } - - /** - * Returns 3-tuples consisting of structure, spectrum and assignments - * for each valid molecule record in the given NMRShiftDB file. Valid means - * here that each molecule record has to contain the given spectrum - * property string as well as the number of signals in that spectrum has to - * be the same as atoms of that atom type in molecule. - * - * @param pathToNMRShiftDB path to NMRShiftDB file - * @param NMRShiftDBSpectrumProperty spectrum property string to use - * @return - * @throws FileNotFoundException - * @throws CDKException - */ - public static HashMap getSSCComponentsFromNMRShiftDB(final String pathToNMRShiftDB, final String NMRShiftDBSpectrumProperty) throws FileNotFoundException, CDKException { - final HashMap structureSetWithSpectra = new HashMap<>(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToNMRShiftDB), - SilentChemObjectBuilder.getInstance() - ); - IAtomContainer structure; - Spectrum spectrum; - Assignment assignment; - final String nucleus = getNucleusFromNMRShiftDBSpectrumProperty(NMRShiftDBSpectrumProperty); - final String spectrumIndexInRecord = NMRShiftDBSpectrumProperty.split("\\s")[NMRShiftDBSpectrumProperty.split("\\s").length - 1]; - while (iterator.hasNext()) { - structure = iterator.next(); - // skip molecules which not contain any of requested spectrum information - if(structure.getProperty(NMRShiftDBSpectrumProperty) == null){ - continue; - } - spectrum = NMRShiftDBSpectrumToSpectrum(structure.getProperty(NMRShiftDBSpectrumProperty), nucleus); - // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if((spectrum == null) || Utils.getAtomTypeIndicesByElement(structure, nucleus.replaceAll("\\d", "")).size() != spectrum.getSignalCount()){ - continue; - } - if(structure.getProperty("Solvent") != null){ - spectrum.setSolvent(getSolvent(structure.getProperty("Solvent"), spectrumIndexInRecord)); - } - if(structure.getProperty("Field Strength [MHz]") != null){ - for (final String fieldStrength : structure.getProperty("Field Strength [MHz]").toString().split("\\s")) { - if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { - try { - spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split(spectrumIndexInRecord + ":")[1])); - } catch (NumberFormatException e) { -// spectrum.setSpectrometerFrequency(null); - } - break; - } - } - } - - assignment = NMRShiftDBSpectrumToAssignment(structure.getProperty(NMRShiftDBSpectrumProperty), nucleus); - structureSetWithSpectra.put(structureSetWithSpectra.size(), new Object[]{structure, spectrum, assignment}); - } - - return structureSetWithSpectra; - } - - /** - * Returns a hashmap containing combined keys (by "_") of solvents - * and lists of calculated deviations between all given spectra for a - * nucleus in molecule record as values.
- * Here, only molecule records in NMRShiftDB file are considered which have - * at least two different spectra for same nucleus.
- * Example: "Spectrum 13C 0", "Spectrum 13C 1" will be used for given - * nucleus 13C. - * - * - * @param pathToNMRShiftDB - * @param nucleus - * @return - * @throws FileNotFoundException - * @throws CDKException - */ - public static HashMap> getSolventDeviations(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException{ - int signalCount; - Spectrum spectrum; - Assignment assignment; - final ArrayList> spectraSets = getSpectraFromNMRShiftDB(pathToNMRShiftDB, nucleus); - HashMap> shiftsPerAtom; - HashMap> solventsPerAtom; - ArrayList solvents; - String[] solventsToSort; - - final HashMap> deviations = new HashMap<>(); - String combiKey; - - for (final ArrayList spectraSetInRecord : spectraSets) { - shiftsPerAtom = new HashMap<>(); - solventsPerAtom = new HashMap<>(); - signalCount = -1; - for (final Object[] spectrumAndAssignment : spectraSetInRecord) { - spectrum = (Spectrum) spectrumAndAssignment[0]; - assignment = (Assignment) spectrumAndAssignment[1]; - if (signalCount == -1) { - signalCount = spectrum.getSignalCount(); - } else if (signalCount != spectrum.getSignalCount()) { - continue; - } - for (final int atomIndex : assignment.getAssignments(0)) { - if (!shiftsPerAtom.containsKey(atomIndex)) { - shiftsPerAtom.put(atomIndex, new ArrayList<>()); - solventsPerAtom.put(atomIndex, new ArrayList<>()); - } - shiftsPerAtom.get(atomIndex).add(spectrum.getSignal(assignment.getIndex(0, atomIndex)).getShift(0)); - solventsPerAtom.get(atomIndex).add(spectrum.getSolvent()); - } - } - if (shiftsPerAtom.isEmpty() || (shiftsPerAtom.get(Collections.min(shiftsPerAtom.keySet())).size() < 2)) { - continue; - } - solvents = new ArrayList<>(solventsPerAtom.get(Collections.min(solventsPerAtom.keySet()))); -// if(Collections.frequency(solvents, "Unreported") + Collections.frequency(solvents, "Unknown") > solvents.size() - 2){ -// continue; -// } - - for (final int atomIndex : shiftsPerAtom.keySet()) { - for (int s1 = 0; s1 < solvents.size(); s1++) { -// if(solvents.get(s1).equals("Unreported") || solvents.get(s1).equals("Unknown")){ -// continue; -// } - for (int s2 = s1 + 1; s2 < solvents.size(); s2++) { -// if (solvents.get(s2).equals("Unreported") || solvents.get(s2).equals("Unknown")) { -// continue; -// } - solventsToSort = new String[2]; - solventsToSort[0] = solvents.get(s1); - solventsToSort[1] = solvents.get(s2); - Arrays.sort(solventsToSort); - combiKey = solventsToSort[0] + "_" + solventsToSort[1]; - if (!deviations.containsKey(combiKey)) { - deviations.put(combiKey, new ArrayList<>()); - } - deviations.get(combiKey).add(Math.abs(shiftsPerAtom.get(atomIndex).get(s1) - shiftsPerAtom.get(atomIndex).get(s2))); - } - } - } - } - - return deviations; - } - - /** - * - * @param pathToDB - * @return - * @throws FileNotFoundException - * @deprecated - */ - public static HashSet getAtomTypesInDB(final String pathToDB) throws FileNotFoundException{ - final HashSet atomTypes = new HashSet<>(); - final IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathToDB), - SilentChemObjectBuilder.getInstance() - ); - while (iterator.hasNext()) { - atomTypes.addAll(Utils.getAtomTypesInAtomContainer(iterator.next())); - } - - return atomTypes; - } - - // currently only for 1D spectra - public static HashMap getSpectraStrings(final IAtomContainer ac, final String nucleus) { - final ArrayList props = (ArrayList) (ArrayList) (new ArrayList<>(ac.getProperties().keySet())); - final HashMap spectra = new HashMap<>(); - for (final String prop : props) { - if (prop.startsWith("Spectrum " + nucleus)) { - spectra.put(prop, ac.getProperty(prop)); - } - } - - return spectra; - } - - /** - * Creates a two dimensional array of a given NMRShiftDB NMR entry - * with all signal shift values, intensities, multiplicities and atom indices. - * - * @param NMRShiftDBSpectrum - * @return two dimensional array: - * 1. dimension: signal index (row); - * 2. dimension: signal shift value (column 1), signal intensity (column 2), - * signal multiplicity (column 3), atom index in structure (column 4) - */ - public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum){ - if(NMRShiftDBSpectrum.trim().isEmpty()){ - return new String[][]{}; - } - String[] signalSplit; - final String[] shiftsSplit = NMRShiftDBSpectrum.split("\\|"); - final String[][] values = new String[shiftsSplit.length][4]; - for (int i = 0; i < shiftsSplit.length; i++) { - signalSplit = shiftsSplit[i].split(";"); - values[i][0] = signalSplit[0]; // shift value - values[i][1] = signalSplit[1].substring(0, signalSplit[1].length() - 1); // intensity - values[i][2] = signalSplit[1].substring(signalSplit[1].length() - 1); // multiplicity - values[i][3] = signalSplit[2]; // atom index - } - - return values; - } - - /** - * Sets shifts, intensities and implicit hydrogen counts in atoms of an atom container - * by means of given spectrum property string. - * - * @param ac IAtomContainer to set - * @param NMRShiftDBSpectrum Property string of spectrum in NMRShiftDB format. - * @return - * - * @see #parseNMRShiftDBSpectrum(String) - * @see Utils#getHydrogenCountFromMultiplicity(String) - * @deprecated - */ - public static boolean setNMRShiftDBShiftsToAtomContainer(final IAtomContainer ac, final String NMRShiftDBSpectrum){ - if (ac.getProperty(NMRShiftDBSpectrum) == null) { - return false; - } - final String[][] spectrumStringArray = parseNMRShiftDBSpectrum(ac.getProperty(NMRShiftDBSpectrum)); - - Integer atomIndexSpectrum; -// String multiplicity; - Double shift; - - for (int i = 0; i < spectrumStringArray.length; i++) { - atomIndexSpectrum = Integer.parseInt(spectrumStringArray[i][3]); - shift = Double.parseDouble(spectrumStringArray[i][0]); -// multiplicity = spectrumStringArray[i][3]; - if(Utils.checkIndexInAtomContainer(ac, atomIndexSpectrum)){ - ac.getAtom(atomIndexSpectrum).setProperty(Utils.getNMRShiftConstant(ac.getAtom(atomIndexSpectrum).getSymbol()), shift); -// ac.getAtom(atomIndexSpectrum).setImplicitHydrogenCount(Utils.getHydrogenCountFromMultiplicity(multiplicity)); - } - } - - return true; - } - - public static String getNucleusFromNMRShiftDBSpectrumProperty(final String NMRShiftDBSpectrumProperty){ - return NMRShiftDBSpectrumProperty.split(" ")[1]; - } - - public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShiftDBSpectrum, final String nucleus, final String description){ - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { - return null; - } - final StringBuilder basicSpectrum = new StringBuilder(); - // append description - if(!description.trim().startsWith("//")){ - basicSpectrum.append("// "); - } - basicSpectrum.append(description).append("\n"); - final String[][] spectrumStringArray = NMRShiftDB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - try { - for (int i = 0; i < spectrumStringArray.length; i++) { - // append nucleus - basicSpectrum.append(nucleus).append(", "); - // append chemical shift - basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][0])).append(", "); - // append multiplicity - basicSpectrum.append(spectrumStringArray[i][2]).append(", "); - // append intensity - basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][1])).append("\n"); - } - } catch (Exception e) { - return null; - } - - return basicSpectrum.toString(); - } - - public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus){ - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { - return null; - } - final String[][] spectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = new Spectrum(new String[]{nucleus}); - String multiplicity; - Double shift, intensity; - try { - for (int i = 0; i < spectrumStringArray.length; i++) { - shift = Double.parseDouble(spectrumStringArray[i][0]); - intensity = Double.parseDouble(spectrumStringArray[i][1]); - multiplicity = spectrumStringArray[i][2]; - spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, intensity)); - } - spectrum.detectEquivalences(); - } catch (Exception e) { - - return null; - } - - return spectrum; - } - - public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String nucleus) { - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { - return null; - } - final String[][] NMRShiftDBSpectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, nucleus); - final Assignment assignment = new Assignment(spectrum); - for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { - assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); - } - - return assignment; - } -} diff --git a/src/casekit/NMR/interpretation/InterpretData.java b/src/casekit/NMR/interpretation/InterpretData.java deleted file mode 100644 index 190f9e4..0000000 --- a/src/casekit/NMR/interpretation/InterpretData.java +++ /dev/null @@ -1,539 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -package casekit.NMR.interpretation; - -import casekit.NMR.Utils; -import casekit.NMR.model.Assignment; -import casekit.NMR.model.Spectrum; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; - -import org.openscience.cdk.silent.Atom; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomType; -import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class InterpretData { - - final private IAtomContainer mol; - final private IMolecularFormula molFormula; - private HashMap> atomTypeIndices; - final private HashMap spectra = new HashMap<>(); - final private HashMap assignments = new HashMap<>(); - - /** - * Creates an instances of this class with an empty class atom container. - */ - public InterpretData(){ - this.molFormula = null; - this.mol = SilentChemObjectBuilder.getInstance().newAtomContainer(); - this.updateAtomTypeIndices(); - } - - /** - * Creates an instances of this class with a class atom container consisting - * of all heavy atoms in given molecular formula. - * - * @param molFormula IMolecularFormula object for IAtomContainer creation - */ - public InterpretData(final IMolecularFormula molFormula){ - this.molFormula = molFormula; - this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); - this.updateAtomTypeIndices(); - } - - - - /** - * Returns used IMolecularFormula object for this class instance. - * - * @return - */ - public final IMolecularFormula getMolecularFormula() { - - return this.molFormula; - } - - - /** - * Returns used IAtomContainer object for this class instance. - * - * @return - */ - public final IAtomContainer getAtomContainer() { - - return this.mol; - } - - - /** - * Returns a HashMap object with the indices of all atoms for all atom types - * (elements) within the atom container of this class. - * - * @return - */ - public final HashMap> getAtomTypeIndices() { - - return this.atomTypeIndices; - } - - - /** - * Sets the indices of all atoms in this class atom container. - * @see Utils#getAtomTypeIndices(org.openscience.cdk.interfaces.IAtomContainer) - * - */ - private void updateAtomTypeIndices(){ - - this.atomTypeIndices = Utils.getAtomTypeIndices(this.mol); - } - - /** - * Returns all given and used spectra. - * - * @return - */ - public final HashMap getSpectra(){ - - return this.spectra; - } - - - /** - * Returns all created and used Assignment objects. The assigned indices - * refer to atom indices in class atom container. - * - * @return - */ - public final HashMap getAssignments(){ - - return this.assignments; - } - - - /** - * Returns one specific created and used Assignment object. - * The assigned indices refer to atom indices in class atom container. - * - * @param spectrum - * @return - */ - public final Assignment getAssignment(final Spectrum spectrum){ - - if (spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT90) || spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT135)) { - - return this.getAssignments().get(spectrum.getSpecType()); - } - - return this.assignments.get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); - } - - - /** - * Sets the 1D NMR shift values for given Spectrum object to atoms of the class IAtomContainer. - * The shift values will be assigned sequentially. - * In case of a molecular formula is given in this class, the number of - * shifts must be equal to the number of atoms in this molecular formula. - * For less shifts in shift list you will be asked for entering equivalences. - * Otherwise this function will return a false value. - * In case of no molecular was given to this class, a new atom in the atom container - * will be created regarding to the input shift list. - * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) - * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on - * the specified atom type (element). - * After usage of this function, the input Spectrum class object might be extended during - * equivalent signal selection by user. - * - * @param spectrum Spectrum class object containing the 1D shift information - * @throws java.io.IOException - * @throws org.openscience.cdk.exception.CDKException - */ - public final void assign1DSpectrum(final Spectrum spectrum) throws Exception { - // checks whether number of signals is equal to molecular formula if given - // if not equal then edit signal list in spectrum - this.check1DSpectrum(spectrum); - // assign shift values to atoms sequentially - this.assignShiftValuesToAtoms(spectrum); - - final Assignment assignment = new Assignment(spectrum); - if(this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0)) != null){ - assignment.setAssignments(0, this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0))); - } - - this.spectra.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); - this.assignments.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); - } - - /** - * Checks the number of signals in a spectrum against the number of atoms - * in molecular formula of class, if given. In case of different numbers, - * a user input for spectrum editing will be requested. - * - * @param spectrum - * @throws IOException - * @see Utils#editSignalsInSpectrum(Spectrum, IMolecularFormula, int) - */ - private void check1DSpectrum(final Spectrum spectrum) throws Exception { - if(this.molFormula != null) { - final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula, 0); - if (diff != 0) { - // adjust Spectrum size by user - Utils.editSignalsInSpectrum(spectrum, this.molFormula, 0); - } - } - } - - - /** - * Sets shift values in atoms of class atom container as property (see below), sequentially. - * - * @param spectrum Spectrum class object which contains shifts in first - * dimension - * @see Utils#getNMRShiftConstant(java.lang.String) - * - */ - private void assignShiftValuesToAtoms(final Spectrum spectrum){ - final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); - final ArrayList shifts = spectrum.getShifts(0); - if((this.molFormula == null) && !atomType.equals("H")){ - // (re-)filling up of peaks for that atom type from given peak list in spectrum - this.removeAtoms(atomType); - IAtom atom; - for (final double shift : shifts) { - atom = new Atom(atomType); - atom.setProperty(Utils.getNMRShiftConstant(atomType), shift); - atom.setImplicitHydrogenCount(null); - this.mol.addAtom(atom); - } - this.updateAtomTypeIndices(); - } - // assign shifts to atoms as property - if(this.atomTypeIndices.get(atomType) != null){ - int assignedShiftCount = 0; - for (final int i : this.atomTypeIndices.get(atomType)) { - if (assignedShiftCount < shifts.size()) { - // shift assignment in atom - this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); - } - assignedShiftCount++; - } - } - } - - - /** - * Removes atoms from a given atom type from the class' atom container. - * - * @param atomType Atom type (element's name, e.g. C or Br) - * @return IAtomContainer where the atoms were removed - */ - private void removeAtoms(final String atomType) { - if(this.getAtomTypeIndices().get(atomType) == null){ - return; - } - final ArrayList toRemoveList = new ArrayList<>(); - for (final int i: this.getAtomTypeIndices().get(atomType)) { - toRemoveList.add(this.mol.getAtom(i)); - } - for (IAtom iAtom : toRemoveList) { - this.mol.removeAtom(iAtom); - } - - this.updateAtomTypeIndices(); - } - - /** - * Sets the assignments of carbon atoms in class atom container - * by usage of DEPT90 and DEPT135 information. The implicit hydrogen count - * property is set too. - * - * @see InterpretData#setImplicitHydrogenCountsFromDEPT() - * - * @param spectrum1D_DEPT90 DEPT90 spectrum - * @param spectrum1D_DEPT135 DEPT135 spectrum which has to contain intensity - * information - * @param tol tolerance value [ppm] for carbon shift matching - * @return false if 1-dimensional 13C spectrum is missing (not set beforehand) - * or something is missing in one of the two input spectra - * - */ - public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum spectrum1D_DEPT135, final double tol){ - if((spectrum1D_DEPT90 == null) || (spectrum1D_DEPT135 == null) || (spectrum1D_DEPT135.getIntensities() == null) - || (this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C") == null)){ - return false; - } - - final Assignment assignment1D_DEPT90 = new Assignment(spectrum1D_DEPT90); - final Assignment assignment1D_DEPT135 = new Assignment(spectrum1D_DEPT135); - final ArrayList matchesIn1DSpectrum_DEPT90 = this.findMatchesIn1DSpectra(spectrum1D_DEPT90, 0, tol); - final ArrayList matchesIn1DSpectrum_DEPT135 = this.findMatchesIn1DSpectra(spectrum1D_DEPT135, 0, tol); - final Assignment assignment1D_13C = this.getAssignment(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C")); - - for (int i = 0; i < assignment1D_DEPT90.getAssignmentsCount(); i++) { - if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { - assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i))); - } - } - for (int i = 0; i < assignment1D_DEPT135.getAssignmentsCount(); i++) { - if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { - assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i))); - } - } - - this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, spectrum1D_DEPT90); - this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, assignment1D_DEPT90); - this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, spectrum1D_DEPT135); - this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, assignment1D_DEPT135); - - this.setImplicitHydrogenCountsFromDEPT(); - - return true; - } - - - /** - * Sets the implicitHydrogenCount() property in atoms of class atom container - * by using the already set DEPT information. - * @see InterpretData#assignDEPT(casekit.NMR.model.Spectrum, casekit.NMR.model.Spectrum, double) - */ - private void setImplicitHydrogenCountsFromDEPT() { - - final ArrayList intensitiesDEPT135 = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135).getIntensities(); - final ArrayList matchesDEPT90InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT90), 0); - final ArrayList matchesDEPT135InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135), 0); - - int matchDEPT90, matchDEPT135, hCount, hCountAll = 0; - for (int i : this.atomTypeIndices.get("C")) { - if ((this.mol.getAtom(i).getProperty(CDKConstants.NMRSHIFT_CARBON) != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() == null)) { - matchDEPT90 = matchesDEPT90InAtomContainer.indexOf(i); - matchDEPT135 = matchesDEPT135InAtomContainer.indexOf(i); - if (matchDEPT90 >= 0) { - // CH - hCount = 1; - } else if (matchDEPT90 == -1 && matchDEPT135 >= 0) { - // CH2 or CH3 - if (intensitiesDEPT135.get(matchDEPT135) < 0) { - hCount = 2; - } else if (intensitiesDEPT135.get(matchDEPT135) > 0) { - hCount = 3; - } else { - // qC - hCount = 0; - } - } else { - // qC - hCount = 0; - } - this.mol.getAtom(i).setImplicitHydrogenCount(hCount); - if( this.mol.getAtom(i).getImplicitHydrogenCount() >= 3){ - this.mol.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); - } - hCountAll += hCount; - } - } - if(this.molFormula != null){ - System.out.println("assigned protons to carbons: " + hCountAll + " (" + MolecularFormulaManipulator.getElementCount(this.molFormula, "H") + ") -> " + (MolecularFormulaManipulator.getElementCount(this.molFormula, "H") - hCountAll) + " protons to be attached on hetero atoms!!!"); - } else { - System.out.println("assigned protons to carbons: " + hCountAll+ "!!!"); - } - - } - - - /** - * - * @param spectrum Spectrum class object consisting of Signal class objects - * where the proton shifts values are given in first dimension and the - * heavy atom shifts in the second. - * @param tolProton tolerance value [ppm] for proton shift matching - * @param tolHeavyAtom tolerance value [ppm] for heavy atom shift matching - */ - public final void assignHSQC(final Spectrum spectrum, final double tolProton, final double tolHeavyAtom) { - // assign index of matching atoms to both dimensions and save the Spectrum and Assignment objects in class - this.assign2DSpectrum(spectrum, tolProton, tolHeavyAtom); - // in case the 1H spectrum is given, then assign protons to same indices from belonging carbon atoms - if(this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H") != null){ - final Assignment assignment1D_1H = this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H"); - final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); - final ArrayList matchesIn1DSpectrum_1H = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); - - for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { - // if heavy atom i has an assignment in class atom container then assign that index i to belonging protons as index - if (assignment2D_HSQC.getAssignment(1, i) >= 0) { - assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAssignment(1, i)); - assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAssignment(0, matchesIn1DSpectrum_1H.get(i))); - } - } - } - // attach protons on other heavy atoms than carbons via HSQC assignment counting - if(!spectrum.getNuclei()[1].equals("13C")){ - final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); - for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { - if((assignment2D_HSQC.getAssignment(1, i) > -1)){ - if(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() == null){ - this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(0); - } - this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() + 1); - } - } - } - } - - - private void assign2DSpectrum(final Spectrum spectrum, final double tolDim1, final double tolDim2){ - - final ArrayList matchesQueryIn1DSpectrumDim1 = this.findMatchesIn1DSpectra(spectrum, 0, tolDim1); - final ArrayList matchesQueryIn1DSpectrumDim2 = this.findMatchesIn1DSpectra(spectrum, 1, tolDim2); - final ArrayList matches1DInAtomContainerDim1 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[0]), 0); - final ArrayList matches1DInAtomContainerDim2 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[1]), 0); - - final Assignment assignment = new Assignment(spectrum); - for (int i = 0; i < matchesQueryIn1DSpectrumDim1.size(); i++) { - if((matches1DInAtomContainerDim1 != null) && (matchesQueryIn1DSpectrumDim1.get(i) >= 0)){ - assignment.setAssignment(0, i, matches1DInAtomContainerDim1.get(matchesQueryIn1DSpectrumDim1.get(i))); - } - if((matches1DInAtomContainerDim2 != null) && (matchesQueryIn1DSpectrumDim2.get(i) >= 0)){ - assignment.setAssignment(1, i, matches1DInAtomContainerDim2.get(matchesQueryIn1DSpectrumDim2.get(i))); - } - } - - this.spectra.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); - this.assignments.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); - } - - - private ArrayList findMatchesIn1DSpectra(final Spectrum spectrum, final int dim, final double tol){ - - ArrayList matchesQueryInOrigin1DSpectrum = new ArrayList<>(); -// final ArrayList shiftsQuery = spectrum.getShifts(dim); -// if(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]) != null){ -// final ArrayList shiftsOrigin1DSpectrum = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]).getShifts(0); -// matchesQueryInOrigin1DSpectrum = Utils.findShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, tol); -// matchesQueryInOrigin1DSpectrum = Utils.correctShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, matchesQueryInOrigin1DSpectrum, tol); -// } else { -// for (int i = 0; i < spectrum.getSignalCount(); i++) { -// matchesQueryInOrigin1DSpectrum.add(-1); -// } -// } - - return matchesQueryInOrigin1DSpectrum; - } - - /** - * Returns the indices of atoms within the class atom container which match - * to the shifts of given spectrum and dimension. - * - * @param spectrum - * @param dim - * @return - */ - public final ArrayList getAssignedAtomIndices(final Spectrum spectrum, final int dim){ - - if(spectrum == null){ - return null; - } else if(this.getAssignment(spectrum) == null){ - final ArrayList atomIndices = new ArrayList<>(); - for (int i = 0; i < spectrum.getSignalCount(); i++) { - atomIndices.add(-1); - } - return atomIndices; - } - - return new ArrayList<>(this.getAssignment(spectrum).getAssignments(dim)); - } - - - /** - * Sets links between two heavy atoms of H,H-COSY signals. - * - * @param spectrum Spectrum class object containing the 2D spectrum proton shift information - * @param tolProton tolerance value [ppm] for matching belonging protons - * of heavy atom - * @return - */ - public final boolean assignHHCOSY(final Spectrum spectrum, final double tolProton) { - - final ArrayList protonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); - final ArrayList protonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolProton); - // are all signals bidirectional? - if (!Utils.isBidirectional(protonShiftMatches1, protonShiftMatches2)) { - return false; - } - this.assign2DSpectrum(spectrum, tolProton, tolProton); - - return true; - } - - - /** - * Sets links between two carbon atoms in an INADEQUATE signal relationship. - * Returns true if all signals are bidirectional, so that atom A has a - * signal according to atom B and vice versa. - * - * @param spectrum Spectrum class object consisting of Signal class objects - * @param tolCarbon tolerance value [ppm] for carbon atom shift matching - * @return - */ - public final boolean assignINADEQUATE(final Spectrum spectrum, final double tolCarbon) { - - final ArrayList carbonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolCarbon); - final ArrayList carbonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolCarbon); - // are all signals bidirectional? - if (!casekit.NMR.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { - return false; - } - this.assign2DSpectrum(spectrum, tolCarbon, tolCarbon); - - final ArrayList indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); - final ArrayList indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); - for (int i = 0; i < spectrum.getSignalCount(); i++) { - if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ - this.setBond(indicesInAtomContainerDim1.get(i), indicesInAtomContainerDim2.get(i)); - } - } - - return true; - } - - - private void setBond(final int index1, final int index2) { - - if (this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null) { - this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); - } - this.mol.addBond(index1, index2, IBond.Order.UNSET); - } - - - /** - * Sets links between heavy atoms which are in HMBC signal relationship. - * - * @param spectrum Spectrum class object consisting of Signal class objects - * where the proton shift values is given first and the heavy atom shifts as the second. - * @param tolProton tolerance value [ppm] for hydrogen shift matching - * @param tolHeavy tolerance value [ppm] for heavy atom shift matching - */ - public final void assignHMBC(final Spectrum spectrum, final double tolProton, final double tolHeavy) { - - this.assign2DSpectrum(spectrum, tolProton, tolHeavy); - } -} diff --git a/src/casekit/NMR/match/Matcher.java b/src/casekit/NMR/match/Matcher.java deleted file mode 100644 index 5d9b016..0000000 --- a/src/casekit/NMR/match/Matcher.java +++ /dev/null @@ -1,360 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package casekit.NMR.match; - -import casekit.NMR.Utils; -import casekit.NMR.model.Assignment; -import casekit.NMR.model.Signal; -import casekit.NMR.model.Spectrum; -import org.apache.commons.lang3.ArrayUtils; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.similarity.Tanimoto; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; - -public class Matcher { - - - /** - * Checks whether two spectra contain given dimensions. - * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension to select in first spectrum - * @param dim2 dimension to select in second spectrum - * @return true if both spectra contain the selected dimension - */ - private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2){ - return spectrum1.containsDim(dim1) && spectrum2.containsDim(dim2); - } - - /** - * Combines selected dimensions of two spectra while considering possible equivalent signals - * via the {@code pickPrecision} parameter and multiplicity comparison. - * In {@code spectrum1}, the equivalent signals have to be set. - * - * - * @param spectrum1 first spectrum, incl. equivalent signals - * @param spectrum2 second spectrum - * @param pickPrecision tolerance value used for signal shift matching to - * find equivalent signals - * @param dim1 dimension of first spectrum to combine - * @param dim2 dimension of second spectrum to combine - * @return null if one spectrum does not contain the selected dimension - * - */ - public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double pickPrecision) throws Exception { - if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { - return null; - } - int equivalentSignalIndex; - // create new spectra which is to fill with signals of both spectra - final Spectrum combinedSpectrum = spectrum1.getClone(); - // fill in signals from spectrum2 - // consider the possibility of potential equivalent signals here - for (final Signal signalSpectrum2 : spectrum2.getSignals()) { - equivalentSignalIndex = -1; - for (final int closestSignalIndex : combinedSpectrum.pickSignals(signalSpectrum2.getShift(dim2), dim1, pickPrecision)) { - if (signalSpectrum2.getMultiplicity().equals(combinedSpectrum.getSignal(closestSignalIndex).getMultiplicity())) { - equivalentSignalIndex = closestSignalIndex; - } - } - combinedSpectrum.addSignal(signalSpectrum2.getClone(), equivalentSignalIndex); - } - return combinedSpectrum; - } - - /** - * Calculates the Tanimoto coefficient between two spectra in given dimensions. - * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @return - * @throws CDKException - */ - public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) throws CDKException { - if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { - return null; - } - final double[] shiftsSpectrum1 = ArrayUtils.toPrimitive(spectrum1.getShifts(dim1).toArray(new Double[spectrum1.getSignalCount()])); - Arrays.parallelSort(shiftsSpectrum1); - final double[] shiftsSpectrum2 = ArrayUtils.toPrimitive(spectrum2.getShifts(dim2).toArray(new Double[spectrum2.getSignalCount()])); - Arrays.parallelSort(shiftsSpectrum2); - - return Tanimoto.calculate(shiftsSpectrum1, shiftsSpectrum2); - } - - /** - * Returns deviatons between matched shifts in SSC and query query spectrum. - * The matching procedure is already included here. - * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol - * @return - * - * @see #matchSpectra(Spectrum, Spectrum, int, int, double) - */ - public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { - final Double[] deviations = new Double[spectrum1.getSignalCount()]; - final Assignment matchAssignments = Matcher.matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol); - Signal matchedSignalInSpectrum2; - for (int i = 0; i < spectrum1.getSignalCount(); i++) { - if (matchAssignments.getAssignment(0, i) == -1) { - deviations[i] = null; - } else { - matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAssignment(0, i)); - deviations[i] = Math.abs(spectrum1.getSignal(i).getShift(dim1) - matchedSignalInSpectrum2.getShift(dim2)); - } - } - return deviations; - } - - /** - * Returns the average of all deviations within a given input array. - * - * @param deviations array of deviations - * @return - * - * - */ - public static Double calculateAverageDeviation(final Double[] deviations) { - // every signal has to have a match - for (final Double deviation : deviations) { - if (deviation == null) { - return null; - } - } - - return Utils.getMean(deviations); - } - - /** - * Returns the average of all deviations of matched shifts between two - * spectra. - * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol Tolerance value [ppm] used during peak picking in - * shift comparison - * @return - * - * @see #getDeviations(Spectrum, Spectrum, int, int, double) - * @see #calculateAverageDeviation(Double[]) - */ - public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { - return Matcher.calculateAverageDeviation(Matcher.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); - } - - /** - * Returns the closest shift matches between two spectra in selected dimensions - * as an Assignment object with one set dimension only.
- * Despite intensities are expected, they are still not considered here. - * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol Tolerance value [ppm] used during spectra shift - * comparison - * @return Assignments with signal indices of spectrum1 and matched indices - * in spectrum2; null if one of the spectra does not - * contain the selected dimension - */ - public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { - if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { - return null; - } - final Assignment matchAssignments = new Assignment(spectrum1); - final HashSet pickedSignalIndices = new HashSet<>(); - ArrayList pickedSignalIndicesSpectrum2; - int pickedSignalIndexSpectrum2; - int pickedSignalIndexSpectrum2Prev; - for (int i = 0; i < spectrum1.getSignalCount(); i++) { - if (spectrum1.getShift(i, dim1) == null) { - pickedSignalIndexSpectrum2 = -1; - } else { -// pickedSignalIndexSpectrum2 = spectrum2.pickClosestSignal(spectrum1.getShift(i, dim1), dim2, shiftTol); -// // if matched signal is already assigned, then consider symmetries (equiv. signals) -// if (pickedSignalIndices.contains(pickedSignalIndexSpectrum2)) { -// // symmetry exists -// if (spectrum2.hasEquivalences(pickedSignalIndexSpectrum2)) { -// pickedSignalIndexSpectrum2Prev = pickedSignalIndexSpectrum2; -// // assign the next signal in equivalence list -// for (final int equivalentSignalIndexSpectrum2 : spectrum2.getEquivalentSignals(pickedSignalIndexSpectrum2)) { -// if (!pickedSignalIndices.contains(equivalentSignalIndexSpectrum2)) { -// pickedSignalIndexSpectrum2 = equivalentSignalIndexSpectrum2; -// break; -// } -// } -// // if no further equivalent signal exists then that match is not valid -// if (pickedSignalIndexSpectrum2 == pickedSignalIndexSpectrum2Prev) { -// pickedSignalIndexSpectrum2 = -1; -// } -// } else { -// // not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch -// // -> still open -// pickedSignalIndexSpectrum2 = -1; -// } -// } - - - pickedSignalIndexSpectrum2 = -1; - pickedSignalIndicesSpectrum2 = spectrum2.pickClosestSignals(spectrum1.getShift(i, dim1), dim2, shiftTol); - for (int j = 0; j < pickedSignalIndicesSpectrum2.size(); j++) { - pickedSignalIndexSpectrum2 = pickedSignalIndicesSpectrum2.get(j); - // if matched signal is already assigned, then consider symmetries (equiv. signals) - if (pickedSignalIndices.contains(pickedSignalIndexSpectrum2)) { - // symmetry exists - if (spectrum2.hasEquivalences(pickedSignalIndexSpectrum2)) { - pickedSignalIndexSpectrum2Prev = pickedSignalIndexSpectrum2; - // assign the next signal in equivalence list - for (final int equivalentSignalIndexSpectrum2 : spectrum2.getEquivalentSignals(pickedSignalIndexSpectrum2)) { - if (!pickedSignalIndices.contains(equivalentSignalIndexSpectrum2)) { - pickedSignalIndexSpectrum2 = equivalentSignalIndexSpectrum2; - break; - } - } - // if no further equivalent signal exists then that match is not valid - if (pickedSignalIndexSpectrum2 == pickedSignalIndexSpectrum2Prev) { - pickedSignalIndexSpectrum2 = -1; - } - } else { - // @TODO not symmetric signals but the same (predicted) or very similar shifts and multiple assignments to catch - pickedSignalIndexSpectrum2 = -1; - } - } - if(pickedSignalIndexSpectrum2 != -1){ - break; - } - } - - // check multiplicity - if ((spectrum1.getMultiplicity(i) == null) || (spectrum2.getMultiplicity(pickedSignalIndexSpectrum2) == null) || !spectrum1.getMultiplicity(i).equals(spectrum2.getMultiplicity(pickedSignalIndexSpectrum2))) { - pickedSignalIndexSpectrum2 = -1; - } - } - // add only truly assigned signal to list of already assigned signals - if (pickedSignalIndexSpectrum2 != -1) { - pickedSignalIndices.add(pickedSignalIndexSpectrum2); - } - // set picked signal index in assignment object - matchAssignments.setAssignment(0, i, pickedSignalIndexSpectrum2); - } -// // try to assign the still unassigned shifts in spectrum1 to shifts in spectrum2 -// System.out.println("--> assignments before:\t" + Utils.ArrayToArrayList(matchAssignments.getAtomIndices(0))); -// ArrayList pickedSignalIndicesInSpectrum2; -// for (int i = 0; i < matchAssignments.getAssignmentsCount(); i++) { -// final Double queryShiftSpectrum1 = spectrum1.getShift(i, 0); -// if ((matchAssignments.getAtomIndex(0, i) == -1) && (queryShiftSpectrum1 != null)) { -// pickedSignalIndicesInSpectrum2 = spectrum2.pickSignals(queryShiftSpectrum1, 0, shiftTol); -// for (final int pickedSignalIndexInSpectrum2 : pickedSignalIndicesInSpectrum2) { -// if (!pickedSignalIndices.contains(pickedSignalIndexInSpectrum2) -// && (spectrum1.getMultiplicity(i) != null) -// && (spectrum2.getMultiplicity(pickedSignalIndexInSpectrum2) != null) -// && spectrum1.getMultiplicity(i).equals(spectrum2.getMultiplicity(pickedSignalIndexInSpectrum2))) { -// matchAssignments.setAssignment(0, i, pickedSignalIndexInSpectrum2); -// pickedSignalIndices.add(pickedSignalIndexInSpectrum2); -// break; -// } -// } -// } -// } -// System.out.println("--> assignments after:\t" + Utils.ArrayToArrayList(matchAssignments.getAtomIndices(0))); - return matchAssignments; - } - - /** - * Returns the closest shift matches between two spectra in all dimensions - * as one Assignment object with N set dimensions. - * N here means the number of dimensions in both spectra.
- * Despite intensities are expected, they are still not considered here. - * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param shiftTols tolerance values [ppm] per each dimension used during spectra shift - * comparisons - * @return Assignments with signal indices of spectrum1 and matched indices - * in spectrum2 for each dimension; null if the number of - * dimensions in both spectra is not the same or is different than the number of given - * shift tolerances - * - * @see #matchSpectra(Spectrum, Spectrum, int, int, double) - * - */ - public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final double[] shiftTols) { - if((spectrum1.getNDim() != spectrum2.getNDim()) || (spectrum1.getNDim() != shiftTols.length)){ - return null; - } - final Assignment matchAssignment = new Assignment(spectrum1); - for (int dim = 0; dim < spectrum1.getNDim(); dim++) { - matchAssignment.setAssignments(dim, Matcher.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]).getAssignments(0)); - } - - return matchAssignment; - } - - - // might be useful in future to correct matches between spectra - -// /** -// * Corrects a match list regarding a given shift list and an atom container. -// * This is useful when two ore more shift values (e.g. DEPT shifts) match -// * with the same atom in the atom container. So the purpose here is to -// * enable more unambiguous matches. This method first looks for unambiguous -// * matches and calculates the median of the difference values between the -// * shift list values and the shifts of atom container. Then, all shift list -// * values are adjusted (+/-) with this median value. -// * -// * @param shiftList1 Shift value list to search in -// * @param shiftList2 Shift value list to match in shiftList1 -// * @param matchesInShiftList1 Matcher list to correct -// * @param tol Tolerance value -// * @return -// */ -// public static ArrayList correctShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final ArrayList matchesInShiftList1, final double tol) { -// -// int matchIndex; -// // get differences of unique matches between query shift and ac shifts -// ArrayList diffs = new ArrayList<>(); -// final HashSet uniqueMatchIndicesSet = new HashSet<>(matchesInShiftList1); -// for (final int uniqueMatchIndex : uniqueMatchIndicesSet) { -// if (Collections.frequency(matchesInShiftList1, uniqueMatchIndex) == 1) { -// matchIndex = matchesInShiftList1.indexOf(uniqueMatchIndex); -// if (matchesInShiftList1.get(matchIndex) >= 0) { -// diffs.add(shiftList2.get(matchIndex) - shiftList1.get(matchesInShiftList1.get(matchIndex))); -// } -// } -// } -// // calculate the median of found unique match differences -// if (diffs.size() > 0) { -// final double median = casekit.NMR.Utils.getMedian(diffs); -// // add or subtract the median of the differences to all shift list values (input) and match again then -// for (int i = 0; i < shiftList2.size(); i++) { -// shiftList2.set(i, shiftList2.get(i) - median); -// } -// // rematch -// return casekit.NMR.Utils.findShiftMatches(shiftList1, shiftList2, tol); -// } -// -// return matchesInShiftList1; -// } -} diff --git a/src/casekit/NMR/model/Signal.java b/src/casekit/NMR/model/Signal.java deleted file mode 100644 index b17da9d..0000000 --- a/src/casekit/NMR/model/Signal.java +++ /dev/null @@ -1,128 +0,0 @@ -/* -* This class was adopted and modified from an earlier version by Christoph Steinbeck -*/ - -/* - * The MIT License - * - * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package casekit.NMR.model; - -import casekit.NMR.model.dimensional.DimensionalNMR; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class Signal extends DimensionalNMR { - - /** - * Am array of doubles to store the chemical shift of - */ - private Double[] shifts; - - private Double intensity; - private String multiplicity; - -// private Integer phase; -// public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; -// public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; - - - public Signal(final String[] nuclei) throws Exception { - this(nuclei, null); - } - - public Signal(final String[] nuclei, final Double[] shifts) throws Exception { - this(nuclei, shifts, null, null); - } - - public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final Double intensity) throws Exception { - super(nuclei); - this.shifts = this.initShifts(shifts, this.getNDim()); - this.multiplicity = multiplicity; - this.intensity = intensity; - } - - private Double[] initShifts(final Double[] shifts, final int nDim) throws Exception { - if((shifts == null) || (shifts.length != nDim)){ - throw new Exception("Number of given nuclei (" + nDim + ") and shifts (" + shifts.length + ") is not the same!!!"); - } - final Double[] tempShifts = new Double[nDim]; - for (int d = 0; d < nDim; d++) { - tempShifts[d] = shifts[d]; - } - - return tempShifts; - } - - public boolean setShift(final Double shift, final int dim) { - if(!this.containsDim(dim)){ - return false; - } - this.shifts[dim] = shift; - - return true; - } - - public Double getShift(final int dim) { - if(!this.containsDim(dim)){ - return null; - } - return this.shifts[dim]; - } - - public void setIntensity(final Double intensity) { - this.intensity = intensity; - } - - public Double getIntensity() { - return this.intensity; - } - - public void setMultiplicity(final String multiplicity) { - this.multiplicity = multiplicity; - } - - public String getMultiplicity() { - return this.multiplicity; - } - -// public void setPhase(final Integer phase) { -// this.phase = phase; -// } -// -// public Integer getPhase() { -// return this.phase; -// } - - - public Signal getClone() throws Exception { -// final Signal clone = new Signal(this.getDimNames(), this.shifts, this.multiplicity, this.intensity); -// clone.setPhase(this.phase); -// -// return clone; - - return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.intensity); - } - -} diff --git a/src/casekit/NMR/model/Spectrum.java b/src/casekit/NMR/model/Spectrum.java deleted file mode 100644 index 75e52f7..0000000 --- a/src/casekit/NMR/model/Spectrum.java +++ /dev/null @@ -1,558 +0,0 @@ -/* -* This class was adopted and modified from an earlier version by Christoph Steinbeck -*/ - - -/* - * The MIT License - * - * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package casekit.NMR.model; - -import casekit.NMR.model.dimensional.DimensionalNMR; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class Spectrum extends DimensionalNMR { - - /** - * An arbitrary name or description that can be assigned to this spectrum for identification purposes. - */ - private String description; - /** - * An arbitrary name to identify the type of this spectrum, like COSY, NOESY, HSQC, etc. I - * decided not to provide static Strings with given experiment type since the there are - * numerous experiments yielding basically identical information having different names - */ - private String specType; - /** - * The proton frequency of the spectrometer used to record this spectrum. - */ - private Double spectrometerFrequency; - private String solvent; - private String standard; - private final ArrayList signals; - private int signalCount; - private final ArrayList equivalences; - private ArrayList[] equivalentSignals; - - - public Spectrum(final String[] nuclei) { - super(nuclei); - this.signals = new ArrayList<>(); - this.signalCount = 0; - this.equivalences = new ArrayList<>(); - this.equivalentSignals = new ArrayList[]{}; - } - - public void setSpecType(final String specType){ - this.specType = specType; - } - - public String getSpecType(){ - return this.specType; - } - - public void setSpecDescription(final String description){ - this.description = description; - } - - public String getSpecDescription(){ - return this.description; - } - - public final boolean setShifts(final ArrayList shiftList, final int dim){ - if(!this.containsDim(dim) || (!this.checkInputListSize(shiftList.size()))){ - return false; - } - for (int i = 0; i < shiftList.size(); i++) { - this.setShift(shiftList.get(i), dim, i); - } - - return true; - } - - public final boolean setShift(final Double shift, final int dim, final int signalIndex){ - if(!this.containsDim(dim) || !this.checkSignalIndex(signalIndex)){ - return false; - } - this.getSignal(signalIndex).setShift(shift, dim); - - return true; - } - - public int getSignalCount() { - return this.signalCount; - } - - /** - * Adds a list of signals to this spectrum. - * - * @param signals list of signals to add - * @return - */ - public boolean addSignals(final ArrayList signals){ - for (final Signal signal : signals) { - if (!this.compareNuclei(signal.getNuclei())) { - return false; - } - } - for (final Signal signal : signals) { - this.addSignal(signal); - } - - return true; - } - - /** - * Adds a signal to this spectrum. - * - * @param signal signal to add - * @return - */ - public boolean addSignal(final Signal signal) { - return this.addSignal(signal, -1); - } - - /** - * Adds a signal to this spectrum and stores an equivalent signal index. - * - * @param signal signal to add - * @param equivalentSignalIndex index of equivalent signal in this spectrum - * @return - */ - public boolean addSignal(final Signal signal, final int equivalentSignalIndex) { - if((signal == null) || !this.compareNuclei(signal.getNuclei())){ - return false; - } - // add signal at the end of signal list - if(this.signals.add(signal)){ - this.signalCount++; - this.equivalences.add(equivalentSignalIndex); - this.updateEquivalentSignalClasses(); - - return true; - } - - return false; - } - - public boolean removeSignal(final Signal signal){ - return this.removeSignal(this.getSignalIndex(signal)); - } - - public boolean removeSignal(final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return false; - } - if(this.signals.remove(signalIndex) != null){ - this.signalCount--; - this.equivalences.remove(signalIndex); - this.updateEquivalentSignalClasses(); - - return true; - } - - return false; - } - - private boolean checkSignalIndex(final Integer signalIndex){ - return (signalIndex != null) && (signalIndex >= 0) && (signalIndex < this.getSignalCount()); - } - - private boolean checkInputListSize(final int size){ - return (size == this.getSignalCount()); - } - - /** - * Returns an NMRSignal at position number in the List - * @param signalIndex - * @return - */ - public Signal getSignal(final int signalIndex) { - if(!this.checkSignalIndex(signalIndex)){ - return null; - } - - try { - return this.signals.get(signalIndex); - } catch (Exception e) { - return null; - } - } - - public ArrayList getIntensities(){ - final ArrayList intensities = new ArrayList<>(); - for (Signal sig : this.signals) { - intensities.add(sig.getIntensity()); - } - - return intensities; - } - - public Double getIntensity(final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return null; - } - - return this.getSignal(signalIndex).getIntensity(); - } - - public boolean setIntensities(final ArrayList intensities){ - if(!this.checkInputListSize(intensities.size())){ - return false; - } - for (int s = 0; s < this.getSignalCount(); s++) { - this.setIntensity(intensities.get(s), s); - } - - return true; - } - - public boolean setIntensity(final double intensity, final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return false; - } - this.getSignal(signalIndex).setIntensity(intensity); - - return true; - } - - public ArrayList getShifts(final int dim){ - final ArrayList shifts = new ArrayList<>(); - if(!this.containsDim(dim)){ - return shifts; - } - for (final Signal sig : this.signals) { - shifts.add(sig.getShift(dim)); - } - - return shifts; - } - - public Double getShift(final int SignalIndex, final int dim){ - if(!this.checkSignalIndex(SignalIndex)){ - return null; - } - - return this.getSignal(SignalIndex).getShift(dim); - } - - public boolean setMultiplicities(final ArrayList multiplicities){ - if(!this.checkInputListSize(multiplicities.size())){ - return false; - } - for (int s = 0; s < this.getSignalCount(); s++) { - this.setMultiplicity(multiplicities.get(s), s); - } - - return true; - } - - public boolean setMultiplicity(final String multiplicity, final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return false; - } - this.getSignal(signalIndex).setMultiplicity(multiplicity); - - return true; - } - - public ArrayList getMultiplicities() { - final ArrayList multiplicities = new ArrayList<>(); - for (final Signal sig : this.signals) { - multiplicities.add(sig.getMultiplicity()); - } - - return multiplicities; - } - - public String getMultiplicity(final int SignalIndex) { - if (!this.checkSignalIndex(SignalIndex)) { - return null; - } - - return this.getSignal(SignalIndex).getMultiplicity(); - } - - public ArrayList getSignals(){ - return this.signals; - } - - public Boolean hasEquivalences(final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return null; - } - - return (this.getEquivalence(signalIndex) != -1) || (this.getEquivalences().contains(signalIndex)); - } - - private ArrayList searchEquivalentSignals(final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return null; - } - final ArrayList equivalentSignalIndices = new ArrayList<>(); - // case 1: signal was first input signal (root) of an equivalence class and is actually not knowing any of its equivalences; collect all equivalent signals - if(this.getEquivalence(signalIndex) == -1){ - for (int i = 0; i < this.getEquivalences().size(); i++) { - if((this.getEquivalences().get(i) != -1) && (this.getEquivalences().get(i) == signalIndex)) { - equivalentSignalIndices.add(i); - } - } - } else { - // case 2: signal was not the first input signal of that equivalent class; store the class root signal - equivalentSignalIndices.add(this.getEquivalences().get(signalIndex)); - } - // check all stored signals for further equivalent signals (i.e. for the added root signal in case 2) - for (int i = 0; i < equivalentSignalIndices.size(); i++) { - for (int j = 0; j < this.getEquivalences().size(); j++) { - // do not store the own signal index in own equ. signal class - if(j == signalIndex){ - continue; - } - if ((this.getEquivalences().get(j) != -1) - && (Integer.compare(this.getEquivalences().get(j), equivalentSignalIndices.get(i)) == 0) - && !equivalentSignalIndices.contains(j)) { - equivalentSignalIndices.add(j); - } - } - } - - return equivalentSignalIndices; - } - - private void updateEquivalentSignalClasses(){ - this.equivalentSignals = new ArrayList[this.getSignalCount()]; - for(int i = 0; i < this.getSignalCount(); i++) { - this.equivalentSignals[i] = this.searchEquivalentSignals(i); - } - } - - /** - * Returns equivalent signals for requested signal. - * - * @param signalIndex - * @return - */ - public ArrayList getEquivalentSignals(final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return null; - } - - return this.equivalentSignals[signalIndex]; - } - - /** - * Returns a hashmap of equivalent signal classes. - * The key set of that hashmap is just a numerical class index and is not - * belonging to any signal. - * - * @return - */ - public HashMap> getEquivalentSignalClasses(){ - this.updateEquivalentSignalClasses(); - // create a new HashMap object to return, containing the key signal index to have a full equivalent signal class - final HashMap> equivalentSignalClasses = new HashMap<>(); - final HashSet storedSignalIndices = new HashSet<>(); - for (int i = 0; i < this.getSignalCount(); i++) { - if (!storedSignalIndices.contains(i)) { - equivalentSignalClasses.put(equivalentSignalClasses.size(), new ArrayList<>(this.equivalentSignals[i])); - equivalentSignalClasses.get(equivalentSignalClasses.size() - 1).add(i); - storedSignalIndices.addAll(equivalentSignalClasses.get(equivalentSignalClasses.size() - 1)); - } - } - - return equivalentSignalClasses; - } - - public ArrayList getEquivalences(){ - return this.equivalences; - } - - public Integer getEquivalence(final int signalIndex){ - if(!this.checkSignalIndex(signalIndex)){ - return null; - } - - return this.equivalences.get(signalIndex); - } - - public boolean setEquivalence(final int signalIndex, final int isEquivalentToSignalIndex){ - if(!this.checkSignalIndex(signalIndex) || !this.checkSignalIndex(isEquivalentToSignalIndex)){ - return false; - } - this.equivalences.set(signalIndex, isEquivalentToSignalIndex); - this.updateEquivalentSignalClasses(); - - return true; - } - - /** - * Detects equivalent signals within this spectrum by a pick precision of 0.0 (no shift deviations are allowed). - * - * @see #detectEquivalences(double) - */ - public void detectEquivalences(){ - this.detectEquivalences(0.0); - } - - /** - * Detects equivalent signals within this spectrum by a given pick precision (shift deviations are allowed). - * - * @param pickPrecision tolerance value used for signal shift matching to find equivalent signals - * - * @see #getEquivalence(int) - * @see #getEquivalences() - * @see #getEquivalentSignals(int) - * @see #getEquivalentSignalClasses() - */ - public void detectEquivalences(final double pickPrecision){ - int equivalentSignalIndex; - for (final Signal signal : this.getSignals()) { - equivalentSignalIndex = -1; - for (final int closestSignalIndex : this.pickSignals(signal.getShift(0), 0, pickPrecision)) { - if (this.getSignalIndex(signal) <= closestSignalIndex) { - continue; - } - if (signal.getMultiplicity().equals(this.getSignal(closestSignalIndex).getMultiplicity())) { - equivalentSignalIndex = closestSignalIndex; - break; - } - } - this.setEquivalence(this.getSignalIndex(signal), equivalentSignalIndex); - } - } - - /** - * Returns the position of an NMRSignal the List - * @param signal - * @return - */ - public int getSignalIndex(final Signal signal) { - for (int s = 0; s < this.signals.size(); s++) { - if (this.signals.get(s) == signal) { - return s; - } - } - return -1; - } - - public void setSpectrometerFrequency(final Double sf) { - this.spectrometerFrequency = sf; - } - - public Double getSpectrometerFrequency() { - return spectrometerFrequency; - } - - public void setSolvent(final String solvent) { - this.solvent = solvent; - } - - public String getSolvent() { - return solvent; - } - - public void setStandard(final String standard) { - this.standard = standard; - } - - public String getStandard() { - return standard; - } - - - /** - * Returns the signal index (or indices) closest to the given shift. If no signal is found within the interval - * defined by {@code pickPrecision}, an empty list is returned. - * @param shift query shift - * @param dim dimension in spectrum to look in - * @param pickPrecision tolerance value for search window - * @return - */ - public ArrayList pickClosestSignals(final double shift, final int dim, final double pickPrecision) { - final ArrayList matchIndices = new ArrayList<>(); - if(!this.containsDim(dim)){ - return matchIndices; - } - double minDiff = pickPrecision; - // detect the minimal difference between a signal shift to the given query shift - for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) < minDiff) { - minDiff = Math.abs(this.getShift(s, dim) - shift); - } - } - for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) == minDiff) { - matchIndices.add(s); - } - } - - return matchIndices; - } - - /** - * Returns a list of signal indices within the interval defined by - * pickPrecision. That list is sorted by the distances to the query shift. - * If none is found an empty ArrayList is returned. - * @param shift query shift - * @param dim dimension in spectrum to look in - * @param pickPrecision tolerance value for search window - * @return - */ - public ArrayList pickSignals(final Double shift, final int dim, final double pickPrecision) { - final ArrayList pickedSignals = new ArrayList<>(); - if(!this.containsDim(dim)){ - return pickedSignals; - } - for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) <= pickPrecision) { - pickedSignals.add(s); - } - } - // sort signal indices by distance to query shift - pickedSignals.sort((pickedSignalIndex1, pickedSignalIndex2) -> Double.compare(Math.abs(shift - getShift(pickedSignalIndex1, dim)), Math.abs(shift - getShift(pickedSignalIndex2, dim)))); - - return pickedSignals; - } - - public Spectrum getClone() throws Exception { - final Spectrum clone = new Spectrum(this.getNuclei()); - for (int i = 0; i < this.getSignalCount(); i++) { - clone.addSignal(this.getSignal(i).getClone(), this.getEquivalence(i)); - } - clone.setSpecDescription(this.description); - clone.setSolvent(this.solvent); - clone.setSpecType(this.specType); - clone.setSpectrometerFrequency(this.spectrometerFrequency); - clone.setStandard(this.standard); - - return clone; - } - -} diff --git a/src/casekit/NMR/model/dimensional/DimensionalNMR.java b/src/casekit/NMR/model/dimensional/DimensionalNMR.java deleted file mode 100644 index e7e117e..0000000 --- a/src/casekit/NMR/model/dimensional/DimensionalNMR.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package casekit.NMR.model.dimensional; - -import casekit.model.Dimensional; - -public class DimensionalNMR extends Dimensional { - - protected DimensionalNMR(String[] dimNames) throws IndexOutOfBoundsException { - super(dimNames); - } - - public final String[] getNuclei(){ - return this.getDimNames(); - } - - public boolean compareNuclei(final String[] nuclei){ - return this.compareDimNames(nuclei); - } - -} diff --git a/src/casekit/NMRShiftDBSDFParser.java b/src/casekit/NMRShiftDBSDFParser.java index c2ff08b..fedf5ff 100644 --- a/src/casekit/NMRShiftDBSDFParser.java +++ b/src/casekit/NMRShiftDBSDFParser.java @@ -6,21 +6,7 @@ */ package casekit; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.util.StringTokenizer; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.*; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.depict.DepictionGenerator; @@ -32,6 +18,9 @@ import org.openscience.cdk.tools.HOSECodeGenerator; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import java.io.*; +import java.util.StringTokenizer; + /** * Helper class to parse an NMRShiftDB SDF file with spectra assignments * and convert it to a tab-separated values file with HOSE codes @@ -42,6 +31,7 @@ * @author Christoph Steinbeck */ +@Deprecated public class NMRShiftDBSDFParser { BufferedWriter bw; IMolecularFormula formula = null; diff --git a/src/casekit/Result.java b/src/casekit/Result.java index e451948..43783ee 100644 --- a/src/casekit/Result.java +++ b/src/casekit/Result.java @@ -9,6 +9,7 @@ import org.openscience.cdk.interfaces.IAtomContainer; +@Deprecated public class Result { public IAtomContainer ac; public double score; diff --git a/src/casekit/SimilarityRanker.java b/src/casekit/SimilarityRanker.java index 024a58e..6a67ec1 100644 --- a/src/casekit/SimilarityRanker.java +++ b/src/casekit/SimilarityRanker.java @@ -1,14 +1,22 @@ -/* -* This Open Source Software is provided to you under the MIT License +/* + * This Open Source Software is provided to you under the MIT License * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck + * + * Copyright (c) 2017, Christoph Steinbeck */ package casekit; -import casekit.NMR.model.Signal; -import casekit.NMR.model.Spectrum; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; +import org.apache.commons.cli.*; +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.depict.DepictionGenerator; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -18,290 +26,240 @@ import java.util.Comparator; import java.util.StringTokenizer; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.depict.DepictionGenerator; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; - -/** - * SimilarityRanker uses a SpectrumPredictor and parses an SDF file, returning a configurable number of compounds and +/** + * SimilarityRanker uses a SpectrumPredictor and parses an SDF file, returning a configurable number of compounds and * their ranked spectrum similarity. - * + *

* This Open Source Software is provided to you under the MIT License * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck - * - * @author steinbeck + *

+ * Copyright (c) 2017, Christoph Steinbeck * + * @author steinbeck */ + +@Deprecated public class SimilarityRanker { - - public boolean verbose = true; - DecimalFormat df; - public int resultListSize = 100; - public String inFile = null; - public String outPath = null; - public String spectrumFile = null; - public String hoseTSVFile = null; - Spectrum spectrum = null; - ArrayList results = null; - - public boolean isVerbose() { - return verbose; - } - public void setVerbose(boolean verbose) { - this.verbose = verbose; - } + public boolean verbose = true; + DecimalFormat df; + public int resultListSize = 100; + public String inFile = null; + public String outPath = null; + public String spectrumFile = null; + public String hoseTSVFile = null; + Spectrum spectrum = null; + ArrayList results = null; + + public boolean isVerbose() { + return verbose; + } + + public void setVerbose(boolean verbose) { + this.verbose = verbose; + } + + public int getResultListSize() { + return resultListSize; + } + + public void setResultListSize(int resultListSize) { + this.resultListSize = resultListSize; + } + + public SimilarityRanker() { + // TODO Auto-generated constructor stub + df = new DecimalFormat(); + df.setMaximumFractionDigits(2); + } + + public void readSpectrum() throws NumberFormatException, IOException { + String line; + StringTokenizer strtok; + int linecounter = 0; + Double shift = null; + Integer mult = null; + Signal signal; + String tempString; + Spectrum spectrum = new Spectrum(null); + BufferedReader br = new BufferedReader(new FileReader(spectrumFile)); + if (verbose) + System.out.println("Start reading spectrum from " + spectrumFile); + // while((line = br.readLine()) != null) + // { + // if (!line.startsWith("#") && line.trim().length() > 0) + // { + // strtok = new StringTokenizer(line, ";"); + // if (verbose) System.out.println(line); + // linecounter++; + // + // shift = Double.parseDouble(strtok.nextToken().trim()); + // mult = Integer.parseInt(strtok.nextToken().trim()); + // signal = new Signal(); + // spectrum.addSignal(signal); + // } + // } + br.close(); + if (verbose) + System.out.println("Read " + linecounter + " signals from spectrum in file " + spectrumFile); + + this.spectrum = spectrum; + } + + + public ArrayList rank() throws Exception { + /* + * Iterate of SDF file given by input file, predict a spectrum and calculate a similarity with the + * spectrum given in @spectrum. + * Store the 10 most similar spectra in a list and write them to outFile in the end + */ + + HOSECodePredictor predictor = new HOSECodePredictor(hoseTSVFile); + IAtomContainer ac = null; + double similarity = 0.0; + double bestSimilarity = 1000000000.0; + results = new ArrayList(); + ResultComparator comp = new ResultComparator(); + IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(inFile), SilentChemObjectBuilder.getInstance()); + + while (iterator.hasNext()) { + ac = iterator.next(); + predictor.predict(ac); + similarity = calculateSimilarity(ac, spectrum); + if (results.size() > 0) { + if (similarity < results.get(results.size() - 1).getScore()) { + bestSimilarity = similarity; + ac.setProperty(CDKConstants.TITLE, "Distance " + df.format(similarity)); + results.add(new Result(ac, similarity)); + results.sort(comp); + //After sorting, we remove the worst entry and thereby trim the results list to resultListSize + if (results.size() == resultListSize) + results.remove(resultListSize - 1); + } + } else + results.add(new Result(ac, similarity)); + } + iterator.close(); + if (verbose) + System.out.println("Calculation finished. Best similarity = " + bestSimilarity); + return results; + } + + public double calculateSimilarity(IAtomContainer ac, Spectrum spectrum) { + double similarity = 0.0; + double lastDiff = 0.0; + int counter = 0; + String shift = null; + boolean matchFound = false; + double diff = 0.0; + double[] shifts = new double[spectrum.getSignalCount()]; + for (IAtom atom : ac.atoms()) { + if (atom.getAtomicNumber() == 6) { + shift = atom.getProperty(CDKConstants.NMRSHIFT_CARBON); + if (shift != null) + shifts[counter] = Double.parseDouble(shift); + else + shifts[counter] = -1.0; + counter++; + } + } + for (int f = 0; f < spectrum.getSignalCount(); f++) { + lastDiff = 10000000000.0; + matchFound = false; + for (int g = 0; g < spectrum.getSignalCount(); g++) { + // if (shifts[f] > spectrum.get(g).getShift().doubleValue()) diff = shifts[f] - spectrum.get(g).getShift().doubleValue(); + // else diff = spectrum.get(g).getShift().doubleValue() - shifts[f]; + df.format(diff); + if (diff < lastDiff) { + lastDiff = diff; + matchFound = true; + } + } + if (matchFound) + similarity += lastDiff; + } + return similarity / spectrum.getSignalCount(); + } + + public void reportResults() throws Exception { + String filename = null; + DepictionGenerator dg = null; + if (!outPath.endsWith(File.separator)) + outPath += File.separator; + for (int f = 0; f < results.size(); f++) { + filename = outPath + String.format("%03d", f) + "-mol.png"; + dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); + dg.depict(results.get(f).getAc()).writeTo(filename); + } + } - public int getResultListSize() { - return resultListSize; - } - public void setResultListSize(int resultListSize) { - this.resultListSize = resultListSize; - } + class ResultComparator implements Comparator { + public int compare(Result o1, Result o2) { - public SimilarityRanker() { - // TODO Auto-generated constructor stub - df = new DecimalFormat(); - df.setMaximumFractionDigits(2); - } + if (o1.getScore() < o2.getScore()) + return -1; + return 1; + } + } - public void readSpectrum() throws NumberFormatException, IOException - { - String line; - StringTokenizer strtok; - int linecounter = 0; - Double shift = null; - Integer mult = null; - Signal signal; - String tempString; - Spectrum spectrum = new Spectrum(null); - BufferedReader br = new BufferedReader(new FileReader(spectrumFile)); - if (verbose) System.out.println("Start reading spectrum from " + spectrumFile); -// while((line = br.readLine()) != null) -// { -// if (!line.startsWith("#") && line.trim().length() > 0) -// { -// strtok = new StringTokenizer(line, ";"); -// if (verbose) System.out.println(line); -// linecounter++; -// -// shift = Double.parseDouble(strtok.nextToken().trim()); -// mult = Integer.parseInt(strtok.nextToken().trim()); -// signal = new Signal(); -// spectrum.addSignal(signal); -// } -// } - br.close(); - if (verbose) System.out.println("Read " + linecounter + " signals from spectrum in file " + spectrumFile); - - this.spectrum = spectrum; - } - + private void parseArgs(String[] args) throws ParseException { + Options options = setupOptions(args); + CommandLineParser parser = new DefaultParser(); + try { + CommandLine cmd = parser.parse(options, args); + this.inFile = cmd.getOptionValue("infile"); + this.hoseTSVFile = cmd.getOptionValue("hosecodes"); + this.outPath = cmd.getOptionValue("outpath"); + this.spectrumFile = cmd.getOptionValue("spectrum"); + if (cmd.hasOption("numbers")) + this.resultListSize = Integer.parseInt(cmd.getOptionValue("numbers")); + if (cmd.hasOption("verbose")) + this.verbose = true; + } catch (ParseException e) { + // TODO Auto-generated catch block + HelpFormatter formatter = new HelpFormatter(); + formatter.setOptionComparator(null); + String header = "Ranke structures based on given experimental spectrum and similarity to predicted spectrum.\n\n"; + String footer = "\nPlease report issues at https://github.com/steinbeck/spectra"; + formatter.printHelp("java -jar casekit.jar casekit.SimilarityRanker", header, options, footer, true); + throw e; + } + } - public ArrayList rank() throws Exception - { - /* - * Iterate of SDF file given by input file, predict a spectrum and calculate a similarity with the - * spectrum given in @spectrum. - * Store the 10 most similar spectra in a list and write them to outFile in the end - */ - - HOSECodePredictor predictor = new HOSECodePredictor(hoseTSVFile); - IAtomContainer ac = null; - double similarity = 0.0; - double bestSimilarity = 1000000000.0; - results = new ArrayList(); - ResultComparator comp = new ResultComparator(); - IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(inFile), - SilentChemObjectBuilder.getInstance() - ); - - while (iterator.hasNext()) - { - ac = iterator.next(); - predictor.predict(ac); - similarity = calculateSimilarity(ac, spectrum); - if (results.size() > 0) - { - if (similarity < results.get(results.size()-1).getScore()) - { - bestSimilarity = similarity; - ac.setProperty(CDKConstants.TITLE, "Distance " + df.format(similarity)); - results.add(new Result(ac, similarity)); - results.sort(comp); - //After sorting, we remove the worst entry and thereby trim the results list to resultListSize - if (results.size() == resultListSize) results.remove(resultListSize - 1); - } - } - else results.add(new Result(ac, similarity)); - } - iterator.close(); - if (verbose) System.out.println("Calculation finished. Best similarity = " + bestSimilarity); - return results; - } + private Options setupOptions(String[] args) { + Options options = new Options(); - public double calculateSimilarity(IAtomContainer ac, Spectrum spectrum) - { - double similarity = 0.0; - double lastDiff = 0.0; - int counter = 0; - String shift = null; - boolean matchFound = false; - double diff = 0.0; - double shifts[] = new double[spectrum.getSignalCount()]; - for (IAtom atom : ac.atoms()) - { - if (atom.getAtomicNumber() == 6) - { - shift = atom.getProperty(CDKConstants.NMRSHIFT_CARBON); - if (shift != null) shifts[counter] = Double.parseDouble(shift); - else shifts[counter] = -1.0; - counter ++; - } - } - for (int f = 0; f < spectrum.getSignalCount(); f++) - { - lastDiff = 10000000000.0; - matchFound = false; - for (int g = 0; g < spectrum.getSignalCount(); g++) - { -// if (shifts[f] > spectrum.get(g).getShift().doubleValue()) diff = shifts[f] - spectrum.get(g).getShift().doubleValue(); -// else diff = spectrum.get(g).getShift().doubleValue() - shifts[f]; - df.format(diff); - if (diff < lastDiff) - { - lastDiff = diff; - matchFound = true; - } - } - if (matchFound) similarity += lastDiff; - } - return similarity/spectrum.getSignalCount(); - } - - public void reportResults() throws Exception - { - String filename = null; - DepictionGenerator dg = null; - if (!outPath.endsWith(File.separator)) - outPath += File.separator; - for (int f = 0; f < results.size(); f++) - { - filename = outPath + String.format("%03d", f) + "-mol.png"; - dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); - dg.depict(results.get(f).getAc()).writeTo(filename); - } - } + Option infile = Option.builder("i").required(true).hasArg().longOpt("infile").desc("filename of with SDF/MOL file of structures to be ranked (required)").build(); + options.addOption(infile); + Option spectrumfile = Option.builder("p").required(true).hasArg().longOpt("spectrum").desc("filename of CSV file with spectrum. Format of each line: ; (required)").build(); + options.addOption(spectrumfile); + Option outpath = Option.builder("o").required(true).hasArg().longOpt("outpath").desc("path to store pictures of ranked output structures (required)").build(); + options.addOption(outpath); + Option hosefile = Option.builder("s").required(true).hasArg().longOpt("hosecodes").desc("filename of TSV file with HOSE codes (required)").build(); + options.addOption(hosefile); + Option outputnumber = Option.builder("n").hasArg().longOpt("number").desc("number of structures in output file. Default is 10, if this option is ommitted").build(); + options.addOption(outputnumber); - - class ResultComparator implements Comparator - { - public int compare(Result o1, Result o2) { + Option verbose = Option.builder("v").required(false).longOpt("verbose").desc("generate messages about progress of operation").build(); + options.addOption(verbose); - if (o1.getScore() < o2.getScore()) return -1; - return 1; - } - } - - private void parseArgs(String[] args) throws ParseException - { - Options options = setupOptions(args); - CommandLineParser parser = new DefaultParser(); - try { - CommandLine cmd = parser.parse( options, args); - this.inFile = cmd.getOptionValue("infile"); - this.hoseTSVFile = cmd.getOptionValue("hosecodes"); - this.outPath = cmd.getOptionValue("outpath"); - this.spectrumFile = cmd.getOptionValue("spectrum"); - if (cmd.hasOption("numbers")) this.resultListSize = Integer.parseInt(cmd.getOptionValue("numbers")); - if (cmd.hasOption("verbose")) this.verbose = true; - } catch (ParseException e) { - // TODO Auto-generated catch block - HelpFormatter formatter = new HelpFormatter(); - formatter.setOptionComparator(null); - String header = "Ranke structures based on given experimental spectrum and similarity to predicted spectrum.\n\n"; - String footer = "\nPlease report issues at https://github.com/steinbeck/spectra"; - formatter.printHelp( "java -jar casekit.jar casekit.SimilarityRanker", header, options, footer, true ); - throw e; - } - } - - private Options setupOptions(String[] args) - { - Options options = new Options(); + return options; + } - Option infile = Option.builder("i") - .required(true) - .hasArg() - .longOpt("infile") - .desc("filename of with SDF/MOL file of structures to be ranked (required)") - .build(); - options.addOption(infile); - Option spectrumfile = Option.builder("p") - .required(true) - .hasArg() - .longOpt("spectrum") - .desc("filename of CSV file with spectrum. Format of each line: ; (required)") - .build(); - options.addOption(spectrumfile); - Option outpath = Option.builder("o") - .required(true) - .hasArg() - .longOpt("outpath") - .desc("path to store pictures of ranked output structures (required)") - .build(); - options.addOption(outpath); - Option hosefile = Option.builder("s") - .required(true) - .hasArg() - .longOpt("hosecodes") - .desc("filename of TSV file with HOSE codes (required)") - .build(); - options.addOption(hosefile); - Option outputnumber = Option.builder("n") - .hasArg() - .longOpt("number") - .desc("number of structures in output file. Default is 10, if this option is ommitted") - .build(); - options.addOption(outputnumber); - Option verbose = Option.builder("v") - .required(false) - .longOpt("verbose") - .desc("generate messages about progress of operation") - .build(); - options.addOption(verbose); + public static void main(String[] args) { + SimilarityRanker sr = new SimilarityRanker(); + try { + sr.parseArgs(args); + sr.readSpectrum(); + sr.rank(); + sr.reportResults(); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } - return options; - } - - - public static void main(String[] args) { - SimilarityRanker sr = new SimilarityRanker(); - try { - sr.parseArgs(args); - sr.readSpectrum(); - sr.rank(); - sr.reportResults(); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } + } } diff --git a/src/casekit/NMR/Utils.java b/src/casekit/nmr/Utils.java similarity index 77% rename from src/casekit/NMR/Utils.java rename to src/casekit/nmr/Utils.java index 1f336c5..9765245 100644 --- a/src/casekit/NMR/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -9,10 +9,10 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.NMR; +package casekit.nmr; -import casekit.NMR.model.Spectrum; +import casekit.nmr.model.Spectrum; import org.apache.commons.lang3.StringUtils; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; @@ -37,96 +37,97 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** - * * @author Michael Wenk [https://github.com/michaelwenk] */ public class Utils { - + /** - * Splits an SDF into single molecular files and converts each of them into the LSD substructure format. + * Splits an SDF into single molecular files and converts each of them into the LSD substructure format. * Therefore, the mol2ab executable provided by LSD is required. - * - * @param pathSDF path to SDF to split - * @param pathOut path to directory which should contain the splitted and converted structure files + * + * @param pathSDF path to SDF to split + * @param pathOut path to directory which should contain the splitted and converted structure files * @param pathMol2ab path to mol2ab executable provided by LSD + * * @throws FileNotFoundException * @throws CDKException * @throws IOException */ - public static void SDFtoLSD(final String pathSDF, final String pathOut, final String pathMol2ab) throws FileNotFoundException, CDKException, IOException{ - - + public static void SDFtoLSD(final String pathSDF, final String pathOut, final String pathMol2ab) throws CDKException, IOException { + + System.out.println("Conversion from SDF format to LSD format... "); - + IAtomContainer ac; - - IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(pathSDF), - SilentChemObjectBuilder.getInstance() - ); - + IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathSDF), SilentChemObjectBuilder.getInstance()); + + File fout; FileOutputStream fos; BufferedWriter bw; File foutPilot = new File(pathOut + "/pilot"); FileOutputStream fosPilot = new FileOutputStream(foutPilot); BufferedWriter bwPilot = new BufferedWriter(new OutputStreamWriter(fosPilot)); - - + + int i = 0; while (iterator.hasNext()) { i++; ac = iterator.next(); String molID = String.valueOf(i);//(String) ac.getProperties().get("cdk:Remark"); -// molID = molID.replace(" ", "_"); + // molID = molID.replace(" ", "_"); fout = new File(pathOut + "/" + molID + ".sdf"); fos = new FileOutputStream(fout); bw = new BufferedWriter(new OutputStreamWriter(fos)); - + SDFWriter wtr = new SDFWriter(bw); -// Properties sdfWriterProps = new Properties(); -// sdfWriterProps.put("WriteAromaticBondTypes", "true"); -// wtr.addChemObjectIOListener(new PropertiesListener(sdfWriterProps)); -// wtr.customizeJob(); - + // Properties sdfWriterProps = new Properties(); + // sdfWriterProps.put("WriteAromaticBondTypes", "true"); + // wtr.addChemObjectIOListener(new PropertiesListener(sdfWriterProps)); + // wtr.customizeJob(); + wtr.write(ac); wtr.close(); bw.close(); - + bwPilot.write(molID + " " + fout.getPath()); bwPilot.newLine(); - + } - + iterator.close(); bwPilot.close(); System.out.println("Input file contained " + i + " molecules!\nSingle files created!"); - - + + // should be replaced by e.g. the process command because: // 1. for very long files the program ends long before the conversion process (command) ends // 2. no control or output here Runtime.getRuntime().exec(pathMol2ab + "/mol2ab " + pathOut + " " + foutPilot.getPath()); - - + + System.out.println("Conversion from SDF format to LSD format... DONE!"); - + } - + /** * Returns a hashmap constisting of lists of atom indices in an atom container. * This is done for all atom types (e.g. C or Br) in given atom container. * - * @param ac IAtomContainer to look in + * @param ac IAtomContainer to look in + * * @return - * @see #getAtomTypeIndicesByElement(org.openscience.cdk.interfaces.IAtomContainer, java.lang.String) + * + * @see #getAtomTypeIndicesByElement(org.openscience.cdk.interfaces.IAtomContainer, java.lang.String) */ public static HashMap> getAtomTypeIndices(final IAtomContainer ac) { - + final HashMap> atomTypeIndices = new HashMap<>(); final HashSet atomTypes = new HashSet<>(); for (final IAtom heavyAtom : AtomContainerManipulator.getHeavyAtoms(ac)) { @@ -135,65 +136,95 @@ public static HashMap> getAtomTypeIndices(final IAtom for (final String atomType : atomTypes) { atomTypeIndices.put(atomType, Utils.getAtomTypeIndicesByElement(ac, atomType)); } - + return atomTypeIndices; } - - + + /** - * Returns a list of atom indices in an atom container for a given atom + * Returns a list of atom indices in an atom container for a given atom * type (e.g. C or Br) * - * @param ac IAtomContainer to use for search - * @param atomType Atom type to find in atom container + * @param ac IAtomContainer to use for search + * @param atomType Atom type to find in atom container + * * @return */ - public static ArrayList getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType){ - + public static ArrayList getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType) { + final ArrayList indices = new ArrayList<>(); for (int i = 0; i < ac.getAtomCount(); i++) { - if(ac.getAtom(i).getSymbol().equals(atomType)){ + if (ac.getAtom(i).getSymbol().equals(atomType)) { indices.add(i); } } - + return indices; } - public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int dim){ - if(spectrum.containsDim(dim)){ + public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int dim) { + if (spectrum.containsDim(dim)) { return Utils.getAtomTypeFromNucleus(spectrum.getNuclei()[dim]); } - + return null; } - - public static String getAtomTypeFromNucleus(final String nucleus){ + + public static String getAtomTypeFromNucleus(final String nucleus) { final String[] nucleusSplit = nucleus.split("\\d"); return nucleusSplit[nucleusSplit.length - 1]; } - - public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws CDKException{ - if(!spectrum.containsDim(dim)){ + + public static IMolecularFormula getMolecularFormulaFromAtomContainer(final IAtomContainer ac) { + return MolecularFormulaManipulator.getMolecularFormula(ac); + } + + public static IMolecularFormula getMolecularFormulaFromString(final String mf) { + return MolecularFormulaManipulator.getMolecularFormula(mf, SilentChemObjectBuilder.getInstance()); + } + + public static String molecularFormularToString(final IMolecularFormula molecularFormula) { + return MolecularFormulaManipulator.getString(molecularFormula); + } + + public static Map getMolecularFormulaElementCounts(final String mf) { + final LinkedHashMap counts = new LinkedHashMap<>(); + final IMolecularFormula iMolecularFormula = Utils.getMolecularFormulaFromString(mf); + final List elements = new ArrayList<>(); + final Matcher matcher = Pattern.compile("([A-Z][a-z]*)").matcher(mf); + + while (matcher.find()) { + elements.add(matcher.group(1)); + } + for (final String element : elements) { + counts.put(element, MolecularFormulaManipulator.getElementCount(iMolecularFormula, element)); + } + + return counts; + } + + public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws CDKException { + if (!spectrum.containsDim(dim)) { throw new CDKException(Thread.currentThread().getStackTrace()[2].getClassName() + "." + Thread.currentThread().getStackTrace()[2].getMethodName() + ": invalid dimension in spectrum given"); } final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, dim); int atomsInMolFormula = 0; - if(molFormula != null){ + if (molFormula != null) { atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molFormula, atomType); } - return atomsInMolFormula - spectrum.getSignalCount(); + return atomsInMolFormula - spectrum.getSignalCountWithEquivalences(); } - + public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws Exception { - BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); int n; + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); + int n; final ArrayList validIndices = new ArrayList<>(); int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula, dim); // walk through all signals in spectrum add missing or to remove signals while (diff != 0) { // display all selectable signal indices in spectrum - if(diff > 0){ + if (diff > 0) { System.out.println("\n" + diff + " " + spectrum.getNuclei()[0] + " signals are missing!\nWhich signal is not unique?"); } else { System.out.println("\n" + (-1 * diff) + " " + spectrum.getNuclei()[0] + " signals are to be removed!\nWhich signal is to remove?"); @@ -201,59 +232,60 @@ public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecul for (int s = 0; s < spectrum.getSignalCount(); s++) { System.out.print("index: " + s); for (int d = 0; d < spectrum.getNDim(); d++) { - System.out.print(", shift dim " + (d+1) + ": " + spectrum.getShift(s, d)); + System.out.print(", shift dim " + (d + 1) + ": " + spectrum.getShift(s, d)); } System.out.println(); validIndices.add(s); } // get selected index by user input n = -1; - while(!validIndices.contains(n)){ + while (!validIndices.contains(n)) { System.out.println("Enter the index: "); n = Integer.parseInt(br.readLine()); } // add/remove signals in spectrum - if(diff > 0){ - spectrum.addSignal(spectrum.getSignal(validIndices.indexOf(n)).getClone()); - spectrum.setEquivalence(spectrum.getSignalCount() - 1, validIndices.indexOf(n)); + if (diff > 0) { + spectrum.addSignal(spectrum.getSignal(validIndices.indexOf(n)).buildClone()); } else { spectrum.removeSignal(validIndices.indexOf(n)); } diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula, dim); } } - + /** * Specified for carbons only -> not generic!!! * * @param mult + * * @return */ - public static Integer getHydrogenCountFromMultiplicity(final String mult){ - - if(mult == null){ + public static Integer getHydrogenCountFromMultiplicity(final String mult) { + + if (mult == null) { System.out.println("null!!!"); return null; } - switch(mult){ - case "Q": + switch (mult) { + case "Q": return 3; - case "T": + case "T": return 2; - case "D": + case "D": return 1; - case "S": + case "S": return 0; - default: + default: System.out.println("unknown symbol!!"); return null; } } - + /** * Specified for carbons only -> not generic!!! * * @param hCount + * * @return */ public static String getMultiplicityFromHydrogenCount(final int hCount) { @@ -270,81 +302,99 @@ public static String getMultiplicityFromHydrogenCount(final int hCount) { return null; } } - - + + /** - * Returns the NMR shift constant value for a given element. As far as + * Returns the casekit.nmr shift constant value for a given element. As far as * it is defined, the value from CDKConstants.NMRSHIFT_* (e.g. * {@link org.openscience.cdk.CDKConstants#NMRSHIFT_CARBON}) will be used. * Otherwise the same format is used for other atom types. * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. + * * @param element element's symbol (e.g. "C") + * * @return */ - public static String getNMRShiftConstant(final String element){ - switch(element){ - case "C": return CDKConstants.NMRSHIFT_CARBON; - case "H": return CDKConstants.NMRSHIFT_HYDROGEN; - case "N": return CDKConstants.NMRSHIFT_NITROGEN; - case "P": return CDKConstants.NMRSHIFT_PHOSPORUS; - case "F": return CDKConstants.NMRSHIFT_FLUORINE; -// case "S": return CDKConstants.NMRSHIFT_SULFUR; + public static String getNMRShiftConstant(final String element) { + switch (element) { + case "C": + return CDKConstants.NMRSHIFT_CARBON; + case "H": + return CDKConstants.NMRSHIFT_HYDROGEN; + case "N": + return CDKConstants.NMRSHIFT_NITROGEN; + case "P": + return CDKConstants.NMRSHIFT_PHOSPORUS; + case "F": + return CDKConstants.NMRSHIFT_FLUORINE; + // case "S": return CDKConstants.NMRSHIFT_SULFUR; default: return null; } } - + /** - * Returns the NMR isotope identifier for a given element, e.g. C -> 13C. + * Returns the casekit.nmr isotope identifier for a given element, e.g. C -> 13C. * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. * * @param element element's symbol (e.g. "C") + * * @return */ public static String getIsotopeIdentifier(final String element) { - switch(element){ - case "C": return "13C"; - case "H": return "1H"; - case "N": return "15N"; - case "P": return "31P"; - case "F": return "19F"; - case "O": return "17O"; - case "S": return "33S"; - case "Si": return "29Si"; - case "B": return "11B"; - case "Pt": return "195Pt"; + switch (element) { + case "C": + return "13C"; + case "H": + return "1H"; + case "N": + return "15N"; + case "P": + return "31P"; + case "F": + return "19F"; + case "O": + return "17O"; + case "S": + return "33S"; + case "Si": + return "29Si"; + case "B": + return "11B"; + case "Pt": + return "195Pt"; default: return element; } - } - - - public static HashSet getAtomTypesInAtomContainer(final IAtomContainer ac) { + } + + + public static Set getAtomTypesInAtomContainer(final IAtomContainer ac) { final HashSet atomTypes = new HashSet<>(); for (IAtom atom : ac.atoms()) { atomTypes.add(atom.getSymbol()); } - + return atomTypes; } - - - public static boolean checkMinMaxValue(final double min, final double max, final double value){ - + + + public static boolean checkMinMaxValue(final double min, final double max, final double value) { + return (value >= min && value <= max); } - + /** - * * @param ac * @param indexAC * @param bondsSet * @param neighborElems + * * @return - * - * @deprecated + * + * @deprecated */ - public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final ArrayList neighborElems){ + public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final List neighborElems) { final int[] counts = new int[neighborElems.size() * bondsSet.length]; String foundBonds; // for all given neighbor element types @@ -353,25 +403,24 @@ public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int // for all next neighbors of a specific element for (IAtom neighborAtom : ac.getConnectedAtomsList(ac.getAtom(indexAC))) { // skip if not the right neighborhood element or bond type is unknown/unset - if ((!neighborAtom.getSymbol().equals(neighborElems.get(n))) || (casekit.NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { + if ((!neighborAtom.getSymbol().equals(neighborElems.get(n))) || (casekit.nmr.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { continue; } - foundBonds += casekit.NMR.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); + foundBonds += casekit.nmr.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); } for (int k = 0; k < bondsSet.length; k++) { counts[n * bondsSet.length + k] = 0; - if (casekit.NMR.Utils.sortString(foundBonds).equals(casekit.NMR.Utils.sortString(bondsSet[k]))) { + if (casekit.nmr.Utils.sortString(foundBonds).equals(casekit.nmr.Utils.sortString(bondsSet[k]))) { counts[n * bondsSet.length + k] = 1; break; } } } - + return counts; } - + /** - * * @param pathToOutput * @param m * @param bondsSet @@ -380,14 +429,13 @@ public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int * @param min * @param max * @param stepSize - * + * * @throws IOException - * - * @deprecated + * @deprecated */ - public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int min, final int max, final int stepSize) throws IOException{ - - if(stepSize < 1){ + public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int min, final int max, final int stepSize) throws IOException { + + if (stepSize < 1) { System.err.println("stepSize < 1 not allowed!!!"); return; } @@ -415,35 +463,35 @@ public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, } sb.append("\n"); } - + final FileWriter writer = new FileWriter(pathToOutput); writer.append(sb.toString()); writer.flush(); writer.close(); } - + /** - * * @param s + * * @return - * - * @deprecated + * + * @deprecated */ public static String sortString(final String s) { final char[] c = s.toCharArray(); Arrays.sort(c); return new String(c); } - + /** - * * @param valences + * * @return - * - * @deprecated + * + * @deprecated */ public static ArrayList> getBondOrderSets(final String[] valences) { - + final ArrayList> bondOrderSets = new ArrayList<>(); for (int i = 0; i < valences.length; i++) { bondOrderSets.add(new ArrayList<>()); @@ -457,16 +505,16 @@ public static ArrayList> getBondOrderSets(final String[] bondOrderSets.get(i).add(IBond.Order.TRIPLE); } } - + return bondOrderSets; } - + /** - * * @param order + * * @return - * - * @deprecated + * + * @deprecated */ public static String getStringFromBondOrder(final IBond.Order order) { switch (order) { @@ -479,26 +527,27 @@ public static String getStringFromBondOrder(final IBond.Order order) { default: return null; } - } - - + } + + public static void writeTextFile(final String pathToOutputFile, final String content) throws IOException { FileWriter fr = new FileWriter(new File(pathToOutputFile)); BufferedWriter br = new BufferedWriter(fr); br.write(content); br.close(); } - + /** - * Simple function without any settings to generate a picture from a structure + * Simple function without any settings to generate a picture from a structure * given as IAtomcontainer. * - * @param ac Atom container + * @param ac Atom container * @param path Path to file for storing + * * @throws IOException * @throws CDKException */ - public static void generatePicture(final IAtomContainer ac, final String path) throws IOException, CDKException { + public static void generatePicture(final IAtomContainer ac, final String path) throws IOException, CDKException { final DepictionGenerator dg = new DepictionGenerator().withSize(1200, 1200).withAtomColors().withFillToFit().withAtomNumbers(); dg.depict(ac).writeTo(path); } @@ -510,11 +559,12 @@ public static void generatePicture(final IAtomContainer ac, final String path) t * The interquartile range (IQR) of the input values is therefore multiplied with a given value * for whisker creation. * - * @param input list of values to process + * @param input list of values to process * @param multiplierIQR multiplier for IQR to use for lower and upper bound creation + * * @return new array list without values outside the generated boundaries */ - public static ArrayList removeOutliers(final ArrayList input, final double multiplierIQR){ + public static ArrayList removeOutliers(final ArrayList input, final double multiplierIQR) { final ArrayList inputWithoutOutliers = new ArrayList<>(input); inputWithoutOutliers.removeAll(Utils.getOutliers(inputWithoutOutliers, multiplierIQR)); @@ -522,13 +572,13 @@ public static ArrayList removeOutliers(final ArrayList input, fi } /** - * * @param input + * * @return */ public static ArrayList getOutliers(final ArrayList input, final double multiplierIQR) { final ArrayList outliers = new ArrayList<>(); - if(input.size() <= 1){ + if (input.size() <= 1) { return outliers; } Collections.sort(input); @@ -549,22 +599,22 @@ public static ArrayList getOutliers(final ArrayList input, final outliers.add(input.get(i)); } } -// System.out.println("input size: " + input.size()); -// System.out.println("output size: " + outliers.size()); + // System.out.println("input size: " + input.size()); + // System.out.println("output size: " + outliers.size()); return outliers; } - - + + /** - * * @param data + * * @return */ public static Double getMedian(final ArrayList data) { - if((data == null) || data.isEmpty()) { + if ((data == null) || data.isEmpty()) { return null; } - if(data.size() == 1){ + if (data.size() == 1) { return data.get(0); } Collections.sort(data); @@ -573,77 +623,77 @@ public static Double getMedian(final ArrayList data) { } else { return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; } - } - - + } + + /** - * * @param data + * * @return */ public static Double getMean(final Collection data) { - if((data == null) || data.isEmpty()){ + if ((data == null) || data.isEmpty()) { return null; } double sum = 0; int nullCounter = 0; for (final Double d : data) { - if(d != null){ + if (d != null) { sum += d; } else { nullCounter++; } } - return ((data.size() - nullCounter) != 0) ? (sum/(data.size() - nullCounter)) : null; + return ((data.size() - nullCounter) != 0) ? (sum / (data.size() - nullCounter)) : null; } - + /** - * * @param data + * * @return */ public static Double getStandardDeviation(final ArrayList data) { if ((data == null) || data.isEmpty()) { return null; - } + } final Double variance = Utils.getVariance(data); - + return (variance != null) ? Math.sqrt(variance) : null; } - + public static Double getVariance(final Collection data) { if ((data == null) || data.isEmpty()) { return null; } final int nullCounter = Collections.frequency(data, null); - double quadrSum = 0.0; + double quadrSum = 0.0; final Double mean = Utils.getMean(data); - if(mean == null){ + if (mean == null) { return null; - } + } for (final Double d : data) { if (d != null) { quadrSum += Math.pow(d - mean, 2); } } - - return ((data.size() - nullCounter) != 0) ? (quadrSum / (data.size() - nullCounter)) : null; + + return ((data.size() - nullCounter) != 0) ? (quadrSum / (data.size() - nullCounter)) : null; } - - + + /** - * * @param data + * * @return */ public static Double getMean(final Double[] data) { - if((data == null) || (data.length == 0)){ + if ((data == null) || (data.length == 0)) { return null; } double sum = 0; int nullCounter = 0; for (final Double d : data) { - if(d != null){ + if (d != null) { sum += d; } else { nullCounter++; @@ -666,30 +716,31 @@ public static HashMap getMean(final HashMap " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd)); + // System.out.print(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd)); final IAtom atom = ac.getAtom(atomIndex); // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group - if(atom.isAromatic() && (!atom.getSymbol().equals("C"))){ -// System.out.print("[ -1 ]"); + if (atom.isAromatic() && (!atom.getSymbol().equals("C"))) { + // System.out.print("[ -1 ]"); bondOrderSum -= 1; } -// System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); + // System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); // @TODO including charges return bondOrderSum <= atom.getValency(); } - - + + /** * Tests whether two array lists of integers are equal which also means * bidirectional values to each other. * * @param shiftMatches1 * @param shiftMatches2 + * * @return */ public static boolean isBidirectional(final ArrayList shiftMatches1, final ArrayList shiftMatches2) { @@ -702,12 +753,12 @@ public static boolean isBidirectional(final ArrayList shiftMatches1, fi } /** - * * @param ac * @param shiftMatches1 * @param shiftMatches2 * @param prop - * @deprecated + * + * @deprecated */ public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayList shiftMatches1, final ArrayList shiftMatches2, final String prop) { @@ -731,31 +782,31 @@ public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayLis } } } - - + + /** - * * @param pathToFile + * * @return */ public static String getFileFormat(final String pathToFile) { - if(pathToFile == null || pathToFile.trim().isEmpty()){ + if (pathToFile == null || pathToFile.trim().isEmpty()) { return ""; - } + } final String[] split = pathToFile.split("\\."); return split[split.length - 1]; } - + /** - * * @param data + * * @return */ public static Double getRMS(final ArrayList data) { - if((data == null) || data.isEmpty()){ + if ((data == null) || data.isEmpty()) { return null; } if (data.size() == 1) { @@ -764,7 +815,7 @@ public static Double getRMS(final ArrayList data) { int nullCounter = 0; double qSum = 0; for (final Double d : data) { - if(d != null){ + if (d != null) { qSum += d * d; } else { nullCounter++; @@ -773,34 +824,34 @@ public static Double getRMS(final ArrayList data) { return ((data.size() - nullCounter) != 0) ? Math.sqrt(qSum / (data.size() - nullCounter)) : null; } - - + + /** - * * @param lookup + * * @return */ - public static HashMap getRMS(final HashMap> lookup){ + public static HashMap getRMS(final HashMap> lookup) { final HashMap rms = new HashMap<>(); Double rmsInList; for (final String key : lookup.keySet()) { rmsInList = Utils.getRMS(lookup.get(key)); - if(rmsInList != null) { + if (rmsInList != null) { rms.put(key, rmsInList); } } - + return rms; } - - public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) { - if(!Utils.checkIndexInAtomContainer(ac, atomIndex)){ + + public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) { + if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } return Utils.getBondOrderSum(ac, atomIndex, true).intValue() >= ac.getAtom(atomIndex).getValency(); } - - public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException{ + + public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException { final CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(ac.getBuilder()); IAtomType type; for (IAtom atom : ac.atoms()) { @@ -810,39 +861,38 @@ public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKExcep final CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(ac.getBuilder()); adder.addImplicitHydrogens(ac); } - -// public static int countElements(final String input){ -// int counter = 0; -// for (int k = 0; k < input.length(); k++) { -// // Check for uppercase letters -// if (Character.isLetter(input.charAt(k)) && Character.isUpperCase(input.charAt(k))) { -// counter++; -// } -// } -// -// return counter; -// } - -// public static ArrayList getComponents(final String symbols){ -// final ArrayList components = new ArrayList<>(); -// for (int i = 0; i < symbols.length(); i++) { -// if ((i + 1 < symbols.length()) -// && Character.isLowerCase(symbols.charAt(i + 1))) { -// components.add(symbols.substring(i, i + 2)); -// i++; -// } else { -// components.add(symbols.substring(i, i + 1)); -// } -// } -// -// return components; -// } - + + // public static int countElements(final String input){ + // int counter = 0; + // for (int k = 0; k < input.length(); k++) { + // // Check for uppercase letters + // if (Character.isLetter(input.charAt(k)) && Character.isUpperCase(input.charAt(k))) { + // counter++; + // } + // } + // + // return counter; + // } + + // public static ArrayList getComponents(final String symbols){ + // final ArrayList components = new ArrayList<>(); + // for (int i = 0; i < symbols.length(); i++) { + // if ((i + 1 < symbols.length()) + // && Character.isLowerCase(symbols.charAt(i + 1))) { + // components.add(symbols.substring(i, i + 2)); + // i++; + // } else { + // components.add(symbols.substring(i, i + 1)); + // } + // } + // + // return components; + // } + /** - * * @param lookup + * * @return - * */ public static HashMap getMedian(final HashMap> lookup) { @@ -859,62 +909,62 @@ public static HashMap getMedian(final HashMap> hoseLookupToExtend, final HashMap> hoseLookup){ + public static void combineHashMaps(final HashMap> hoseLookupToExtend, final HashMap> hoseLookup) { for (final String hose : hoseLookup.keySet()) { - if(!hoseLookupToExtend.containsKey(hose)){ + if (!hoseLookupToExtend.containsKey(hose)) { hoseLookupToExtend.put(hose, new ArrayList<>()); } hoseLookupToExtend.get(hose).addAll(hoseLookup.get(hose)); - } - } - - public static Double roundDouble(final Double value, final int decimalPlaces){ - if(value == null){ + } + } + + public static Double roundDouble(final Double value, final int decimalPlaces) { + if (value == null) { return null; } final int decimalFactor = (int) (Math.pow(10, decimalPlaces)); return (Math.round(value * decimalFactor) / (double) decimalFactor); } - + /** * Checks whether a structure contains explicit hydrogen atoms or not. * * @param ac structure to check + * * @return */ - public static boolean containsExplicitHydrogens(final IAtomContainer ac){ + public static boolean containsExplicitHydrogens(final IAtomContainer ac) { for (final IAtom atomA : ac.atoms()) { // check each atom whether it is an hydrogen if (atomA.getSymbol().equals("H")) { return true; } } - + return false; } - + /** - * Stores all explicit hydrogens as implicit counter for the bonded heavy - * atoms and removes those from the atom container. Also, a HashMap - * containing non-hydrogen atoms and its indices - * before the removals will be returned which one can use for atom index - * comparison (before and after the removals) later. + * Stores all explicit hydrogens as implicit counter for the bonded heavy + * atoms and removes those from the atom container.
+ * Also, a HashMap containing non-hydrogen atoms and its indices + * before the removals will be returned which one can use for atom index + * comparison (before and after the removals). * * @param ac the structure to convert - * @return - * - * @see #containsExplicitHydrogens(org.openscience.cdk.interfaces.IAtomContainer) + * + * @return + * + * @see #containsExplicitHydrogens(org.openscience.cdk.interfaces.IAtomContainer) */ - public static HashMap convertExplicitToImplicitHydrogens(final IAtomContainer ac){ - // create a list of atom indices which one can use for index comparison (before vs. after) after removing the explict hydrogens + public static HashMap convertExplicitToImplicitHydrogens(final IAtomContainer ac) { + // create a list of atom indices which one can use for index comparison (before vs. after) after removing the explicit hydrogens final HashMap atomIndices = new HashMap<>(); final List toRemoveList = new ArrayList<>(); IAtom atomB; @@ -924,48 +974,48 @@ public static HashMap convertExplicitToImplicitHydrogens(final I // for its bonded heavy atom if (atomA.getSymbol().equals("H")) { atomB = ac.getConnectedAtomsList(atomA).get(0); - if(atomB.getImplicitHydrogenCount() == null){ + if (atomB.getImplicitHydrogenCount() == null) { atomB.setImplicitHydrogenCount(0); } atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + 1); - toRemoveList.add(atomA); + toRemoveList.add(atomA); } else { // store all non-hydrogen atoms and their indices atomIndices.put(atomA, atomA.getIndex()); } - + } // remove all explicit hydrogen atoms for (final IAtom iAtom : toRemoveList) { ac.removeAtom(iAtom); } - + return atomIndices; } - + /** - * * @param ac - * @return + * + * @return */ - public static int getExplicitHydrogenCount(final IAtomContainer ac){ + public static int getExplicitHydrogenCount(final IAtomContainer ac) { final List toRemoveList = new ArrayList<>(); IAtom atomB; for (final IAtom atomA : ac.atoms()) { if (atomA.getAtomicNumber() == 1) { atomB = ac.getConnectedAtomsList(atomA).get(0); - if(atomB.getImplicitHydrogenCount() == null){ + if (atomB.getImplicitHydrogenCount() == null) { atomB.setImplicitHydrogenCount(0); } atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + 1); toRemoveList.add(atomA); } } - + return toRemoveList.size(); } - - + + public static void setAromaticity(final IAtomContainer ac) throws CDKException { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); @@ -978,13 +1028,14 @@ public static void setAromaticityAndKekulize(final IAtomContainer ac) throws CDK Utils.setAromaticity(ac); Kekulization.kekulize(ac); } - - + + /** * Removes atoms from a given atom type from an atom container. * - * @param ac IAtomContainer object where to remove the atoms + * @param ac IAtomContainer object where to remove the atoms * @param atomType Atom type (element's name, e.g. C or Br) + * * @return IAtomContainer where the atoms were removed */ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType) { @@ -1001,45 +1052,44 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a return ac; } - + /** - * * @param array + * * @return - * */ - public static ArrayList ArrayToArrayList(final int[] array){ - + public static ArrayList ArrayToArrayList(final int[] array) { + final ArrayList list = new ArrayList<>(); for (int i = 0; i < array.length; i++) { list.add(array[i]); } - + return list; } - - - public static String getSpectrumNucleiAsString(final Spectrum spectrum){ + + + public static String getSpectrumNucleiAsString(final Spectrum spectrum) { String specID = ""; for (int i = 0; i < spectrum.getNDim(); i++) { specID += spectrum.getNuclei()[i]; - if(i < spectrum.getNDim() - 1){ + if (i < spectrum.getNDim() - 1) { specID += "-"; } } - + return specID; } - - public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex){ + + public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex) { return ((atomIndex >= 0) && atomIndex < ac.getAtomCount()); - } - + } + public static ExecutorService initExecuter(final int nThreads) { return Executors.newFixedThreadPool(nThreads); } - public static void stopExecuter(final ExecutorService executor, final long seconds) { + public static void stopExecuter(final ExecutorService executor, final long seconds) { executor.shutdown(); try { if (!executor.awaitTermination(seconds, TimeUnit.SECONDS)) { @@ -1050,26 +1100,27 @@ public static void stopExecuter(final ExecutorService executor, final long secon System.err.println("killing non-finished tasks!"); executor.shutdownNow(); } - } - - /** + } + + /** * Returns the bond order for a numeric order value. * - * @param orderAsNumeric + * @param orderAsNumeric + * * @return */ public static IBond.Order getBondOrder(final int orderAsNumeric) { - for (IBond.Order order : IBond.Order.values()){ - if(order.numeric() == orderAsNumeric){ + for (IBond.Order order : IBond.Order.values()) { + if (order.numeric() == orderAsNumeric) { return order; } - } - + } + return null; } public static Float getBondOrderAsNumeric(final IBond bond) { - if(bond == null){ + if (bond == null) { return null; } float bondOrderAsNumeric; @@ -1078,12 +1129,12 @@ public static Float getBondOrderAsNumeric(final IBond bond) { } else { bondOrderAsNumeric = bond.getOrder().numeric(); } - + return bondOrderAsNumeric; } - + public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, final boolean includeImplicitHydrogenCount) { - if(!Utils.checkIndexInAtomContainer(ac, atomIndex)){ + if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } float bondsOrderSum = 0; @@ -1091,11 +1142,11 @@ public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex for (final IBond bond : ac.getConnectedBondsList(atom)) { bondsOrderSum += Utils.getBondOrderAsNumeric(bond); } - if(includeImplicitHydrogenCount && (atom.getImplicitHydrogenCount() != null)){ + if (includeImplicitHydrogenCount && (atom.getImplicitHydrogenCount() != null)) { bondsOrderSum += atom.getImplicitHydrogenCount(); } - + return bondsOrderSum; } - + } diff --git a/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java new file mode 100644 index 0000000..3d773ff --- /dev/null +++ b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java @@ -0,0 +1,116 @@ +/* + * The MIT License + * + * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.nmr.analysis; + +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; +import org.openscience.cdk.exception.CDKException; + +import java.util.*; + +/** + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class MultiplicitySectionsBuilder { + + private final HashSet multiplicities; + private int minLimit, maxLimit, stepSize, steps; + + public MultiplicitySectionsBuilder() { + this.multiplicities = new HashSet<>(); + this.init(); + } + + private void init() { + this.multiplicities.clear(); + this.multiplicities.add("S"); + this.multiplicities.add("D"); + this.multiplicities.add("T"); + this.multiplicities.add("Q"); + this.minLimit = -20; + this.maxLimit = 260; + this.stepSize = 5; + this.steps = (this.maxLimit - this.minLimit) / this.stepSize; // ppm range from -20 to 260 in 5 ppm steps + } + + /** + * Resets to following default values:

+ * multiplicties: S, D, T. Q
+ * min. ppm limit: -20
+ * max. ppm limit: 260
+ * step size: 5 + */ + public void reset() { + this.init(); + } + + public Map> buildMultiplicitySections(final Spectrum spectrum) throws CDKException { + final HashMap> multSections = new HashMap<>(); + // init + for (final String mult : this.multiplicities) { + multSections.put(mult, new ArrayList<>()); + } + // set the mult. sections + Signal signal; + int shiftSection; + for (int i = 0; i < spectrum.getSignalCount(); i++) { + signal = spectrum.getSignal(i); + if ((signal == null) || (signal.getShift(0) == null) || (signal.getMultiplicity() == null) || (signal.getIntensity() == null) || (!this.multiplicities.contains(signal.getMultiplicity()))) { + throw new CDKException(Thread.currentThread().getStackTrace()[1].getMethodName() + ": signal, shift or multiplicity is missing"); + } + shiftSection = (int) ((signal.getShift(0) - this.minLimit) / this.stepSize); + multSections.get(signal.getMultiplicity()).add(shiftSection); + } + + return multSections; + } + + public Set getMultiplicities() { + return this.multiplicities; + } + + public boolean addMultiplicity(final String mult) { + return this.multiplicities.add(mult); + } + + public boolean removeMultiplicity(final String mult) { + return this.multiplicities.remove(mult); + } + + public boolean containsMultiplicity(final String mult) { + return this.multiplicities.contains(mult); + } + + public int getMinLimit() { + return this.minLimit; + } + + public void setMinLimit(final int minLimit) { + this.minLimit = minLimit; + } + + public int getMaxLimit() { + return this.maxLimit; + } + + public void setMaxLimit(final int maxLimit) { + this.maxLimit = maxLimit; + } + + public int getStepSize() { + return this.stepSize; + } + + public void setStepSize(final int stepSize) { + this.stepSize = stepSize; + } +} diff --git a/src/casekit/NMR/convert/LSDConverter.java b/src/casekit/nmr/convert/LSDConverter.java similarity index 97% rename from src/casekit/NMR/convert/LSDConverter.java rename to src/casekit/nmr/convert/LSDConverter.java index f7e8788..ec6d0c2 100644 --- a/src/casekit/NMR/convert/LSDConverter.java +++ b/src/casekit/nmr/convert/LSDConverter.java @@ -9,19 +9,19 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.NMR.convert; - -import casekit.NMR.Utils; -import casekit.NMR.model.Spectrum; -import java.io.File; -import java.io.IOException; -import java.util.HashMap; +package casekit.nmr.convert; +import casekit.nmr.Utils; +import casekit.nmr.model.Spectrum; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.silent.MolecularFormula; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; + /** * * @author Michael Wenk [https://github.com/michaelwenk] @@ -202,7 +202,7 @@ public static void ConvertToLSD(final String projectName, final String pathToOut // if (shiftsDB == null) { // continue; // } -// String[][] shiftsDBvalues = casekit.NMR.dbservice.NMRShiftDB.parseNMRShiftDBSpectrum(shiftsDB); +// String[][] shiftsDBvalues = casekit.casekit.nmr.dbservice.NMRShiftDB.parseNMRShiftDBSpectrum(shiftsDB); // for (String[] shiftsDBvalue : shiftsDBvalues) { // atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); // // sometimes the MongoDB atom index is wrong and out of array range @@ -223,7 +223,7 @@ public static void ConvertToLSD(final String projectName, final String pathToOut // neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 // neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 // // add counts for a specific atom to matrix m -// int[] counts = casekit.NMR.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); +// int[] counts = casekit.casekit.nmr.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); // for (int i = 0; i < counts.length; i++) { // neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; // } diff --git a/src/casekit/nmr/core/Dereplication.java b/src/casekit/nmr/core/Dereplication.java new file mode 100644 index 0000000..5bc14a9 --- /dev/null +++ b/src/casekit/nmr/core/Dereplication.java @@ -0,0 +1,32 @@ +package casekit.nmr.core; + +import casekit.nmr.match.Matcher; +import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Spectrum; +import org.openscience.cdk.exception.CDKException; + +import java.util.ArrayList; +import java.util.List; + +public class Dereplication { + + public static List dereplicate1D(final Spectrum querySpectrum, final List compoundDataSets, final double shiftTol) { + final List solutions = new ArrayList<>(); + + for (final DataSet dataSet : compoundDataSets) { + final Assignment matchAssignment = Matcher.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, 1); + if (matchAssignment.isFullyAssigned(0)) { + try { + dataSet.addMetaInfo("tanimoto", String.valueOf(Matcher.calculateTanimotoCoefficient(dataSet.getSpectrum(), querySpectrum, 0, 0))); + } catch (CDKException e) { + e.printStackTrace(); + } + dataSet.addMetaInfo("avgDev", String.valueOf(Matcher.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol))); + solutions.add(dataSet); + } + } + + return solutions; + } +} diff --git a/src/casekit/nmr/core/Elucidation.java b/src/casekit/nmr/core/Elucidation.java new file mode 100644 index 0000000..19f4d51 --- /dev/null +++ b/src/casekit/nmr/core/Elucidation.java @@ -0,0 +1,31 @@ +package casekit.nmr.core; + +import casekit.nmr.match.Matcher; +import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Spectrum; + +import java.util.ArrayList; +import java.util.List; + +public class Elucidation { + + public static List findFragments(final Spectrum querySpectrum, final List compoundDataSets, final double shiftTol){ + final List fragments = new ArrayList<>(); + + Assignment matchAssignment; + for (final DataSet dataSet : compoundDataSets){ + matchAssignment = Matcher.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol); + + } + + return fragments; + } + + public static List elucidate(){ + final List solutions = new ArrayList<>(); + + return solutions; + } + +} diff --git a/src/casekit/NMR/dbservice/MongoDB.java b/src/casekit/nmr/dbservice/MongoDB.java similarity index 90% rename from src/casekit/NMR/dbservice/MongoDB.java rename to src/casekit/nmr/dbservice/MongoDB.java index 0f548a7..3ced8eb 100644 --- a/src/casekit/NMR/dbservice/MongoDB.java +++ b/src/casekit/nmr/dbservice/MongoDB.java @@ -9,7 +9,8 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.NMR.dbservice; +package casekit.nmr.dbservice; + import com.mongodb.MongoClient; import com.mongodb.MongoClientOptions; @@ -18,7 +19,6 @@ import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; import org.bson.Document; -import org.openscience.cdk.exception.CDKException; /** * @@ -27,10 +27,10 @@ public class MongoDB { - public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) throws CDKException { + public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) { MongoClient mongo; try { - // Creating a Mongo client + // Creating a Mongo client mongo = new MongoClient( new ServerAddress("127.0.0.1", 27017), MongoCredential.createCredential( @@ -39,7 +39,7 @@ public static MongoClient login(final String mongoUser, final String mongoPasswo mongoPassword.toCharArray()), MongoClientOptions.builder().build()); System.out.println("Login to MongoDB was successfull"); - // Accessing the database + // Accessing the database } catch (Exception e) { e.printStackTrace(); System.err.println(Thread.currentThread().getStackTrace()[1].getMethodName() + ": could not connect to MongoDB!"); @@ -53,12 +53,12 @@ public static MongoClient login(final String mongoUser, final String mongoPasswo public static MongoDatabase getDatabase(final MongoClient mongo, final String mongoDBName){ return mongo.getDatabase(mongoDBName); } - + public static MongoCollection getCollection(final MongoClient mongo, final String mongoDBName, final String mongoDBCollection) { final MongoDatabase database = MongoDB.getDatabase(mongo, mongoDBName); - if (database == null) { - return null; - } +// if (database == null) { +// return null; +// } System.out.println("Access to database \"" + mongoDBName + "\" was successfull"); // Retrieving a collection final MongoCollection collection = database.getCollection(mongoDBCollection); diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java new file mode 100644 index 0000000..46976f1 --- /dev/null +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -0,0 +1,422 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.nmr.dbservice; + +import casekit.nmr.Utils; +import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; + +public class NMRShiftDB { + + public static String getSolvent(final String solventPropertyString, final String spectrumIndexInRecord) { + final String[] solventPropertyStringSplit = solventPropertyString.split(":"); + String solvent; + for (int i = 0; i < solventPropertyStringSplit.length; i++) { + if (solventPropertyStringSplit[i].endsWith(spectrumIndexInRecord)) { + solvent = solventPropertyStringSplit[i + 1]; + if (solvent.substring(solvent.length() - 1).matches("\\d")) { + solvent = solvent.substring(0, solvent.length() - 1); + } + if (solvent.substring(solvent.length() - 1).matches("\\d")) { + solvent = solvent.substring(0, solvent.length() - 1); + } + // solvent = solvent.substring(0, solvent.length() - 1); + solvent = solvent.trim(); + + return solvent; + } + } + + return null; + } + + public static List getSpectraProperties1D(final IAtomContainer ac, final String nucleus) { + final List spectraProperties1D = new ArrayList<>(); + for (final Object obj : ac.getProperties().keySet()) { + if (obj instanceof String && ((String) obj).startsWith("Spectrum " + nucleus)) { + spectraProperties1D.add((String) obj); + } + } + + return spectraProperties1D; + } + + /** + * Returns a {@link DataSet} class object + * for each valid molecule record in the given NMRShiftDB file. Valid means + * here that each molecule record has to contain the given spectrum + * property string as well as the number of signals in that spectrum has to + * be the same as atoms of that atom type in molecule. + * + * @param pathToNMRShiftDB path to NMRShiftDB file + * @param nuclei nuclei to get the spectra for + * + * @return + * + * @throws FileNotFoundException + * @throws CDKException + * @see DataSet + */ + public static Collection getDataSetsFromNMRShiftDB(final String pathToNMRShiftDB, final String[] nuclei) throws FileNotFoundException, CDKException { + final Collection dataSets = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToNMRShiftDB), SilentChemObjectBuilder.getInstance()); + IAtomContainer structure; + Spectrum spectrum; + Assignment assignment; + HashMap meta; + final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); + + List spectraProperties1D; + String[] split; + String spectrumIndexInRecord; + IMolecularFormula mf; + + while (iterator.hasNext()) { + structure = iterator.next(); + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + // remove explicit hydrogens + Utils.removeAtoms(structure, "H"); + hydrogenAdder.addImplicitHydrogens(structure); + Utils.setAromaticityAndKekulize(structure); + + meta = new HashMap<>(); + meta.put("title", structure.getTitle()); + meta.put("id", structure.getProperty("nmrshiftdb2 ID")); + mf = Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mf", Utils.molecularFormularToString(mf)); + + for (final String nucleus : nuclei) { + spectraProperties1D = getSpectraProperties1D(structure, nucleus); + for (final String spectrumProperty1D : spectraProperties1D) { + + split = spectrumProperty1D.split("\\s"); + spectrumIndexInRecord = split[split.length - 1]; + + // skip molecules which do not contain any of requested spectrum information + spectrum = NMRShiftDBSpectrumToSpectrum(structure.getProperty(spectrumProperty1D), nucleus); + // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule + if ((spectrum == null) || Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) != 0) { + continue; + } + if (structure.getProperty("Solvent") != null) { + spectrum.setSolvent(getSolvent(structure.getProperty("Solvent"), spectrumIndexInRecord)); + } + if (structure.getProperty("Field Strength [MHz]") != null) { + for (final String fieldStrength : structure.getProperty("Field Strength [MHz]").toString().split("\\s")) { + if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { + try { + spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split(spectrumIndexInRecord + ":")[1])); + } catch (NumberFormatException e) { + // e.printStackTrace(); + } + break; + } + } + } + + assignment = NMRShiftDBSpectrumToAssignment(structure.getProperty(spectrumProperty1D), nucleus); + dataSets.add(new DataSet(structure, spectrum, assignment, meta)); + } + } + + + } + + return dataSets; + } + + // /** + // * Returns a hashmap containing combined keys (by "_") of solvents + // * and lists of calculated deviations between all given spectra for a + // * nucleus in molecule record as values.
+ // * Here, only molecule records in NMRShiftDB file are considered which have + // * at least two different spectra for same nucleus.
+ // * Example: "Spectrum 13C 0", "Spectrum 13C 1" will be used for given + // * nucleus 13C. + // * + // * @param pathToNMRShiftDB + // * @param nucleus + // * + // * @return + // * + // * @throws FileNotFoundException + // * @throws CDKException + // */ + // public static HashMap> getSolventDeviations(final String pathToNMRShiftDB, final String nucleus) throws FileNotFoundException, CDKException { + // int signalCount; + // Spectrum spectrum; + // Assignment assignment; + // final ArrayList> spectraSets = getSpectraFromNMRShiftDB(pathToNMRShiftDB, nucleus); + // HashMap> shiftsPerAtom; + // HashMap> solventsPerAtom; + // ArrayList solvents; + // String[] solventsToSort; + // + // final HashMap> deviations = new HashMap<>(); + // String combiKey; + // + // for (final ArrayList spectraSetInRecord : spectraSets) { + // shiftsPerAtom = new HashMap<>(); + // solventsPerAtom = new HashMap<>(); + // signalCount = -1; + // for (final Object[] spectrumAndAssignment : spectraSetInRecord) { + // spectrum = (Spectrum) spectrumAndAssignment[0]; + // assignment = (Assignment) spectrumAndAssignment[1]; + // if (signalCount == -1) { + // signalCount = spectrum.getSignalCount(); + // } else if (signalCount != spectrum.getSignalCount()) { + // continue; + // } + // for (final int atomIndex : assignment.getAssignments(0)) { + // if (!shiftsPerAtom.containsKey(atomIndex)) { + // shiftsPerAtom.put(atomIndex, new ArrayList<>()); + // solventsPerAtom.put(atomIndex, new ArrayList<>()); + // } + // shiftsPerAtom.get(atomIndex).add(spectrum.getSignal(assignment.getIndex(0, atomIndex)).getShift(0)); + // solventsPerAtom.get(atomIndex).add(spectrum.getSolvent()); + // } + // } + // if (shiftsPerAtom.isEmpty() || (shiftsPerAtom.get(Collections.min(shiftsPerAtom.keySet())).size() < 2)) { + // continue; + // } + // solvents = new ArrayList<>(solventsPerAtom.get(Collections.min(solventsPerAtom.keySet()))); + // // if(Collections.frequency(solvents, "Unreported") + Collections.frequency(solvents, "Unknown") > solvents.size() - 2){ + // // continue; + // // } + // + // for (final int atomIndex : shiftsPerAtom.keySet()) { + // for (int s1 = 0; s1 < solvents.size(); s1++) { + // // if(solvents.get(s1).equals("Unreported") || solvents.get(s1).equals("Unknown")){ + // // continue; + // // } + // for (int s2 = s1 + 1; s2 < solvents.size(); s2++) { + // // if (solvents.get(s2).equals("Unreported") || solvents.get(s2).equals("Unknown")) { + // // continue; + // // } + // solventsToSort = new String[2]; + // solventsToSort[0] = solvents.get(s1); + // solventsToSort[1] = solvents.get(s2); + // Arrays.sort(solventsToSort); + // combiKey = solventsToSort[0] + "_" + solventsToSort[1]; + // if (!deviations.containsKey(combiKey)) { + // deviations.put(combiKey, new ArrayList<>()); + // } + // deviations.get(combiKey).add(Math.abs(shiftsPerAtom.get(atomIndex).get(s1) - shiftsPerAtom.get(atomIndex).get(s2))); + // } + // } + // } + // } + // + // return deviations; + // } + // + // /** + // * @param pathToDB + // * + // * @return + // * + // * @throws FileNotFoundException + // * @deprecated + // */ + // public static Set getAtomTypesInDB(final String pathToDB) throws FileNotFoundException { + // final HashSet atomTypes = new HashSet<>(); + // final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToDB), SilentChemObjectBuilder.getInstance()); + // while (iterator.hasNext()) { + // atomTypes.addAll(Utils.getAtomTypesInAtomContainer(iterator.next())); + // } + // + // return atomTypes; + // } + + /** + * Creates a two dimensional array of a given NMRShiftDB casekit.nmr entry + * with all signal shift values, intensities, multiplicities and atom indices. + * + * @param NMRShiftDBSpectrum + * + * @return two dimensional array: + * 1. dimension: signal index (row); + * 2. dimension: signal shift value (column 1), signal intensity (column 2), + * signal multiplicity (column 3), atom index in structure (column 4) + */ + public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum) { + if (NMRShiftDBSpectrum.trim().isEmpty()) { + return new String[][]{}; + } + String[] signalSplit; + final String[] shiftsSplit = NMRShiftDBSpectrum.split("\\|"); + final String[][] values = new String[shiftsSplit.length][4]; + for (int i = 0; i < shiftsSplit.length; i++) { + signalSplit = shiftsSplit[i].split(";"); + values[i][0] = signalSplit[0]; // shift value + values[i][1] = signalSplit[1].toLowerCase().split("[a-z]")[0]; // intensity + values[i][2] = signalSplit[1].split("\\d+\\.\\d+").length > 0 ? signalSplit[1].split("\\d+\\.\\d+")[1].toLowerCase() : ""; // multiplicity + values[i][3] = signalSplit[2]; // atom index + } + + return values; + } + + public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShiftDBSpectrum, final String nucleus, final String description) { + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final StringBuilder basicSpectrum = new StringBuilder(); + // append description + if (!description.trim().startsWith("//")) { + basicSpectrum.append("// "); + } + basicSpectrum.append(description).append("\n"); + final String[][] spectrumStringArray = NMRShiftDB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + try { + for (int i = 0; i < spectrumStringArray.length; i++) { + // append nucleus + basicSpectrum.append(nucleus).append(", "); + // append chemical shift + basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][0])).append(", "); + // append multiplicity + basicSpectrum.append(spectrumStringArray[i][2]).append(", "); + // append intensity + basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][1])).append("\n"); + } + } catch (Exception e) { + return null; + } + + return basicSpectrum.toString(); + } + + public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus) { + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final String[][] spectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + final Spectrum spectrum = new Spectrum(new String[]{nucleus}); + String multiplicity; + Double shift, intensity; + try { + for (int i = 0; i < spectrumStringArray.length; i++) { + shift = Double.parseDouble(spectrumStringArray[i][0]); + intensity = Double.parseDouble(spectrumStringArray[i][1]); + multiplicity = spectrumStringArray[i][2]; + spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 0)); + } + } catch (Exception e) { + return null; + } + + return spectrum; + } + + public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String nucleus) { + if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + return null; + } + final String[][] NMRShiftDBSpectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); + final Spectrum spectrum = NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, nucleus); + final Assignment assignment = new Assignment(spectrum); + for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { + assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); + } + + return assignment; + } + + // public static Map>>> buildHybridizationDistributions(final String pathToDB) { + // // for atom type -> hybridization -> multiplicity -> shift list + // final Map>>> hybridizationDistributions = new HashMap<>(); + // + // try (final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToDB), SilentChemObjectBuilder.getInstance())) { + // IAtom atom; + // String nucleus; + // IAtomContainer structure; + // List spectraProperties13C, spectraProperties1H, spectraProperties15N, spectraProperties1D; + // Spectrum spectrum; + // Assignment assignment; + // Signal signal; + // while (iterator.hasNext()) { + // structure = iterator.next(); + // AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + // Utils.setAromaticity(structure); + // + // spectraProperties13C = getSpectraProperties1D(structure, "13C"); + // spectraProperties1H = getSpectraProperties1D(structure, "1H"); + // spectraProperties15N = getSpectraProperties1D(structure, "15N"); + // + // for (int i = 0; i < structure.getAtomCount(); i++) { + // atom = structure.getAtom(i); + // if (!hybridizationDistributions.containsKey(atom.getSymbol())) { + // hybridizationDistributions.put(atom.getSymbol(), new HashMap<>()); + // } + // if (!hybridizationDistributions.get(atom.getSymbol()).containsKey(atom.getHybridization().name())) { + // hybridizationDistributions.get(atom.getSymbol()).put(atom.getHybridization().name(), new HashMap<>()); + // } + // + // switch (atom.getSymbol()) { + // case "C": + // spectraProperties1D = spectraProperties13C; + // nucleus = "13C"; + // break; + // case "H": + // spectraProperties1D = spectraProperties1H; + // nucleus = "1H"; + // break; + // case "N": + // spectraProperties1D = spectraProperties15N; + // nucleus = "15N"; + // break; + // default: + // spectraProperties1D = new ArrayList<>(); + // nucleus = ""; + // break; + // } + // + // for (final String spectrumProperty1D : spectraProperties1D) { + // spectrum = NMRShiftDBSpectrumToSpectrum(structure.getProperty(spectrumProperty1D), nucleus); + // assignment = NMRShiftDBSpectrumToAssignment(structure.getProperty(spectrumProperty1D), nucleus); + // signal = spectrum.getSignal(assignment.getIndex(0, i)); + // + // if (signal != null && signal.getMultiplicity() != null) { + // if (!hybridizationDistributions.get(atom.getSymbol()).get(atom.getHybridization().name()).containsKey(signal.getMultiplicity())) { + // hybridizationDistributions.get(atom.getSymbol()).get(atom.getHybridization().name()).put(signal.getMultiplicity(), new ArrayList<>()); + // } + // hybridizationDistributions.get(atom.getSymbol()).get(atom.getHybridization().name()).get(signal.getMultiplicity()).add(signal.getShift(0)); + // } + // } + // } + // } + // } catch (IOException | CDKException e) { + // e.printStackTrace(); + // } + // + // System.out.println(hybridizationDistributions); + // + // return hybridizationDistributions; + // } +} diff --git a/src/casekit/nmr/interpretation/InterpretData.java b/src/casekit/nmr/interpretation/InterpretData.java new file mode 100644 index 0000000..7a79b69 --- /dev/null +++ b/src/casekit/nmr/interpretation/InterpretData.java @@ -0,0 +1,526 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.nmr.interpretation; + +/** + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class InterpretData { + + // final private IAtomContainer mol; + // final private IMolecularFormula molFormula; + // private HashMap> atomTypeIndices; + // final private HashMap spectra = new HashMap<>(); + // final private HashMap assignments = new HashMap<>(); + // + // /** + // * Creates an instances of this class with an empty class atom container. + // */ + // public InterpretData() { + // this.molFormula = null; + // this.mol = SilentChemObjectBuilder.getInstance().newAtomContainer(); + // this.updateAtomTypeIndices(); + // } + // + // /** + // * Creates an instances of this class with a class atom container consisting + // * of all heavy atoms in given molecular formula. + // * + // * @param molFormula IMolecularFormula object for IAtomContainer creation + // */ + // public InterpretData(final IMolecularFormula molFormula) { + // this.molFormula = molFormula; + // this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); + // this.updateAtomTypeIndices(); + // } + // + // + // /** + // * Returns used IMolecularFormula object for this class instance. + // * + // * @return + // */ + // public final IMolecularFormula getMolecularFormula() { + // + // return this.molFormula; + // } + // + // + // /** + // * Returns used IAtomContainer object for this class instance. + // * + // * @return + // */ + // public final IAtomContainer getAtomContainer() { + // + // return this.mol; + // } + // + // + // /** + // * Returns a HashMap object with the indices of all atoms for all atom types + // * (elements) within the atom container of this class. + // * + // * @return + // */ + // public final Map> getAtomTypeIndices() { + // + // return this.atomTypeIndices; + // } + // + // + // /** + // * Sets the indices of all atoms in this class atom container. + // * + // * @see Utils#getAtomTypeIndices(org.openscience.cdk.interfaces.IAtomContainer) + // */ + // private void updateAtomTypeIndices() { + // + // this.atomTypeIndices = Utils.getAtomTypeIndices(this.mol); + // } + // + // /** + // * Returns all given and used spectra. + // * + // * @return + // */ + // public final Map getSpectra() { + // + // return this.spectra; + // } + // + // + // /** + // * Returns all created and used Assignment objects. The assigned indices + // * refer to atom indices in class atom container. + // * + // * @return + // */ + // public final Map getAssignments() { + // + // return this.assignments; + // } + // + // + // /** + // * Returns one specific created and used Assignment object. + // * The assigned indices refer to atom indices in class atom container. + // * + // * @param spectrum + // * + // * @return + // */ + // public final Assignment getAssignment(final Spectrum spectrum) { + // + // if (spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT90) || spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT135)) { + // + // return this.getAssignments().get(spectrum.getSpecType()); + // } + // + // return this.assignments.get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); + // } + // + // + // /** + // * Sets the 1D casekit.nmr shift values for given Spectrum object to atoms of the class IAtomContainer. + // * The shift values will be assigned sequentially. + // * In case of a molecular formula is given in this class, the number of + // * shifts must be equal to the number of atoms in this molecular formula. + // * For less shifts in shift list you will be asked for entering equivalences. + // * Otherwise this function will return a false value. + // * In case of no molecular was given to this class, a new atom in the atom container + // * will be created regarding to the input shift list. + // * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) + // * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on + // * the specified atom type (element). + // * After usage of this function, the input Spectrum class object might be extended during + // * equivalent signal selection by user. + // * + // * @param spectrum Spectrum class object containing the 1D shift information + // * + // * @throws java.io.IOException + // * @throws org.openscience.cdk.exception.CDKException + // */ + // public final void assign1DSpectrum(final Spectrum spectrum) throws Exception { + // // checks whether number of signals is equal to molecular formula if given + // // if not equal then edit signal list in spectrum + // this.check1DSpectrum(spectrum); + // // assign shift values to atoms sequentially + // this.assignShiftValuesToAtoms(spectrum); + // + // final Assignment assignment = new Assignment(spectrum); + // if (this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0)) != null) { + // assignment.setAssignments(0, this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0))); + // } + // + // this.spectra.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); + // this.assignments.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); + // } + // + // /** + // * Checks the number of signals in a spectrum against the number of atoms + // * in molecular formula of class, if given. In case of different numbers, + // * a user input for spectrum editing will be requested. + // * + // * @param spectrum + // * + // * @throws IOException + // * @see Utils#editSignalsInSpectrum(Spectrum, IMolecularFormula, int) + // */ + // private void check1DSpectrum(final Spectrum spectrum) throws Exception { + // if (this.molFormula != null) { + // final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula, 0); + // if (diff != 0) { + // // adjust Spectrum size by user + // Utils.editSignalsInSpectrum(spectrum, this.molFormula, 0); + // } + // } + // } + // + // + // /** + // * Sets shift values in atoms of class atom container as property (see below), sequentially. + // * + // * @param spectrum Spectrum class object which contains shifts in first + // * dimension + // * + // * @see Utils#getNMRShiftConstant(java.lang.String) + // */ + // private void assignShiftValuesToAtoms(final Spectrum spectrum) { + // final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); + // final List shifts = spectrum.getShifts(0); + // if ((this.molFormula == null) && !atomType.equals("H")) { + // // (re-)filling up of peaks for that atom type from given peak list in spectrum + // this.removeAtoms(atomType); + // IAtom atom; + // for (final double shift : shifts) { + // atom = new Atom(atomType); + // atom.setProperty(Utils.getNMRShiftConstant(atomType), shift); + // atom.setImplicitHydrogenCount(null); + // this.mol.addAtom(atom); + // } + // this.updateAtomTypeIndices(); + // } + // // assign shifts to atoms as property + // if (this.atomTypeIndices.get(atomType) != null) { + // int assignedShiftCount = 0; + // for (final int i : this.atomTypeIndices.get(atomType)) { + // if (assignedShiftCount < shifts.size()) { + // // shift assignment in atom + // this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); + // } + // assignedShiftCount++; + // } + // } + // } + // + // + // /** + // * Removes atoms from a given atom type from the class' atom container. + // * + // * @param atomType Atom type (element's name, e.g. C or Br) + // * + // * @return IAtomContainer where the atoms were removed + // */ + // private void removeAtoms(final String atomType) { + // if (this.getAtomTypeIndices().get(atomType) == null) { + // return; + // } + // final ArrayList toRemoveList = new ArrayList<>(); + // for (final int i : this.getAtomTypeIndices().get(atomType)) { + // toRemoveList.add(this.mol.getAtom(i)); + // } + // for (IAtom iAtom : toRemoveList) { + // this.mol.removeAtom(iAtom); + // } + // + // this.updateAtomTypeIndices(); + // } + // + // /** + // * Sets the assignments of carbon atoms in class atom container + // * by usage of DEPT90 and DEPT135 information. The implicit hydrogen count + // * property is set too. + // * + // * @param spectrum1D_DEPT90 DEPT90 spectrum + // * @param spectrum1D_DEPT135 DEPT135 spectrum which has to contain intensity + // * information + // * @param tol tolerance value [ppm] for carbon shift matching + // * + // * @return false if 1-dimensional 13C spectrum is missing (not set beforehand) + // * or something is missing in one of the two input spectra + // * + // * @see InterpretData#setImplicitHydrogenCountsFromDEPT() + // */ + // public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum spectrum1D_DEPT135, final double tol) { + // if ((spectrum1D_DEPT90 == null) || (spectrum1D_DEPT135 == null) || (spectrum1D_DEPT135.getIntensities() == null) || (this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C") == null)) { + // return false; + // } + // + // final Assignment assignment1D_DEPT90 = new Assignment(spectrum1D_DEPT90); + // final Assignment assignment1D_DEPT135 = new Assignment(spectrum1D_DEPT135); + // final ArrayList matchesIn1DSpectrum_DEPT90 = this.findMatchesIn1DSpectra(spectrum1D_DEPT90, 0, tol); + // final ArrayList matchesIn1DSpectrum_DEPT135 = this.findMatchesIn1DSpectra(spectrum1D_DEPT135, 0, tol); + // final Assignment assignment1D_13C = this.getAssignment(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C")); + // + // for (int i = 0; i < assignment1D_DEPT90.getAssignmentsCount(); i++) { + // if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { + // assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i))); + // } + // } + // for (int i = 0; i < assignment1D_DEPT135.getAssignmentsCount(); i++) { + // if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { + // assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i))); + // } + // } + // + // this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, spectrum1D_DEPT90); + // this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, assignment1D_DEPT90); + // this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, spectrum1D_DEPT135); + // this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, assignment1D_DEPT135); + // + // this.setImplicitHydrogenCountsFromDEPT(); + // + // return true; + // } + // + // + // /** + // * Sets the implicitHydrogenCount() property in atoms of class atom container + // * by using the already set DEPT information. + // * + // * @see InterpretData#assignDEPT(casekit.nmr.model.Spectrum, casekit.nmr.model.Spectrum, double) + // */ + // private void setImplicitHydrogenCountsFromDEPT() { + // + // final List intensitiesDEPT135 = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135).getIntensities(); + // final List matchesDEPT90InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT90), 0); + // final List matchesDEPT135InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135), 0); + // + // int matchDEPT90, matchDEPT135, hCount, hCountAll = 0; + // for (int i : this.atomTypeIndices.get("C")) { + // if ((this.mol.getAtom(i).getProperty(CDKConstants.NMRSHIFT_CARBON) != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() == null)) { + // matchDEPT90 = matchesDEPT90InAtomContainer.indexOf(i); + // matchDEPT135 = matchesDEPT135InAtomContainer.indexOf(i); + // if (matchDEPT90 >= 0) { + // // CH + // hCount = 1; + // } else if (matchDEPT90 == -1 && matchDEPT135 >= 0) { + // // CH2 or CH3 + // if (intensitiesDEPT135.get(matchDEPT135) < 0) { + // hCount = 2; + // } else if (intensitiesDEPT135.get(matchDEPT135) > 0) { + // hCount = 3; + // } else { + // // qC + // hCount = 0; + // } + // } else { + // // qC + // hCount = 0; + // } + // this.mol.getAtom(i).setImplicitHydrogenCount(hCount); + // if (this.mol.getAtom(i).getImplicitHydrogenCount() >= 3) { + // this.mol.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); + // } + // hCountAll += hCount; + // } + // } + // if (this.molFormula != null) { + // System.out.println("assigned protons to carbons: " + hCountAll + " (" + MolecularFormulaManipulator.getElementCount(this.molFormula, "H") + ") -> " + (MolecularFormulaManipulator.getElementCount(this.molFormula, "H") - hCountAll) + " protons to be attached on hetero atoms!!!"); + // } else { + // System.out.println("assigned protons to carbons: " + hCountAll + "!!!"); + // } + // + // } + // + // + // /** + // * @param spectrum Spectrum class object consisting of Signal class objects + // * where the proton shifts values are given in first dimension and the + // * heavy atom shifts in the second. + // * @param tolProton tolerance value [ppm] for proton shift matching + // * @param tolHeavyAtom tolerance value [ppm] for heavy atom shift matching + // */ + // public final void assignHSQC(final Spectrum spectrum, final double tolProton, final double tolHeavyAtom) { + // // assign index of matching atoms to both dimensions and save the Spectrum and Assignment objects in class + // this.assign2DSpectrum(spectrum, tolProton, tolHeavyAtom); + // // in case the 1H spectrum is given, then assign protons to same indices from belonging carbon atoms + // if (this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H") != null) { + // final Assignment assignment1D_1H = this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H"); + // final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); + // final ArrayList matchesIn1DSpectrum_1H = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); + // + // for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { + // // if heavy atom i has an assignment in class atom container then assign that index i to belonging protons as index + // if (assignment2D_HSQC.getAssignment(1, i) >= 0) { + // assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAssignment(1, i)); + // assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAssignment(0, matchesIn1DSpectrum_1H.get(i))); + // } + // } + // } + // // attach protons on other heavy atoms than carbons via HSQC assignment counting + // if (!spectrum.getNuclei()[1].equals("13C")) { + // final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); + // for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { + // if ((assignment2D_HSQC.getAssignment(1, i) > -1)) { + // if (this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() == null) { + // this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(0); + // } + // this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() + 1); + // } + // } + // } + // } + // + // + // private void assign2DSpectrum(final Spectrum spectrum, final double tolDim1, final double tolDim2) { + // + // final ArrayList matchesQueryIn1DSpectrumDim1 = this.findMatchesIn1DSpectra(spectrum, 0, tolDim1); + // final ArrayList matchesQueryIn1DSpectrumDim2 = this.findMatchesIn1DSpectra(spectrum, 1, tolDim2); + // final ArrayList matches1DInAtomContainerDim1 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[0]), 0); + // final ArrayList matches1DInAtomContainerDim2 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[1]), 0); + // + // final Assignment assignment = new Assignment(spectrum); + // for (int i = 0; i < matchesQueryIn1DSpectrumDim1.size(); i++) { + // if ((matches1DInAtomContainerDim1 != null) && (matchesQueryIn1DSpectrumDim1.get(i) >= 0)) { + // assignment.setAssignment(0, i, matches1DInAtomContainerDim1.get(matchesQueryIn1DSpectrumDim1.get(i))); + // } + // if ((matches1DInAtomContainerDim2 != null) && (matchesQueryIn1DSpectrumDim2.get(i) >= 0)) { + // assignment.setAssignment(1, i, matches1DInAtomContainerDim2.get(matchesQueryIn1DSpectrumDim2.get(i))); + // } + // } + // + // this.spectra.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); + // this.assignments.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); + // } + // + // + // private ArrayList findMatchesIn1DSpectra(final Spectrum spectrum, final int dim, final double tol) { + // + // ArrayList matchesQueryInOrigin1DSpectrum = new ArrayList<>(); + // // final ArrayList shiftsQuery = spectrum.getShifts(dim); + // // if(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]) != null){ + // // final ArrayList shiftsOrigin1DSpectrum = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]).getShifts(0); + // // matchesQueryInOrigin1DSpectrum = Utils.findShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, tol); + // // matchesQueryInOrigin1DSpectrum = Utils.correctShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, matchesQueryInOrigin1DSpectrum, tol); + // // } else { + // // for (int i = 0; i < spectrum.getSignalCount(); i++) { + // // matchesQueryInOrigin1DSpectrum.add(-1); + // // } + // // } + // + // return matchesQueryInOrigin1DSpectrum; + // } + // + // /** + // * Returns the indices of atoms within the class atom container which match + // * to the shifts of given spectrum and dimension. + // * + // * @param spectrum + // * @param dim + // * + // * @return + // */ + // public final ArrayList getAssignedAtomIndices(final Spectrum spectrum, final int dim) { + // + // if (spectrum == null) { + // return null; + // } else if (this.getAssignment(spectrum) == null) { + // final ArrayList atomIndices = new ArrayList<>(); + // for (int i = 0; i < spectrum.getSignalCount(); i++) { + // atomIndices.add(-1); + // } + // return atomIndices; + // } + // + // return new ArrayList<>(this.getAssignment(spectrum).getAssignments(dim)); + // } + // + // + // /** + // * Sets links between two heavy atoms of H,H-COSY signals. + // * + // * @param spectrum Spectrum class object containing the 2D spectrum proton shift information + // * @param tolProton tolerance value [ppm] for matching belonging protons + // * of heavy atom + // * + // * @return + // */ + // public final boolean assignHHCOSY(final Spectrum spectrum, final double tolProton) { + // + // final ArrayList protonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); + // final ArrayList protonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolProton); + // // are all signals bidirectional? + // if (!Utils.isBidirectional(protonShiftMatches1, protonShiftMatches2)) { + // return false; + // } + // this.assign2DSpectrum(spectrum, tolProton, tolProton); + // + // return true; + // } + // + // + // /** + // * Sets links between two carbon atoms in an INADEQUATE signal relationship. + // * Returns true if all signals are bidirectional, so that atom A has a + // * signal according to atom B and vice versa. + // * + // * @param spectrum Spectrum class object consisting of Signal class objects + // * @param tolCarbon tolerance value [ppm] for carbon atom shift matching + // * + // * @return + // */ + // public final boolean assignINADEQUATE(final Spectrum spectrum, final double tolCarbon) { + // + // final ArrayList carbonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolCarbon); + // final ArrayList carbonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolCarbon); + // // are all signals bidirectional? + // if (!casekit.nmr.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { + // return false; + // } + // this.assign2DSpectrum(spectrum, tolCarbon, tolCarbon); + // + // final ArrayList indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); + // final ArrayList indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); + // for (int i = 0; i < spectrum.getSignalCount(); i++) { + // if ((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)) { + // this.setBond(indicesInAtomContainerDim1.get(i), indicesInAtomContainerDim2.get(i)); + // } + // } + // + // return true; + // } + // + // + // private void setBond(final int index1, final int index2) { + // + // if (this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null) { + // this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); + // } + // this.mol.addBond(index1, index2, IBond.Order.UNSET); + // } + // + // + // /** + // * Sets links between heavy atoms which are in HMBC signal relationship. + // * + // * @param spectrum Spectrum class object consisting of Signal class objects + // * where the proton shift values is given first and the heavy atom shifts as the second. + // * @param tolProton tolerance value [ppm] for hydrogen shift matching + // * @param tolHeavy tolerance value [ppm] for heavy atom shift matching + // */ + // public final void assignHMBC(final Spectrum spectrum, final double tolProton, final double tolHeavy) { + // + // this.assign2DSpectrum(spectrum, tolProton, tolHeavy); + // } +} diff --git a/src/casekit/nmr/match/Matcher.java b/src/casekit/nmr/match/Matcher.java new file mode 100644 index 0000000..11c23a8 --- /dev/null +++ b/src/casekit/nmr/match/Matcher.java @@ -0,0 +1,287 @@ +/* + * The MIT License + * + * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.nmr.match; + +import casekit.nmr.Utils; +import casekit.nmr.model.Assignment; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; +import org.apache.commons.lang3.ArrayUtils; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.similarity.Tanimoto; + +import java.util.*; + +public class Matcher { + + + /** + * Checks whether two spectra contain given dimensions. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension to select in first spectrum + * @param dim2 dimension to select in second spectrum + * + * @return true if both spectra contain the selected dimension + */ + private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) { + return spectrum1.containsDim(dim1) && spectrum2.containsDim(dim2); + } + + /** + * Combines selected dimensions of two spectra while considering possible equivalent signals + * via the {@code pickPrecision} parameter and multiplicity comparison. + * In {@code spectrum1}, the equivalent signals have to be set. + * + * @param spectrum1 first spectrum, incl. equivalent signals + * @param spectrum2 second spectrum + * @param pickPrecision tolerance value used for signal shift matching to + * find equivalent signals + * @param dim1 dimension of first spectrum to combine + * @param dim2 dimension of second spectrum to combine + * + * @return null if one spectrum does not contain the selected dimension + */ + public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double pickPrecision) throws Exception { + if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + return null; + } + // create new spectra which is to fill with signals of both spectra + final Spectrum combinedSpectrum = spectrum1.buildClone(); + // fill in signals from spectrum2 + // consider the possibility of potential equivalent signals here + for (final Signal signalSpectrum2 : spectrum2.getSignals()) { + combinedSpectrum.addSignal(signalSpectrum2.buildClone(), pickPrecision); + } + return combinedSpectrum; + } + + /** + * Calculates the Tanimoto coefficient between two spectra in given dimensions. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * + * @return + * + * @throws CDKException + */ + public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) throws CDKException { + if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + return null; + } + final double[] shiftsSpectrum1 = ArrayUtils.toPrimitive(spectrum1.getShifts(dim1).toArray(new Double[spectrum1.getSignalCount()])); + Arrays.parallelSort(shiftsSpectrum1); + final double[] shiftsSpectrum2 = ArrayUtils.toPrimitive(spectrum2.getShifts(dim2).toArray(new Double[spectrum2.getSignalCount()])); + Arrays.parallelSort(shiftsSpectrum2); + + return Tanimoto.calculate(shiftsSpectrum1, shiftsSpectrum2); + } + + /** + * Returns deviations between matched shifts of two spectra. + * The matching procedure is already included here. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol + * + * @return + * + * @see #matchSpectra(Spectrum, Spectrum, int, int, double) + */ + public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { + final Double[] deviations = new Double[spectrum1.getSignalCount()]; + final Assignment matchAssignments = Matcher.matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol); + Signal matchedSignalInSpectrum2; + for (int i = 0; i < spectrum1.getSignalCount(); i++) { + if (matchAssignments.getAssignment(0, i) == -1) { + deviations[i] = null; + } else { + matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAssignment(0, i)); + deviations[i] = Math.abs(spectrum1.getSignal(i).getShift(dim1) - matchedSignalInSpectrum2.getShift(dim2)); + } + } + return deviations; + } + + /** + * Returns the average of all deviations within a given input array. + * + * @param deviations array of deviations + * + * @return + */ + public static Double calculateAverageDeviation(final Double[] deviations) { + // every signal has to have a match + for (final Double deviation : deviations) { + if (deviation == null) { + return null; + } + } + + return Utils.getMean(deviations); + } + + /** + * Returns the average of all deviations of matched shifts between two + * spectra. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during peak picking in + * shift comparison + * + * @return + * + * @see #getDeviations(Spectrum, Spectrum, int, int, double) + * @see #calculateAverageDeviation(Double[]) + */ + public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { + return Matcher.calculateAverageDeviation(Matcher.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); + } + + /** + * Returns the closest shift matches between two spectra in selected dimensions + * as an Assignment object with one set dimension only.
+ * Despite intensities are expected, they are still not considered here. + * + * @param spectrum first spectrum + * @param querySpectrum query spectrum (Subspectrum) + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during spectra shift + * comparison + * + * @return Assignments with signal indices of spectrum and matched indices + * in query spectrum; null if one of the spectra does not + * contain the selected dimension + */ + public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum querySpectrum, final int dim1, final int dim2, final double shiftTol) { + if (!Matcher.checkDimensions(spectrum, querySpectrum, dim1, dim2)) { + return null; + } + final Assignment matchAssignments = new Assignment(spectrum); + final Set assigned = new HashSet<>(); + List pickedSignalIndicesSpectrum2; + + for (int i = 0; i < spectrum.getSignalCount(); i++) { + if (spectrum.getShift(i, dim1) == null) + continue; + + // @TODO add solvent deviation value for picking closest signal(s) + pickedSignalIndicesSpectrum2 = new ArrayList<>(); + for (final int pickedSignalIndexSpectrum2 : querySpectrum.pickClosestSignal(spectrum.getShift(i, dim1), dim2, shiftTol)) { + // @TODO maybe consider further parameters to check ? e.g. intensity + if (querySpectrum.getMultiplicity(pickedSignalIndexSpectrum2).equals(spectrum.getMultiplicity(i)) && querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) <= spectrum.getEquivalencesCount(i)) { + pickedSignalIndicesSpectrum2.add(pickedSignalIndexSpectrum2); + } + } + for (final int pickedSignalIndexSpectrum2 : pickedSignalIndicesSpectrum2) { + if (!assigned.contains(pickedSignalIndexSpectrum2)) { + // add signal to list of already assigned signals + assigned.add(pickedSignalIndexSpectrum2); + // set picked signal index in assignment object + matchAssignments.setAssignment(0, i, pickedSignalIndexSpectrum2); + + break; + } + } + } + + return matchAssignments; + } + + /** + * Returns the closest shift matches between two spectra in all dimensions + * as one Assignment object with N set dimensions. + * N here means the number of dimensions in both spectra.
+ * Despite intensities are expected, they are still not considered here. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum (query) + * @param shiftTols tolerance values [ppm] per each dimension used during spectra shift + * comparisons + * + * @return Assignments with signal indices of spectrum1 and matched indices + * in spectrum2 for each dimension; null if the number of + * dimensions in both spectra is not the same or is different than the number of given + * shift tolerances + * + * @see #matchSpectra(Spectrum, Spectrum, int, int, double) + */ + public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final double[] shiftTols) { + if ((spectrum1.getNDim() != spectrum2.getNDim()) || (spectrum1.getNDim() != shiftTols.length)) { + return null; + } + final Assignment matchAssignment = new Assignment(spectrum1); + for (int dim = 0; dim < spectrum1.getNDim(); dim++) { + matchAssignment.setAssignments(dim, Matcher.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]).getAssignments(0)); + } + + return matchAssignment; + } + + + // might be useful in future to correct matches between spectra + + // /** + // * Corrects a match list regarding a given shift list and an atom container. + // * This is useful when two ore more shift values (e.g. DEPT shifts) match + // * with the same atom in the atom container. So the purpose here is to + // * enable more unambiguous matches. This method first looks for unambiguous + // * matches and calculates the median of the difference values between the + // * shift list values and the shifts of atom container. Then, all shift list + // * values are adjusted (+/-) with this median value. + // * + // * @param shiftList1 Shift value list to search in + // * @param shiftList2 Shift value list to match in shiftList1 + // * @param matchesInShiftList1 Matcher list to correct + // * @param tol Tolerance value + // * @return + // */ + // public static ArrayList correctShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final ArrayList matchesInShiftList1, final double tol) { + // + // int matchIndex; + // // get differences of unique matches between query shift and ac shifts + // ArrayList diffs = new ArrayList<>(); + // final HashSet uniqueMatchIndicesSet = new HashSet<>(matchesInShiftList1); + // for (final int uniqueMatchIndex : uniqueMatchIndicesSet) { + // if (Collections.frequency(matchesInShiftList1, uniqueMatchIndex) == 1) { + // matchIndex = matchesInShiftList1.indexOf(uniqueMatchIndex); + // if (matchesInShiftList1.get(matchIndex) >= 0) { + // diffs.add(shiftList2.get(matchIndex) - shiftList1.get(matchesInShiftList1.get(matchIndex))); + // } + // } + // } + // // calculate the median of found unique match differences + // if (diffs.size() > 0) { + // final double median = casekit.casekit.nmr.Utils.getMedian(diffs); + // // add or subtract the median of the differences to all shift list values (input) and match again then + // for (int i = 0; i < shiftList2.size(); i++) { + // shiftList2.set(i, shiftList2.get(i) - median); + // } + // // rematch + // return casekit.casekit.nmr.Utils.findShiftMatches(shiftList1, shiftList2, tol); + // } + // + // return matchesInShiftList1; + // } +} diff --git a/src/casekit/NMR/model/Assignment.java b/src/casekit/nmr/model/Assignment.java similarity index 86% rename from src/casekit/NMR/model/Assignment.java rename to src/casekit/nmr/model/Assignment.java index 1f0b067..53378ff 100644 --- a/src/casekit/NMR/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -21,9 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package casekit.NMR.model; +package casekit.nmr.model; -import casekit.NMR.model.dimensional.DimensionalNMR; +import casekit.nmr.model.dimensional.Dimensional; import org.apache.commons.lang3.ArrayUtils; import java.util.ArrayList; @@ -34,9 +34,18 @@ * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Assignment extends DimensionalNMR implements Cloneable { +public class Assignment extends Dimensional implements Cloneable { - int[][] assignments; + private int[][] assignments; + + + public Assignment() { + } + + public Assignment(final String[] nuclei, final int[][] assignments) { + super(nuclei); + this.assignments = assignments; + } public Assignment(final Spectrum spectrum) { super(spectrum.getNuclei()); @@ -137,6 +146,14 @@ public Boolean isFullyAssigned(final int dim){ return this.getSetAssignmentsCount(dim) == this.getAssignmentsCount(); } + + public Boolean isAssigned(final int dim, final int index){ + if(!this.containsDim(dim)){ + return null; + } + + return this.getAssignment(dim, index) != -1; + } /** * Adds a new assignment entry (index), e.g. for a new signal. The given assignment indices @@ -194,4 +211,20 @@ private boolean checkInputListSize(final int size){ public Assignment clone() throws CloneNotSupportedException{ return (Assignment) super.clone(); } + + @Override + public String toString() { + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("Assignments:\n"); + + for (int i = 0; i < this.getNDim(); i++) { + stringBuilder.append(Arrays.toString(this.assignments[i])).append("\n"); + } + + return stringBuilder.toString(); + } + + public int[][] getAssignments() { + return assignments; + } } diff --git a/src/casekit/nmr/model/DataSet.java b/src/casekit/nmr/model/DataSet.java new file mode 100644 index 0000000..60c37c5 --- /dev/null +++ b/src/casekit/nmr/model/DataSet.java @@ -0,0 +1,81 @@ +package casekit.nmr.model; + +import org.openscience.cdk.interfaces.IAtomContainer; + +import java.util.HashMap; +import java.util.Map; + +public class DataSet { + + private ExtendedConnectionMatrix structure; + private Spectrum spectrum; + private Assignment assignment; + private HashMap meta; + + public DataSet() { + } + + public DataSet(final ExtendedConnectionMatrix structure, final Spectrum spectrum, final Assignment assignment, Map meta) { + this.structure = structure; + this.spectrum = spectrum; + this.assignment = assignment; + this.meta = new HashMap<>(meta); + } + + public DataSet(final IAtomContainer structure, final Spectrum spectrum, final Assignment assignment, Map meta) { + this.structure = new ExtendedConnectionMatrix(structure); + this.spectrum = spectrum; + this.assignment = assignment; + this.meta = new HashMap<>(meta); + } + + public void addMetaInfo(final String key, final String value){ + this.meta.put(key, value); + } + + public void removeMetaInfo(final String key){ + this.meta.remove(key); + } + + public ExtendedConnectionMatrix getStructure() { + return structure; + } + + public void setStructure(final ExtendedConnectionMatrix structure) { + this.structure = structure; + } + + public Spectrum getSpectrum() { + return spectrum; + } + + public void setSpectrum(final Spectrum spectrum) { + this.spectrum = spectrum; + } + + public Assignment getAssignment() { + return assignment; + } + + public void setAssignment(final Assignment assignment) { + this.assignment = assignment; + } + + public Map getMeta() { + return meta; + } + + public void setMeta(Map meta) { + this.meta = new HashMap<>(meta); + } + + @Override + public String toString() { + return "DataSet{" + + "structure=" + structure + + ", spectrum=" + spectrum + + ", assignment=" + assignment + + ", meta=" + meta + + '}'; + } +} diff --git a/src/casekit/nmr/model/ExtendedConnectionMatrix.java b/src/casekit/nmr/model/ExtendedConnectionMatrix.java new file mode 100644 index 0000000..dfd43dd --- /dev/null +++ b/src/casekit/nmr/model/ExtendedConnectionMatrix.java @@ -0,0 +1,431 @@ +/* + * The MIT License + * + * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.nmr.model; + +import casekit.nmr.Utils; +import org.openscience.cdk.graph.matrix.ConnectionMatrix; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomType.Hybridization; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.silent.Atom; +import org.openscience.cdk.silent.Bond; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import java.util.Arrays; + +/** + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class ExtendedConnectionMatrix { + + private double[][] connectionMatrix; + private String[] atomTypes; + private Integer[][] atomPropertiesNumeric;// hydrogenCounts, valencies, formalCharges; + private Hybridization[] hybridizations; + private Boolean[][] atomPropertiesBoolean;// isInRingAtoms, isAromaticAtoms; + private Boolean[][][] bondProperties; + private int bondCount; + + + public ExtendedConnectionMatrix() { + } + + public ExtendedConnectionMatrix(double[][] connectionMatrix, String[] atomTypes, Integer[][] atomPropertiesNumeric, Hybridization[] hybridizations, Boolean[][] atomPropertiesBoolean, Boolean[][][] bondProperties, int bondCount) { + this.connectionMatrix = connectionMatrix; + this.atomTypes = atomTypes; + this.atomPropertiesNumeric = atomPropertiesNumeric; + this.hybridizations = hybridizations; + this.atomPropertiesBoolean = atomPropertiesBoolean; + this.bondProperties = bondProperties; + this.bondCount = bondCount; + } + + public ExtendedConnectionMatrix(final IAtomContainer ac){ + this.connectionMatrix = ConnectionMatrix.getMatrix(ac); + this.atomTypes = new String[this.connectionMatrix.length]; + this.hybridizations = new Hybridization[this.connectionMatrix.length]; + this.atomPropertiesNumeric = new Integer[this.connectionMatrix.length][]; + this.atomPropertiesBoolean = new Boolean[this.connectionMatrix.length][]; + this.bondProperties = new Boolean[this.connectionMatrix.length][][]; + + this.init(ac); + } + + private void init(final IAtomContainer ac){ + IAtom atom1, atom2; + IBond bond; + for (int i = 0; i < this.connectionMatrix.length; i++) { + atom1 = ac.getAtom(i); + this.setAtomProperties(i, atom1.getSymbol(), atom1.getImplicitHydrogenCount(), atom1.getValency(), atom1.getFormalCharge(), atom1.isInRing(), atom1.isAromatic(), atom1.getHybridization()); + + this.bondProperties[i] = new Boolean[this.connectionMatrix.length][2]; + for (int k = 0; k < this.connectionMatrix.length; k++) { + atom2 = ac.getAtom(k); + bond = ac.getBond(atom1, atom2); + if(bond != null){ + this.setBondProperty(i, k, bond.isInRing(), bond.isAromatic()); + } + } + } + this.updateBondCount(); + } + + private void init(final ExtendedConnectionMatrix extendedConnectionMatrix){ + for (int i = 0; i < this.getAtomCount(); i++) { + if(i < extendedConnectionMatrix.getAtomCount()){ + this.setAtomProperties(i, extendedConnectionMatrix.getAtomType(i), + extendedConnectionMatrix.getHydrogenCount(i), + extendedConnectionMatrix.getValency(i), + extendedConnectionMatrix.getFormalCharge(i), + extendedConnectionMatrix.isInRing(i), + extendedConnectionMatrix.isAromatic(i), + extendedConnectionMatrix.getHybridization(i)); + + + } + this.bondProperties[i] = new Boolean[this.getAtomCount()][2]; + if(i < extendedConnectionMatrix.getAtomCount()){ + for (int k = 0; k < extendedConnectionMatrix.getAtomCount(); k++) { + this.connectionMatrix[i][k] = extendedConnectionMatrix.getBondOrder(i, k); + this.setBondProperty(i, k, extendedConnectionMatrix.isInRing(i, k), extendedConnectionMatrix.isAromatic(i, k)); + + } + } else { + for (int k = 0; k < this.getAtomCount(); k++) { + this.connectionMatrix[i][k] = 0.0; +// this.setBondProperty(i, k, null, null); + } + } + } + this.updateBondCount(); + } + + private void extendConnectionMatrix(){ + this.extendConnectionMatrix(1); + } + + private void extendConnectionMatrix(final int extensionSize){ + this.connectionMatrix = new double[this.getAtomCount() + extensionSize][this.getAtomCount() + extensionSize]; + this.atomTypes = new String[this.connectionMatrix.length]; + this.hybridizations = new Hybridization[this.connectionMatrix.length]; + this.atomPropertiesNumeric = new Integer[this.connectionMatrix.length][]; + this.atomPropertiesBoolean = new Boolean[this.connectionMatrix.length][]; + this.bondProperties = new Boolean[this.connectionMatrix.length][][]; + } + + public void addAtom(final String atomType, final Integer implicitHydrogenCount, final Integer valency, final Integer formalCharge, final Boolean isInRing, final Boolean isAromatic, final Hybridization hybridization){ + // create backup object + final ExtendedConnectionMatrix extendedConnectionMatrixBackup = this.buildClone(); + // extend the sizes of all matrices by one + this.extendConnectionMatrix(); + // fill all information in again from backup object + this.init(extendedConnectionMatrixBackup); + // set information for new atom + this.setAtomProperties(this.getAtomCount() - 1, atomType, implicitHydrogenCount, valency, formalCharge, isInRing, isAromatic, hybridization); + } + + public boolean addBond(final int atomIndex1, final int atomIndex2, final double order, final Boolean isInRing, final Boolean isAromatic){ + if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + return false; + } + if(!this.isValidBondAddition(atomIndex1, atomIndex2, order, isAromatic)){ + return false; + } + this.connectionMatrix[atomIndex1][atomIndex2] = order; + this.connectionMatrix[atomIndex2][atomIndex1] = order; + this.setBondProperty(atomIndex1, atomIndex2, isInRing, isAromatic); + this.setBondProperty(atomIndex2, atomIndex1, isInRing, isAromatic); + + this.updateBondCount(); + + return true; + } + + public boolean isValidBondAddition(final int atomIndex1, final int atomIndex2, final double order, final boolean isAromatic){ + if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + return false; + } + + return this.isValidBondAddition(atomIndex1, order, isAromatic) && this.isValidBondAddition(atomIndex2, order, isAromatic); + } + + public boolean isValidBondAddition(final int atomIndex, final double order, final boolean isAromatic){ + float bondOrderSum = this.getBondOrderSum(atomIndex, true); + if(isAromatic){ + bondOrderSum += 1.5; + } else { + bondOrderSum += order; + } + // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group + if(this.isAromatic(atomIndex) && (!this.getAtomType(atomIndex).equals("C"))){ + bondOrderSum -= 1; + } + + return bondOrderSum <= this.getValency(atomIndex); + } + + private void setAtomProperties(final int atomIndex, final String atomType, final Integer implicitHydrogenCount, final Integer valency, final Integer formalCharge, final Boolean isInRing, final Boolean isAromatic, final Hybridization hybridization){ + this.atomTypes[atomIndex] = atomType; + this.atomPropertiesNumeric[atomIndex] = new Integer[3]; + this.atomPropertiesNumeric[atomIndex][0] = implicitHydrogenCount; + this.atomPropertiesNumeric[atomIndex][1] = valency; + this.atomPropertiesNumeric[atomIndex][2] = formalCharge; + this.atomPropertiesBoolean[atomIndex] = new Boolean[2]; + this.atomPropertiesBoolean[atomIndex][0] = isInRing; + this.atomPropertiesBoolean[atomIndex][1] = isAromatic; + this.hybridizations[atomIndex] = hybridization; + } + + private void setBondProperty(final int atomIndex1, final int atomIndex2, final Boolean isInRing, final Boolean isAromatic){ + this.bondProperties[atomIndex1][atomIndex2][0] = isInRing; + this.bondProperties[atomIndex1][atomIndex2][1] = isAromatic; + } + + private void updateBondCount(){ + int bondCounter = 0; + for (int i = 0; i < this.getAtomCount(); i++) { + for (int j = i + 1; j < this.getAtomCount(); j++) { + if(this.connectionMatrix[i][j] > 0.0){ + bondCounter++; + } + } + } + this.bondCount = bondCounter; + } + + public Boolean hasBond(final int atomIndex1, final int atomIndex2){ + if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + return null; + } + + return this.getBondOrder(atomIndex1, atomIndex2) > 0.0; + } + + public Double getBondOrder(final int atomIndex1, final int atomIndex2){ + if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + return null; + } + + return this.connectionMatrix[atomIndex1][atomIndex2]; + } + + public Float getBondOrderSum(final int atomIndex, final boolean includeHydrogens){ + if(!this.hasAtom(atomIndex)){ + return null; + } + float bondOrderSum = (float) 0.0; + for (int j = 0; j < this.connectionMatrix[atomIndex].length; j++) { + if((this.isAromatic(atomIndex, j) != null) && this.isAromatic(atomIndex, j)){ + bondOrderSum += 1.5; + } else { + bondOrderSum += this.getBondOrder(atomIndex, j); + } + } + if(includeHydrogens){ + bondOrderSum += this.getHydrogenCount(atomIndex); + } + + return bondOrderSum; + } + + public String getAtomType(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.atomTypes[atomIndex]; + } + + public Integer getHydrogenCount(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.atomPropertiesNumeric[atomIndex][0]; + } + + public Integer getValency(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.atomPropertiesNumeric[atomIndex][1]; + } + + public Integer getFormalCharge(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.atomPropertiesNumeric[atomIndex][2]; + } + + public Boolean isInRing(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.atomPropertiesBoolean[atomIndex][0]; + } + + public Boolean isAromatic(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.atomPropertiesBoolean[atomIndex][1]; + } + + public Hybridization getHybridization(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.hybridizations[atomIndex]; + } + + public Boolean isInRing(final int atomIndex1, final int atomIndex2){ + if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + return null; + } + + return this.bondProperties[atomIndex1][atomIndex2][0]; + } + + public Boolean isAromatic(final int atomIndex1, final int atomIndex2){ + if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + return null; + } + + return this.bondProperties[atomIndex1][atomIndex2][1]; + } + + public int getAtomCount(){ + return this.connectionMatrix.length; + } + + public int getBondCount(){ + return this.bondCount; + } + + public Boolean isUnsaturated(final int atomIndex){ + if(!this.hasAtom(atomIndex)){ + return null; + } + + return this.getBondOrderSum(atomIndex, true) < this.getValency(atomIndex); + } + + public boolean hasAtom(final int atomIndex){ + return (atomIndex >= 0) && (atomIndex < this.getAtomCount()); + } + + public IAtomContainer toAtomContainer(){ + final IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); + IAtom atom; + for (int i = 0; i < this.connectionMatrix.length; i++) { + atom = new Atom(this.atomTypes[i]); + atom.setImplicitHydrogenCount(this.atomPropertiesNumeric[i][0]); + atom.setValency(this.atomPropertiesNumeric[i][1]); + atom.setFormalCharge(this.atomPropertiesNumeric[i][2]); + atom.setHybridization(this.hybridizations[i]); + atom.setIsInRing(this.atomPropertiesBoolean[i][0]); + atom.setIsAromatic(this.atomPropertiesBoolean[i][1]); + + ac.addAtom(atom); + } + IBond bond; + for (int i = 0; i < this.bondProperties.length; i++) { + for (int k = i + 1; k < this.bondProperties.length; k++) { + if(this.connectionMatrix[i][k] > 0.0){ + bond = new Bond(ac.getAtom(i), ac.getAtom(k), Utils.getBondOrder((int) this.connectionMatrix[i][k])); + bond.setIsInRing(this.bondProperties[i][k][0]); + bond.setIsAromatic(this.bondProperties[i][k][1]); + ac.addBond(bond); + } + } + } + + return ac; + } + + public ExtendedConnectionMatrix buildClone() { + return new ExtendedConnectionMatrix(this.toAtomContainer()); + } + + @Override + public String toString() { + return "ExtendedConnectionMatrix{" + + "connectionMatrix=" + Arrays.toString(connectionMatrix) + + ", atomTypes=" + Arrays.toString(atomTypes) + + ", atomPropertiesNumeric=" + Arrays.toString(atomPropertiesNumeric) + + ", hybridizations=" + Arrays.toString(hybridizations) + + ", atomPropertiesBoolean=" + Arrays.toString(atomPropertiesBoolean) + + ", bondProperties=" + Arrays.toString(bondProperties) + + ", bondCount=" + bondCount + + '}'; + } + + public double[][] getConnectionMatrix() { + return connectionMatrix; + } + + public String[] getAtomTypes() { + return atomTypes; + } + + public Integer[][] getAtomPropertiesNumeric() { + return atomPropertiesNumeric; + } + + public Hybridization[] getHybridizations() { + return hybridizations; + } + + public Boolean[][] getAtomPropertiesBoolean() { + return atomPropertiesBoolean; + } + + public Boolean[][][] getBondProperties() { + return bondProperties; + } + + public void setConnectionMatrix(double[][] connectionMatrix) { + this.connectionMatrix = connectionMatrix; + } + + public void setAtomTypes(String[] atomTypes) { + this.atomTypes = atomTypes; + } + + public void setAtomPropertiesNumeric(Integer[][] atomPropertiesNumeric) { + this.atomPropertiesNumeric = atomPropertiesNumeric; + } + + public void setHybridizations(Hybridization[] hybridizations) { + this.hybridizations = hybridizations; + } + + public void setAtomPropertiesBoolean(Boolean[][] atomPropertiesBoolean) { + this.atomPropertiesBoolean = atomPropertiesBoolean; + } + + public void setBondProperties(Boolean[][][] bondProperties) { + this.bondProperties = bondProperties; + } + + public void setBondCount(int bondCount) { + this.bondCount = bondCount; + } +} diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java new file mode 100644 index 0000000..14a709b --- /dev/null +++ b/src/casekit/nmr/model/Signal.java @@ -0,0 +1,148 @@ +/* + * This class was adopted and modified from an earlier version by Christoph Steinbeck + */ + +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package casekit.nmr.model; + +import casekit.nmr.model.dimensional.Dimensional; + +import java.util.Arrays; + +/** + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Signal extends Dimensional { + + private Double[] shifts; + private String multiplicity; + private Double intensity; + private String kind; + private int equivalencesCount; + + //// private Integer phase; + //// public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; + //// public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; + // + + + public Signal() { + } + + public Signal(final String[] nuclei) { + this(nuclei, null, null, null, null, 0); + } + + public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, final Double intensity, final int equivalencesCount) { + super(nuclei); + this.shifts = shifts; // this.initShifts(shifts, this.getNDim()); + this.multiplicity = multiplicity; + this.kind = kind; + this.intensity = intensity; + this.equivalencesCount = equivalencesCount; + } + + // private Double[] initShifts(final Double[] shifts, final int nDim) { + //// if((shifts == null) || (shifts.length != nDim)){ + //// throw new Exception("Number of given nuclei (" + nDim + ") and shifts (" + shifts.length + ") is not the same!!!"); + //// } + // final Double[] tempShifts = new Double[nDim]; + // for (int d = 0; d < nDim; d++) { + // tempShifts[d] = shifts[d]; + // } + // + // return tempShifts; + // } + + public boolean setShift(final Double shift, final int dim) { + if (!this.containsDim(dim)) { + return false; + } + this.shifts[dim] = shift; + + return true; + } + + public Double getShift(final int dim) { + if (!this.containsDim(dim)) { + return null; + } + return this.shifts[dim]; + } + + public void setIntensity(final Double intensity) { + this.intensity = intensity; + } + + public Double getIntensity() { + return this.intensity; + } + + public void setMultiplicity(final String multiplicity) { + this.multiplicity = multiplicity; + } + + public String getMultiplicity() { + return this.multiplicity; + } + + public String getKind() { + return kind; + } + + public void setKind(final String kind) { + this.kind = kind; + } + + // public void setPhase(final Integer phase) { + // this.phase = phase; + // } + // + // public Integer getPhase() { + // return this.phase; + // } + + + public Signal buildClone() { + return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.kind, this.intensity, equivalencesCount); + } + + @Override + public String toString() { + return "Signal{" + "shifts=" + Arrays.toString(shifts) + ", multiplicity='" + multiplicity + '\'' + ", intensity=" + intensity + ", kind='" + kind + '\'' + ", equivalencesCount=" + equivalencesCount + '}'; + } + + public Double[] getShifts() { + return shifts; + } + + public int getEquivalencesCount() { + return equivalencesCount; + } + + public void setEquivalencesCount(final int equivalencesCount) { + this.equivalencesCount = equivalencesCount; + } +} diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java new file mode 100644 index 0000000..d6f5710 --- /dev/null +++ b/src/casekit/nmr/model/Spectrum.java @@ -0,0 +1,359 @@ +/* + * This class was adopted and modified from an earlier version by Christoph Steinbeck + */ + + +/* + * The MIT License + * + * Copyright 2018 Michael Wenk [https://github.com/michaelwenk]. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package casekit.nmr.model; + +import casekit.nmr.model.dimensional.Dimensional; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +/** + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class Spectrum extends Dimensional { + + /** + * An arbitrary name or description that can be assigned to this spectrum for identification purposes. + */ + private String description; + /** + * An arbitrary name to identify the type of this spectrum, like COSY, NOESY, HSQC, etc. I + * decided not to provide static Strings with given experiment type since the there are + * numerous experiments yielding basically identical information having different names + */ + private String specType; + /** + * The proton frequency of the spectrometer used to record this spectrum. + */ + private Double spectrometerFrequency; + private String solvent; + private String standard; + private List signals; + private int signalCount; + + public Spectrum() { + } + + public Spectrum(final String[] nuclei) { + super(nuclei); + this.signals = new ArrayList<>(); + this.signalCount = 0; + } + + public Spectrum(String[] nuclei, String description, String specType, Double spectrometerFrequency, String solvent, String standard, List signals, int signalCount, List equivalences) { + super(nuclei); + this.description = description; + this.specType = specType; + this.spectrometerFrequency = spectrometerFrequency; + this.solvent = solvent; + this.standard = standard; + this.signals = new ArrayList<>(signals); + this.signalCount = signalCount; + } + + public void setSpecType(final String specType) { + this.specType = specType; + } + + public String getSpecType() { + return this.specType; + } + + public void setSpecDescription(final String description) { + this.description = description; + } + + public String getSpecDescription() { + return this.description; + } + + public int getSignalCount() { + return this.signalCount; + } + + public int getSignalCountWithEquivalences() { + int sum = 0; + for (final Signal signal : this.getSignals()) { + sum += 1 + signal.getEquivalencesCount(); + } + return sum; + } + + /** + * Adds a signal to this spectrum. + * + * @param signal signal to add + * + * @return + */ + public boolean addSignal(final Signal signal) { + return this.addSignal(signal, 0.0); + } + + /** + * Adds a signal to this spectrum and stores an equivalent signal index. + * + * @param signal signal to add + * @param pickPrecision precision to find equivalent signals to store in + * + * @return + */ + public boolean addSignal(final Signal signal, final double pickPrecision) { + if ((signal == null) || !this.compareNuclei(signal.getNuclei())) { + return false; + } + + // check for equivalent signals in all dimensions + final List closestSignalList = this.pickClosestSignal(signal.getShift(0), 0, pickPrecision); + for (int dim = 1; dim < this.getNDim(); dim++) { + closestSignalList.retainAll(this.pickClosestSignal(signal.getShift(dim), dim, pickPrecision)); + } + + if (closestSignalList.isEmpty()) { + // add signal at the end of signal list + this.signals.add(signal); + this.signalCount++; + } else { + Signal closestSignal; + for (final Integer closestSignalIndex : closestSignalList) { + closestSignal = this.getSignal(closestSignalIndex); + if (closestSignal.getMultiplicity().equals(signal.getMultiplicity())) { + closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() + 1); + } + } + } + + return true; + + } + + public boolean removeSignal(final Signal signal) { + return this.removeSignal(this.getSignalIndex(signal)); + } + + public boolean removeSignal(final int signalIndex) { + if (!this.checkSignalIndex(signalIndex)) { + return false; + } + if (this.signals.remove(signalIndex) != null) { + this.signalCount--; + + return true; + } + + return false; + } + + private boolean checkSignalIndex(final Integer signalIndex) { + return (signalIndex != null) && (signalIndex >= 0) && (signalIndex < this.getSignalCount()); + } + + /** + * Returns a NMR Signal at position number in the List + * + * @param signalIndex + * + * @return + */ + public Signal getSignal(final int signalIndex) { + if (!this.checkSignalIndex(signalIndex)) { + return null; + } + + return this.signals.get(signalIndex); + } + + public List getSignals() { + return this.signals; + } + + public Double getShift(final int signalIndex, final int dim) { + if (!this.checkSignalIndex(signalIndex)) { + return null; + } + + return this.getSignal(signalIndex).getShift(dim); + } + + public List getShifts(final int dim) { + return this.getSignals().stream().map(signal -> signal.getShift(dim)).collect(Collectors.toList()); + } + + public String getMultiplicity(final int signalIndex) { + if (!this.checkSignalIndex(signalIndex)) { + return null; + } + + return this.getSignal(signalIndex).getMultiplicity(); + } + + public Boolean hasEquivalences(final int signalIndex) { + if (!this.checkSignalIndex(signalIndex)) { + return null; + } + + return this.getEquivalencesCount(signalIndex) > 0; + } + + public Integer getEquivalencesCount(final int signalIndex) { + if (!this.checkSignalIndex(signalIndex)) { + return null; + } + + return this.getSignal(signalIndex).getEquivalencesCount(); + } + + public List getEquivalencesCounts() { + return this.getSignals().stream().map(Signal::getEquivalencesCount).collect(Collectors.toList()); + } + + /** + * Returns the position of an NMRSignal the List + * + * @param signal + * + * @return + */ + public int getSignalIndex(final Signal signal) { + for (int s = 0; s < this.signals.size(); s++) { + if (this.signals.get(s) == signal) { + return s; + } + } + return -1; + } + + public void setSpectrometerFrequency(final Double sf) { + this.spectrometerFrequency = sf; + } + + public Double getSpectrometerFrequency() { + return spectrometerFrequency; + } + + public void setSolvent(final String solvent) { + this.solvent = solvent; + } + + public String getSolvent() { + return solvent; + } + + public void setStandard(final String standard) { + this.standard = standard; + } + + public String getStandard() { + return standard; + } + + + /** + * Returns the signal index (or indices) closest to the given shift. If no signal is found within the interval + * defined by {@code pickPrecision}, an empty list is returned. + * + * @param shift query shift + * @param dim dimension in spectrum to look in + * @param pickPrecision tolerance value for search window + * + * @return + */ + public List pickClosestSignal(final double shift, final int dim, final double pickPrecision) { + final List matchIndices = new ArrayList<>(); + if (!this.containsDim(dim)) { + return matchIndices; + } + double minDiff = pickPrecision; + // detect the minimal difference between a signal shift to the given query shift + for (int s = 0; s < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) - shift) < minDiff) { + minDiff = Math.abs(this.getShift(s, dim) - shift); + } + } + for (int s = 0; s < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) - shift) == minDiff) { + matchIndices.add(s); + } + } + + return matchIndices; + } + + /** + * Returns a list of signal indices within the interval defined by + * pickPrecision. That list is sorted by the distances to the query shift. + * If none is found an empty ArrayList is returned. + * + * @param shift query shift + * @param dim dimension in spectrum to look in + * @param pickPrecision tolerance value for search window + * + * @return + */ + public List pickSignals(final Double shift, final int dim, final double pickPrecision) { + final List pickedSignals = new ArrayList<>(); + if (!this.containsDim(dim)) { + return pickedSignals; + } + for (int s = 0; s < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) - shift) <= pickPrecision) { + pickedSignals.add(s); + } + } + // sort signal indices by distance to query shift + pickedSignals.sort(Comparator.comparingDouble(pickedSignalIndex -> Math.abs(shift - this.getShift(pickedSignalIndex, dim)))); + + return pickedSignals; + } + + public Spectrum buildClone() { + final Spectrum clone = new Spectrum(this.getNuclei()); + for (int i = 0; i < this.getSignalCount(); i++) { + clone.addSignal(this.getSignal(i).buildClone()); + } + clone.setSpecDescription(this.description); + clone.setSolvent(this.solvent); + clone.setSpecType(this.specType); + clone.setSpectrometerFrequency(this.spectrometerFrequency); + clone.setStandard(this.standard); + + return clone; + } + + @Override + public String toString() { + return "Spectrum{" + "description='" + description + '\'' + ", specType='" + specType + '\'' + ", spectrometerFrequency=" + spectrometerFrequency + ", solvent='" + solvent + '\'' + ", standard='" + standard + '\'' + ", signals=" + signals + ", signalCount=" + signalCount + '}'; + } + + public String getDescription() { + return description; + } +} diff --git a/src/casekit/model/Dimensional.java b/src/casekit/nmr/model/dimensional/Dimensional.java similarity index 63% rename from src/casekit/model/Dimensional.java rename to src/casekit/nmr/model/dimensional/Dimensional.java index bc3da90..e37a00d 100644 --- a/src/casekit/model/Dimensional.java +++ b/src/casekit/nmr/model/dimensional/Dimensional.java @@ -10,48 +10,33 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.model; +package casekit.nmr.model.dimensional; import java.util.Arrays; public class Dimensional { - private final int nDim; - private final String[] dimNames; + private String[] nuclei; + private int nDim; - /** - * Creates a new object of that class by given dimension names. - * - * @param dimNames names for all dimensions to store. - * @throws IndexOutOfBoundsException - */ - protected Dimensional(final String[] dimNames) throws IndexOutOfBoundsException { - if(dimNames.length == 0){ - throw new IndexOutOfBoundsException("Number of given dimensions (" + dimNames.length + ") is not valid: must be >= 1"); - } + protected Dimensional() { + } - this.dimNames = dimNames; - this.nDim = dimNames.length; + protected Dimensional(final String[] nuclei) { + this.nuclei = nuclei; + this.nDim = nuclei.length; } - /** - * Returns the dimension names. - * - * @return - */ - protected final String[] getDimNames() { - return dimNames; + public final String[] getNuclei(){ + return this.nuclei; } - /** - * Checks whether the input dimension names are equal to the dimension names of - * this object and in same order. - * - * @param dimNames names of dimensions to check - * @return - */ - protected final boolean compareDimNames(final String[] dimNames){ - return Arrays.equals(this.getDimNames(), dimNames); + public void setNuclei(String[] nuclei) { + this.nuclei = nuclei; + } + + public boolean compareNuclei(final String[] nuclei){ + return Arrays.equals(this.getNuclei(), nuclei); } /** @@ -63,6 +48,10 @@ public final int getNDim() { return this.nDim; } + public void setNDim(int nDim) { + this.nDim = nDim; + } + /** * Checks whether the input dimension exists by dimension number. The dimension * indexing starts at 0. diff --git a/src/casekit/NMR/parse/Parser.java b/src/casekit/nmr/parse/Parser.java similarity index 71% rename from src/casekit/NMR/parse/Parser.java rename to src/casekit/nmr/parse/Parser.java index 67430d5..ee44d86 100644 --- a/src/casekit/NMR/parse/Parser.java +++ b/src/casekit/nmr/parse/Parser.java @@ -10,36 +10,40 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.NMR.parse; +package casekit.nmr.parse; -import casekit.NMR.Utils; -import casekit.NMR.model.Signal; -import casekit.NMR.model.Spectrum; import casekit.io.FileParser; +import casekit.nmr.Utils; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; + import javax.xml.parsers.ParserConfigurationException; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; +import java.util.List; - +@Deprecated public class Parser { /** - * Creates a Spectrum class object from given 1D NMR input file in CSV or XML format. + * Creates a Spectrum class object from given 1D casekit.nmr input file in CSV or XML format. * The extension of given file is used to determine the format. * * @param pathToFile path to peak table (Bruker's TopSpin csv or xml - * file format) - * @param nucleus nucleus to use for spectrum creation, e.g. "13C" + * file format) + * @param nucleus nucleus to use for spectrum creation, e.g. "13C" + * * @return + * * @throws Exception */ public static Spectrum parse1DNMR(final String pathToFile, final String nucleus) throws Exception { - switch (Utils.getFileFormat(pathToFile)){ + switch (Utils.getFileFormat(pathToFile)) { case "csv": return CSVtoSpectrum(pathToFile, new int[]{4}, new String[]{nucleus}, 6); case "xml": @@ -50,18 +54,19 @@ public static Spectrum parse1DNMR(final String pathToFile, final String nucleus) } /** - * Creates a Spectrum class object from given 2D NMR input file in CSV or XML format. + * Creates a Spectrum class object from given 2D casekit.nmr input file in CSV or XML format. * The extension of given file is used to determine the format. * * @param pathToFile path to peak table (Bruker's TopSpin csv or xml - * file format) - * @param nuclei nuclei to use for spectrum creation, e.g. ["13C", "13C] + * file format) + * @param nuclei nuclei to use for spectrum creation, e.g. ["13C", "13C] * * @return + * * @throws Exception */ public static Spectrum parse2DNMR(final String pathToFile, final String[] nuclei) throws Exception { - switch (Utils.getFileFormat(pathToFile)){ + switch (Utils.getFileFormat(pathToFile)) { case "csv": return CSVtoSpectrum(pathToFile, new int[]{5, 6}, nuclei, 9); case "xml": @@ -72,12 +77,14 @@ public static Spectrum parse2DNMR(final String pathToFile, final String[] nuclei } /** - * Reads a specific column of a NMR peak table and stores it into an + * Reads a specific column of a casekit.nmr peak table and stores it into an * ArrayList object. * - * @param pathToCSV path to NMR peak table in CSV file format - * @param column column index to select in peak table + * @param pathToCSV path to casekit.nmr peak table in CSV file format + * @param column column index to select in peak table + * * @return ArrayList of Double shift values + * * @throws IOException */ private static ArrayList CSVtoPeakList(final String pathToCSV, final int column) throws IOException { @@ -98,60 +105,60 @@ private static ArrayList CSVtoPeakList(final String pathToCSV, final int } /** - * Reads specific columns of one NMR peak table to obtain a Spectrum class + * Reads specific columns of one casekit.nmr peak table to obtain a Spectrum class * object and set intensitiy values. * The number of columns and atom types has to be the same and defines the * dimension of the returning spectrum. * - * @param pathToCSV path to NMR peak table in CSV file format - * @param columns column indices to select in peak table - * @param nuclei nuclei for each dimension + * @param pathToCSV path to casekit.nmr peak table in CSV file format + * @param columns column indices to select in peak table + * @param nuclei nuclei for each dimension * @param intensityColumnIndex column index for intensity values + * * @return Spectrum class object containing the peak lists + * * @throws Exception */ private static Spectrum CSVtoSpectrum(final String pathToCSV, final int[] columns, final String[] nuclei, final int intensityColumnIndex) throws Exception { // assumes the same number of selected columns (dimensions) and atom types - if(columns.length != nuclei.length){ + if (columns.length != nuclei.length) { return null; } final Spectrum spectrum = new Spectrum(nuclei); - ArrayList shiftList; + List shiftList; for (int col = 0; col < columns.length; col++) { shiftList = CSVtoPeakList(pathToCSV, columns[col]); - if(col == 0){ + if (col == 0) { for (int i = 0; i < shiftList.size(); i++) { - spectrum.addSignal(new Signal(spectrum.getNuclei())); + spectrum.addSignal(new Signal(spectrum.getNuclei(), new Double[]{shiftList.get(i)}, null, null, null, 0)); } } - if(!spectrum.setShifts(shiftList, col)){ - return null; - } } - spectrum.setIntensities(CSVtoPeakList(pathToCSV, intensityColumnIndex)); + // spectrum.setIntensities(CSVtoPeakList(pathToCSV, intensityColumnIndex)); return spectrum; - } + } /** - * Reads a NMR peak XML file and returns one attribute of nodes (column) into an + * Reads a casekit.nmr peak XML file and returns one attribute of nodes (column) into an * ArrayList object. * The XML file must be in Bruker's TopSpin format. * * @param pathToXML Path to XML file - * @param dim number of dimensions of given data 1 (1D) or 2 (2D) + * @param dim number of dimensions of given data 1 (1D) or 2 (2D) * @param attribute which attribute index in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, - * intensity if 1D data) or 3 (intensity if 2D data) + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, + * intensity if 1D data) or 3 (intensity if 2D data) * * @return ArrayList of Double shift values + * * @throws IOException * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException */ private static ArrayList XMLtoPeakList(final String pathToXML, final int dim, final int attribute) throws IOException, ParserConfigurationException, SAXException { // assumes a attribute value between 1 and 3 - if(attribute < 1 || attribute > 3){ + if (attribute < 1 || attribute > 3) { return null; } @@ -167,40 +174,38 @@ private static ArrayList XMLtoPeakList(final String pathToXML, final int } /** - * Reads specific columns of NMR XML files to obtain a Spectrum class + * Reads specific columns of casekit.nmr XML files to obtain a Spectrum class * object. * The XML file must be in Bruker's TopSpin format. * - * @param pathToXML path to NMR XML file in Bruker's TopSpin XML file format - * @param ndim number of dimensions: 1 (1D) or 2 (2D) + * @param pathToXML path to casekit.nmr XML file in Bruker's TopSpin XML file format + * @param ndim number of dimensions: 1 (1D) or 2 (2D) * @param attributes which attribute indices in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data) - * @param nuclei nuclei for each dimension + * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data) + * @param nuclei nuclei for each dimension + * * @return Spectrum class object containing the selected peak lists + * * @throws Exception */ private static Spectrum XMLtoSpectrum(final String pathToXML, final int ndim, final int[] attributes, final String[] nuclei) throws Exception { // assumes the same number of dims, attributes and atom types and a maximum number of dims of 2 - if((ndim != attributes.length) || (ndim != nuclei.length) || (attributes.length != nuclei.length) - || (ndim < 1 || ndim > 2)){ + if ((ndim != attributes.length) || (ndim != nuclei.length) || (attributes.length != nuclei.length) || (ndim < 1 || ndim > 2)) { return null; } final Spectrum spectrum = new Spectrum(nuclei); ArrayList shiftList; for (int dim = 0; dim < ndim; dim++) { shiftList = XMLtoPeakList(pathToXML, ndim, attributes[dim]); - if(dim == 0){ + if (dim == 0) { for (int i = 0; i < (shiftList != null ? shiftList.size() : 0); i++) { - spectrum.addSignal(new Signal(spectrum.getNuclei())); + spectrum.addSignal(new Signal(spectrum.getNuclei(), new Double[]{shiftList.get(i)}, null, null, null, 0)); } } - if(!spectrum.setShifts(shiftList, dim)){ - return null; - } } - spectrum.setIntensities(XMLtoPeakList(pathToXML, ndim, ndim + 1)); + // spectrum.setIntensities(XMLtoPeakList(pathToXML, ndim, ndim + 1)); return spectrum; - } + } } diff --git a/src/casekit/NMR/predict/Predict.java b/src/casekit/nmr/predict/Predict.java similarity index 76% rename from src/casekit/NMR/predict/Predict.java rename to src/casekit/nmr/predict/Predict.java index 597d7c3..42a1347 100644 --- a/src/casekit/NMR/predict/Predict.java +++ b/src/casekit/nmr/predict/Predict.java @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package casekit.NMR.predict; +package casekit.nmr.predict; -import casekit.NMR.Utils; -import casekit.NMR.model.Signal; -import casekit.NMR.model.Spectrum; +import casekit.nmr.Utils; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; import hose.HOSECodeBuilder; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; @@ -36,29 +36,28 @@ import java.util.HashMap; /** - * * @author Michael Wenk [https://github.com/michaelwenk] */ public class Predict { - + /** - * Predicts a shift value for a central atom based on its HOSE code and a + * Predicts a shift value for a central atom based on its HOSE code and a * given HOSE code lookup table. The prediction is done by using the median * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromHydrogenCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values - * @param HOSECode specific HOSE code to use for shift prediction - * @return null if HOSE code does not exist in lookup table + * @param HOSECode specific HOSE code to use for shift prediction * - * @see casekit.NMR.Utils#getMedian(ArrayList) + * @return null if HOSE code does not exist in lookup table * + * @see casekit.nmr.Utils#getMedian(ArrayList) */ public static Double predictShift(final HashMap> HOSECodeLookupTable, final String HOSECode) { if (HOSECodeLookupTable.containsKey(HOSECode)) { return Utils.getMedian(HOSECodeLookupTable.get(HOSECode)); -// return Utils.getMean(HOSECodeLookupTable.get(HOSECode)); + // return Utils.getMean(HOSECodeLookupTable.get(HOSECode)); } return null; @@ -68,21 +67,19 @@ public static Double predictShift(final HashMap> HOSEC * Predicts a signal for a central atom based on its HOSE code and a * given HOSE code lookup table. The prediction is done by using the mean * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromHydrogenCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values - * @param ac structure to predict from - * @param atomIndex index of central atom in structure for HOSE code generation - * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited - * @param nucleus nucleus (e.g. "13C") for signal creation + * @param ac structure to predict from + * @param atomIndex index of central atom in structure for HOSE code generation + * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited + * @param nucleus nucleus (e.g. "13C") for signal creation * * @return null if HOSE code of selected atom does not exist in lookup table - * - * @throws CDKException * + * @throws CDKException * @see #predictShift(HashMap, String) - * */ public static Signal predictSignal(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, final String nucleus) throws Exception { if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { @@ -93,40 +90,34 @@ public static Signal predictSignal(final HashMap> HOSE if (predictedShift == null) { return null; } - return new Signal( - new String[]{nucleus}, - new Double[]{predictedShift}, - Utils.getMultiplicityFromHydrogenCount(ac.getAtom(atomIndex).getImplicitHydrogenCount()), - null - ); + return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, Utils.getMultiplicityFromHydrogenCount(ac.getAtom(atomIndex).getImplicitHydrogenCount()), "signal", null, 1); } /** * Predicts a spectrum for a given structure based on HOSE code of atoms with specified nucleus and a * given HOSE code lookup table.
- * Specified for carbons (13C) only -> {@link casekit.NMR.Utils#getMultiplicityFromHydrogenCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromHydrogenCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values - * @param ac structure to predict from - * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited - * @param nucleus nucleus (e.g. "13C") for signal creation + * @param ac structure to predict from + * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited + * @param nucleus nucleus (e.g. "13C") for signal creation + * * @return null if a HOSE code of one atom does not exist in lookup table - * - * @throws org.openscience.cdk.exception.CDKException * + * @throws org.openscience.cdk.exception.CDKException * @see #predictSignal(HashMap, IAtomContainer, int, Integer, String) - * */ public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final Integer maxSphere, final String nucleus) throws Exception { final Spectrum predictedSpectrum = new Spectrum(new String[]{nucleus}); Signal signal; - for (final IAtom atom : ac.atoms()) { + for (final IAtom atom : ac.atoms()) { if (atom.getSymbol().equals(Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus); - if(signal == null){ + if (signal == null) { continue; -// return null; + // return null; } predictedSpectrum.addSignal(signal); } diff --git a/src/casekit/NMR/remarks b/src/casekit/nmr/remarks similarity index 100% rename from src/casekit/NMR/remarks rename to src/casekit/nmr/remarks From f8fbb800c6afbdf83f7f0dea34d24066096daa2e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 10 Dec 2020 22:44:34 +0100 Subject: [PATCH 149/405] in spectrum class: explicit equivalent signals replaced by implicit ones; adaption in Utils and NMRShiftDB.getDataSetsFromNMRShiftDB to this --- src/casekit/nmr/Utils.java | 36 ++++++++++------------- src/casekit/nmr/dbservice/NMRShiftDB.java | 28 ++++++++++++++---- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index 9765245..5376aa3 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -940,14 +940,7 @@ public static Double roundDouble(final Double value, final int decimalPlaces) { * @return */ public static boolean containsExplicitHydrogens(final IAtomContainer ac) { - for (final IAtom atomA : ac.atoms()) { - // check each atom whether it is an hydrogen - if (atomA.getSymbol().equals("H")) { - return true; - } - } - - return false; + return getExplicitHydrogenCount(ac) > 0; } /** @@ -998,21 +991,24 @@ public static HashMap convertExplicitToImplicitHydrogens(final I * * @return */ - public static int getExplicitHydrogenCount(final IAtomContainer ac) { - final List toRemoveList = new ArrayList<>(); - IAtom atomB; - for (final IAtom atomA : ac.atoms()) { - if (atomA.getAtomicNumber() == 1) { - atomB = ac.getConnectedAtomsList(atomA).get(0); - if (atomB.getImplicitHydrogenCount() == null) { - atomB.setImplicitHydrogenCount(0); - } - atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + 1); - toRemoveList.add(atomA); + public static List getExplicitHydrogenIndices(final IAtomContainer ac) { + final List explicitHydrogenIndicesList = new ArrayList<>(); + for (int i = 0; i < ac.getAtomCount(); i++) { + if (ac.getAtom(i).getSymbol().equals("H")) { + explicitHydrogenIndicesList.add(i); } } - return toRemoveList.size(); + return explicitHydrogenIndicesList; + } + + /** + * @param ac + * + * @return + */ + public static int getExplicitHydrogenCount(final IAtomContainer ac) { + return getExplicitHydrogenIndices(ac).size(); } diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 46976f1..c8f7578 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -27,10 +27,7 @@ import java.io.FileNotFoundException; import java.io.FileReader; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; +import java.util.*; public class NMRShiftDB { @@ -96,12 +93,17 @@ public static Collection getDataSetsFromNMRShiftDB(final String pathToN String[] split; String spectrumIndexInRecord; IMolecularFormula mf; + List explicitHydrogenIndices; while (iterator.hasNext()) { structure = iterator.next(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - // remove explicit hydrogens - Utils.removeAtoms(structure, "H"); + explicitHydrogenIndices = Utils.getExplicitHydrogenIndices(structure); + Collections.sort(explicitHydrogenIndices); + if (!explicitHydrogenIndices.isEmpty()) { + // remove explicit hydrogens + Utils.removeAtoms(structure, "H"); + } hydrogenAdder.addImplicitHydrogens(structure); Utils.setAromaticityAndKekulize(structure); @@ -141,6 +143,20 @@ public static Collection getDataSetsFromNMRShiftDB(final String pathToN } assignment = NMRShiftDBSpectrumToAssignment(structure.getProperty(spectrumProperty1D), nucleus); + if (assignment != null && !explicitHydrogenIndices.isEmpty()) { + int hCount; + for (int i = 0; i < assignment.getAssignmentsCount(); i++) { + hCount = 0; + for (int j = 0; j < explicitHydrogenIndices.size(); j++) { + if (explicitHydrogenIndices.get(j) >= assignment.getAssignment(0, i)) { + break; + } + hCount++; + } + assignment.setAssignment(0, i, assignment.getAssignment(0, i) - hCount); + } + } + dataSets.add(new DataSet(structure, spectrum, assignment, meta)); } } From 71799605769234656325444663ce91398411c7a0 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 21 Jan 2021 09:41:10 +0100 Subject: [PATCH 150/405] migration of PyLSD methods from webCASE to here --- pom.xml | 131 +++-- src/casekit/HOSECodePredictor.java | 360 ------------ src/casekit/NMRShiftDBSDFParser.java | 247 -------- src/casekit/Result.java | 38 -- src/casekit/SimilarityRanker.java | 265 --------- .../{FileParser.java => FileOperations.java} | 37 +- src/casekit/nmr/Utils.java | 541 ++---------------- src/casekit/nmr/convert/LSDConverter.java | 237 -------- src/casekit/nmr/dbservice/NMRShiftDB.java | 75 +-- .../nmr/interpretation/InterpretData.java | 526 ----------------- src/casekit/nmr/lsd/Constants.java | 78 +++ .../nmr/lsd/PyLSDInputFileBuilder.java | 338 +++++++++++ src/casekit/nmr/model/Signal.java | 23 +- .../nmr/model/nmrdisplayer/Correlation.java | 54 ++ .../nmr/model/nmrdisplayer/Correlations.java | 40 ++ src/casekit/nmr/model/nmrdisplayer/Data.java | 45 ++ .../nmr/model/nmrdisplayer/Default.java | 42 ++ src/casekit/nmr/model/nmrdisplayer/Link.java | 48 ++ src/casekit/nmr/model/nmrdisplayer/Range.java | 44 ++ .../nmr/model/nmrdisplayer/Signal1D.java | 44 ++ .../nmr/model/nmrdisplayer/Signal2D.java | 47 ++ .../nmr/model/nmrdisplayer/Spectrum.java | 87 +++ src/casekit/nmr/model/nmrdisplayer/Zone.java | 44 ++ src/casekit/nmr/parse/Parser.java | 211 ------- src/casekit/nmr/predict/Predict.java | 15 +- src/casekit/nmr/utils/Utils.java | 70 +++ 26 files changed, 1131 insertions(+), 2556 deletions(-) delete mode 100644 src/casekit/HOSECodePredictor.java delete mode 100644 src/casekit/NMRShiftDBSDFParser.java delete mode 100644 src/casekit/Result.java delete mode 100644 src/casekit/SimilarityRanker.java rename src/casekit/io/{FileParser.java => FileOperations.java} (57%) delete mode 100644 src/casekit/nmr/convert/LSDConverter.java delete mode 100644 src/casekit/nmr/interpretation/InterpretData.java create mode 100644 src/casekit/nmr/lsd/Constants.java create mode 100644 src/casekit/nmr/lsd/PyLSDInputFileBuilder.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Correlation.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Correlations.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Data.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Default.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Link.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Range.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Signal1D.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Signal2D.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Spectrum.java create mode 100644 src/casekit/nmr/model/nmrdisplayer/Zone.java delete mode 100644 src/casekit/nmr/parse/Parser.java create mode 100644 src/casekit/nmr/utils/Utils.java diff --git a/pom.xml b/pom.xml index 8b698e3..5b8a92b 100644 --- a/pom.xml +++ b/pom.xml @@ -1,69 +1,80 @@ - 4.0.0 - org.openscience - casekit - 1.0-SNAPSHOT - casekit - - - src - - - maven-compiler-plugin - 3.3 - - 1.8 - 1.8 - true - - - - maven-assembly-plugin - 3.0.0 - - - jar-with-dependencies - - - - - make-assembly - package - - single - - - - - - + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + 4.0.0 + org.openscience + casekit + 1.0-SNAPSHOT + casekit + + + src + + + maven-compiler-plugin + 3.3 + + 1.8 + 1.8 + true + + + + maven-assembly-plugin + 3.0.0 + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + org.openscience.cdk cdk-bundle 2.3 - - commons-cli - commons-cli - 1.3.1 - - - org.apache.commons - commons-lang3 - 3.5 - - - org.openscience - HOSECodeBuilder - 1.0 - - - org.mongodb - mongo-java-driver - 3.10.0 - - + + commons-cli + commons-cli + 1.3.1 + + + org.apache.commons + commons-lang3 + 3.5 + + + org.openscience + HOSECodeBuilder + 1.0 + + + org.mongodb + mongo-java-driver + 3.10.0 + + + org.projectlombok + lombok + 1.18.16 + compile + + + com.fasterxml.jackson.core + jackson-databind + 2.11.3 + + diff --git a/src/casekit/HOSECodePredictor.java b/src/casekit/HOSECodePredictor.java deleted file mode 100644 index b724840..0000000 --- a/src/casekit/HOSECodePredictor.java +++ /dev/null @@ -1,360 +0,0 @@ -/* -* This Open Source Software is provided to you under the MIT License - * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck - */ - -package casekit; - -import casekit.nmr.Utils; -import org.apache.commons.cli.*; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.depict.DepictionGenerator; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.HOSECodeGenerator; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.StringTokenizer; - - -/** - * Predicts NMRS spectra by lookup of HOSE codes - * (Bremser, W., HOSE - A Novel Substructure Code, Analytica Chimica Acta, 1978, 103:355-365) - * parsed from a tab-separated value file, produced, for example, by the NMRShiftDBSDFParser - * tool provided in this package. - * The current version is hard coded to predict carbon spectra only. - * - *

- * This Open Source Software is provided to you under the MIT License - * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck - * - * @author Christoph Steinbeck - * - */ - -@Deprecated -public class HOSECodePredictor { - - HashMap> hoseLookup; - public boolean verbose = false; - int maxSpheres = 6; //How many maximum spheres to use for the prediction - boolean generatePictures = false; - String picdir = null; - String inFile = null; - String hoseCodeFile = null; - String hoseTSVfile = null; - - /** - * Initializes a HOSECodePredictor by reading HOSECodes and assigned shift values from - * from a tab-separated values (TSV) file, produced, for example, by the NMRShiftDBSDFParser - * tool provided in this package. - * Each line in the TSV file has the format - *

- * String hosecode <\t>double shiftvalue - *

- * @param hoseTSVfile tab-separated values (TSV) file with HOSECodes and assigned shift values - * @throws Exception Exception if TSV file cannot be read - */ - public HOSECodePredictor(String hoseTSVfile) throws Exception - { - readHOSECodeTable(hoseTSVfile); - } - - /** - * Initializes an empty HOSECodePredictor. Use readHOSECodeTable(String hoseTSVfile) to initialise. - * - * Each line in the TSV file has the format - *

- * String hosecode <\t>double shiftvalue - *

- * @param hoseTSVfile tab-separated values (TSV) file with HOSECodes and assigned shift values - * @throws IOException Exception if TSV file cannot be read - */ - public HOSECodePredictor() throws Exception - { - - } - - public void predictFile(String outFile) throws Exception - { - - IAtomContainer ac = null; - File hoseTSVfile = new File(inFile); - IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(outFile), - SilentChemObjectBuilder.getInstance() - ); - - while (iterator.hasNext()) - { - ac = iterator.next(); - predict(ac); - generatePicture(ac, picdir); - } - iterator.close(); - } - - public void readHOSECodeTable() throws Exception - { - readHOSECodeTable(this.hoseTSVfile); - } - - /** - * Reads HOSE code table from TSV file. Without this, no prediction can be performed. - * Each line in the TSV file has the format - *

- * String hosecode <\t>double shiftvalue - *

- * - * @param hoseTSVfile - * @throws Exception - */ - public void readHOSECodeTable(String hoseTSVfile) throws Exception - { - String line = null; - StringTokenizer strtok; - String hose; - Double shift; - ArrayList shifts; - int linecounter = 0; - - if (verbose) System.out.println("Start reading HOSE codes from " + hoseTSVfile); - - BufferedReader br = new BufferedReader(new FileReader(hoseTSVfile)); - hoseLookup = new HashMap>(); - while((line = br.readLine()) != null) - { - strtok = new StringTokenizer(line, "\t"); - linecounter++; - hose = strtok.nextToken(); - shift = Double.parseDouble(strtok.nextToken()); - //System.out.println(hose + " ---- " + shift); - if (hoseLookup.containsKey(hose)) - { - shifts = hoseLookup.get(hose); - shifts.add(new Double(shift)); - //System.out.println("HOSE code already in hashtable. Adding to existing list."); - } - else - { - //System.out.println("HOSE code not in hashtable. Adding to new list."); - shifts = new ArrayList(); - shifts.add(new Double(shift)); - hoseLookup.put(hose, shifts); - } - } - br.close(); - if (verbose) System.out.println("Finished reading " + linecounter + " lines of HOSE codes."); - - } - - public HashMap> getHOSELookup(){ - - return this.hoseLookup; - } - - /** - * Predicts casekit.nmr chemical shifts based on a given HOSE code table read by the - * Constructor of this class. - * The predicted chemical shifts are assigned to each atom by - * as a property of type CDKConstants.NMRSHIFT_CARBON. - * They are also written as CDKConstants.COMMENT to aid the - * DepictionGenerator class to generate a picture with shift annotations - * for all carbon atoms. - * - * @param ac The IAtomContainer for which to predict the shift values - * @throws Exception Thrown if something goes wrong. - */ - public void predict(IAtomContainer ac) throws Exception - { - IAtom atom; - String hose = null; - Double shift = null; - HOSECodeGenerator hcg = new HOSECodeGenerator(); - DecimalFormat df = new DecimalFormat(); - df.setMaximumFractionDigits(2); - /** - * A visual appendix to the CDKConstants.NMRSHIFT_COMMENT annotation to show - * how many HOSE code spheres where used to predict this shift value. - */ - String[] sphereCount = - { - "'", - "''", - "'''", - "''''", - "'''''", - "''''''", - "'''''''", - "''''''''" - }; - fixExplicitHydrogens(ac); - if (verbose) System.out.println("Entering prediction module"); - for (int f = 0; f < ac.getAtomCount(); f++) - { - atom = ac.getAtom(f); - if (verbose) System.out.println("Atom no. " + f); - if (atom.getAtomicNumber() == 6) - { - // We descend from N-sphere HOSE codes defined by maxSpheres to those with lower spheres - for (int g = maxSpheres; g > 0; g--) - { - hose = hcg.getHOSECode(ac, atom, g); - //System.out.println("Look-up for HOSE code " + hose); - try{ - shift = getShift(hose); - if (shift != null) - { - if (verbose) System.out.println("Shift " + df.format(shift) + " found with " + g + "-sphere HOSE code."); - ac.getAtom(f).setProperty(CDKConstants.NMRSHIFT_CARBON, df.format(shift)); - ac.getAtom(f).setProperty(CDKConstants.COMMENT, df.format(shift) + sphereCount[g - 1]); - //If we found a HOSE code of a higher sphere, we take that one and skip the lower ones - break; - } - } - catch(Exception e) - { - e.printStackTrace(); - - } - } - } - } - } - - public Double getShift(String hose) - { - double shiftvalue = 0.0; - if (!hoseLookup.containsKey(hose)) return null; - ArrayList list = hoseLookup.get(hose); - for (int f = 0; f < list.size(); f++) - { - shiftvalue = shiftvalue + list.get(f).doubleValue(); - } - shiftvalue = shiftvalue / list.size(); - if (verbose) System.out.println("Predicted HOSE code from " + list.size() + " values"); - - return new Double(shiftvalue); - } - - public void generatePicture(IAtomContainer ac, String path) throws IOException, CDKException - { - String moleculeTitle = ""; - /* Path separators differ in operating systems. Unix uses slash, windows backslash. - That's why Java offers this constant File.pathSeparator since it knows what OS it is running on */ - if (path.endsWith(File.separator)) - moleculeTitle = path + "mol.png"; - else - moleculeTitle = path + File.separator + "mol.png"; - DepictionGenerator dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); - dg.depict(ac).writeTo(moleculeTitle); - } - - - - /** - * This predictor cannot handle explicit hydrogens. Where therefore convert them to implicit first - */ - private void fixExplicitHydrogens(IAtomContainer ac) { - Utils.convertExplicitToImplicitHydrogens(ac); - } - - private void parseArgs(String[] args) throws ParseException - { - Options options = setupOptions(args); - CommandLineParser parser = new DefaultParser(); - try { - CommandLine cmd = parser.parse( options, args); - this.inFile = cmd.getOptionValue("infile"); - this.hoseTSVfile = cmd.getOptionValue("hosecodes"); - if (cmd.hasOption("maxspheres")) - { - this.maxSpheres = Integer.parseInt(cmd.getOptionValue("maxspheres")); - } - if (cmd.hasOption("verbose")) this.verbose = true; - - if (cmd.hasOption("picdir")) - { - this.generatePictures = true; - this.picdir = cmd.getOptionValue("picdir"); - - } - } catch (ParseException e) { - // TODO Auto-generated catch block - HelpFormatter formatter = new HelpFormatter(); - formatter.setOptionComparator(null); - String header = "Predict casekit.nmr chemical shifts for a given molecule based on table of HOSE codes and assigned shifts.\n\n"; - String footer = "\nPlease report issues at https://github.com/steinbeck/spectra"; - formatter.printHelp( "java -jar casekit.jar casekit.HOSECodePredictor", header, options, footer, true ); - throw new ParseException("Problem parsing command line"); - } - } - - private Options setupOptions(String[] args) - { - Options options = new Options(); - Option hosefile = Option.builder("s") - .required(true) - .hasArg() - .longOpt("hosecodes") - .desc("filename of TSV file with HOSE codes (required)") - .build(); - options.addOption(hosefile); - Option infile = Option.builder("i") - .required(true) - .hasArg() - .longOpt("infile") - .desc("filename of with SDF/MOL file of a structure to be predicted (required)") - .build(); - options.addOption(infile); - Option picdir = Option.builder("d") - .required(true) - .hasArg() - .longOpt("picdir") - .desc("store picture of structure with assigned shifts in given directory (required)") - .build(); - options.addOption(picdir); - Option maxspheres = Option.builder("m") - .required(true) - .hasArg() - .longOpt("maxspheres") - .desc("maximum sphere size up to which to generate HOSE codes (required)") - .build(); - options.addOption(maxspheres); - Option verbose = Option.builder("v") - .required(false) - .longOpt("verbose") - .desc("generate messages about progress of operation") - .build(); - options.addOption(verbose); - return options; - } - - public static void main(String[] args) { - // TODO Auto-generated method stub - HOSECodePredictor hcp = null; - try { - hcp = new HOSECodePredictor(); - hcp.parseArgs(args); - hcp.readHOSECodeTable(); - hcp.predictFile(hcp.inFile); - } catch (Exception e) { - // We don't do anything here. Apache CLI will print a usage text. - if (hcp.verbose) e.printStackTrace(); - } - - } -} diff --git a/src/casekit/NMRShiftDBSDFParser.java b/src/casekit/NMRShiftDBSDFParser.java deleted file mode 100644 index fedf5ff..0000000 --- a/src/casekit/NMRShiftDBSDFParser.java +++ /dev/null @@ -1,247 +0,0 @@ -/* -* This Open Source Software is provided to you under the MIT License - * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck - */ -package casekit; - -import org.apache.commons.cli.*; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.aromaticity.Aromaticity; -import org.openscience.cdk.depict.DepictionGenerator; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.HOSECodeGenerator; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; - -import java.io.*; -import java.util.StringTokenizer; - -/** - * Helper class to parse an NMRShiftDB SDF file with spectra assignments - * and convert it to a tab-separated values file with HOSE codes - * (Bremser, W., HOSE - A Novel Substructure Code, Analytica Chimica Acta, 1978, 103:355-365) - * and associated shift values. - * The TSV file can then be used by HOSECodePredictor to predict spectra - * - * @author Christoph Steinbeck - */ - -@Deprecated -public class NMRShiftDBSDFParser { - BufferedWriter bw; - IMolecularFormula formula = null; - String comment = null; - String carbonNMR = null; - String hydrogenNMR = null; - String moleculeTitle = null; - int carbonNMRCount = 0; - int hydrogenNMRCount = 0; - int moleculeCount = 0; - String report = ""; - String temp = ""; - boolean generatePictures = false; - String picdir = null; - int hoseCodeCounter = 0; - int carbonCounter = 0; - String inFile = null; - String outFile = null; - boolean verbose = false; - int maxSpheres; - - public NMRShiftDBSDFParser(String[] args) throws Exception - { - parseArgs(args); - if (verbose) System.out.println("Starting HOSE code generation with " + maxSpheres + " sphere from " + inFile); - File fout = new File(outFile); - FileOutputStream fos = new FileOutputStream(fout); - bw = new BufferedWriter(new OutputStreamWriter(fos)); - - IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); - - IteratingSDFReader iterator = new IteratingSDFReader( - new FileReader(inFile), - SilentChemObjectBuilder.getInstance() - ); - - while (iterator.hasNext()) - { - ac = iterator.next(); - carbonNMR = ac.getProperty("Spectrum 13C 0"); - hydrogenNMR = ac.getProperty("Spectrum 1H 0"); - if (carbonNMR != null) - { - carbonNMRCount++; - ac = assignCarbonNMR(ac, carbonNMR); - generateHOSECodes(ac, maxSpheres); - } - if (hydrogenNMR != null) hydrogenNMRCount++; - moleculeCount ++; - if (generatePictures) generatePicture(ac, picdir); - } - iterator.close(); - report = "File contains " + moleculeCount + " molecules with " + carbonNMRCount + " carbon spectra and " + hydrogenNMRCount + " hydrogen spectra.\n"; - report += hoseCodeCounter + " HOSE codes generated for " + carbonCounter + "carbon atoms, and written to file."; - if (verbose) System.out.println(report); - bw.close(); - } - - IAtomContainer assignCarbonNMR(IAtomContainer ac, String nmrString) throws IOException, CDKException - { - String sigString = null, shiftString = null, multString = null, atomNumString = null; - StringTokenizer strTok1 = new StringTokenizer(nmrString, "|"); - StringTokenizer strTok2 = null; - while (strTok1.hasMoreTokens()) - { - sigString = strTok1.nextToken(); //System.out.println(sigString); - strTok2 = new StringTokenizer(sigString, ";"); - while (strTok2.hasMoreTokens()) - { - shiftString = strTok2.nextToken(); //System.out.println(shiftString); - multString = strTok2.nextToken(); //System.out.println(multString); - atomNumString = strTok2.nextToken(); //System.out.println(atomNumString); - try - { - ac.getAtom(Integer.parseInt(atomNumString)).setProperty(CDKConstants.NMRSHIFT_CARBON, Double.parseDouble(shiftString)); - ac.getAtom(Integer.parseInt(atomNumString)).setProperty(CDKConstants.COMMENT, Double.parseDouble(shiftString)); - }catch(Exception exc) - { - System.out.println("Failed to assign shift to atom no. " + Integer.parseInt(atomNumString) + " in molecule no " + moleculeCount + ", title: " + ac.getProperty(CDKConstants.TITLE) + " with " + ac.getAtomCount() + " atoms. "); - } - } - } - return ac; - } - - public void generateHOSECodes(IAtomContainer ac, int maxSpheres) throws Exception - { - String hose = null; - HOSECodeGenerator hcg = new HOSECodeGenerator(); - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); - Aromaticity.cdkLegacy().apply(ac); - for (int f = 0; f < ac.getAtomCount(); f++) - { - if (ac.getAtom(f).getAtomicNumber() == 6) - { - carbonCounter ++; - for (int g = 0; g < maxSpheres; g++) - { - hose = hcg.getHOSECode(ac, ac.getAtom(f), g + 1); - if (hose != null && ac.getAtom(f).getProperty(CDKConstants.NMRSHIFT_CARBON) != null) - { - bw.write(hose + "\t" + ac.getAtom(f).getProperty(CDKConstants.NMRSHIFT_CARBON)); - bw.newLine(); - hoseCodeCounter++; - } - } - } - } - } - - public void generatePicture(IAtomContainer ac, String picdir) throws IOException, CDKException - { - try - { - temp = ac.getProperty(CDKConstants.TITLE); - if (temp != null && temp.length() > 15) temp = temp.substring(0, 14) + "..."; - ac.setProperty(CDKConstants.TITLE, temp); - } - catch(Exception e) - { - System.out.println("Problem with title " + temp); - } - if (!picdir.endsWith(File.separator)) picdir += File.separator; - moleculeTitle = picdir + String.format("%03d", moleculeCount) + "-mol.png"; - if (verbose) System.out.println(moleculeTitle); - DepictionGenerator dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); - dg.depict(ac).writeTo(moleculeTitle); - } - - private void parseArgs(String[] args) throws ParseException - { - Options options = setupOptions(args); - CommandLineParser parser = new DefaultParser(); - try { - CommandLine cmd = parser.parse( options, args); - this.inFile = cmd.getOptionValue("infile"); - this.outFile = cmd.getOptionValue("outfile"); - if (cmd.hasOption("maxspheres")) - { - this.maxSpheres = Integer.parseInt(cmd.getOptionValue("maxspheres")); - } - if (cmd.hasOption("verbose")) this.verbose = true; - - if (cmd.hasOption("picdir")) - { - this.generatePictures = true; - this.picdir = cmd.getOptionValue("picdir"); - - } - } catch (ParseException e) { - // TODO Auto-generated catch block - HelpFormatter formatter = new HelpFormatter(); - - formatter.setOptionComparator(null); - String header = "Generates a table of HOSE codes and assigned shifts from an NMRShiftDB SDF file from http://nmrshiftdb.nmr.uni-koeln.de/portal/js_pane/P-Help.\n\n"; - String footer = "\nPlease report issues at https://github.com/steinbeck/spectra"; - formatter.printHelp( "java -jar casekit.jar casekit.NMRShiftDBSDFParser", header, options, footer, true ); - throw new ParseException("Problem parsing command line"); - } - } - - private Options setupOptions(String[] args) - { - Options options = new Options(); - Option infile = Option.builder("i") - .required(true) - .hasArg() - .longOpt("infile") - .desc("filename of NMRShiftDB SDF with spectra (required)") - .build(); - options.addOption(infile); - Option outfile = Option.builder("o") - .required(true) - .hasArg() - .longOpt("outfile") - .desc("filename of generated HOSE code table (required)") - .build(); - options.addOption(outfile); - Option maxspheres = Option.builder("m") - .required(true) - .hasArg() - .longOpt("maxspheres") - .desc("maximum sphere size up to which to generate HOSE codes (required)") - .build(); - options.addOption(maxspheres); - Option verbose = Option.builder("v") - .required(false) - .longOpt("verbose") - .desc("generate messages about progress of operation") - .build(); - options.addOption(verbose); - Option picdir = Option.builder("d") - .required(false) - .hasArg() - .longOpt("picdir") - .desc("store pictures in given directory") - .build(); - options.addOption(picdir); - - return options; - } - - public static void main(String[] args) - { - try { - new NMRShiftDBSDFParser(args); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - -} diff --git a/src/casekit/Result.java b/src/casekit/Result.java deleted file mode 100644 index 43783ee..0000000 --- a/src/casekit/Result.java +++ /dev/null @@ -1,38 +0,0 @@ -/* -* This Open Source Software is provided to you under the MIT License - * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck - */ - -package casekit; - -import org.openscience.cdk.interfaces.IAtomContainer; - -@Deprecated -public class Result { - public IAtomContainer ac; - public double score; - - public Result(IAtomContainer ac, double score) { - super(); - this.ac = ac; - this.score = score; - } - - public IAtomContainer getAc() { - return ac; - } - public void setAc(IAtomContainer ac) { - this.ac = ac; - } - public double getScore() { - return score; - } - - public void setScore(double score) { - this.score = score; - } - - -} \ No newline at end of file diff --git a/src/casekit/SimilarityRanker.java b/src/casekit/SimilarityRanker.java deleted file mode 100644 index 6a67ec1..0000000 --- a/src/casekit/SimilarityRanker.java +++ /dev/null @@ -1,265 +0,0 @@ - -/* - * This Open Source Software is provided to you under the MIT License - * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - * - * Copyright (c) 2017, Christoph Steinbeck - */ -package casekit; - -import casekit.nmr.model.Signal; -import casekit.nmr.model.Spectrum; -import org.apache.commons.cli.*; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.depict.DepictionGenerator; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.StringTokenizer; - -/** - * SimilarityRanker uses a SpectrumPredictor and parses an SDF file, returning a configurable number of compounds and - * their ranked spectrum similarity. - *

- * This Open Source Software is provided to you under the MIT License - * Refer to doc/mit.license or https://opensource.org/licenses/MIT for more information - *

- * Copyright (c) 2017, Christoph Steinbeck - * - * @author steinbeck - */ - -@Deprecated -public class SimilarityRanker { - - public boolean verbose = true; - DecimalFormat df; - public int resultListSize = 100; - public String inFile = null; - public String outPath = null; - public String spectrumFile = null; - public String hoseTSVFile = null; - Spectrum spectrum = null; - ArrayList results = null; - - public boolean isVerbose() { - return verbose; - } - - public void setVerbose(boolean verbose) { - this.verbose = verbose; - } - - public int getResultListSize() { - return resultListSize; - } - - public void setResultListSize(int resultListSize) { - this.resultListSize = resultListSize; - } - - public SimilarityRanker() { - // TODO Auto-generated constructor stub - df = new DecimalFormat(); - df.setMaximumFractionDigits(2); - } - - public void readSpectrum() throws NumberFormatException, IOException { - String line; - StringTokenizer strtok; - int linecounter = 0; - Double shift = null; - Integer mult = null; - Signal signal; - String tempString; - Spectrum spectrum = new Spectrum(null); - BufferedReader br = new BufferedReader(new FileReader(spectrumFile)); - if (verbose) - System.out.println("Start reading spectrum from " + spectrumFile); - // while((line = br.readLine()) != null) - // { - // if (!line.startsWith("#") && line.trim().length() > 0) - // { - // strtok = new StringTokenizer(line, ";"); - // if (verbose) System.out.println(line); - // linecounter++; - // - // shift = Double.parseDouble(strtok.nextToken().trim()); - // mult = Integer.parseInt(strtok.nextToken().trim()); - // signal = new Signal(); - // spectrum.addSignal(signal); - // } - // } - br.close(); - if (verbose) - System.out.println("Read " + linecounter + " signals from spectrum in file " + spectrumFile); - - this.spectrum = spectrum; - } - - - public ArrayList rank() throws Exception { - /* - * Iterate of SDF file given by input file, predict a spectrum and calculate a similarity with the - * spectrum given in @spectrum. - * Store the 10 most similar spectra in a list and write them to outFile in the end - */ - - HOSECodePredictor predictor = new HOSECodePredictor(hoseTSVFile); - IAtomContainer ac = null; - double similarity = 0.0; - double bestSimilarity = 1000000000.0; - results = new ArrayList(); - ResultComparator comp = new ResultComparator(); - IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(inFile), SilentChemObjectBuilder.getInstance()); - - while (iterator.hasNext()) { - ac = iterator.next(); - predictor.predict(ac); - similarity = calculateSimilarity(ac, spectrum); - if (results.size() > 0) { - if (similarity < results.get(results.size() - 1).getScore()) { - bestSimilarity = similarity; - ac.setProperty(CDKConstants.TITLE, "Distance " + df.format(similarity)); - results.add(new Result(ac, similarity)); - results.sort(comp); - //After sorting, we remove the worst entry and thereby trim the results list to resultListSize - if (results.size() == resultListSize) - results.remove(resultListSize - 1); - } - } else - results.add(new Result(ac, similarity)); - } - iterator.close(); - if (verbose) - System.out.println("Calculation finished. Best similarity = " + bestSimilarity); - return results; - } - - public double calculateSimilarity(IAtomContainer ac, Spectrum spectrum) { - double similarity = 0.0; - double lastDiff = 0.0; - int counter = 0; - String shift = null; - boolean matchFound = false; - double diff = 0.0; - double[] shifts = new double[spectrum.getSignalCount()]; - for (IAtom atom : ac.atoms()) { - if (atom.getAtomicNumber() == 6) { - shift = atom.getProperty(CDKConstants.NMRSHIFT_CARBON); - if (shift != null) - shifts[counter] = Double.parseDouble(shift); - else - shifts[counter] = -1.0; - counter++; - } - } - for (int f = 0; f < spectrum.getSignalCount(); f++) { - lastDiff = 10000000000.0; - matchFound = false; - for (int g = 0; g < spectrum.getSignalCount(); g++) { - // if (shifts[f] > spectrum.get(g).getShift().doubleValue()) diff = shifts[f] - spectrum.get(g).getShift().doubleValue(); - // else diff = spectrum.get(g).getShift().doubleValue() - shifts[f]; - df.format(diff); - if (diff < lastDiff) { - lastDiff = diff; - matchFound = true; - } - } - if (matchFound) - similarity += lastDiff; - } - return similarity / spectrum.getSignalCount(); - } - - public void reportResults() throws Exception { - String filename = null; - DepictionGenerator dg = null; - if (!outPath.endsWith(File.separator)) - outPath += File.separator; - for (int f = 0; f < results.size(); f++) { - filename = outPath + String.format("%03d", f) + "-mol.png"; - dg = new DepictionGenerator().withSize(800, 800).withAtomColors().withAtomValues().withMolTitle().withFillToFit(); - dg.depict(results.get(f).getAc()).writeTo(filename); - } - } - - - class ResultComparator implements Comparator { - public int compare(Result o1, Result o2) { - - if (o1.getScore() < o2.getScore()) - return -1; - return 1; - } - } - - private void parseArgs(String[] args) throws ParseException { - Options options = setupOptions(args); - CommandLineParser parser = new DefaultParser(); - try { - CommandLine cmd = parser.parse(options, args); - this.inFile = cmd.getOptionValue("infile"); - this.hoseTSVFile = cmd.getOptionValue("hosecodes"); - this.outPath = cmd.getOptionValue("outpath"); - this.spectrumFile = cmd.getOptionValue("spectrum"); - if (cmd.hasOption("numbers")) - this.resultListSize = Integer.parseInt(cmd.getOptionValue("numbers")); - if (cmd.hasOption("verbose")) - this.verbose = true; - } catch (ParseException e) { - // TODO Auto-generated catch block - HelpFormatter formatter = new HelpFormatter(); - formatter.setOptionComparator(null); - String header = "Ranke structures based on given experimental spectrum and similarity to predicted spectrum.\n\n"; - String footer = "\nPlease report issues at https://github.com/steinbeck/spectra"; - formatter.printHelp("java -jar casekit.jar casekit.SimilarityRanker", header, options, footer, true); - throw e; - } - } - - private Options setupOptions(String[] args) { - Options options = new Options(); - - Option infile = Option.builder("i").required(true).hasArg().longOpt("infile").desc("filename of with SDF/MOL file of structures to be ranked (required)").build(); - options.addOption(infile); - Option spectrumfile = Option.builder("p").required(true).hasArg().longOpt("spectrum").desc("filename of CSV file with spectrum. Format of each line: ; (required)").build(); - options.addOption(spectrumfile); - Option outpath = Option.builder("o").required(true).hasArg().longOpt("outpath").desc("path to store pictures of ranked output structures (required)").build(); - options.addOption(outpath); - Option hosefile = Option.builder("s").required(true).hasArg().longOpt("hosecodes").desc("filename of TSV file with HOSE codes (required)").build(); - options.addOption(hosefile); - Option outputnumber = Option.builder("n").hasArg().longOpt("number").desc("number of structures in output file. Default is 10, if this option is ommitted").build(); - options.addOption(outputnumber); - - Option verbose = Option.builder("v").required(false).longOpt("verbose").desc("generate messages about progress of operation").build(); - options.addOption(verbose); - - return options; - } - - - public static void main(String[] args) { - SimilarityRanker sr = new SimilarityRanker(); - try { - sr.parseArgs(args); - sr.readSpectrum(); - sr.rank(); - sr.reportResults(); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - -} diff --git a/src/casekit/io/FileParser.java b/src/casekit/io/FileOperations.java similarity index 57% rename from src/casekit/io/FileParser.java rename to src/casekit/io/FileOperations.java index 2cf669e..d41eaf7 100644 --- a/src/casekit/io/FileParser.java +++ b/src/casekit/io/FileOperations.java @@ -12,27 +12,32 @@ package casekit.io; -import org.w3c.dom.Document; -import org.xml.sax.SAXException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; import java.io.*; -public class FileParser { +public class FileOperations { + + public static BufferedReader readFile(final String pathToFile) { + try { + return new BufferedReader(new FileReader(pathToFile)); + } catch (IOException e) { + e.printStackTrace(); + } - public static BufferedReader parseText(final String pathToTextFile) throws FileNotFoundException { - return new BufferedReader(new FileReader(pathToTextFile)); + return null; } - public static Document parseXML(final String pathToXML) throws IOException, SAXException, ParserConfigurationException { - final DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); - final DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); - final Document doc = docBuilder.parse(new File(pathToXML)); - // normalize text representation - doc.getDocumentElement().normalize(); + public static boolean writeFile(final String pathToFile, final String content) { + try { + final FileWriter fileWriter = new FileWriter(pathToFile); + final BufferedWriter bufferedWriter = new BufferedWriter(fileWriter); + bufferedWriter.write(content); + bufferedWriter.close(); + + return true; + } catch (IOException e) { + e.printStackTrace(); + } - return doc; + return false; } } diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index 5376aa3..1c10404 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -13,111 +13,32 @@ import casekit.nmr.model.Spectrum; -import org.apache.commons.lang3.StringUtils; -import org.openscience.cdk.CDKConstants; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; import org.openscience.cdk.aromaticity.Kekulization; import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; -import org.openscience.cdk.depict.DepictionGenerator; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.graph.CycleFinder; import org.openscience.cdk.graph.Cycles; import org.openscience.cdk.interfaces.*; -import org.openscience.cdk.io.SDFWriter; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; -import java.io.*; import java.util.*; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * @author Michael Wenk [https://github.com/michaelwenk] + * @deprecated */ public class Utils { /** - * Splits an SDF into single molecular files and converts each of them into the LSD substructure format. - * Therefore, the mol2ab executable provided by LSD is required. - * - * @param pathSDF path to SDF to split - * @param pathOut path to directory which should contain the splitted and converted structure files - * @param pathMol2ab path to mol2ab executable provided by LSD - * - * @throws FileNotFoundException - * @throws CDKException - * @throws IOException - */ - public static void SDFtoLSD(final String pathSDF, final String pathOut, final String pathMol2ab) throws CDKException, IOException { - - - System.out.println("Conversion from SDF format to LSD format... "); - - - IAtomContainer ac; - - IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathSDF), SilentChemObjectBuilder.getInstance()); - - - File fout; - FileOutputStream fos; - BufferedWriter bw; - File foutPilot = new File(pathOut + "/pilot"); - FileOutputStream fosPilot = new FileOutputStream(foutPilot); - BufferedWriter bwPilot = new BufferedWriter(new OutputStreamWriter(fosPilot)); - - - int i = 0; - while (iterator.hasNext()) { - i++; - ac = iterator.next(); - String molID = String.valueOf(i);//(String) ac.getProperties().get("cdk:Remark"); - // molID = molID.replace(" ", "_"); - fout = new File(pathOut + "/" + molID + ".sdf"); - fos = new FileOutputStream(fout); - bw = new BufferedWriter(new OutputStreamWriter(fos)); - - SDFWriter wtr = new SDFWriter(bw); - // Properties sdfWriterProps = new Properties(); - // sdfWriterProps.put("WriteAromaticBondTypes", "true"); - // wtr.addChemObjectIOListener(new PropertiesListener(sdfWriterProps)); - // wtr.customizeJob(); - - wtr.write(ac); - wtr.close(); - bw.close(); - - bwPilot.write(molID + " " + fout.getPath()); - bwPilot.newLine(); - - } - - iterator.close(); - bwPilot.close(); - System.out.println("Input file contained " + i + " molecules!\nSingle files created!"); - - - // should be replaced by e.g. the process command because: - // 1. for very long files the program ends long before the conversion process (command) ends - // 2. no control or output here - Runtime.getRuntime().exec(pathMol2ab + "/mol2ab " + pathOut + " " + foutPilot.getPath()); - - - System.out.println("Conversion from SDF format to LSD format... DONE!"); - - } - - /** - * Returns a hashmap constisting of lists of atom indices in an atom container. + * Returns a hashmap consisting of lists of atom indices in an atom container. * This is done for all atom types (e.g. C or Br) in given atom container. * * @param ac IAtomContainer to look in @@ -126,10 +47,10 @@ public static void SDFtoLSD(final String pathSDF, final String pathOut, final St * * @see #getAtomTypeIndicesByElement(org.openscience.cdk.interfaces.IAtomContainer, java.lang.String) */ - public static HashMap> getAtomTypeIndices(final IAtomContainer ac) { + public static Map> getAtomTypeIndices(final IAtomContainer ac) { - final HashMap> atomTypeIndices = new HashMap<>(); - final HashSet atomTypes = new HashSet<>(); + final Map> atomTypeIndices = new HashMap<>(); + final Set atomTypes = new HashSet<>(); for (final IAtom heavyAtom : AtomContainerManipulator.getHeavyAtoms(ac)) { atomTypes.add(heavyAtom.getSymbol()); } @@ -150,7 +71,7 @@ public static HashMap> getAtomTypeIndices(final IAtom * * @return */ - public static ArrayList getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType) { + public static List getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType) { final ArrayList indices = new ArrayList<>(); for (int i = 0; i < ac.getAtomCount(); i++) { @@ -163,52 +84,20 @@ public static ArrayList getAtomTypeIndicesByElement(final IAtomContaine } - public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int dim) { - if (spectrum.containsDim(dim)) { - return Utils.getAtomTypeFromNucleus(spectrum.getNuclei()[dim]); - } - - return null; - } - - public static String getAtomTypeFromNucleus(final String nucleus) { - final String[] nucleusSplit = nucleus.split("\\d"); - return nucleusSplit[nucleusSplit.length - 1]; - } - public static IMolecularFormula getMolecularFormulaFromAtomContainer(final IAtomContainer ac) { return MolecularFormulaManipulator.getMolecularFormula(ac); } - public static IMolecularFormula getMolecularFormulaFromString(final String mf) { - return MolecularFormulaManipulator.getMolecularFormula(mf, SilentChemObjectBuilder.getInstance()); - } - + public static String molecularFormularToString(final IMolecularFormula molecularFormula) { return MolecularFormulaManipulator.getString(molecularFormula); } - public static Map getMolecularFormulaElementCounts(final String mf) { - final LinkedHashMap counts = new LinkedHashMap<>(); - final IMolecularFormula iMolecularFormula = Utils.getMolecularFormulaFromString(mf); - final List elements = new ArrayList<>(); - final Matcher matcher = Pattern.compile("([A-Z][a-z]*)").matcher(mf); - - while (matcher.find()) { - elements.add(matcher.group(1)); - } - for (final String element : elements) { - counts.put(element, MolecularFormulaManipulator.getElementCount(iMolecularFormula, element)); - } - - return counts; - } - public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws CDKException { if (!spectrum.containsDim(dim)) { throw new CDKException(Thread.currentThread().getStackTrace()[2].getClassName() + "." + Thread.currentThread().getStackTrace()[2].getMethodName() + ": invalid dimension in spectrum given"); } - final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, dim); + final String atomType = casekit.nmr.utils.Utils.getAtomTypeFromSpectrum(spectrum, dim); int atomsInMolFormula = 0; if (molFormula != null) { atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molFormula, atomType); @@ -216,80 +105,15 @@ public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectr return atomsInMolFormula - spectrum.getSignalCountWithEquivalences(); } - public static void editSignalsInSpectrum(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws Exception { - BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); - int n; - final ArrayList validIndices = new ArrayList<>(); - int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula, dim); - // walk through all signals in spectrum add missing or to remove signals - while (diff != 0) { - // display all selectable signal indices in spectrum - if (diff > 0) { - System.out.println("\n" + diff + " " + spectrum.getNuclei()[0] + " signals are missing!\nWhich signal is not unique?"); - } else { - System.out.println("\n" + (-1 * diff) + " " + spectrum.getNuclei()[0] + " signals are to be removed!\nWhich signal is to remove?"); - } - for (int s = 0; s < spectrum.getSignalCount(); s++) { - System.out.print("index: " + s); - for (int d = 0; d < spectrum.getNDim(); d++) { - System.out.print(", shift dim " + (d + 1) + ": " + spectrum.getShift(s, d)); - } - System.out.println(); - validIndices.add(s); - } - // get selected index by user input - n = -1; - while (!validIndices.contains(n)) { - System.out.println("Enter the index: "); - n = Integer.parseInt(br.readLine()); - } - // add/remove signals in spectrum - if (diff > 0) { - spectrum.addSignal(spectrum.getSignal(validIndices.indexOf(n)).buildClone()); - } else { - spectrum.removeSignal(validIndices.indexOf(n)); - } - diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, molFormula, dim); - } - } - /** * Specified for carbons only -> not generic!!! * - * @param mult + * @param protonsCount * * @return */ - public static Integer getHydrogenCountFromMultiplicity(final String mult) { - - if (mult == null) { - System.out.println("null!!!"); - return null; - } - switch (mult) { - case "Q": - return 3; - case "T": - return 2; - case "D": - return 1; - case "S": - return 0; - default: - System.out.println("unknown symbol!!"); - return null; - } - } - - /** - * Specified for carbons only -> not generic!!! - * - * @param hCount - * - * @return - */ - public static String getMultiplicityFromHydrogenCount(final int hCount) { - switch (hCount) { + public static String getMultiplicityFromProtonsCount(final int protonsCount) { + switch (protonsCount) { case 0: return "S"; case 1: @@ -303,36 +127,6 @@ public static String getMultiplicityFromHydrogenCount(final int hCount) { } } - - /** - * Returns the casekit.nmr shift constant value for a given element. As far as - * it is defined, the value from CDKConstants.NMRSHIFT_* (e.g. - * {@link org.openscience.cdk.CDKConstants#NMRSHIFT_CARBON}) will be used. - * Otherwise the same format is used for other atom types. - * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. - * - * @param element element's symbol (e.g. "C") - * - * @return - */ - public static String getNMRShiftConstant(final String element) { - switch (element) { - case "C": - return CDKConstants.NMRSHIFT_CARBON; - case "H": - return CDKConstants.NMRSHIFT_HYDROGEN; - case "N": - return CDKConstants.NMRSHIFT_NITROGEN; - case "P": - return CDKConstants.NMRSHIFT_PHOSPORUS; - case "F": - return CDKConstants.NMRSHIFT_FLUORINE; - // case "S": return CDKConstants.NMRSHIFT_SULFUR; - default: - return null; - } - } - /** * Returns the casekit.nmr isotope identifier for a given element, e.g. C -> 13C. * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. @@ -379,180 +173,6 @@ public static Set getAtomTypesInAtomContainer(final IAtomContainer ac) { } - public static boolean checkMinMaxValue(final double min, final double max, final double value) { - - return (value >= min && value <= max); - } - - /** - * @param ac - * @param indexAC - * @param bondsSet - * @param neighborElems - * - * @return - * - * @deprecated - */ - public static int[] getNeighborhoodBondsCount(final IAtomContainer ac, final int indexAC, final String[] bondsSet, final List neighborElems) { - final int[] counts = new int[neighborElems.size() * bondsSet.length]; - String foundBonds; - // for all given neighbor element types - for (int n = 0; n < neighborElems.size(); n++) { - foundBonds = ""; - // for all next neighbors of a specific element - for (IAtom neighborAtom : ac.getConnectedAtomsList(ac.getAtom(indexAC))) { - // skip if not the right neighborhood element or bond type is unknown/unset - if ((!neighborAtom.getSymbol().equals(neighborElems.get(n))) || (casekit.nmr.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()) == null)) { - continue; - } - foundBonds += casekit.nmr.Utils.getStringFromBondOrder(ac.getBond(ac.getAtom(indexAC), neighborAtom).getOrder()); - } - for (int k = 0; k < bondsSet.length; k++) { - counts[n * bondsSet.length + k] = 0; - if (casekit.nmr.Utils.sortString(foundBonds).equals(casekit.nmr.Utils.sortString(bondsSet[k]))) { - counts[n * bondsSet.length + k] = 1; - break; - } - } - } - - return counts; - } - - /** - * @param pathToOutput - * @param m - * @param bondsSet - * @param elem - * @param neighborElems - * @param min - * @param max - * @param stepSize - * - * @throws IOException - * @deprecated - */ - public static void writeNeighborhoodBondsCountMatrix(final String pathToOutput, final int[][] m, final String[] bondsSet, final String elem, final ArrayList neighborElems, final int min, final int max, final int stepSize) throws IOException { - - if (stepSize < 1) { - System.err.println("stepSize < 1 not allowed!!!"); - return; - } - final StringBuilder sb = new StringBuilder(); - sb.append("shift [" + elem + "] (" + stepSize + "),nTotal,inRing,isArom,q" + elem + "," + elem + "H," + elem + "H2," + elem + "H3,"); - for (int i = 0; i < neighborElems.size(); i++) { - for (int j = 0; j < bondsSet.length; j++) { - sb.append(bondsSet[j] + "[" + neighborElems.get(i) + "]"); - if (j < bondsSet.length - 1) { - sb.append(","); - } - } - if (i < neighborElems.size() - 1) { - sb.append(","); - } - } - sb.append("\n"); - for (int i = 0; i < stepSize * (max - min) + 1; i++) { - sb.append((i + min) + ","); - for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { - sb.append(m[i][j]); - if (j < 3 + 4 + neighborElems.size() * bondsSet.length - 1) { - sb.append(","); - } - } - sb.append("\n"); - } - - final FileWriter writer = new FileWriter(pathToOutput); - writer.append(sb.toString()); - writer.flush(); - writer.close(); - } - - /** - * @param s - * - * @return - * - * @deprecated - */ - public static String sortString(final String s) { - final char[] c = s.toCharArray(); - Arrays.sort(c); - return new String(c); - } - - /** - * @param valences - * - * @return - * - * @deprecated - */ - public static ArrayList> getBondOrderSets(final String[] valences) { - - final ArrayList> bondOrderSets = new ArrayList<>(); - for (int i = 0; i < valences.length; i++) { - bondOrderSets.add(new ArrayList<>()); - for (int k = 0; k < StringUtils.countMatches(valences[i], "-"); k++) { - bondOrderSets.get(i).add(IBond.Order.SINGLE); - } - for (int k = 0; k < StringUtils.countMatches(valences[i], "="); k++) { - bondOrderSets.get(i).add(IBond.Order.DOUBLE); - } - for (int k = 0; k < StringUtils.countMatches(valences[i], "%"); k++) { - bondOrderSets.get(i).add(IBond.Order.TRIPLE); - } - } - - return bondOrderSets; - } - - /** - * @param order - * - * @return - * - * @deprecated - */ - public static String getStringFromBondOrder(final IBond.Order order) { - switch (order) { - case SINGLE: - return "-"; - case DOUBLE: - return "="; - case TRIPLE: - return "%"; - default: - return null; - } - } - - - public static void writeTextFile(final String pathToOutputFile, final String content) throws IOException { - FileWriter fr = new FileWriter(new File(pathToOutputFile)); - BufferedWriter br = new BufferedWriter(fr); - br.write(content); - br.close(); - } - - /** - * Simple function without any settings to generate a picture from a structure - * given as IAtomcontainer. - * - * @param ac Atom container - * @param path Path to file for storing - * - * @throws IOException - * @throws CDKException - */ - public static void generatePicture(final IAtomContainer ac, final String path) throws IOException, CDKException { - final DepictionGenerator dg = new DepictionGenerator().withSize(1200, 1200).withAtomColors().withFillToFit().withAtomNumbers(); - dg.depict(ac).writeTo(path); - } - - /** * Detects outliers in given array list of input values and removes them.
* Here, outliers are those which are outside of a calculated lower and upper bound (whisker). @@ -564,7 +184,7 @@ public static void generatePicture(final IAtomContainer ac, final String path) t * * @return new array list without values outside the generated boundaries */ - public static ArrayList removeOutliers(final ArrayList input, final double multiplierIQR) { + public static List removeOutliers(final List input, final double multiplierIQR) { final ArrayList inputWithoutOutliers = new ArrayList<>(input); inputWithoutOutliers.removeAll(Utils.getOutliers(inputWithoutOutliers, multiplierIQR)); @@ -576,7 +196,7 @@ public static ArrayList removeOutliers(final ArrayList input, fi * * @return */ - public static ArrayList getOutliers(final ArrayList input, final double multiplierIQR) { + public static List getOutliers(final List input, final double multiplierIQR) { final ArrayList outliers = new ArrayList<>(); if (input.size() <= 1) { return outliers; @@ -610,7 +230,7 @@ public static ArrayList getOutliers(final ArrayList input, final * * @return */ - public static Double getMedian(final ArrayList data) { + public static Double getMedian(final List data) { if ((data == null) || data.isEmpty()) { return null; } @@ -652,7 +272,7 @@ public static Double getMean(final Collection data) { * * @return */ - public static Double getStandardDeviation(final ArrayList data) { + public static Double getStandardDeviation(final List data) { if ((data == null) || data.isEmpty()) { return null; } @@ -702,7 +322,7 @@ public static Double getMean(final Double[] data) { return ((data.length - nullCounter) != 0) ? (sum / (data.length - nullCounter)) : null; } - public static HashMap getMean(final HashMap> lookup) { + public static Map getMean(final Map> lookup) { final HashMap means = new HashMap<>(); Double meanInList; @@ -734,56 +354,6 @@ public static boolean isValidBondAddition(final IAtomContainer ac, final int ato } - /** - * Tests whether two array lists of integers are equal which also means - * bidirectional values to each other. - * - * @param shiftMatches1 - * @param shiftMatches2 - * - * @return - */ - public static boolean isBidirectional(final ArrayList shiftMatches1, final ArrayList shiftMatches2) { - final ArrayList temp1 = new ArrayList<>(shiftMatches1); - final ArrayList temp2 = new ArrayList<>(shiftMatches2); - Collections.sort(temp1); - Collections.sort(temp2); - - return temp1.equals(temp2); - } - - /** - * @param ac - * @param shiftMatches1 - * @param shiftMatches2 - * @param prop - * - * @deprecated - */ - public static void setBidirectionalLinks(final IAtomContainer ac, final ArrayList shiftMatches1, final ArrayList shiftMatches2, final String prop) { - - ArrayList propList1, propList2; - for (int i = 0; i < shiftMatches1.size(); i++) { - if (shiftMatches1.get(i) >= 0 && shiftMatches2.get(i) >= 0) { - if (ac.getAtom(shiftMatches1.get(i)).getProperty(prop) == null) { - ac.getAtom(shiftMatches1.get(i)).setProperty(prop, new ArrayList<>()); - } - if (ac.getAtom(shiftMatches2.get(i)).getProperty(prop) == null) { - ac.getAtom(shiftMatches2.get(i)).setProperty(prop, new ArrayList<>()); - } - propList1 = ac.getAtom(shiftMatches1.get(i)).getProperty(prop); - propList2 = ac.getAtom(shiftMatches2.get(i)).getProperty(prop); - if (!propList1.contains(shiftMatches2.get(i))) { - propList1.add(shiftMatches2.get(i)); - } - if (!propList2.contains(shiftMatches1.get(i))) { - propList2.add(shiftMatches1.get(i)); - } - } - } - } - - /** * @param pathToFile * @@ -831,7 +401,7 @@ public static Double getRMS(final ArrayList data) { * * @return */ - public static HashMap getRMS(final HashMap> lookup) { + public static Map getRMS(final Map> lookup) { final HashMap rms = new HashMap<>(); Double rmsInList; for (final String key : lookup.keySet()) { @@ -862,41 +432,14 @@ public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKExcep adder.addImplicitHydrogens(ac); } - // public static int countElements(final String input){ - // int counter = 0; - // for (int k = 0; k < input.length(); k++) { - // // Check for uppercase letters - // if (Character.isLetter(input.charAt(k)) && Character.isUpperCase(input.charAt(k))) { - // counter++; - // } - // } - // - // return counter; - // } - - // public static ArrayList getComponents(final String symbols){ - // final ArrayList components = new ArrayList<>(); - // for (int i = 0; i < symbols.length(); i++) { - // if ((i + 1 < symbols.length()) - // && Character.isLowerCase(symbols.charAt(i + 1))) { - // components.add(symbols.substring(i, i + 2)); - // i++; - // } else { - // components.add(symbols.substring(i, i + 1)); - // } - // } - // - // return components; - // } - /** * @param lookup * * @return */ - public static HashMap getMedian(final HashMap> lookup) { + public static Map getMedian(final Map> lookup) { - final HashMap medians = new HashMap<>(); + final Map medians = new HashMap<>(); Double medianInList; for (final String key : lookup.keySet()) { medianInList = Utils.getMedian(lookup.get(key)); @@ -914,7 +457,7 @@ public static HashMap getMedian(final HashMap> hoseLookupToExtend, final HashMap> hoseLookup) { + public static void combineHashMaps(final Map> hoseLookupToExtend, final Map> hoseLookup) { for (final String hose : hoseLookup.keySet()) { if (!hoseLookupToExtend.containsKey(hose)) { hoseLookupToExtend.put(hose, new ArrayList<>()); @@ -950,15 +493,15 @@ public static boolean containsExplicitHydrogens(final IAtomContainer ac) { * before the removals will be returned which one can use for atom index * comparison (before and after the removals). * - * @param ac the structure to convert + * @param ac the structure to lsd * * @return * * @see #containsExplicitHydrogens(org.openscience.cdk.interfaces.IAtomContainer) */ - public static HashMap convertExplicitToImplicitHydrogens(final IAtomContainer ac) { + public static Map convertExplicitToImplicitHydrogens(final IAtomContainer ac) { // create a list of atom indices which one can use for index comparison (before vs. after) after removing the explicit hydrogens - final HashMap atomIndices = new HashMap<>(); + final Map atomIndices = new HashMap<>(); final List toRemoveList = new ArrayList<>(); IAtom atomB; for (final IAtom atomA : ac.atoms()) { @@ -1049,33 +592,17 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a return ac; } - /** - * @param array - * - * @return - */ - public static ArrayList ArrayToArrayList(final int[] array) { - - final ArrayList list = new ArrayList<>(); - for (int i = 0; i < array.length; i++) { - list.add(array[i]); - } - - return list; - } - - - public static String getSpectrumNucleiAsString(final Spectrum spectrum) { - String specID = ""; - for (int i = 0; i < spectrum.getNDim(); i++) { - specID += spectrum.getNuclei()[i]; - if (i < spectrum.getNDim() - 1) { - specID += "-"; - } - } - - return specID; - } + // public static String getSpectrumNucleiAsString(final Spectrum spectrum) { + // String specID = ""; + // for (int i = 0; i < spectrum.getNDim(); i++) { + // specID += spectrum.getNuclei()[i]; + // if (i < spectrum.getNDim() - 1) { + // specID += "-"; + // } + // } + // + // return specID; + // } public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex) { return ((atomIndex >= 0) && atomIndex < ac.getAtomCount()); diff --git a/src/casekit/nmr/convert/LSDConverter.java b/src/casekit/nmr/convert/LSDConverter.java deleted file mode 100644 index ec6d0c2..0000000 --- a/src/casekit/nmr/convert/LSDConverter.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -package casekit.nmr.convert; - -import casekit.nmr.Utils; -import casekit.nmr.model.Spectrum; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.silent.MolecularFormula; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; - -/** - * - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class LSDConverter { - - /** - * - * @param projectName - * @param pathToOutputFile - * @param pathsToFilters - * @param molecularFormula - * @param ac - * @param spectra - * - * @throws IOException - */ - public static void ConvertToLSD(final String projectName, final String pathToOutputFile, final String[] pathsToFilters, final MolecularFormula molecularFormula, final IAtomContainer ac, final HashMap spectra) throws IOException { - - String wholeContent, hybrid, protons, MULT = "", HSQC = "", BOND = "", HMBC = "", COSY = ""; - wholeContent = "; project name: " + projectName + "\n"; - if(molecularFormula != null){ - wholeContent += "; molecular formula: " + MolecularFormulaManipulator.getString(molecularFormula) + "\n\n"; - } else { - wholeContent += "; molecular formula: unknown \n\n"; - } - for (int i = 0; i < ac.getAtomCount(); i++) { - // set MULT section in LSD input file - // set hybridization level - if(ac.getAtom(i).getHybridization() == null){ - hybrid = "-"; - } else { - switch (ac.getAtom(i).getHybridization()) { - case SP1: - case S: - hybrid = "1"; break; - case SP2: - hybrid = "2"; break; - default: - hybrid = "3"; - } - } - // set implicit proton number - if(ac.getAtom(i).getImplicitHydrogenCount() == null){ - protons = "-"; - } else { - protons = String.valueOf(ac.getAtom(i).getImplicitHydrogenCount()); - } - MULT += "MULT " + (i+1) + " " + ac.getAtom(i).getSymbol() + " " + hybrid + " " + protons; - if(ac.getAtom(i).getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())) != null){ - String hCount; - if(ac.getAtom(i).getImplicitHydrogenCount() == null){ - hCount = "x"; - } else { - hCount = String.valueOf(ac.getAtom(i).getImplicitHydrogenCount()); - } - MULT += ";\t" + ac.getAtom(i).getProperty(Utils.getNMRShiftConstant(ac.getAtom(i).getSymbol())) + ",\t" + ac.getAtom(i).getSymbol() + "H" + hCount; - } - MULT += "\n"; - // set HSQC section in LSD input file - if((ac.getAtom(i).getImplicitHydrogenCount() != null) && (ac.getAtom(i).getImplicitHydrogenCount() > 0)){ - HSQC += "HSQC " + (i+1) + " " + (i+1) + ";\t" + ac.getAtom(i).getSymbol() + "H" + ac.getAtom(i).getImplicitHydrogenCount() + "\n"; - } - } - wholeContent += MULT + "\n"; - wholeContent += HSQC + "\n"; - - // set BOND information in LSD input file by INADEQUATE or general bond knowledge - for (IBond bond : ac.bonds()) { - BOND += "BOND " + (bond.getAtom(0).getIndex()+1) + " " + (bond.getAtom(1).getIndex()+1) + ";\t" + ac.getAtom(bond.getAtom(0).getIndex()).getSymbol() + "H" + ac.getAtom(bond.getAtom(0).getIndex()).getImplicitHydrogenCount() + " - " + ac.getAtom(bond.getAtom(1).getIndex()).getSymbol() + "H" + ac.getAtom(bond.getAtom(1).getIndex()).getImplicitHydrogenCount() + "\n"; - } - wholeContent += BOND + "\n"; - - - // @TODO repair HMBC and COSY information output - -// // set HMBC information to LSD input file -// ArrayList indicesInAtomContainerDim1; -// ArrayList indicesInAtomContainerDim2; -// final boolean [][] HMBCTable = new boolean[ac.getAtomCount()][ac.getAtomCount()]; -// for (int i = 0; i < ac.getAtomCount(); i++) { -// for (int j = 0; j < ac.getAtomCount(); j++) { -// HMBCTable[i][j] = false; -// } -// } -// for (final Spectrum spectrum : spectra.values()) { -// if((spectrum.getNDim() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HMBC)){ -// continue; -// } -// indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); -// indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); -// HMBC += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; -// for (int i = 0; i < spectrum.getSignalCount(); i++) { -// if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ -// // set signal only if it is not already covered by BOND -// // here reversed order (see LSD manual page): 1. heavy atom, 2. proton -// if(ac.getBond(ac.getAtom(indicesInAtomContainerDim2.get(i)), ac.getAtom(indicesInAtomContainerDim1.get(i))) != null){ -// HMBC += ";"; -// } -// HMBC += "HMBC " + (indicesInAtomContainerDim2.get(i) + 1) + " " + (indicesInAtomContainerDim1.get(i) + 1) + ";\t" + ac.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + " - " + ac.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + "\n"; -// HMBCTable[indicesInAtomContainerDim2.get(i)][indicesInAtomContainerDim1.get(i)] = true; -// } -// } -// } -// wholeContent += HMBC + "\n"; -// // set COSY information to LSD input file -// for (final Spectrum spectrum : spectra.values()) { -// if((spectrum.getNDim() != 2) || !spectrum.getSpecType().startsWith(CDKConstants.NMRSPECTYPE_2D_HHCOSY)){ -// continue; -// } -// indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); -// indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); -// COSY += ";\t " + spectrum.getSpecType() + " " + Utils.getSpectrumNucleiAsString(spectrum) + "\n"; -// for (int i = 0; i < spectrum.getSignalCount(); i++) { -// if((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)){ -// // set signal only if it is not already covered by BOND or HMBC -// if((ac.getBond(ac.getAtom(indicesInAtomContainerDim1.get(i)), ac.getAtom(indicesInAtomContainerDim2.get(i))) != null) -// || HMBCTable[indicesInAtomContainerDim1.get(i)][indicesInAtomContainerDim2.get(i)]){ -// COSY += ";"; -// } -// COSY += "COSY " + (indicesInAtomContainerDim1.get(i) + 1) + " " + (indicesInAtomContainerDim2.get(i) + 1) + ";\t" + ac.getAtom(indicesInAtomContainerDim1.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim1.get(i)).getImplicitHydrogenCount() + " - " + ac.getAtom(indicesInAtomContainerDim2.get(i)).getSymbol() + "H" + ac.getAtom(indicesInAtomContainerDim2.get(i)).getImplicitHydrogenCount() + "\n"; -// } -// } -// } -// wholeContent += COSY + "\n"; - // set filter definitions - String DEFF = ""; - String FEXP = ""; - if((pathsToFilters != null) && pathsToFilters.length > 0){ - int fragmentCounter = 1; - for (final String pathToFilter : pathsToFilters) { - File folder = new File(pathToFilter); - File[] listOfFiles = folder.listFiles(); - for (final File file : listOfFiles) { - if (file.isFile() && !file.getName().toLowerCase().contains(".")) { - DEFF += "DEFF F" + fragmentCounter + " \"" + file.getAbsolutePath() + "\"\n"; - fragmentCounter++; - } - } - } - FEXP = "FEXP \"NOT F1"; - for (int i = 2; i < fragmentCounter; i++) { - FEXP += " and NOT F" + i; - } - FEXP += "\""; - } - - wholeContent += DEFF + "\n"; - wholeContent += FEXP + "\n"; - - Utils.writeTextFile(pathToOutputFile, wholeContent); - } - - - -// public static void countNeighborhoodBonds(final String pathToNMRShiftDB, final String[] bondsSet, final String nucleus, final ArrayList neighborElems, final int minShift, final int maxShift, final int stepSize) throws FileNotFoundException, IOException, CDKException { -// -// if (stepSize < 1) { -// System.err.println("stepSize < 1 not allowed!!!"); -// return; -// } -// // creation of frequency counting matrix and shift indices holder -// final int[][] neighborhoodCountsMatrix = new int[stepSize * (maxShift - minShift + 1)][3 + 4 + neighborElems.size() * bondsSet.length]; -// final IAtomContainerSet acSet = NMRShiftDB.getStructuresFromSDFile(pathToNMRShiftDB, true); -// final HashMap> shiftIndicesInACSet = new HashMap<>(); -// for (int i = 0; i < stepSize * maxShift; i++) { -// for (int j = 0; j < 3 + 4 + neighborElems.size() * bondsSet.length; j++) { -// neighborhoodCountsMatrix[i][j] = 0; -// } -// shiftIndicesInACSet.put(i, new ArrayList<>()); -// } -// int atomIndexDB, shiftDBInt; double shiftDBDouble; IAtomContainer acDB; -// // go through all molecules in MongoDB -// for (int k = 0; k < acSet.getAtomContainerCount(); k++) { -// acDB = acSet.getAtomContainer(k); -// // for all MongoDB entries containing a spectrum for the current query atom type -// for (final String shiftsDB : NMRShiftDB.getSpectraFromNMRShiftDB(pathToNMRShiftDB, nucleus)) { -// if (shiftsDB == null) { -// continue; -// } -// String[][] shiftsDBvalues = casekit.casekit.nmr.dbservice.NMRShiftDB.parseNMRShiftDBSpectrum(shiftsDB); -// for (String[] shiftsDBvalue : shiftsDBvalues) { -// atomIndexDB = Integer.parseInt(shiftsDBvalue[2]); -// // sometimes the MongoDB atom index is wrong and out of array range -// if (atomIndexDB > acDB.getAtomCount() - 1) { -// continue; -// } -// shiftDBDouble = Math.round(Double.parseDouble(shiftsDBvalue[0]) * stepSize) / (double) stepSize; -// // if MongoDB shift value out of min-max-range then skip this shift -// if(shiftDBDouble < minShift || shiftDBDouble > maxShift - 1){ -// continue; -// } -// shiftDBInt = (int) (shiftDBDouble * stepSize); -// neighborhoodCountsMatrix[shiftDBInt - minShift][0] += 1; // increase number of this shift occurence -// neighborhoodCountsMatrix[shiftDBInt - minShift][1] += (acDB.getAtom(atomIndexDB).isInRing()) ? 1 : 0; // increase if atom is a ring member -// neighborhoodCountsMatrix[shiftDBInt - minShift][2] += (acDB.getAtom(atomIndexDB).isAromatic()) ? 1 : 0; // increase if atom is aromatic -// neighborhoodCountsMatrix[shiftDBInt - minShift][3] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 0)) ? 1 : 0; // qC count or equivalents, e.g. qN -// neighborhoodCountsMatrix[shiftDBInt - minShift][4] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 1)) ? 1 : 0; // CH count or equivalents, e.g. NH -// neighborhoodCountsMatrix[shiftDBInt - minShift][5] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 2)) ? 1 : 0; // CH2 count or equivalents, e.g. NH2 -// neighborhoodCountsMatrix[shiftDBInt - minShift][6] += ((acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() != null) && (acDB.getAtom(atomIndexDB).getImplicitHydrogenCount() == 3)) ? 1 : 0; // CH3 count or equivalents, e.g. NH3 -// // add counts for a specific atom to matrix m -// int[] counts = casekit.casekit.nmr.Utils.getNeighborhoodBondsCount(acDB, atomIndexDB, bondsSet, neighborElems); -// for (int i = 0; i < counts.length; i++) { -// neighborhoodCountsMatrix[shiftDBInt - minShift][3 + 4 + i] += counts[i]; -// } -// // add this atom container index and atom index within it to belonging hash map -// shiftIndicesInACSet.get(shiftDBInt).add(new Integer[]{k, atomIndexDB}); -// } -// } -// } -// } - -} diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index c8f7578..65ba4fc 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -299,6 +299,7 @@ public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum return values; } + @Deprecated public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShiftDBSpectrum, final String nucleus, final String description) { if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { return null; @@ -341,7 +342,7 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect shift = Double.parseDouble(spectrumStringArray[i][0]); intensity = Double.parseDouble(spectrumStringArray[i][1]); multiplicity = spectrumStringArray[i][2]; - spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 0)); + spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 0, 0)); } } catch (Exception e) { return null; @@ -363,76 +364,4 @@ public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBS return assignment; } - - // public static Map>>> buildHybridizationDistributions(final String pathToDB) { - // // for atom type -> hybridization -> multiplicity -> shift list - // final Map>>> hybridizationDistributions = new HashMap<>(); - // - // try (final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToDB), SilentChemObjectBuilder.getInstance())) { - // IAtom atom; - // String nucleus; - // IAtomContainer structure; - // List spectraProperties13C, spectraProperties1H, spectraProperties15N, spectraProperties1D; - // Spectrum spectrum; - // Assignment assignment; - // Signal signal; - // while (iterator.hasNext()) { - // structure = iterator.next(); - // AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - // Utils.setAromaticity(structure); - // - // spectraProperties13C = getSpectraProperties1D(structure, "13C"); - // spectraProperties1H = getSpectraProperties1D(structure, "1H"); - // spectraProperties15N = getSpectraProperties1D(structure, "15N"); - // - // for (int i = 0; i < structure.getAtomCount(); i++) { - // atom = structure.getAtom(i); - // if (!hybridizationDistributions.containsKey(atom.getSymbol())) { - // hybridizationDistributions.put(atom.getSymbol(), new HashMap<>()); - // } - // if (!hybridizationDistributions.get(atom.getSymbol()).containsKey(atom.getHybridization().name())) { - // hybridizationDistributions.get(atom.getSymbol()).put(atom.getHybridization().name(), new HashMap<>()); - // } - // - // switch (atom.getSymbol()) { - // case "C": - // spectraProperties1D = spectraProperties13C; - // nucleus = "13C"; - // break; - // case "H": - // spectraProperties1D = spectraProperties1H; - // nucleus = "1H"; - // break; - // case "N": - // spectraProperties1D = spectraProperties15N; - // nucleus = "15N"; - // break; - // default: - // spectraProperties1D = new ArrayList<>(); - // nucleus = ""; - // break; - // } - // - // for (final String spectrumProperty1D : spectraProperties1D) { - // spectrum = NMRShiftDBSpectrumToSpectrum(structure.getProperty(spectrumProperty1D), nucleus); - // assignment = NMRShiftDBSpectrumToAssignment(structure.getProperty(spectrumProperty1D), nucleus); - // signal = spectrum.getSignal(assignment.getIndex(0, i)); - // - // if (signal != null && signal.getMultiplicity() != null) { - // if (!hybridizationDistributions.get(atom.getSymbol()).get(atom.getHybridization().name()).containsKey(signal.getMultiplicity())) { - // hybridizationDistributions.get(atom.getSymbol()).get(atom.getHybridization().name()).put(signal.getMultiplicity(), new ArrayList<>()); - // } - // hybridizationDistributions.get(atom.getSymbol()).get(atom.getHybridization().name()).get(signal.getMultiplicity()).add(signal.getShift(0)); - // } - // } - // } - // } - // } catch (IOException | CDKException e) { - // e.printStackTrace(); - // } - // - // System.out.println(hybridizationDistributions); - // - // return hybridizationDistributions; - // } } diff --git a/src/casekit/nmr/interpretation/InterpretData.java b/src/casekit/nmr/interpretation/InterpretData.java deleted file mode 100644 index 7a79b69..0000000 --- a/src/casekit/nmr/interpretation/InterpretData.java +++ /dev/null @@ -1,526 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -package casekit.nmr.interpretation; - -/** - * @author Michael Wenk [https://github.com/michaelwenk] - */ -public class InterpretData { - - // final private IAtomContainer mol; - // final private IMolecularFormula molFormula; - // private HashMap> atomTypeIndices; - // final private HashMap spectra = new HashMap<>(); - // final private HashMap assignments = new HashMap<>(); - // - // /** - // * Creates an instances of this class with an empty class atom container. - // */ - // public InterpretData() { - // this.molFormula = null; - // this.mol = SilentChemObjectBuilder.getInstance().newAtomContainer(); - // this.updateAtomTypeIndices(); - // } - // - // /** - // * Creates an instances of this class with a class atom container consisting - // * of all heavy atoms in given molecular formula. - // * - // * @param molFormula IMolecularFormula object for IAtomContainer creation - // */ - // public InterpretData(final IMolecularFormula molFormula) { - // this.molFormula = molFormula; - // this.mol = Utils.removeAtoms(MolecularFormulaManipulator.getAtomContainer(this.molFormula), "H"); - // this.updateAtomTypeIndices(); - // } - // - // - // /** - // * Returns used IMolecularFormula object for this class instance. - // * - // * @return - // */ - // public final IMolecularFormula getMolecularFormula() { - // - // return this.molFormula; - // } - // - // - // /** - // * Returns used IAtomContainer object for this class instance. - // * - // * @return - // */ - // public final IAtomContainer getAtomContainer() { - // - // return this.mol; - // } - // - // - // /** - // * Returns a HashMap object with the indices of all atoms for all atom types - // * (elements) within the atom container of this class. - // * - // * @return - // */ - // public final Map> getAtomTypeIndices() { - // - // return this.atomTypeIndices; - // } - // - // - // /** - // * Sets the indices of all atoms in this class atom container. - // * - // * @see Utils#getAtomTypeIndices(org.openscience.cdk.interfaces.IAtomContainer) - // */ - // private void updateAtomTypeIndices() { - // - // this.atomTypeIndices = Utils.getAtomTypeIndices(this.mol); - // } - // - // /** - // * Returns all given and used spectra. - // * - // * @return - // */ - // public final Map getSpectra() { - // - // return this.spectra; - // } - // - // - // /** - // * Returns all created and used Assignment objects. The assigned indices - // * refer to atom indices in class atom container. - // * - // * @return - // */ - // public final Map getAssignments() { - // - // return this.assignments; - // } - // - // - // /** - // * Returns one specific created and used Assignment object. - // * The assigned indices refer to atom indices in class atom container. - // * - // * @param spectrum - // * - // * @return - // */ - // public final Assignment getAssignment(final Spectrum spectrum) { - // - // if (spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT90) || spectrum.getSpecType().equals(CDKConstants.NMRSPECTYPE_1D_DEPT135)) { - // - // return this.getAssignments().get(spectrum.getSpecType()); - // } - // - // return this.assignments.get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); - // } - // - // - // /** - // * Sets the 1D casekit.nmr shift values for given Spectrum object to atoms of the class IAtomContainer. - // * The shift values will be assigned sequentially. - // * In case of a molecular formula is given in this class, the number of - // * shifts must be equal to the number of atoms in this molecular formula. - // * For less shifts in shift list you will be asked for entering equivalences. - // * Otherwise this function will return a false value. - // * In case of no molecular was given to this class, a new atom in the atom container - // * will be created regarding to the input shift list. - // * Each shift value is set to {@link IAtomContainer#setProperty(java.lang.Object, java.lang.Object) - // * as result of Utils#getNMRShiftConstant(java.lang.String)}, depending on - // * the specified atom type (element). - // * After usage of this function, the input Spectrum class object might be extended during - // * equivalent signal selection by user. - // * - // * @param spectrum Spectrum class object containing the 1D shift information - // * - // * @throws java.io.IOException - // * @throws org.openscience.cdk.exception.CDKException - // */ - // public final void assign1DSpectrum(final Spectrum spectrum) throws Exception { - // // checks whether number of signals is equal to molecular formula if given - // // if not equal then edit signal list in spectrum - // this.check1DSpectrum(spectrum); - // // assign shift values to atoms sequentially - // this.assignShiftValuesToAtoms(spectrum); - // - // final Assignment assignment = new Assignment(spectrum); - // if (this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0)) != null) { - // assignment.setAssignments(0, this.atomTypeIndices.get(Utils.getAtomTypeFromSpectrum(spectrum, 0))); - // } - // - // this.spectra.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); - // this.assignments.put(CDKConstants.NMRSPECTYPE_1D + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); - // } - // - // /** - // * Checks the number of signals in a spectrum against the number of atoms - // * in molecular formula of class, if given. In case of different numbers, - // * a user input for spectrum editing will be requested. - // * - // * @param spectrum - // * - // * @throws IOException - // * @see Utils#editSignalsInSpectrum(Spectrum, IMolecularFormula, int) - // */ - // private void check1DSpectrum(final Spectrum spectrum) throws Exception { - // if (this.molFormula != null) { - // final int diff = Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, this.molFormula, 0); - // if (diff != 0) { - // // adjust Spectrum size by user - // Utils.editSignalsInSpectrum(spectrum, this.molFormula, 0); - // } - // } - // } - // - // - // /** - // * Sets shift values in atoms of class atom container as property (see below), sequentially. - // * - // * @param spectrum Spectrum class object which contains shifts in first - // * dimension - // * - // * @see Utils#getNMRShiftConstant(java.lang.String) - // */ - // private void assignShiftValuesToAtoms(final Spectrum spectrum) { - // final String atomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); - // final List shifts = spectrum.getShifts(0); - // if ((this.molFormula == null) && !atomType.equals("H")) { - // // (re-)filling up of peaks for that atom type from given peak list in spectrum - // this.removeAtoms(atomType); - // IAtom atom; - // for (final double shift : shifts) { - // atom = new Atom(atomType); - // atom.setProperty(Utils.getNMRShiftConstant(atomType), shift); - // atom.setImplicitHydrogenCount(null); - // this.mol.addAtom(atom); - // } - // this.updateAtomTypeIndices(); - // } - // // assign shifts to atoms as property - // if (this.atomTypeIndices.get(atomType) != null) { - // int assignedShiftCount = 0; - // for (final int i : this.atomTypeIndices.get(atomType)) { - // if (assignedShiftCount < shifts.size()) { - // // shift assignment in atom - // this.mol.getAtom(i).setProperty(Utils.getNMRShiftConstant(atomType), shifts.get(assignedShiftCount)); - // } - // assignedShiftCount++; - // } - // } - // } - // - // - // /** - // * Removes atoms from a given atom type from the class' atom container. - // * - // * @param atomType Atom type (element's name, e.g. C or Br) - // * - // * @return IAtomContainer where the atoms were removed - // */ - // private void removeAtoms(final String atomType) { - // if (this.getAtomTypeIndices().get(atomType) == null) { - // return; - // } - // final ArrayList toRemoveList = new ArrayList<>(); - // for (final int i : this.getAtomTypeIndices().get(atomType)) { - // toRemoveList.add(this.mol.getAtom(i)); - // } - // for (IAtom iAtom : toRemoveList) { - // this.mol.removeAtom(iAtom); - // } - // - // this.updateAtomTypeIndices(); - // } - // - // /** - // * Sets the assignments of carbon atoms in class atom container - // * by usage of DEPT90 and DEPT135 information. The implicit hydrogen count - // * property is set too. - // * - // * @param spectrum1D_DEPT90 DEPT90 spectrum - // * @param spectrum1D_DEPT135 DEPT135 spectrum which has to contain intensity - // * information - // * @param tol tolerance value [ppm] for carbon shift matching - // * - // * @return false if 1-dimensional 13C spectrum is missing (not set beforehand) - // * or something is missing in one of the two input spectra - // * - // * @see InterpretData#setImplicitHydrogenCountsFromDEPT() - // */ - // public final boolean assignDEPT(final Spectrum spectrum1D_DEPT90, final Spectrum spectrum1D_DEPT135, final double tol) { - // if ((spectrum1D_DEPT90 == null) || (spectrum1D_DEPT135 == null) || (spectrum1D_DEPT135.getIntensities() == null) || (this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C") == null)) { - // return false; - // } - // - // final Assignment assignment1D_DEPT90 = new Assignment(spectrum1D_DEPT90); - // final Assignment assignment1D_DEPT135 = new Assignment(spectrum1D_DEPT135); - // final ArrayList matchesIn1DSpectrum_DEPT90 = this.findMatchesIn1DSpectra(spectrum1D_DEPT90, 0, tol); - // final ArrayList matchesIn1DSpectrum_DEPT135 = this.findMatchesIn1DSpectra(spectrum1D_DEPT135, 0, tol); - // final Assignment assignment1D_13C = this.getAssignment(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_13C")); - // - // for (int i = 0; i < assignment1D_DEPT90.getAssignmentsCount(); i++) { - // if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i)) >= 0) { - // assignment1D_DEPT90.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT90.get(i))); - // } - // } - // for (int i = 0; i < assignment1D_DEPT135.getAssignmentsCount(); i++) { - // if (assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i)) >= 0) { - // assignment1D_DEPT135.setAssignment(0, i, assignment1D_13C.getAssignment(0, matchesIn1DSpectrum_DEPT135.get(i))); - // } - // } - // - // this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, spectrum1D_DEPT90); - // this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT90, assignment1D_DEPT90); - // this.spectra.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, spectrum1D_DEPT135); - // this.assignments.put(CDKConstants.NMRSPECTYPE_1D_DEPT135, assignment1D_DEPT135); - // - // this.setImplicitHydrogenCountsFromDEPT(); - // - // return true; - // } - // - // - // /** - // * Sets the implicitHydrogenCount() property in atoms of class atom container - // * by using the already set DEPT information. - // * - // * @see InterpretData#assignDEPT(casekit.nmr.model.Spectrum, casekit.nmr.model.Spectrum, double) - // */ - // private void setImplicitHydrogenCountsFromDEPT() { - // - // final List intensitiesDEPT135 = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135).getIntensities(); - // final List matchesDEPT90InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT90), 0); - // final List matchesDEPT135InAtomContainer = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D_DEPT135), 0); - // - // int matchDEPT90, matchDEPT135, hCount, hCountAll = 0; - // for (int i : this.atomTypeIndices.get("C")) { - // if ((this.mol.getAtom(i).getProperty(CDKConstants.NMRSHIFT_CARBON) != null) && (this.mol.getAtom(i).getImplicitHydrogenCount() == null)) { - // matchDEPT90 = matchesDEPT90InAtomContainer.indexOf(i); - // matchDEPT135 = matchesDEPT135InAtomContainer.indexOf(i); - // if (matchDEPT90 >= 0) { - // // CH - // hCount = 1; - // } else if (matchDEPT90 == -1 && matchDEPT135 >= 0) { - // // CH2 or CH3 - // if (intensitiesDEPT135.get(matchDEPT135) < 0) { - // hCount = 2; - // } else if (intensitiesDEPT135.get(matchDEPT135) > 0) { - // hCount = 3; - // } else { - // // qC - // hCount = 0; - // } - // } else { - // // qC - // hCount = 0; - // } - // this.mol.getAtom(i).setImplicitHydrogenCount(hCount); - // if (this.mol.getAtom(i).getImplicitHydrogenCount() >= 3) { - // this.mol.getAtom(i).setHybridization(IAtomType.Hybridization.SP3); - // } - // hCountAll += hCount; - // } - // } - // if (this.molFormula != null) { - // System.out.println("assigned protons to carbons: " + hCountAll + " (" + MolecularFormulaManipulator.getElementCount(this.molFormula, "H") + ") -> " + (MolecularFormulaManipulator.getElementCount(this.molFormula, "H") - hCountAll) + " protons to be attached on hetero atoms!!!"); - // } else { - // System.out.println("assigned protons to carbons: " + hCountAll + "!!!"); - // } - // - // } - // - // - // /** - // * @param spectrum Spectrum class object consisting of Signal class objects - // * where the proton shifts values are given in first dimension and the - // * heavy atom shifts in the second. - // * @param tolProton tolerance value [ppm] for proton shift matching - // * @param tolHeavyAtom tolerance value [ppm] for heavy atom shift matching - // */ - // public final void assignHSQC(final Spectrum spectrum, final double tolProton, final double tolHeavyAtom) { - // // assign index of matching atoms to both dimensions and save the Spectrum and Assignment objects in class - // this.assign2DSpectrum(spectrum, tolProton, tolHeavyAtom); - // // in case the 1H spectrum is given, then assign protons to same indices from belonging carbon atoms - // if (this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H") != null) { - // final Assignment assignment1D_1H = this.getAssignments().get(CDKConstants.NMRSPECTYPE_1D + "_1H"); - // final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); - // final ArrayList matchesIn1DSpectrum_1H = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); - // - // for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { - // // if heavy atom i has an assignment in class atom container then assign that index i to belonging protons as index - // if (assignment2D_HSQC.getAssignment(1, i) >= 0) { - // assignment1D_1H.setAssignment(0, matchesIn1DSpectrum_1H.get(i), assignment2D_HSQC.getAssignment(1, i)); - // assignment2D_HSQC.setAssignment(0, i, assignment1D_1H.getAssignment(0, matchesIn1DSpectrum_1H.get(i))); - // } - // } - // } - // // attach protons on other heavy atoms than carbons via HSQC assignment counting - // if (!spectrum.getNuclei()[1].equals("13C")) { - // final Assignment assignment2D_HSQC = this.getAssignments().get(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum)); - // for (int i = 0; i < assignment2D_HSQC.getAssignmentsCount(); i++) { - // if ((assignment2D_HSQC.getAssignment(1, i) > -1)) { - // if (this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() == null) { - // this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(0); - // } - // this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).setImplicitHydrogenCount(this.mol.getAtom(assignment2D_HSQC.getAssignment(1, i)).getImplicitHydrogenCount() + 1); - // } - // } - // } - // } - // - // - // private void assign2DSpectrum(final Spectrum spectrum, final double tolDim1, final double tolDim2) { - // - // final ArrayList matchesQueryIn1DSpectrumDim1 = this.findMatchesIn1DSpectra(spectrum, 0, tolDim1); - // final ArrayList matchesQueryIn1DSpectrumDim2 = this.findMatchesIn1DSpectra(spectrum, 1, tolDim2); - // final ArrayList matches1DInAtomContainerDim1 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[0]), 0); - // final ArrayList matches1DInAtomContainerDim2 = this.getAssignedAtomIndices(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[1]), 0); - // - // final Assignment assignment = new Assignment(spectrum); - // for (int i = 0; i < matchesQueryIn1DSpectrumDim1.size(); i++) { - // if ((matches1DInAtomContainerDim1 != null) && (matchesQueryIn1DSpectrumDim1.get(i) >= 0)) { - // assignment.setAssignment(0, i, matches1DInAtomContainerDim1.get(matchesQueryIn1DSpectrumDim1.get(i))); - // } - // if ((matches1DInAtomContainerDim2 != null) && (matchesQueryIn1DSpectrumDim2.get(i) >= 0)) { - // assignment.setAssignment(1, i, matches1DInAtomContainerDim2.get(matchesQueryIn1DSpectrumDim2.get(i))); - // } - // } - // - // this.spectra.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), spectrum); - // this.assignments.put(spectrum.getSpecType() + "_" + Utils.getSpectrumNucleiAsString(spectrum), assignment); - // } - // - // - // private ArrayList findMatchesIn1DSpectra(final Spectrum spectrum, final int dim, final double tol) { - // - // ArrayList matchesQueryInOrigin1DSpectrum = new ArrayList<>(); - // // final ArrayList shiftsQuery = spectrum.getShifts(dim); - // // if(this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]) != null){ - // // final ArrayList shiftsOrigin1DSpectrum = this.getSpectra().get(CDKConstants.NMRSPECTYPE_1D + "_" + spectrum.getNuclei()[dim]).getShifts(0); - // // matchesQueryInOrigin1DSpectrum = Utils.findShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, tol); - // // matchesQueryInOrigin1DSpectrum = Utils.correctShiftMatches(shiftsOrigin1DSpectrum, shiftsQuery, matchesQueryInOrigin1DSpectrum, tol); - // // } else { - // // for (int i = 0; i < spectrum.getSignalCount(); i++) { - // // matchesQueryInOrigin1DSpectrum.add(-1); - // // } - // // } - // - // return matchesQueryInOrigin1DSpectrum; - // } - // - // /** - // * Returns the indices of atoms within the class atom container which match - // * to the shifts of given spectrum and dimension. - // * - // * @param spectrum - // * @param dim - // * - // * @return - // */ - // public final ArrayList getAssignedAtomIndices(final Spectrum spectrum, final int dim) { - // - // if (spectrum == null) { - // return null; - // } else if (this.getAssignment(spectrum) == null) { - // final ArrayList atomIndices = new ArrayList<>(); - // for (int i = 0; i < spectrum.getSignalCount(); i++) { - // atomIndices.add(-1); - // } - // return atomIndices; - // } - // - // return new ArrayList<>(this.getAssignment(spectrum).getAssignments(dim)); - // } - // - // - // /** - // * Sets links between two heavy atoms of H,H-COSY signals. - // * - // * @param spectrum Spectrum class object containing the 2D spectrum proton shift information - // * @param tolProton tolerance value [ppm] for matching belonging protons - // * of heavy atom - // * - // * @return - // */ - // public final boolean assignHHCOSY(final Spectrum spectrum, final double tolProton) { - // - // final ArrayList protonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolProton); - // final ArrayList protonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolProton); - // // are all signals bidirectional? - // if (!Utils.isBidirectional(protonShiftMatches1, protonShiftMatches2)) { - // return false; - // } - // this.assign2DSpectrum(spectrum, tolProton, tolProton); - // - // return true; - // } - // - // - // /** - // * Sets links between two carbon atoms in an INADEQUATE signal relationship. - // * Returns true if all signals are bidirectional, so that atom A has a - // * signal according to atom B and vice versa. - // * - // * @param spectrum Spectrum class object consisting of Signal class objects - // * @param tolCarbon tolerance value [ppm] for carbon atom shift matching - // * - // * @return - // */ - // public final boolean assignINADEQUATE(final Spectrum spectrum, final double tolCarbon) { - // - // final ArrayList carbonShiftMatches1 = this.findMatchesIn1DSpectra(spectrum, 0, tolCarbon); - // final ArrayList carbonShiftMatches2 = this.findMatchesIn1DSpectra(spectrum, 1, tolCarbon); - // // are all signals bidirectional? - // if (!casekit.nmr.Utils.isBidirectional(carbonShiftMatches1, carbonShiftMatches2)) { - // return false; - // } - // this.assign2DSpectrum(spectrum, tolCarbon, tolCarbon); - // - // final ArrayList indicesInAtomContainerDim1 = this.getAssignedAtomIndices(spectrum, 0); - // final ArrayList indicesInAtomContainerDim2 = this.getAssignedAtomIndices(spectrum, 1); - // for (int i = 0; i < spectrum.getSignalCount(); i++) { - // if ((indicesInAtomContainerDim1.get(i) > -1) && (indicesInAtomContainerDim2.get(i) > -1)) { - // this.setBond(indicesInAtomContainerDim1.get(i), indicesInAtomContainerDim2.get(i)); - // } - // } - // - // return true; - // } - // - // - // private void setBond(final int index1, final int index2) { - // - // if (this.mol.getBond(this.mol.getAtom(index1), this.mol.getAtom(index2)) != null) { - // this.mol.removeBond(this.mol.getAtom(index1), this.mol.getAtom(index2)); - // } - // this.mol.addBond(index1, index2, IBond.Order.UNSET); - // } - // - // - // /** - // * Sets links between heavy atoms which are in HMBC signal relationship. - // * - // * @param spectrum Spectrum class object consisting of Signal class objects - // * where the proton shift values is given first and the heavy atom shifts as the second. - // * @param tolProton tolerance value [ppm] for hydrogen shift matching - // * @param tolHeavy tolerance value [ppm] for heavy atom shift matching - // */ - // public final void assignHMBC(final Spectrum spectrum, final double tolProton, final double tolHeavy) { - // - // this.assign2DSpectrum(spectrum, tolProton, tolHeavy); - // } -} diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java new file mode 100644 index 0000000..e81c698 --- /dev/null +++ b/src/casekit/nmr/lsd/Constants.java @@ -0,0 +1,78 @@ +package casekit.nmr.lsd; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class Constants { + + // valid strings from LSD webpage: C N N5 O S S4 S6 F Cl Br I P P5 Si B X + public static final Map nucleiMap = createNucleiMap(); + public static final Map defaultHybridizationMap = createDefaultHybridizationMap(); + public static final Map defaultProtonsCountPerValencyMap = createDefaultProtonsCountPerValencyMap(); + public static final Map defaultAtomLabelMap = createDefaultAtomLabelMap(); + public static final Map> hybridizationConversionMap = createHybridizationConversionMapMap(); + + private static Map createNucleiMap() { + final Map nuclei = new HashMap<>(); + nuclei.put("C", "13C"); + nuclei.put("N", "15N"); + nuclei.put("H", "1H"); + + return Collections.unmodifiableMap(nuclei); + } + + private static Map createDefaultHybridizationMap() { + final Map defaultHybridization = new HashMap<>(); + defaultHybridization.put("C", "(1 2 3)"); + defaultHybridization.put("N", "(1 2 3)"); + defaultHybridization.put("O", "(2 3)"); + defaultHybridization.put("S", "(1 2 3)"); + + return Collections.unmodifiableMap(defaultHybridization); + } + + private static Map createDefaultProtonsCountPerValencyMap() { + final Map defaultProtonsCountPerValency = new HashMap<>(); + defaultProtonsCountPerValency.put("C", "(0 1 2 3)"); + defaultProtonsCountPerValency.put("N", "(0 1 2)"); + defaultProtonsCountPerValency.put("N5", "(0 1 2 3)"); + defaultProtonsCountPerValency.put("N35", "(0 1 2 3)"); + defaultProtonsCountPerValency.put("S", "(0 1)"); + defaultProtonsCountPerValency.put("S4", "(0 1 2 3)"); + defaultProtonsCountPerValency.put("S6", "(0 1 2 3)"); + defaultProtonsCountPerValency.put("S246", "(0 1 2 3)"); + defaultProtonsCountPerValency.put("O", "(0 1)"); + + return defaultProtonsCountPerValency; + } + + private static Map createDefaultAtomLabelMap() { + final Map defaultAtomLabel = new HashMap<>(); + defaultAtomLabel.put("C", "C"); + defaultAtomLabel.put("N", "N35"); + defaultAtomLabel.put("O", "O"); + defaultAtomLabel.put("S", "S246"); + + return Collections.unmodifiableMap(defaultAtomLabel); + } + + private static Map> createHybridizationConversionMapMap() { + // @TODO access this information from MongoDB and store it instead of hard coding it + // possible command in MongoDB: db.hybridizations.aggregate([{$match: {nucleus: "15N"}}, {$group: {_id: null, set: {$addToSet: "$hybridization"}}}]) + // nucleus -> hybridization string -> number + final Map> hybridizationConversionMap = new HashMap<>(); + hybridizationConversionMap.put("13C", new HashMap<>()); + hybridizationConversionMap.get("13C").put("PLANAR3", 3); + hybridizationConversionMap.get("13C").put("SP3", 3); + hybridizationConversionMap.get("13C").put("SP2", 2); + hybridizationConversionMap.get("13C").put("SP1", 1); + hybridizationConversionMap.put("15N", new HashMap<>()); + hybridizationConversionMap.get("15N").put("PLANAR3", 3); + hybridizationConversionMap.get("15N").put("SP3", 3); + hybridizationConversionMap.get("15N").put("SP2", 2); + hybridizationConversionMap.get("15N").put("SP1", 1); + + return Collections.unmodifiableMap(hybridizationConversionMap); + } +} diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java new file mode 100644 index 0000000..5650efa --- /dev/null +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -0,0 +1,338 @@ +package casekit.nmr.lsd; + +import casekit.io.FileOperations; +import casekit.nmr.model.nmrdisplayer.Correlation; +import casekit.nmr.model.nmrdisplayer.Data; +import casekit.nmr.model.nmrdisplayer.Link; +import casekit.nmr.utils.Utils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.*; + +public class PyLSDInputFileBuilder { + + private static String buildHeader() { + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("; PyLSD input file created by webCASE\n"); + final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z"); + final Date date = new Date(System.currentTimeMillis()); + stringBuilder.append("; ").append(formatter.format(date)); + + return stringBuilder.toString(); + } + + private static String buildFORM(final String mf, final Map elementCounts) { + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("; Molecular Formula: ").append(mf).append("\n"); + stringBuilder.append("FORM "); + elementCounts.forEach((elem, count) -> stringBuilder.append(elem).append(" ").append(count).append(" ")); + + return stringBuilder.toString(); + } + + private static String buildPIEC() { + return "PIEC 1"; + } + + private static Map buildIndicesMap(final Data data, final Map elementCounts) { + // index in correlation data -> [atom type, indices in PyLSD file...] + final Map indicesMap = new HashMap<>(); + // init element indices within correlations with same order as in correlation data input + final int totalHeavyAtomsCount = elementCounts.entrySet().stream().filter(set -> !set.getKey().equals("H")).map(Map.Entry::getValue).reduce(0, Integer::sum); + int heavyAtomIndexInPyLSDFile = 1; + int protonIndexInPyLSDFile = totalHeavyAtomsCount + 1; + int protonsToInsert; + Correlation correlation; + for (int i = 0; i < data.getCorrelations().getValues().size(); i++) { + correlation = data.getCorrelations().getValues().get(i); + // set entry for each correlation with consideration of equivalences + if (correlation.getAtomType().equals("H")) { + protonsToInsert = 1; + for (final Link link : correlation.getLink()) { + if (link.getExperimentType().equals("hsqc")) { + protonsToInsert += data.getCorrelations().getValues().get(link.getMatch().get(0)).getEquivalence(); + } + } + indicesMap.put(i, new Object[1 + protonsToInsert]); + indicesMap.get(i)[0] = correlation.getAtomType(); + for (int j = 0; j < protonsToInsert; j++) { + indicesMap.get(i)[1 + j] = protonIndexInPyLSDFile; + protonIndexInPyLSDFile++; + } + } else { + indicesMap.put(i, new Object[1 + correlation.getEquivalence() + 1]); + indicesMap.get(i)[0] = correlation.getAtomType(); + for (int j = 1; j <= correlation.getEquivalence() + 1; j++) { + indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; + heavyAtomIndexInPyLSDFile++; + } + } + } + + return indicesMap; + } + + private static String buildMULT(final Correlation correlation, final int index, final Map indicesMap, final Map> detectedHybridizations) { + final StringBuilder stringBuilder = new StringBuilder(); + List hybridizations; + StringBuilder hybridizationStringBuilder; + StringBuilder attachedProtonsCountStringBuilder; + if (correlation.getAtomType().equals("H")) { + return null; + } + hybridizations = new ArrayList<>(); + if (correlation.getHybridization() != null && !correlation.getHybridization().isEmpty()) { + // if hybridization is already given + if (correlation.getHybridization().equals("SP")) { + hybridizations.add(1); + } else if (correlation.getHybridization().equals("SP2")) { + hybridizations.add(2); + } else { + hybridizations.add(3); + } + } else { + // if hybridization is not given then use the detected ones via MongoDB queries + if (detectedHybridizations.containsKey(index)) { + hybridizations = detectedHybridizations.get(index); + } + } + if (hybridizations.isEmpty()) { + hybridizationStringBuilder = new StringBuilder(Constants.defaultHybridizationMap.get(correlation.getAtomType())); + } else { + hybridizationStringBuilder = new StringBuilder(); + if (hybridizations.size() > 1) { + hybridizationStringBuilder.append("("); + } + for (int k = 0; k < hybridizations.size(); k++) { + hybridizationStringBuilder.append(hybridizations.get(k)); + if (k < hybridizations.size() - 1) { + hybridizationStringBuilder.append(" "); + } + } + if (hybridizations.size() > 1) { + hybridizationStringBuilder.append(")"); + } + } + // set attached protons count + attachedProtonsCountStringBuilder = new StringBuilder(); + // if protons count is given + if (correlation.getProtonsCount() != null && !correlation.getProtonsCount().isEmpty()) { + if (correlation.getProtonsCount().size() == 1) { + attachedProtonsCountStringBuilder.append(correlation.getProtonsCount().get(0)); + } else { + attachedProtonsCountStringBuilder.append("("); + for (final int protonsCount : correlation.getProtonsCount()) { + attachedProtonsCountStringBuilder.append(protonsCount).append(" "); + } + attachedProtonsCountStringBuilder.deleteCharAt(attachedProtonsCountStringBuilder.length() - 1); + attachedProtonsCountStringBuilder.append(")"); + } + } else { // if protons count is not given then set it to default value + attachedProtonsCountStringBuilder.append(Constants.defaultProtonsCountPerValencyMap.get(Constants.defaultAtomLabelMap.get(correlation.getAtomType()))); + } + for (int j = 1; j < indicesMap.get(index).length; j++) { + stringBuilder.append("MULT ").append(indicesMap.get(index)[j]).append(" ").append(correlation.getAtomType()).append(" ").append(hybridizationStringBuilder.toString()).append(" ").append(attachedProtonsCountStringBuilder.toString()).append("\n"); + } + + return stringBuilder.toString(); + } + + private static String buildHSQC(final Correlation correlation, final int index, final Map indicesMap) { + final StringBuilder stringBuilder = new StringBuilder(); + if (correlation.getAtomType().equals("H")) { + return null; + } + for (final Link link : correlation.getLink()) { + if (link.getExperimentType().equals("hsqc")) { + for (final int matchIndex : link.getMatch()) { + // for each equivalence of heavy atom and attached equivalent proton + for (int k = 1; k < indicesMap.get(index).length; k++) { + stringBuilder.append("HSQC ").append(indicesMap.get(index)[k]).append(" ").append(indicesMap.get(matchIndex)[k]).append("\n"); + } + } + } + } + + return stringBuilder.toString(); + } + + private static String buildHMBC(final Correlation correlation, final int index, final Data data, final Map indicesMap) { + final StringBuilder stringBuilder = new StringBuilder(); + final String defaultBondDistance = "2 4"; + if (correlation.getAtomType().equals("H")) { + return null; + } + for (final Link link : correlation.getLink()) { + if (link.getExperimentType().equals("hmbc")) { + for (final int matchIndex : link.getMatch()) { + for (int k = 1; k < indicesMap.get(index).length; k++) { + for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { + // only add an HMBC correlation if there is no direct link via HSQC and the equivalence index is not equal + if (!(data.getCorrelations().getValues().get(matchIndex).getAttachment().containsKey(correlation.getAtomType()) && data.getCorrelations().getValues().get(matchIndex).getAttachment().get(correlation.getAtomType()).contains(index) && l == k)) { + stringBuilder.append("HMBC ").append(indicesMap.get(index)[k]).append(" ").append(indicesMap.get(matchIndex)[l]).append(" ").append(defaultBondDistance).append("\n"); + } + } + } + } + } + } + + return stringBuilder.toString(); + } + + private static String buildCOSY(final Correlation correlation, final int index, final Data data, final Map indicesMap) { + final StringBuilder stringBuilder = new StringBuilder(); + if (!correlation.getAtomType().equals("H")) { + return null; + } + for (final Link link : correlation.getLink()) { + if (link.getExperimentType().equals("cosy")) { + for (final int matchIndex : link.getMatch()) { + // only add an COSY correlation if the two signals there is not equivalent + if (!data.getCorrelations().getValues().get(matchIndex).getId().equals(correlation.getId())) { + for (int k = 1; k < indicesMap.get(index).length; k++) { + for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { + stringBuilder.append("COSY ").append(indicesMap.get(index)[k]).append(" ").append(indicesMap.get(matchIndex)[l]).append("\n"); + } + } + } + } + } + } + + return stringBuilder.toString(); + } + + private static String buildSHIX(final Correlation correlation, final int index, final Map indicesMap) { + final StringBuilder stringBuilder = new StringBuilder(); + if (correlation.getAtomType().equals("H") || correlation.isPseudo()) { + return null; + } + for (int k = 1; k < indicesMap.get(index).length; k++) { + stringBuilder.append("SHIX ").append(indicesMap.get(index)[k]).append(" ").append(correlation.getSignal().getDelta()).append("\n"); + } + + return stringBuilder.toString(); + } + + private static String buildSHIH(final Correlation correlation, final int index, final Map indicesMap) { + final StringBuilder stringBuilder = new StringBuilder(); + if (!correlation.getAtomType().equals("H") || correlation.isPseudo()) { + return null; + } + // only consider protons which are attached via HSQC/HMQC (pseudo and real links) + for (final Link link : correlation.getLink()) { + if ((link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) && !link.getMatch().isEmpty()) { // && !link.isPseudo() + for (int k = 1; k < indicesMap.get(index).length; k++) { + stringBuilder.append("SHIH ").append(indicesMap.get(index)[k]).append(" ").append(correlation.getSignal().getDelta()).append("\n"); + } + } + } + + return stringBuilder.toString(); + } + + private static String buildLISTAndPROP(final boolean allowHeteroHeteroBonds) { + final StringBuilder stringBuilder = new StringBuilder(); + // LIST PROP for hetero hetero bonds allowance + if (!allowHeteroHeteroBonds) { + // create hetero atom list automatically + stringBuilder.append("HETE L1").append("; list of hetero atoms\n"); + stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); + } + + return stringBuilder.toString(); + } + + private static String buildFilters(final String pathToLSDFilterList) { + final StringBuilder stringBuilder = new StringBuilder(); + // DEFF + FEXP -> add filters + stringBuilder.append("; externally defined filters\n"); + final Map filters = new LinkedHashMap<>(); + int counter = 1; + try { + final BufferedReader bufferedReader = FileOperations.readFile(pathToLSDFilterList); + if (bufferedReader != null) { + String line; + while ((line = bufferedReader.readLine()) != null) { + filters.put("F" + counter, line); + counter++; + } + bufferedReader.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + if (!filters.isEmpty()) { + filters.forEach((label, filePath) -> stringBuilder.append("DEFF ").append(label).append(" \"").append(filePath).append("\"\n")); + stringBuilder.append("\n"); + + stringBuilder.append("FEXP \""); + counter = 0; + for (final String label : filters.keySet()) { + stringBuilder.append("NOT ").append(label); + if (counter < filters.size() - 1) { + stringBuilder.append(" and "); + } + counter++; + } + stringBuilder.append("\"\n"); + } + + return stringBuilder.toString(); + } + + public static String buildPyLSDFileContent(final Data data, final String mf, final Map> detectedHybridizations, final boolean allowHeteroHeteroBonds, final String pathToLSDFilterList) { + final HashMap> state = data.getCorrelations().getState(); + boolean hasErrors = state.keySet().stream().anyMatch(s -> state.get(s).containsKey("error")); + if (mf != null && !hasErrors) { + final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); + final StringBuilder stringBuilder = new StringBuilder(); + // create header + stringBuilder.append(buildHeader()).append("\n\n"); + // FORM + stringBuilder.append(buildFORM(mf, elementCounts)).append("\n\n"); + // PIEC + stringBuilder.append(buildPIEC()).append("\n\n"); + + final Map> collection = new LinkedHashMap<>(); + collection.put("MULT", new ArrayList<>()); + collection.put("HSQC", new ArrayList<>()); + collection.put("HMBC", new ArrayList<>()); + collection.put("COSY", new ArrayList<>()); + collection.put("SHIX", new ArrayList<>()); + collection.put("SHIH", new ArrayList<>()); + // index in correlation data -> [atom type, index in PyLSD file] + final Map indicesMap = buildIndicesMap(data, elementCounts); + + Correlation correlation; + for (int i = 0; i < data.getCorrelations().getValues().size(); i++) { + correlation = data.getCorrelations().getValues().get(i); + collection.get("MULT").add(buildMULT(correlation, i, indicesMap, detectedHybridizations)); + collection.get("HSQC").add(buildHSQC(correlation, i, indicesMap)); + collection.get("HMBC").add(buildHMBC(correlation, i, data, indicesMap)); + collection.get("COSY").add(buildCOSY(correlation, i, data, indicesMap)); + collection.get("SHIX").add(buildSHIX(correlation, i, indicesMap)); + collection.get("SHIH").add(buildSHIH(correlation, i, indicesMap)); + } + + collection.keySet().forEach(key -> { + collection.get(key).stream().filter(Objects::nonNull).forEach(stringBuilder::append); + stringBuilder.append("\n"); + }); + + // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments + + // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance + stringBuilder.append(buildLISTAndPROP(allowHeteroHeteroBonds)).append("\n"); + // DEFF and FEXP as default filters (bad lists) + stringBuilder.append(buildFilters(pathToLSDFilterList)).append("\n"); + + return stringBuilder.toString(); + } + + return ""; + } +} diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index 14a709b..2f65e53 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -41,27 +41,24 @@ public class Signal extends Dimensional { private Double intensity; private String kind; private int equivalencesCount; - - //// private Integer phase; - //// public final static int PHASE_NONE = 0, PHASE_POSITIVE = 1, PHASE_NEGATIVE = 2; - //// public final static String[] PHASENAMES = {"NONE", "POSITIVE", "NEGATIVE"}; - // + private int phase; public Signal() { } public Signal(final String[] nuclei) { - this(nuclei, null, null, null, null, 0); + this(nuclei, null, null, null, null, 0, 0); } - public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, final Double intensity, final int equivalencesCount) { + public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, final Double intensity, final int equivalencesCount, final int phase) { super(nuclei); this.shifts = shifts; // this.initShifts(shifts, this.getNDim()); this.multiplicity = multiplicity; this.kind = kind; this.intensity = intensity; this.equivalencesCount = equivalencesCount; + this.phase = phase; } // private Double[] initShifts(final Double[] shifts, final int nDim) { @@ -126,12 +123,12 @@ public void setKind(final String kind) { public Signal buildClone() { - return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.kind, this.intensity, equivalencesCount); + return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.kind, this.intensity, this.equivalencesCount, this.phase); } @Override public String toString() { - return "Signal{" + "shifts=" + Arrays.toString(shifts) + ", multiplicity='" + multiplicity + '\'' + ", intensity=" + intensity + ", kind='" + kind + '\'' + ", equivalencesCount=" + equivalencesCount + '}'; + return "Signal{" + "shifts=" + Arrays.toString(shifts) + ", multiplicity='" + multiplicity + '\'' + ", intensity=" + intensity + ", kind='" + kind + '\'' + ", equivalencesCount=" + equivalencesCount + ", phase=" + phase + '}'; } public Double[] getShifts() { @@ -145,4 +142,12 @@ public int getEquivalencesCount() { public void setEquivalencesCount(final int equivalencesCount) { this.equivalencesCount = equivalencesCount; } + + public int getPhase() { + return phase; + } + + public void setPhase(final int phase) { + this.phase = phase; + } } diff --git a/src/casekit/nmr/model/nmrdisplayer/Correlation.java b/src/casekit/nmr/model/nmrdisplayer/Correlation.java new file mode 100644 index 0000000..f05573a --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Correlation.java @@ -0,0 +1,54 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; + +import java.util.ArrayList; +import java.util.HashMap; + +@NoArgsConstructor +@Getter +@Setter +@ToString + +public class Correlation { + private String id; + private String experimentType; + private String experimentID; + private String atomType; + private HashMap label; + private Signal1D signal; + private ArrayList link; + private int equivalence; + private HashMap> attachment; + private ArrayList protonsCount; + private String hybridization; + private boolean pseudo; + private HashMap edited; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Correlations.java b/src/casekit/nmr/model/nmrdisplayer/Correlations.java new file mode 100644 index 0000000..9f12d9e --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Correlations.java @@ -0,0 +1,40 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; + +import java.util.HashMap; + +@NoArgsConstructor +@Getter +@Setter +@ToString +public class Correlations extends Default { + private HashMap> state; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Data.java b/src/casekit/nmr/model/nmrdisplayer/Data.java new file mode 100644 index 0000000..4e3b9ad --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Data.java @@ -0,0 +1,45 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; + +import java.util.ArrayList; + +@NoArgsConstructor +@Getter +@Setter +@ToString + +@JsonIgnoreProperties(ignoreUnknown = true) +public class Data { + + private ArrayList spectra; + private Correlations correlations; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Default.java b/src/casekit/nmr/model/nmrdisplayer/Default.java new file mode 100644 index 0000000..297ce5f --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Default.java @@ -0,0 +1,42 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import lombok.*; + +import java.util.ArrayList; +import java.util.HashMap; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString + +public class Default { + + private HashMap options; + private ArrayList values; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Link.java b/src/casekit/nmr/model/nmrdisplayer/Link.java new file mode 100644 index 0000000..1fbecd5 --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Link.java @@ -0,0 +1,48 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; + +import java.util.ArrayList; + +@NoArgsConstructor +@Getter +@Setter +@ToString +public class Link { + private String experimentType; + private String experimentID; + private String[] atomType; + private Signal2D signal; + private String axis; + private ArrayList match; + private String id; + private String experimentLabel; + private boolean pseudo; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Range.java b/src/casekit/nmr/model/nmrdisplayer/Range.java new file mode 100644 index 0000000..cbaff13 --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Range.java @@ -0,0 +1,44 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.*; + +import java.util.ArrayList; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString + +@JsonIgnoreProperties(ignoreUnknown = true) +public class Range { + + private String id; + private String kind; + private ArrayList signal; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Signal1D.java b/src/casekit/nmr/model/nmrdisplayer/Signal1D.java new file mode 100644 index 0000000..166d1d0 --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Signal1D.java @@ -0,0 +1,44 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.*; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString + +@JsonIgnoreProperties(ignoreUnknown = true) +public class Signal1D { + + private String id; + private String kind; + private String multiplicity; + private double delta; + private int sign; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Signal2D.java b/src/casekit/nmr/model/nmrdisplayer/Signal2D.java new file mode 100644 index 0000000..1366bf4 --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Signal2D.java @@ -0,0 +1,47 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.*; + +import java.util.HashMap; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString + +@JsonIgnoreProperties(ignoreUnknown = true) +public class Signal2D { + + private String id; + private String kind; + private String multiplicity; + private HashMap x; + private HashMap y; + private Integer sign; +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java new file mode 100644 index 0000000..d6bb1df --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java @@ -0,0 +1,87 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import casekit.nmr.model.Signal; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; + +import java.util.ArrayList; +import java.util.HashMap; + +@NoArgsConstructor +@Getter +@Setter +@ToString + +@JsonIgnoreProperties(ignoreUnknown = true) +public class Spectrum { + + private String id; + private Default ranges; + private Default zones; + private HashMap info; + + public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { + final int dimension = (int) info.get("dimension"); + final boolean isFid = (boolean) info.get("isFid"); + + if (!isFid) { + if (dimension == 1) { + final String nucleus = (String) info.get("nucleus"); + final casekit.nmr.model.Spectrum spectrum = new casekit.nmr.model.Spectrum(new String[]{nucleus}); + ranges.getValues().forEach(range -> range.getSignal().forEach(signal1D -> { + if (considerSignalKind && signal1D.getKind().equals("signal")) { + spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{signal1D.getDelta()}, signal1D.getMultiplicity(), signal1D.getKind(), null, 0, 0)); + } + })); + spectrum.setSolvent((String) info.get("solvent")); + spectrum.setSpecType((String) info.get("experiment")); + + return spectrum; + + } else if (dimension == 2) { + final String[] nuclei = ((ArrayList) info.get("nucleus")).toArray(new String[]{}); + final casekit.nmr.model.Spectrum spectrum = new casekit.nmr.model.Spectrum(nuclei); + + zones.getValues().forEach(zone -> zone.getSignal().forEach(signal2D -> { + if (considerSignalKind && signal2D.getKind().equals("signal")) { + spectrum.addSignal(new Signal(nuclei, new Double[]{(Double) signal2D.getX().get("delta"), (Double) signal2D.getY().get("delta")}, signal2D.getMultiplicity(), signal2D.getKind(), null, 0, 0)); + } + })); + spectrum.setSolvent((String) info.get("solvent")); + spectrum.setSpecType((String) info.get("experiment")); + + return spectrum; + } + } + + return null; + } + +} diff --git a/src/casekit/nmr/model/nmrdisplayer/Zone.java b/src/casekit/nmr/model/nmrdisplayer/Zone.java new file mode 100644 index 0000000..17ad0e4 --- /dev/null +++ b/src/casekit/nmr/model/nmrdisplayer/Zone.java @@ -0,0 +1,44 @@ +/* + * MIT License + * + * Copyright (c) 2020 Michael Wenk (https://github.com/michaelwenk) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package casekit.nmr.model.nmrdisplayer; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.*; + +import java.util.ArrayList; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString + +@JsonIgnoreProperties(ignoreUnknown = true) +public class Zone { + + private String id; + private String kind; + private ArrayList signal; +} diff --git a/src/casekit/nmr/parse/Parser.java b/src/casekit/nmr/parse/Parser.java deleted file mode 100644 index ee44d86..0000000 --- a/src/casekit/nmr/parse/Parser.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package casekit.nmr.parse; - -import casekit.io.FileParser; -import casekit.nmr.Utils; -import casekit.nmr.model.Signal; -import casekit.nmr.model.Spectrum; -import org.w3c.dom.Document; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; - -import javax.xml.parsers.ParserConfigurationException; -import java.io.BufferedReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -@Deprecated -public class Parser { - - - /** - * Creates a Spectrum class object from given 1D casekit.nmr input file in CSV or XML format. - * The extension of given file is used to determine the format. - * - * @param pathToFile path to peak table (Bruker's TopSpin csv or xml - * file format) - * @param nucleus nucleus to use for spectrum creation, e.g. "13C" - * - * @return - * - * @throws Exception - */ - public static Spectrum parse1DNMR(final String pathToFile, final String nucleus) throws Exception { - switch (Utils.getFileFormat(pathToFile)) { - case "csv": - return CSVtoSpectrum(pathToFile, new int[]{4}, new String[]{nucleus}, 6); - case "xml": - return XMLtoSpectrum(pathToFile, 1, new int[]{1}, new String[]{nucleus}); - default: - return null; - } - } - - /** - * Creates a Spectrum class object from given 2D casekit.nmr input file in CSV or XML format. - * The extension of given file is used to determine the format. - * - * @param pathToFile path to peak table (Bruker's TopSpin csv or xml - * file format) - * @param nuclei nuclei to use for spectrum creation, e.g. ["13C", "13C] - * - * @return - * - * @throws Exception - */ - public static Spectrum parse2DNMR(final String pathToFile, final String[] nuclei) throws Exception { - switch (Utils.getFileFormat(pathToFile)) { - case "csv": - return CSVtoSpectrum(pathToFile, new int[]{5, 6}, nuclei, 9); - case "xml": - return XMLtoSpectrum(pathToFile, 2, new int[]{2, 1}, nuclei); - default: - return null; - } - } - - /** - * Reads a specific column of a casekit.nmr peak table and stores it into an - * ArrayList object. - * - * @param pathToCSV path to casekit.nmr peak table in CSV file format - * @param column column index to select in peak table - * - * @return ArrayList of Double shift values - * - * @throws IOException - */ - private static ArrayList CSVtoPeakList(final String pathToCSV, final int column) throws IOException { - final ArrayList shifts = new ArrayList<>(); - String line; - String[] tokens; - final BufferedReader fileReader = FileParser.parseText(pathToCSV); - while ((line = fileReader.readLine()) != null) { - tokens = line.split(","); - // get shift value - if (tokens[column].trim().matches("^[+|-]{0,1}\\d+\\.{0,1}\\d*")) { - shifts.add(Double.parseDouble(tokens[column].trim())); - } - } - fileReader.close(); - - return shifts; - } - - /** - * Reads specific columns of one casekit.nmr peak table to obtain a Spectrum class - * object and set intensitiy values. - * The number of columns and atom types has to be the same and defines the - * dimension of the returning spectrum. - * - * @param pathToCSV path to casekit.nmr peak table in CSV file format - * @param columns column indices to select in peak table - * @param nuclei nuclei for each dimension - * @param intensityColumnIndex column index for intensity values - * - * @return Spectrum class object containing the peak lists - * - * @throws Exception - */ - private static Spectrum CSVtoSpectrum(final String pathToCSV, final int[] columns, final String[] nuclei, final int intensityColumnIndex) throws Exception { - // assumes the same number of selected columns (dimensions) and atom types - if (columns.length != nuclei.length) { - return null; - } - final Spectrum spectrum = new Spectrum(nuclei); - List shiftList; - for (int col = 0; col < columns.length; col++) { - shiftList = CSVtoPeakList(pathToCSV, columns[col]); - if (col == 0) { - for (int i = 0; i < shiftList.size(); i++) { - spectrum.addSignal(new Signal(spectrum.getNuclei(), new Double[]{shiftList.get(i)}, null, null, null, 0)); - } - } - } - // spectrum.setIntensities(CSVtoPeakList(pathToCSV, intensityColumnIndex)); - - return spectrum; - } - - /** - * Reads a casekit.nmr peak XML file and returns one attribute of nodes (column) into an - * ArrayList object. - * The XML file must be in Bruker's TopSpin format. - * - * @param pathToXML Path to XML file - * @param dim number of dimensions of given data 1 (1D) or 2 (2D) - * @param attribute which attribute index in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data, - * intensity if 1D data) or 3 (intensity if 2D data) - * - * @return ArrayList of Double shift values - * - * @throws IOException - * @throws javax.xml.parsers.ParserConfigurationException - * @throws org.xml.sax.SAXException - */ - private static ArrayList XMLtoPeakList(final String pathToXML, final int dim, final int attribute) throws IOException, ParserConfigurationException, SAXException { - // assumes a attribute value between 1 and 3 - if (attribute < 1 || attribute > 3) { - return null; - } - - final ArrayList shifts = new ArrayList<>(); - final Document doc = FileParser.parseXML(pathToXML); - - final NodeList peakLists = doc.getElementsByTagName("Peak" + dim + "D"); - for (int i = 0; i < peakLists.getLength(); i++) { - shifts.add(Double.parseDouble(peakLists.item(i).getAttributes().item(attribute - 1).getNodeValue())); - } - - return shifts; - } - - /** - * Reads specific columns of casekit.nmr XML files to obtain a Spectrum class - * object. - * The XML file must be in Bruker's TopSpin format. - * - * @param pathToXML path to casekit.nmr XML file in Bruker's TopSpin XML file format - * @param ndim number of dimensions: 1 (1D) or 2 (2D) - * @param attributes which attribute indices in XML peak nodes should be used: - * 1 (shift of 1st dimension), 2 (shift of 2nd dimension if 2D data) - * @param nuclei nuclei for each dimension - * - * @return Spectrum class object containing the selected peak lists - * - * @throws Exception - */ - private static Spectrum XMLtoSpectrum(final String pathToXML, final int ndim, final int[] attributes, final String[] nuclei) throws Exception { - - // assumes the same number of dims, attributes and atom types and a maximum number of dims of 2 - if ((ndim != attributes.length) || (ndim != nuclei.length) || (attributes.length != nuclei.length) || (ndim < 1 || ndim > 2)) { - return null; - } - final Spectrum spectrum = new Spectrum(nuclei); - ArrayList shiftList; - for (int dim = 0; dim < ndim; dim++) { - shiftList = XMLtoPeakList(pathToXML, ndim, attributes[dim]); - if (dim == 0) { - for (int i = 0; i < (shiftList != null ? shiftList.size() : 0); i++) { - spectrum.addSignal(new Signal(spectrum.getNuclei(), new Double[]{shiftList.get(i)}, null, null, null, 0)); - } - } - } - // spectrum.setIntensities(XMLtoPeakList(pathToXML, ndim, ndim + 1)); - - return spectrum; - } -} diff --git a/src/casekit/nmr/predict/Predict.java b/src/casekit/nmr/predict/Predict.java index 42a1347..28f17b9 100644 --- a/src/casekit/nmr/predict/Predict.java +++ b/src/casekit/nmr/predict/Predict.java @@ -34,6 +34,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.Map; /** * @author Michael Wenk [https://github.com/michaelwenk] @@ -44,7 +45,7 @@ public class Predict { * Predicts a shift value for a central atom based on its HOSE code and a * given HOSE code lookup table. The prediction is done by using the median * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromHydrogenCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromProtonsCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values @@ -54,7 +55,7 @@ public class Predict { * * @see casekit.nmr.Utils#getMedian(ArrayList) */ - public static Double predictShift(final HashMap> HOSECodeLookupTable, final String HOSECode) { + public static Double predictShift(final Map> HOSECodeLookupTable, final String HOSECode) { if (HOSECodeLookupTable.containsKey(HOSECode)) { return Utils.getMedian(HOSECodeLookupTable.get(HOSECode)); // return Utils.getMean(HOSECodeLookupTable.get(HOSECode)); @@ -67,7 +68,7 @@ public static Double predictShift(final HashMap> HOSEC * Predicts a signal for a central atom based on its HOSE code and a * given HOSE code lookup table. The prediction is done by using the mean * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromHydrogenCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromProtonsCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values @@ -81,7 +82,7 @@ public static Double predictShift(final HashMap> HOSEC * @throws CDKException * @see #predictShift(HashMap, String) */ - public static Signal predictSignal(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, final String nucleus) throws Exception { + public static Signal predictSignal(final Map> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, final String nucleus) throws Exception { if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } @@ -90,13 +91,13 @@ public static Signal predictSignal(final HashMap> HOSE if (predictedShift == null) { return null; } - return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, Utils.getMultiplicityFromHydrogenCount(ac.getAtom(atomIndex).getImplicitHydrogenCount()), "signal", null, 1); + return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, Utils.getMultiplicityFromProtonsCount(ac.getAtom(atomIndex).getImplicitHydrogenCount()), "signal", null, 0, 0); } /** * Predicts a spectrum for a given structure based on HOSE code of atoms with specified nucleus and a * given HOSE code lookup table.
- * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromHydrogenCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromProtonsCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values @@ -113,7 +114,7 @@ public static Spectrum predictSpectrum(final HashMap> final Spectrum predictedSpectrum = new Spectrum(new String[]{nucleus}); Signal signal; for (final IAtom atom : ac.atoms()) { - if (atom.getSymbol().equals(Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { + if (atom.getSymbol().equals(casekit.nmr.utils.Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus); if (signal == null) { continue; diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java new file mode 100644 index 0000000..100df32 --- /dev/null +++ b/src/casekit/nmr/utils/Utils.java @@ -0,0 +1,70 @@ +package casekit.nmr.utils; + +import casekit.nmr.model.Spectrum; +import casekit.nmr.model.nmrdisplayer.Correlation; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class Utils { + + public static String getMultiplicityFromProtonsCount(final Correlation correlation) { + if (correlation.getAtomType().equals("C") && correlation.getProtonsCount().size() == 1) { + switch (correlation.getProtonsCount().get(0)) { + case 0: + return "s"; + case 1: + return "d"; + case 2: + return "t"; + case 3: + return "q"; + default: + return null; + } + } + return null; + } + + public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int dim) { + if (spectrum.containsDim(dim)) { + return getAtomTypeFromNucleus(spectrum.getNuclei()[dim]); + } + + return null; + } + + public static String getAtomTypeFromNucleus(final String nucleus) { + final String[] nucleusSplit = nucleus.split("\\d"); + return nucleusSplit[nucleusSplit.length - 1]; + } + + public static Map getMolecularFormulaElementCounts(final String mf) { + final LinkedHashMap counts = new LinkedHashMap<>(); + final IMolecularFormula iMolecularFormula = Utils.getMolecularFormulaFromString(mf); + final List elements = new ArrayList<>(); + final Matcher matcher = Pattern.compile("([A-Z][a-z]*)").matcher(mf); + + while (matcher.find()) { + elements.add(matcher.group(1)); + } + for (final String element : elements) { + counts.put(element, MolecularFormulaManipulator.getElementCount(iMolecularFormula, element)); + } + + return counts; + } + + public static IMolecularFormula getMolecularFormulaFromString(final String mf) { + return MolecularFormulaManipulator.getMolecularFormula(mf, SilentChemObjectBuilder.getInstance()); + } + + +} From fadb56636e5d63bbee637edd7cb5a72e56d81845 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 21 Jan 2021 11:50:19 +0100 Subject: [PATCH 151/405] rearrange some variables; filter same HMBC and COSY content which occurs multiple times --- .../nmr/lsd/PyLSDInputFileBuilder.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 5650efa..b33cc3d 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -75,14 +75,14 @@ private static Map buildIndicesMap(final Data data, final Map } private static String buildMULT(final Correlation correlation, final int index, final Map indicesMap, final Map> detectedHybridizations) { - final StringBuilder stringBuilder = new StringBuilder(); - List hybridizations; - StringBuilder hybridizationStringBuilder; - StringBuilder attachedProtonsCountStringBuilder; if (correlation.getAtomType().equals("H")) { return null; } - hybridizations = new ArrayList<>(); + final StringBuilder stringBuilder = new StringBuilder(); + List hybridizations = new ArrayList<>(); + StringBuilder hybridizationStringBuilder; + StringBuilder attachedProtonsCountStringBuilder; + if (correlation.getHybridization() != null && !correlation.getHybridization().isEmpty()) { // if hybridization is already given if (correlation.getHybridization().equals("SP")) { @@ -140,10 +140,10 @@ private static String buildMULT(final Correlation correlation, final int index, } private static String buildHSQC(final Correlation correlation, final int index, final Map indicesMap) { - final StringBuilder stringBuilder = new StringBuilder(); if (correlation.getAtomType().equals("H")) { return null; } + final StringBuilder stringBuilder = new StringBuilder(); for (final Link link : correlation.getLink()) { if (link.getExperimentType().equals("hsqc")) { for (final int matchIndex : link.getMatch()) { @@ -159,11 +159,11 @@ private static String buildHSQC(final Correlation correlation, final int index, } private static String buildHMBC(final Correlation correlation, final int index, final Data data, final Map indicesMap) { - final StringBuilder stringBuilder = new StringBuilder(); - final String defaultBondDistance = "2 4"; if (correlation.getAtomType().equals("H")) { return null; } + final String defaultBondDistance = "2 4"; + final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType().equals("hmbc")) { for (final int matchIndex : link.getMatch()) { @@ -171,7 +171,7 @@ private static String buildHMBC(final Correlation correlation, final int index, for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { // only add an HMBC correlation if there is no direct link via HSQC and the equivalence index is not equal if (!(data.getCorrelations().getValues().get(matchIndex).getAttachment().containsKey(correlation.getAtomType()) && data.getCorrelations().getValues().get(matchIndex).getAttachment().get(correlation.getAtomType()).contains(index) && l == k)) { - stringBuilder.append("HMBC ").append(indicesMap.get(index)[k]).append(" ").append(indicesMap.get(matchIndex)[l]).append(" ").append(defaultBondDistance).append("\n"); + uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l]); } } } @@ -179,14 +179,14 @@ private static String buildHMBC(final Correlation correlation, final int index, } } - return stringBuilder.toString(); + return uniqueSet.stream().map(str -> "HMBC " + str + " " + defaultBondDistance + "\n").reduce("", (strAll, str) -> strAll + str); } private static String buildCOSY(final Correlation correlation, final int index, final Data data, final Map indicesMap) { - final StringBuilder stringBuilder = new StringBuilder(); if (!correlation.getAtomType().equals("H")) { return null; } + final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType().equals("cosy")) { for (final int matchIndex : link.getMatch()) { @@ -194,7 +194,7 @@ private static String buildCOSY(final Correlation correlation, final int index, if (!data.getCorrelations().getValues().get(matchIndex).getId().equals(correlation.getId())) { for (int k = 1; k < indicesMap.get(index).length; k++) { for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { - stringBuilder.append("COSY ").append(indicesMap.get(index)[k]).append(" ").append(indicesMap.get(matchIndex)[l]).append("\n"); + uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l]); } } } @@ -202,14 +202,14 @@ private static String buildCOSY(final Correlation correlation, final int index, } } - return stringBuilder.toString(); + return uniqueSet.stream().map(str -> "COSY " + str + "\n").reduce("", (strAll, str) -> strAll + str); } private static String buildSHIX(final Correlation correlation, final int index, final Map indicesMap) { - final StringBuilder stringBuilder = new StringBuilder(); if (correlation.getAtomType().equals("H") || correlation.isPseudo()) { return null; } + final StringBuilder stringBuilder = new StringBuilder(); for (int k = 1; k < indicesMap.get(index).length; k++) { stringBuilder.append("SHIX ").append(indicesMap.get(index)[k]).append(" ").append(correlation.getSignal().getDelta()).append("\n"); } @@ -218,10 +218,10 @@ private static String buildSHIX(final Correlation correlation, final int index, } private static String buildSHIH(final Correlation correlation, final int index, final Map indicesMap) { - final StringBuilder stringBuilder = new StringBuilder(); if (!correlation.getAtomType().equals("H") || correlation.isPseudo()) { return null; } + final StringBuilder stringBuilder = new StringBuilder(); // only consider protons which are attached via HSQC/HMQC (pseudo and real links) for (final Link link : correlation.getLink()) { if ((link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) && !link.getMatch().isEmpty()) { // && !link.isPseudo() From db09eda016191605276b6a1a19c1038965b7cb15 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 21 Jan 2021 13:20:19 +0100 Subject: [PATCH 152/405] extended list of files to ignore --- .gitignore | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..549e00a --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +HELP.md +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ From 09a9ec77e96c81a218047c4cdf3bb4bb14e43f0a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 3 Feb 2021 21:17:05 +0100 Subject: [PATCH 153/405] renaming --- src/casekit/io/{FileOperations.java => FileSystem.java} | 2 +- src/casekit/nmr/lsd/Constants.java | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) rename src/casekit/io/{FileOperations.java => FileSystem.java} (98%) diff --git a/src/casekit/io/FileOperations.java b/src/casekit/io/FileSystem.java similarity index 98% rename from src/casekit/io/FileOperations.java rename to src/casekit/io/FileSystem.java index d41eaf7..f68fcbf 100644 --- a/src/casekit/io/FileOperations.java +++ b/src/casekit/io/FileSystem.java @@ -14,7 +14,7 @@ import java.io.*; -public class FileOperations { +public class FileSystem { public static BufferedReader readFile(final String pathToFile) { try { diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index e81c698..de34ce5 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -11,7 +11,7 @@ public class Constants { public static final Map defaultHybridizationMap = createDefaultHybridizationMap(); public static final Map defaultProtonsCountPerValencyMap = createDefaultProtonsCountPerValencyMap(); public static final Map defaultAtomLabelMap = createDefaultAtomLabelMap(); - public static final Map> hybridizationConversionMap = createHybridizationConversionMapMap(); + public static final Map> hybridizationConversionMap = createHybridizationConversionMap(); private static Map createNucleiMap() { final Map nuclei = new HashMap<>(); @@ -28,6 +28,7 @@ private static Map createDefaultHybridizationMap() { defaultHybridization.put("N", "(1 2 3)"); defaultHybridization.put("O", "(2 3)"); defaultHybridization.put("S", "(1 2 3)"); + defaultHybridization.put("I", "3"); return Collections.unmodifiableMap(defaultHybridization); } @@ -43,6 +44,7 @@ private static Map createDefaultProtonsCountPerValencyMap() { defaultProtonsCountPerValency.put("S6", "(0 1 2 3)"); defaultProtonsCountPerValency.put("S246", "(0 1 2 3)"); defaultProtonsCountPerValency.put("O", "(0 1)"); + defaultProtonsCountPerValency.put("I", "(0 1)"); return defaultProtonsCountPerValency; } @@ -53,11 +55,12 @@ private static Map createDefaultAtomLabelMap() { defaultAtomLabel.put("N", "N35"); defaultAtomLabel.put("O", "O"); defaultAtomLabel.put("S", "S246"); + defaultAtomLabel.put("I", "I"); return Collections.unmodifiableMap(defaultAtomLabel); } - private static Map> createHybridizationConversionMapMap() { + private static Map> createHybridizationConversionMap() { // @TODO access this information from MongoDB and store it instead of hard coding it // possible command in MongoDB: db.hybridizations.aggregate([{$match: {nucleus: "15N"}}, {$group: {_id: null, set: {$addToSet: "$hybridization"}}}]) // nucleus -> hybridization string -> number From 3bf000a2d0fb240946cc9a0c956eba8ac2e6e0c7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 3 Feb 2021 21:20:22 +0100 Subject: [PATCH 154/405] usage of UUID header in header; added "hmqc" to consider next to "hsqc"; added equivalence information in MULT part; COSY only takes correlations which are ambiguous --- .../nmr/lsd/PyLSDInputFileBuilder.java | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index b33cc3d..1d7f805 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -1,6 +1,6 @@ package casekit.nmr.lsd; -import casekit.io.FileOperations; +import casekit.io.FileSystem; import casekit.nmr.model.nmrdisplayer.Correlation; import casekit.nmr.model.nmrdisplayer.Data; import casekit.nmr.model.nmrdisplayer.Link; @@ -13,9 +13,10 @@ public class PyLSDInputFileBuilder { - private static String buildHeader() { + private static String buildHeader(final String uuid) { final StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("; PyLSD input file created by webCASE\n"); + stringBuilder.append("; ").append(uuid).append("\n"); final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z"); final Date date = new Date(System.currentTimeMillis()); stringBuilder.append("; ").append(formatter.format(date)); @@ -51,7 +52,7 @@ private static Map buildIndicesMap(final Data data, final Map if (correlation.getAtomType().equals("H")) { protonsToInsert = 1; for (final Link link : correlation.getLink()) { - if (link.getExperimentType().equals("hsqc")) { + if (link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) { protonsToInsert += data.getCorrelations().getValues().get(link.getMatch().get(0)).getEquivalence(); } } @@ -133,7 +134,11 @@ private static String buildMULT(final Correlation correlation, final int index, attachedProtonsCountStringBuilder.append(Constants.defaultProtonsCountPerValencyMap.get(Constants.defaultAtomLabelMap.get(correlation.getAtomType()))); } for (int j = 1; j < indicesMap.get(index).length; j++) { - stringBuilder.append("MULT ").append(indicesMap.get(index)[j]).append(" ").append(correlation.getAtomType()).append(" ").append(hybridizationStringBuilder.toString()).append(" ").append(attachedProtonsCountStringBuilder.toString()).append("\n"); + stringBuilder.append("MULT ").append(indicesMap.get(index)[j]).append(" ").append(correlation.getAtomType()).append(" ").append(hybridizationStringBuilder.toString()).append(" ").append(attachedProtonsCountStringBuilder.toString()); + if (j >= 2) { + stringBuilder.append("; equivalent to ").append(indicesMap.get(index)[1]); + } + stringBuilder.append("\n"); } return stringBuilder.toString(); @@ -145,7 +150,7 @@ private static String buildHSQC(final Correlation correlation, final int index, } final StringBuilder stringBuilder = new StringBuilder(); for (final Link link : correlation.getLink()) { - if (link.getExperimentType().equals("hsqc")) { + if (link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) { for (final int matchIndex : link.getMatch()) { // for each equivalence of heavy atom and attached equivalent proton for (int k = 1; k < indicesMap.get(index).length; k++) { @@ -186,6 +191,7 @@ private static String buildCOSY(final Correlation correlation, final int index, if (!correlation.getAtomType().equals("H")) { return null; } + final String defaultBondDistance = "3 4"; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType().equals("cosy")) { @@ -193,8 +199,13 @@ private static String buildCOSY(final Correlation correlation, final int index, // only add an COSY correlation if the two signals there is not equivalent if (!data.getCorrelations().getValues().get(matchIndex).getId().equals(correlation.getId())) { for (int k = 1; k < indicesMap.get(index).length; k++) { - for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { - uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l]); + // for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { + // uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l]); + // } + + // only allow COSY values between possible equivalent protons and only one another non-equivalent proton + if (indicesMap.get(matchIndex).length == 2) { + uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[1]); } } } @@ -202,7 +213,7 @@ private static String buildCOSY(final Correlation correlation, final int index, } } - return uniqueSet.stream().map(str -> "COSY " + str + "\n").reduce("", (strAll, str) -> strAll + str); + return uniqueSet.stream().map(str -> "COSY " + str + " " + defaultBondDistance + "\n").reduce("", (strAll, str) -> strAll + str); } private static String buildSHIX(final Correlation correlation, final int index, final Map indicesMap) { @@ -253,7 +264,7 @@ private static String buildFilters(final String pathToLSDFilterList) { final Map filters = new LinkedHashMap<>(); int counter = 1; try { - final BufferedReader bufferedReader = FileOperations.readFile(pathToLSDFilterList); + final BufferedReader bufferedReader = FileSystem.readFile(pathToLSDFilterList); if (bufferedReader != null) { String line; while ((line = bufferedReader.readLine()) != null) { @@ -284,14 +295,14 @@ private static String buildFilters(final String pathToLSDFilterList) { return stringBuilder.toString(); } - public static String buildPyLSDFileContent(final Data data, final String mf, final Map> detectedHybridizations, final boolean allowHeteroHeteroBonds, final String pathToLSDFilterList) { + public static String buildPyLSDFileContent(final Data data, final String mf, final Map> detectedHybridizations, final boolean allowHeteroHeteroBonds, final String pathToLSDFilterList, final String uuid) { final HashMap> state = data.getCorrelations().getState(); boolean hasErrors = state.keySet().stream().anyMatch(s -> state.get(s).containsKey("error")); if (mf != null && !hasErrors) { final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); final StringBuilder stringBuilder = new StringBuilder(); // create header - stringBuilder.append(buildHeader()).append("\n\n"); + stringBuilder.append(buildHeader(uuid)).append("\n\n"); // FORM stringBuilder.append(buildFORM(mf, elementCounts)).append("\n\n"); // PIEC From 441e2b76488f19672ff5bcc1e0a81ad389ebecf7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 24 Feb 2021 21:50:09 +0100 Subject: [PATCH 155/405] set equivalence count default value to 1 (instead of 0) --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 1d7f805..ed22821 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -50,7 +50,7 @@ private static Map buildIndicesMap(final Data data, final Map correlation = data.getCorrelations().getValues().get(i); // set entry for each correlation with consideration of equivalences if (correlation.getAtomType().equals("H")) { - protonsToInsert = 1; + protonsToInsert = 0; for (final Link link : correlation.getLink()) { if (link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) { protonsToInsert += data.getCorrelations().getValues().get(link.getMatch().get(0)).getEquivalence(); @@ -63,9 +63,9 @@ private static Map buildIndicesMap(final Data data, final Map protonIndexInPyLSDFile++; } } else { - indicesMap.put(i, new Object[1 + correlation.getEquivalence() + 1]); + indicesMap.put(i, new Object[1 + correlation.getEquivalence()]); indicesMap.get(i)[0] = correlation.getAtomType(); - for (int j = 1; j <= correlation.getEquivalence() + 1; j++) { + for (int j = 1; j <= correlation.getEquivalence(); j++) { indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; heavyAtomIndexInPyLSDFile++; } From 8e5cda64fee7a73e041429bd6972d3be20dbe7d2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 24 Feb 2021 21:51:25 +0100 Subject: [PATCH 156/405] disabled MongoDB dependency and methods --- pom.xml | 10 +-- src/casekit/nmr/dbservice/MongoDB.java | 95 ++++++++++++-------------- 2 files changed, 48 insertions(+), 57 deletions(-) diff --git a/pom.xml b/pom.xml index 5b8a92b..a9cadc3 100644 --- a/pom.xml +++ b/pom.xml @@ -60,11 +60,11 @@ HOSECodeBuilder 1.0 - - org.mongodb - mongo-java-driver - 3.10.0 - + + + + + org.projectlombok lombok diff --git a/src/casekit/nmr/dbservice/MongoDB.java b/src/casekit/nmr/dbservice/MongoDB.java index 3ced8eb..373d904 100644 --- a/src/casekit/nmr/dbservice/MongoDB.java +++ b/src/casekit/nmr/dbservice/MongoDB.java @@ -12,62 +12,53 @@ package casekit.nmr.dbservice; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientOptions; -import com.mongodb.MongoCredential; -import com.mongodb.ServerAddress; -import com.mongodb.client.MongoCollection; -import com.mongodb.client.MongoDatabase; -import org.bson.Document; - /** - * * @author Michael Wenk [https://github.com/michaelwenk] */ public class MongoDB { - public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) { - MongoClient mongo; - try { - // Creating a Mongo client - mongo = new MongoClient( - new ServerAddress("127.0.0.1", 27017), - MongoCredential.createCredential( - mongoUser, - mongoAuthDB, - mongoPassword.toCharArray()), - MongoClientOptions.builder().build()); - System.out.println("Login to MongoDB was successfull"); - // Accessing the database - } catch (Exception e) { - e.printStackTrace(); - System.err.println(Thread.currentThread().getStackTrace()[1].getMethodName() + ": could not connect to MongoDB!"); - - return null; - } - - return mongo; - } - - public static MongoDatabase getDatabase(final MongoClient mongo, final String mongoDBName){ - return mongo.getDatabase(mongoDBName); - } - - public static MongoCollection getCollection(final MongoClient mongo, final String mongoDBName, final String mongoDBCollection) { - final MongoDatabase database = MongoDB.getDatabase(mongo, mongoDBName); -// if (database == null) { -// return null; -// } - System.out.println("Access to database \"" + mongoDBName + "\" was successfull"); - // Retrieving a collection - final MongoCollection collection = database.getCollection(mongoDBCollection); - System.out.println("Retrieval of collection \"" + mongoDBCollection + "\" was successfull -> size: " + collection.countDocuments()); - - return collection; - } - - public static void logout(final MongoClient mongo) { - mongo.close(); - } + // public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) { + // MongoClient mongo; + // try { + // // Creating a Mongo client + // mongo = new MongoClient( + // new ServerAddress("127.0.0.1", 27017), + // MongoCredential.createCredential( + // mongoUser, + // mongoAuthDB, + // mongoPassword.toCharArray()), + // MongoClientOptions.builder().build()); + // System.out.println("Login to MongoDB was successfull"); + // // Accessing the database + // } catch (Exception e) { + // e.printStackTrace(); + // System.err.println(Thread.currentThread().getStackTrace()[1].getMethodName() + ": could not connect to MongoDB!"); + // + // return null; + // } + // + // return mongo; + // } + // + // public static MongoDatabase getDatabase(final MongoClient mongo, final String mongoDBName){ + // return mongo.getDatabase(mongoDBName); + // } + // + // public static MongoCollection getCollection(final MongoClient mongo, final String mongoDBName, final String mongoDBCollection) { + // final MongoDatabase database = MongoDB.getDatabase(mongo, mongoDBName); + //// if (database == null) { + //// return null; + //// } + // System.out.println("Access to database \"" + mongoDBName + "\" was successfull"); + // // Retrieving a collection + // final MongoCollection collection = database.getCollection(mongoDBCollection); + // System.out.println("Retrieval of collection \"" + mongoDBCollection + "\" was successfull -> size: " + collection.countDocuments()); + // + // return collection; + // } + // + // public static void logout(final MongoClient mongo) { + // mongo.close(); + // } } From b5d978668d9e60995ca76ddac44c50c9b5ed4f08 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 1 Mar 2021 00:39:27 +0100 Subject: [PATCH 157/405] added SMILES as meta info in getDataSetsFromNMRShiftDB(); added SMILES generator method in Utils --- .DS_Store | Bin 0 -> 6148 bytes src/.DS_Store | Bin 0 -> 6148 bytes src/casekit/nmr/Utils.java | 253 ++++++++++++++++------ src/casekit/nmr/dbservice/NMRShiftDB.java | 151 +++++++++---- src/casekit/nmr/utils/Utils.java | 26 ++- 5 files changed, 317 insertions(+), 113 deletions(-) create mode 100644 .DS_Store create mode 100644 src/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..fc336baa6b79e156cb621ae94f623e78cff45373 GIT binary patch literal 6148 zcmeHKO>Wab6#ga&O@IYaS8R}cf=1;8CYVK0#U?6Z+5AzW#5i&iMeN)QpdO`CZ@?-N z2Vjf3V9OQw-pnZVIH4m4=S5)tI!+P#^tT6UCK|ha`k-?LH&o3)URWK9K)b? z?Q`eOQ>lCX^Tyt@&#Tm|c!PTNt5UuBa&Wp@EOVM(qK8kM#gZJP=-@3f%&?+9rnQ3v zefs*;&UiiMY{i;lP5mRL)Yr_&t+t@{p3!T@=k#}Q!WbEO#@__5X&KTd<4omg^V;4& zamxkg_QubxS&0S?>|AobE93B8sWrd8;~0TI`;4UKp9X5whYMn5U~ly9t($d>0oD%0K^8(R;b&(ofwIM7<()n(n2vVmFQB9 zKVle{PJ86zVvmJGmkwhqa~xmU_!Ek;)oG7xI85wNdu2cw2pOom%eL(QuYQ0250mst z8BhlPD+Wv>=_Os>lHXf*Zcg^vi0zzBMB)mEHiezvj;$hF@l!S{^hKf&V~>SHS}6KQ Nz|x?dGVoU!_yzkQpe+CZ literal 0 HcmV?d00001 diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0> getAtomTypeIndices(final IAtomContainer public static List getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType) { final ArrayList indices = new ArrayList<>(); - for (int i = 0; i < ac.getAtomCount(); i++) { - if (ac.getAtom(i).getSymbol().equals(atomType)) { + for (int i = 0; i + < ac.getAtomCount(); i++) { + if (ac.getAtom(i) + .getSymbol() + .equals(atomType)) { indices.add(i); } } @@ -88,21 +91,30 @@ public static IMolecularFormula getMolecularFormulaFromAtomContainer(final IAtom return MolecularFormulaManipulator.getMolecularFormula(ac); } - + public static String molecularFormularToString(final IMolecularFormula molecularFormula) { return MolecularFormulaManipulator.getString(molecularFormula); } - public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, final IMolecularFormula molFormula, final int dim) throws CDKException { + public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, + final IMolecularFormula molFormula, + final int dim) throws CDKException { if (!spectrum.containsDim(dim)) { - throw new CDKException(Thread.currentThread().getStackTrace()[2].getClassName() + "." + Thread.currentThread().getStackTrace()[2].getMethodName() + ": invalid dimension in spectrum given"); + throw new CDKException(Thread.currentThread() + .getStackTrace()[2].getClassName() + + "." + + Thread.currentThread() + .getStackTrace()[2].getMethodName() + + ": invalid dimension in spectrum given"); } final String atomType = casekit.nmr.utils.Utils.getAtomTypeFromSpectrum(spectrum, dim); int atomsInMolFormula = 0; - if (molFormula != null) { + if (molFormula + != null) { atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molFormula, atomType); } - return atomsInMolFormula - spectrum.getSignalCountWithEquivalences(); + return atomsInMolFormula + - spectrum.getSignalCountWithEquivalences(); } /** @@ -198,24 +210,40 @@ public static List removeOutliers(final List input, final double */ public static List getOutliers(final List input, final double multiplierIQR) { final ArrayList outliers = new ArrayList<>(); - if (input.size() <= 1) { + if (input.size() + <= 1) { return outliers; } Collections.sort(input); - final ArrayList data1 = new ArrayList<>(input.subList(0, input.size() / 2)); + final ArrayList data1 = new ArrayList<>(input.subList(0, input.size() + / 2)); final ArrayList data2; - if (input.size() % 2 == 0) { - data2 = new ArrayList<>(input.subList(input.size() / 2, input.size())); + if (input.size() + % 2 + == 0) { + data2 = new ArrayList<>(input.subList(input.size() + / 2, input.size())); } else { - data2 = new ArrayList<>(input.subList(input.size() / 2 + 1, input.size())); + data2 = new ArrayList<>(input.subList(input.size() + / 2 + + 1, input.size())); } final double q1 = getMedian(new ArrayList<>(data1)); final double q3 = getMedian(new ArrayList<>(data2)); - final double iqr = q3 - q1; - final double lowerBound = q1 - multiplierIQR * iqr; - final double upperBound = q3 + multiplierIQR * iqr; - for (int i = 0; i < input.size(); i++) { - if ((input.get(i) < lowerBound) || (input.get(i) > upperBound)) { + final double iqr = q3 + - q1; + final double lowerBound = q1 + - multiplierIQR + * iqr; + final double upperBound = q3 + + multiplierIQR + * iqr; + for (int i = 0; i + < input.size(); i++) { + if ((input.get(i) + < lowerBound) + || (input.get(i) + > upperBound)) { outliers.add(input.get(i)); } } @@ -231,17 +259,28 @@ public static List getOutliers(final List input, final double mu * @return */ public static Double getMedian(final List data) { - if ((data == null) || data.isEmpty()) { + if ((data + == null) + || data.isEmpty()) { return null; } - if (data.size() == 1) { + if (data.size() + == 1) { return data.get(0); } Collections.sort(data); - if (data.size() % 2 == 1) { - return data.get(data.size() / 2); + if (data.size() + % 2 + == 1) { + return data.get(data.size() + / 2); } else { - return (data.get(data.size() / 2 - 1) + data.get(data.size() / 2)) / 2.0; + return (data.get(data.size() + / 2 + - 1) + + data.get(data.size() + / 2)) + / 2.0; } } @@ -252,19 +291,28 @@ public static Double getMedian(final List data) { * @return */ public static Double getMean(final Collection data) { - if ((data == null) || data.isEmpty()) { + if ((data + == null) + || data.isEmpty()) { return null; } double sum = 0; int nullCounter = 0; for (final Double d : data) { - if (d != null) { + if (d + != null) { sum += d; } else { nullCounter++; } } - return ((data.size() - nullCounter) != 0) ? (sum / (data.size() - nullCounter)) : null; + return ((data.size() + - nullCounter) + != 0) + ? (sum + / (data.size() + - nullCounter)) + : null; } /** @@ -273,31 +321,47 @@ public static Double getMean(final Collection data) { * @return */ public static Double getStandardDeviation(final List data) { - if ((data == null) || data.isEmpty()) { + if ((data + == null) + || data.isEmpty()) { return null; } final Double variance = Utils.getVariance(data); - return (variance != null) ? Math.sqrt(variance) : null; + return (variance + != null) + ? Math.sqrt(variance) + : null; } public static Double getVariance(final Collection data) { - if ((data == null) || data.isEmpty()) { + if ((data + == null) + || data.isEmpty()) { return null; } final int nullCounter = Collections.frequency(data, null); double quadrSum = 0.0; final Double mean = Utils.getMean(data); - if (mean == null) { + if (mean + == null) { return null; } for (final Double d : data) { - if (d != null) { - quadrSum += Math.pow(d - mean, 2); + if (d + != null) { + quadrSum += Math.pow(d + - mean, 2); } } - return ((data.size() - nullCounter) != 0) ? (quadrSum / (data.size() - nullCounter)) : null; + return ((data.size() + - nullCounter) + != 0) + ? (quadrSum + / (data.size() + - nullCounter)) + : null; } @@ -307,19 +371,29 @@ public static Double getVariance(final Collection data) { * @return */ public static Double getMean(final Double[] data) { - if ((data == null) || (data.length == 0)) { + if ((data + == null) + || (data.length + == 0)) { return null; } double sum = 0; int nullCounter = 0; for (final Double d : data) { - if (d != null) { + if (d + != null) { sum += d; } else { nullCounter++; } } - return ((data.length - nullCounter) != 0) ? (sum / (data.length - nullCounter)) : null; + return ((data.length + - nullCounter) + != 0) + ? (sum + / (data.length + - nullCounter)) + : null; } public static Map getMean(final Map> lookup) { @@ -328,7 +402,8 @@ public static Map getMean(final Map> l Double meanInList; for (final String key : lookup.keySet()) { meanInList = Utils.getMean(lookup.get(key)); - if (meanInList != null) { + if (meanInList + != null) { means.put(key, meanInList); } } @@ -343,14 +418,17 @@ public static boolean isValidBondAddition(final IAtomContainer ac, final int ato // System.out.print(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd)); final IAtom atom = ac.getAtom(atomIndex); // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group - if (atom.isAromatic() && (!atom.getSymbol().equals("C"))) { + if (atom.isAromatic() + && (!atom.getSymbol() + .equals("C"))) { // System.out.print("[ -1 ]"); bondOrderSum -= 1; } // System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); // @TODO including charges - return bondOrderSum <= atom.getValency(); + return bondOrderSum + <= atom.getValency(); } @@ -361,12 +439,16 @@ public static boolean isValidBondAddition(final IAtomContainer ac, final int ato */ public static String getFileFormat(final String pathToFile) { - if (pathToFile == null || pathToFile.trim().isEmpty()) { + if (pathToFile + == null + || pathToFile.trim() + .isEmpty()) { return ""; } final String[] split = pathToFile.split("\\."); - return split[split.length - 1]; + return split[split.length + - 1]; } @@ -376,23 +458,34 @@ public static String getFileFormat(final String pathToFile) { * @return */ public static Double getRMS(final ArrayList data) { - if ((data == null) || data.isEmpty()) { + if ((data + == null) + || data.isEmpty()) { return null; } - if (data.size() == 1) { + if (data.size() + == 1) { return data.get(0); } int nullCounter = 0; double qSum = 0; for (final Double d : data) { - if (d != null) { - qSum += d * d; + if (d + != null) { + qSum += d + * d; } else { nullCounter++; } } - return ((data.size() - nullCounter) != 0) ? Math.sqrt(qSum / (data.size() - nullCounter)) : null; + return ((data.size() + - nullCounter) + != 0) + ? Math.sqrt(qSum + / (data.size() + - nullCounter)) + : null; } @@ -406,7 +499,8 @@ public static Map getRMS(final Map> lo Double rmsInList; for (final String key : lookup.keySet()) { rmsInList = Utils.getRMS(lookup.get(key)); - if (rmsInList != null) { + if (rmsInList + != null) { rms.put(key, rmsInList); } } @@ -418,7 +512,10 @@ public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } - return Utils.getBondOrderSum(ac, atomIndex, true).intValue() >= ac.getAtom(atomIndex).getValency(); + return Utils.getBondOrderSum(ac, atomIndex, true) + .intValue() + >= ac.getAtom(atomIndex) + .getValency(); } public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException { @@ -443,7 +540,8 @@ public static Map getMedian(final Map> look Double medianInList; for (final String key : lookup.keySet()) { medianInList = Utils.getMedian(lookup.get(key)); - if (medianInList != null) { + if (medianInList + != null) { medians.put(key, medianInList); } } @@ -457,22 +555,27 @@ public static Map getMedian(final Map> look * * @deprecated */ - public static void combineHashMaps(final Map> hoseLookupToExtend, final Map> hoseLookup) { + public static void combineHashMaps(final Map> hoseLookupToExtend, + final Map> hoseLookup) { for (final String hose : hoseLookup.keySet()) { if (!hoseLookupToExtend.containsKey(hose)) { hoseLookupToExtend.put(hose, new ArrayList<>()); } - hoseLookupToExtend.get(hose).addAll(hoseLookup.get(hose)); + hoseLookupToExtend.get(hose) + .addAll(hoseLookup.get(hose)); } } public static Double roundDouble(final Double value, final int decimalPlaces) { - if (value == null) { + if (value + == null) { return null; } final int decimalFactor = (int) (Math.pow(10, decimalPlaces)); - return (Math.round(value * decimalFactor) / (double) decimalFactor); + return (Math.round(value + * decimalFactor) + / (double) decimalFactor); } /** @@ -483,7 +586,8 @@ public static Double roundDouble(final Double value, final int decimalPlaces) { * @return */ public static boolean containsExplicitHydrogens(final IAtomContainer ac) { - return getExplicitHydrogenCount(ac) > 0; + return getExplicitHydrogenCount(ac) + > 0; } /** @@ -508,12 +612,16 @@ public static Map convertExplicitToImplicitHydrogens(final IAtom // check each atom whether it is an hydrogen; // if yes then store (increase) the number of implicit hydrogens // for its bonded heavy atom - if (atomA.getSymbol().equals("H")) { - atomB = ac.getConnectedAtomsList(atomA).get(0); - if (atomB.getImplicitHydrogenCount() == null) { + if (atomA.getSymbol() + .equals("H")) { + atomB = ac.getConnectedAtomsList(atomA) + .get(0); + if (atomB.getImplicitHydrogenCount() + == null) { atomB.setImplicitHydrogenCount(0); } - atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + 1); + atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + + 1); toRemoveList.add(atomA); } else { // store all non-hydrogen atoms and their indices @@ -536,8 +644,11 @@ public static Map convertExplicitToImplicitHydrogens(final IAtom */ public static List getExplicitHydrogenIndices(final IAtomContainer ac) { final List explicitHydrogenIndicesList = new ArrayList<>(); - for (int i = 0; i < ac.getAtomCount(); i++) { - if (ac.getAtom(i).getSymbol().equals("H")) { + for (int i = 0; i + < ac.getAtomCount(); i++) { + if (ac.getAtom(i) + .getSymbol() + .equals("H")) { explicitHydrogenIndicesList.add(i); } } @@ -581,7 +692,8 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a final ArrayList toRemoveList = new ArrayList<>(); for (IAtom atomA : ac.atoms()) { - if (atomA.getSymbol().equals(atomType)) {// detect whether the current atom A is a from the given atom type + if (atomA.getSymbol() + .equals(atomType)) {// detect whether the current atom A is a from the given atom type toRemoveList.add(atomA); } } @@ -605,7 +717,10 @@ public static IAtomContainer removeAtoms(final IAtomContainer ac, final String a // } public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex) { - return ((atomIndex >= 0) && atomIndex < ac.getAtomCount()); + return ((atomIndex + >= 0) + && atomIndex + < ac.getAtomCount()); } public static ExecutorService initExecuter(final int nThreads) { @@ -634,7 +749,8 @@ public static void stopExecuter(final ExecutorService executor, final long secon */ public static IBond.Order getBondOrder(final int orderAsNumeric) { for (IBond.Order order : IBond.Order.values()) { - if (order.numeric() == orderAsNumeric) { + if (order.numeric() + == orderAsNumeric) { return order; } } @@ -643,20 +759,23 @@ public static IBond.Order getBondOrder(final int orderAsNumeric) { } public static Float getBondOrderAsNumeric(final IBond bond) { - if (bond == null) { + if (bond + == null) { return null; } float bondOrderAsNumeric; if (bond.isAromatic()) { bondOrderAsNumeric = (float) 1.5; } else { - bondOrderAsNumeric = bond.getOrder().numeric(); + bondOrderAsNumeric = bond.getOrder() + .numeric(); } return bondOrderAsNumeric; } - public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, final boolean includeImplicitHydrogenCount) { + public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, + final boolean includeImplicitHydrogenCount) { if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } @@ -665,7 +784,9 @@ public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex for (final IBond bond : ac.getConnectedBondsList(atom)) { bondsOrderSum += Utils.getBondOrderAsNumeric(bond); } - if (includeImplicitHydrogenCount && (atom.getImplicitHydrogenCount() != null)) { + if (includeImplicitHydrogenCount + && (atom.getImplicitHydrogenCount() + != null)) { bondsOrderSum += atom.getImplicitHydrogenCount(); } diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 65ba4fc..5b237e4 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -27,21 +27,32 @@ import java.io.FileNotFoundException; import java.io.FileReader; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; public class NMRShiftDB { public static String getSolvent(final String solventPropertyString, final String spectrumIndexInRecord) { final String[] solventPropertyStringSplit = solventPropertyString.split(":"); String solvent; - for (int i = 0; i < solventPropertyStringSplit.length; i++) { + for (int i = 0; i + < solventPropertyStringSplit.length; i++) { if (solventPropertyStringSplit[i].endsWith(spectrumIndexInRecord)) { - solvent = solventPropertyStringSplit[i + 1]; - if (solvent.substring(solvent.length() - 1).matches("\\d")) { - solvent = solvent.substring(0, solvent.length() - 1); + solvent = solventPropertyStringSplit[i + + 1]; + if (solvent.substring(solvent.length() + - 1) + .matches("\\d")) { + solvent = solvent.substring(0, solvent.length() + - 1); } - if (solvent.substring(solvent.length() - 1).matches("\\d")) { - solvent = solvent.substring(0, solvent.length() - 1); + if (solvent.substring(solvent.length() + - 1) + .matches("\\d")) { + solvent = solvent.substring(0, solvent.length() + - 1); } // solvent = solvent.substring(0, solvent.length() - 1); solvent = solvent.trim(); @@ -55,8 +66,11 @@ public static String getSolvent(final String solventPropertyString, final String public static List getSpectraProperties1D(final IAtomContainer ac, final String nucleus) { final List spectraProperties1D = new ArrayList<>(); - for (final Object obj : ac.getProperties().keySet()) { - if (obj instanceof String && ((String) obj).startsWith("Spectrum " + nucleus)) { + for (final Object obj : ac.getProperties() + .keySet()) { + if (obj instanceof String + && ((String) obj).startsWith("Spectrum " + + nucleus)) { spectraProperties1D.add((String) obj); } } @@ -80,9 +94,11 @@ public static List getSpectraProperties1D(final IAtomContainer ac, final * @throws CDKException * @see DataSet */ - public static Collection getDataSetsFromNMRShiftDB(final String pathToNMRShiftDB, final String[] nuclei) throws FileNotFoundException, CDKException { - final Collection dataSets = new ArrayList<>(); - final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToNMRShiftDB), SilentChemObjectBuilder.getInstance()); + public static List getDataSetsFromNMRShiftDB(final String pathToNMRShiftDB, + final String[] nuclei) throws FileNotFoundException, CDKException { + final List dataSets = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToNMRShiftDB), + SilentChemObjectBuilder.getInstance()); IAtomContainer structure; Spectrum spectrum; Assignment assignment; @@ -112,28 +128,46 @@ public static Collection getDataSetsFromNMRShiftDB(final String pathToN meta.put("id", structure.getProperty("nmrshiftdb2 ID")); mf = Utils.getMolecularFormulaFromAtomContainer(structure); meta.put("mf", Utils.molecularFormularToString(mf)); + try { + final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); + meta.put("smiles", smiles); + } catch (final CDKException e) { + e.printStackTrace(); + } + for (final String nucleus : nuclei) { spectraProperties1D = getSpectraProperties1D(structure, nucleus); for (final String spectrumProperty1D : spectraProperties1D) { split = spectrumProperty1D.split("\\s"); - spectrumIndexInRecord = split[split.length - 1]; + spectrumIndexInRecord = split[split.length + - 1]; // skip molecules which do not contain any of requested spectrum information spectrum = NMRShiftDBSpectrumToSpectrum(structure.getProperty(spectrumProperty1D), nucleus); // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if ((spectrum == null) || Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) != 0) { + if ((spectrum + == null) + || Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) + != 0) { continue; } - if (structure.getProperty("Solvent") != null) { + if (structure.getProperty("Solvent") + != null) { spectrum.setSolvent(getSolvent(structure.getProperty("Solvent"), spectrumIndexInRecord)); } - if (structure.getProperty("Field Strength [MHz]") != null) { - for (final String fieldStrength : structure.getProperty("Field Strength [MHz]").toString().split("\\s")) { - if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { + if (structure.getProperty("Field Strength [MHz]") + != null) { + for (final String fieldStrength : structure.getProperty("Field Strength [MHz]") + .toString() + .split("\\s")) { + if (fieldStrength.startsWith(spectrumIndexInRecord + + ":")) { try { - spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split(spectrumIndexInRecord + ":")[1])); + spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split( + spectrumIndexInRecord + + ":")[1])); } catch (NumberFormatException e) { // e.printStackTrace(); } @@ -143,17 +177,23 @@ public static Collection getDataSetsFromNMRShiftDB(final String pathToN } assignment = NMRShiftDBSpectrumToAssignment(structure.getProperty(spectrumProperty1D), nucleus); - if (assignment != null && !explicitHydrogenIndices.isEmpty()) { + if (assignment + != null + && !explicitHydrogenIndices.isEmpty()) { int hCount; - for (int i = 0; i < assignment.getAssignmentsCount(); i++) { + for (int i = 0; i + < assignment.getAssignmentsCount(); i++) { hCount = 0; - for (int j = 0; j < explicitHydrogenIndices.size(); j++) { - if (explicitHydrogenIndices.get(j) >= assignment.getAssignment(0, i)) { + for (int j = 0; j + < explicitHydrogenIndices.size(); j++) { + if (explicitHydrogenIndices.get(j) + >= assignment.getAssignment(0, i)) { break; } hCount++; } - assignment.setAssignment(0, i, assignment.getAssignment(0, i) - hCount); + assignment.setAssignment(0, i, assignment.getAssignment(0, i) + - hCount); } } @@ -282,17 +322,23 @@ public static Collection getDataSetsFromNMRShiftDB(final String pathToN * signal multiplicity (column 3), atom index in structure (column 4) */ public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum) { - if (NMRShiftDBSpectrum.trim().isEmpty()) { + if (NMRShiftDBSpectrum.trim() + .isEmpty()) { return new String[][]{}; } String[] signalSplit; final String[] shiftsSplit = NMRShiftDBSpectrum.split("\\|"); final String[][] values = new String[shiftsSplit.length][4]; - for (int i = 0; i < shiftsSplit.length; i++) { + for (int i = 0; i + < shiftsSplit.length; i++) { signalSplit = shiftsSplit[i].split(";"); values[i][0] = signalSplit[0]; // shift value - values[i][1] = signalSplit[1].toLowerCase().split("[a-z]")[0]; // intensity - values[i][2] = signalSplit[1].split("\\d+\\.\\d+").length > 0 ? signalSplit[1].split("\\d+\\.\\d+")[1].toLowerCase() : ""; // multiplicity + values[i][1] = signalSplit[1].toLowerCase() + .split("[a-z]")[0]; // intensity + values[i][2] = signalSplit[1].split("\\d+\\.\\d+").length + > 0 + ? signalSplit[1].split("\\d+\\.\\d+")[1].toLowerCase() + : ""; // multiplicity values[i][3] = signalSplit[2]; // atom index } @@ -300,27 +346,38 @@ public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum } @Deprecated - public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShiftDBSpectrum, final String nucleus, final String description) { - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShiftDBSpectrum, final String nucleus, + final String description) { + if ((NMRShiftDBSpectrum + == null) + || NMRShiftDBSpectrum.trim() + .isEmpty()) { return null; } final StringBuilder basicSpectrum = new StringBuilder(); // append description - if (!description.trim().startsWith("//")) { + if (!description.trim() + .startsWith("//")) { basicSpectrum.append("// "); } - basicSpectrum.append(description).append("\n"); + basicSpectrum.append(description) + .append("\n"); final String[][] spectrumStringArray = NMRShiftDB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); try { - for (int i = 0; i < spectrumStringArray.length; i++) { + for (int i = 0; i + < spectrumStringArray.length; i++) { // append nucleus - basicSpectrum.append(nucleus).append(", "); + basicSpectrum.append(nucleus) + .append(", "); // append chemical shift - basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][0])).append(", "); + basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][0])) + .append(", "); // append multiplicity - basicSpectrum.append(spectrumStringArray[i][2]).append(", "); + basicSpectrum.append(spectrumStringArray[i][2]) + .append(", "); // append intensity - basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][1])).append("\n"); + basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][1])) + .append("\n"); } } catch (Exception e) { return null; @@ -330,7 +387,10 @@ public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShift } public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus) { - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + if ((NMRShiftDBSpectrum + == null) + || NMRShiftDBSpectrum.trim() + .isEmpty()) { return null; } final String[][] spectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); @@ -338,11 +398,14 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect String multiplicity; Double shift, intensity; try { - for (int i = 0; i < spectrumStringArray.length; i++) { + for (int i = 0; i + < spectrumStringArray.length; i++) { shift = Double.parseDouble(spectrumStringArray[i][0]); intensity = Double.parseDouble(spectrumStringArray[i][1]); multiplicity = spectrumStringArray[i][2]; - spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 0, 0)); + spectrum.addSignal( + new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 0, + 0)); } } catch (Exception e) { return null; @@ -352,13 +415,17 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect } public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBSpectrum, final String nucleus) { - if ((NMRShiftDBSpectrum == null) || NMRShiftDBSpectrum.trim().isEmpty()) { + if ((NMRShiftDBSpectrum + == null) + || NMRShiftDBSpectrum.trim() + .isEmpty()) { return null; } final String[][] NMRShiftDBSpectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); final Spectrum spectrum = NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, nucleus); final Assignment assignment = new Assignment(spectrum); - for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { + for (int i = 0; i + < NMRShiftDBSpectrumStringArray.length; i++) { assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 100df32..7133ab4 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -2,8 +2,12 @@ import casekit.nmr.model.Spectrum; import casekit.nmr.model.nmrdisplayer.Correlation; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.smiles.SmiFlavor; +import org.openscience.cdk.smiles.SmilesGenerator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import java.util.ArrayList; @@ -16,8 +20,13 @@ public class Utils { public static String getMultiplicityFromProtonsCount(final Correlation correlation) { - if (correlation.getAtomType().equals("C") && correlation.getProtonsCount().size() == 1) { - switch (correlation.getProtonsCount().get(0)) { + if (correlation.getAtomType() + .equals("C") + && correlation.getProtonsCount() + .size() + == 1) { + switch (correlation.getProtonsCount() + .get(0)) { case 0: return "s"; case 1: @@ -43,14 +52,16 @@ public static String getAtomTypeFromSpectrum(final Spectrum spectrum, final int public static String getAtomTypeFromNucleus(final String nucleus) { final String[] nucleusSplit = nucleus.split("\\d"); - return nucleusSplit[nucleusSplit.length - 1]; + return nucleusSplit[nucleusSplit.length + - 1]; } public static Map getMolecularFormulaElementCounts(final String mf) { final LinkedHashMap counts = new LinkedHashMap<>(); final IMolecularFormula iMolecularFormula = Utils.getMolecularFormulaFromString(mf); final List elements = new ArrayList<>(); - final Matcher matcher = Pattern.compile("([A-Z][a-z]*)").matcher(mf); + final Matcher matcher = Pattern.compile("([A-Z][a-z]*)") + .matcher(mf); while (matcher.find()) { elements.add(matcher.group(1)); @@ -65,6 +76,11 @@ public static Map getMolecularFormulaElementCounts(final String public static IMolecularFormula getMolecularFormulaFromString(final String mf) { return MolecularFormulaManipulator.getMolecularFormula(mf, SilentChemObjectBuilder.getInstance()); } - + + public static String getSmilesFromAtomContainer(final IAtomContainer ac) throws CDKException { + final SmilesGenerator smilesGenerator = new SmilesGenerator(SmiFlavor.Absolute); + + return smilesGenerator.create(ac); + } } From 0a9f50980866e712703c25f944f017d941523be4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 1 Mar 2021 00:45:07 +0100 Subject: [PATCH 158/405] extended gitignore file by .DS_Store --- .DS_Store | Bin 6148 -> 0 bytes .gitignore | 3 +++ 2 files changed, 3 insertions(+) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index fc336baa6b79e156cb621ae94f623e78cff45373..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKO>Wab6#ga&O@IYaS8R}cf=1;8CYVK0#U?6Z+5AzW#5i&iMeN)QpdO`CZ@?-N z2Vjf3V9OQw-pnZVIH4m4=S5)tI!+P#^tT6UCK|ha`k-?LH&o3)URWK9K)b? z?Q`eOQ>lCX^Tyt@&#Tm|c!PTNt5UuBa&Wp@EOVM(qK8kM#gZJP=-@3f%&?+9rnQ3v zefs*;&UiiMY{i;lP5mRL)Yr_&t+t@{p3!T@=k#}Q!WbEO#@__5X&KTd<4omg^V;4& zamxkg_QubxS&0S?>|AobE93B8sWrd8;~0TI`;4UKp9X5whYMn5U~ly9t($d>0oD%0K^8(R;b&(ofwIM7<()n(n2vVmFQB9 zKVle{PJ86zVvmJGmkwhqa~xmU_!Ek;)oG7xI85wNdu2cw2pOom%eL(QuYQ0250mst z8BhlPD+Wv>=_Os>lHXf*Zcg^vi0zzBMB)mEHiezvj;$hF@l!S{^hKf&V~>SHS}6KQ Nz|x?dGVoU!_yzkQpe+CZ diff --git a/.gitignore b/.gitignore index 549e00a..dcb312f 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,6 @@ build/ ### VS Code ### .vscode/ + +### MacOS ### +.DS_Store From 210c5bb5e8cd91e6136f986683f6808a36924c74 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 1 Mar 2021 00:48:27 +0100 Subject: [PATCH 159/405] deleted .DS_Store in source folder --- src/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/.DS_Store diff --git a/src/.DS_Store b/src/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Mon, 1 Mar 2021 22:31:02 +0100 Subject: [PATCH 160/405] set equivalence default value to 1; renamed Matcher class to Match and moved together with Predict class to package utils --- src/casekit/nmr/core/Dereplication.java | 13 +- src/casekit/nmr/core/Elucidation.java | 11 +- src/casekit/nmr/dbservice/NMRShiftDB.java | 2 +- src/casekit/nmr/model/Signal.java | 2 +- src/casekit/nmr/model/Spectrum.java | 114 +++++++++++++----- .../{match/Matcher.java => utils/Match.java} | 87 ++++++++----- .../nmr/{predict => utils}/Predict.java | 23 ++-- 7 files changed, 176 insertions(+), 76 deletions(-) rename src/casekit/nmr/{match/Matcher.java => utils/Match.java} (77%) rename src/casekit/nmr/{predict => utils}/Predict.java (86%) diff --git a/src/casekit/nmr/core/Dereplication.java b/src/casekit/nmr/core/Dereplication.java index 5bc14a9..c1034a5 100644 --- a/src/casekit/nmr/core/Dereplication.java +++ b/src/casekit/nmr/core/Dereplication.java @@ -1,9 +1,9 @@ package casekit.nmr.core; -import casekit.nmr.match.Matcher; import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Match; import org.openscience.cdk.exception.CDKException; import java.util.ArrayList; @@ -11,18 +11,21 @@ public class Dereplication { - public static List dereplicate1D(final Spectrum querySpectrum, final List compoundDataSets, final double shiftTol) { + public static List dereplicate1D(final Spectrum querySpectrum, final List compoundDataSets, + final double shiftTol) { final List solutions = new ArrayList<>(); for (final DataSet dataSet : compoundDataSets) { - final Assignment matchAssignment = Matcher.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, 1); + final Assignment matchAssignment = Match.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, 1); if (matchAssignment.isFullyAssigned(0)) { try { - dataSet.addMetaInfo("tanimoto", String.valueOf(Matcher.calculateTanimotoCoefficient(dataSet.getSpectrum(), querySpectrum, 0, 0))); + dataSet.addMetaInfo("tanimoto", String.valueOf( + Match.calculateTanimotoCoefficient(dataSet.getSpectrum(), querySpectrum, 0, 0))); } catch (CDKException e) { e.printStackTrace(); } - dataSet.addMetaInfo("avgDev", String.valueOf(Matcher.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol))); + dataSet.addMetaInfo("avgDev", String.valueOf( + Match.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol))); solutions.add(dataSet); } } diff --git a/src/casekit/nmr/core/Elucidation.java b/src/casekit/nmr/core/Elucidation.java index 19f4d51..cb2d454 100644 --- a/src/casekit/nmr/core/Elucidation.java +++ b/src/casekit/nmr/core/Elucidation.java @@ -1,28 +1,29 @@ package casekit.nmr.core; -import casekit.nmr.match.Matcher; import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Match; import java.util.ArrayList; import java.util.List; public class Elucidation { - public static List findFragments(final Spectrum querySpectrum, final List compoundDataSets, final double shiftTol){ + public static List findFragments(final Spectrum querySpectrum, final List compoundDataSets, + final double shiftTol) { final List fragments = new ArrayList<>(); Assignment matchAssignment; - for (final DataSet dataSet : compoundDataSets){ - matchAssignment = Matcher.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol); + for (final DataSet dataSet : compoundDataSets) { + matchAssignment = Match.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol); } return fragments; } - public static List elucidate(){ + public static List elucidate() { final List solutions = new ArrayList<>(); return solutions; diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 5b237e4..a4f3f3a 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -404,7 +404,7 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect intensity = Double.parseDouble(spectrumStringArray[i][1]); multiplicity = spectrumStringArray[i][2]; spectrum.addSignal( - new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 0, + new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 1, 0)); } } catch (Exception e) { diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index 2f65e53..c20a346 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -48,7 +48,7 @@ public Signal() { } public Signal(final String[] nuclei) { - this(nuclei, null, null, null, null, 0, 0); + this(nuclei, null, null, null, null, 1, 0); } public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, final Double intensity, final int equivalencesCount, final int phase) { diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index d6f5710..a8adee8 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -38,7 +38,8 @@ /** * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Spectrum extends Dimensional { +public class Spectrum + extends Dimensional { /** * An arbitrary name or description that can be assigned to this spectrum for identification purposes. @@ -68,7 +69,8 @@ public Spectrum(final String[] nuclei) { this.signalCount = 0; } - public Spectrum(String[] nuclei, String description, String specType, Double spectrometerFrequency, String solvent, String standard, List signals, int signalCount, List equivalences) { + public Spectrum(String[] nuclei, String description, String specType, Double spectrometerFrequency, String solvent, + String standard, List signals, int signalCount) { super(nuclei); this.description = description; this.specType = specType; @@ -102,7 +104,7 @@ public int getSignalCount() { public int getSignalCountWithEquivalences() { int sum = 0; for (final Signal signal : this.getSignals()) { - sum += 1 + signal.getEquivalencesCount(); + sum += signal.getEquivalencesCount(); } return sum; } @@ -127,13 +129,16 @@ public boolean addSignal(final Signal signal) { * @return */ public boolean addSignal(final Signal signal, final double pickPrecision) { - if ((signal == null) || !this.compareNuclei(signal.getNuclei())) { + if ((signal + == null) + || !this.compareNuclei(signal.getNuclei())) { return false; } // check for equivalent signals in all dimensions final List closestSignalList = this.pickClosestSignal(signal.getShift(0), 0, pickPrecision); - for (int dim = 1; dim < this.getNDim(); dim++) { + for (int dim = 1; dim + < this.getNDim(); dim++) { closestSignalList.retainAll(this.pickClosestSignal(signal.getShift(dim), dim, pickPrecision)); } @@ -145,8 +150,10 @@ public boolean addSignal(final Signal signal, final double pickPrecision) { Signal closestSignal; for (final Integer closestSignalIndex : closestSignalList) { closestSignal = this.getSignal(closestSignalIndex); - if (closestSignal.getMultiplicity().equals(signal.getMultiplicity())) { - closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() + 1); + if (closestSignal.getMultiplicity() + .equals(signal.getMultiplicity())) { + closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() + + 1); } } } @@ -163,7 +170,8 @@ public boolean removeSignal(final int signalIndex) { if (!this.checkSignalIndex(signalIndex)) { return false; } - if (this.signals.remove(signalIndex) != null) { + if (this.signals.remove(signalIndex) + != null) { this.signalCount--; return true; @@ -173,7 +181,12 @@ public boolean removeSignal(final int signalIndex) { } private boolean checkSignalIndex(final Integer signalIndex) { - return (signalIndex != null) && (signalIndex >= 0) && (signalIndex < this.getSignalCount()); + return (signalIndex + != null) + && (signalIndex + >= 0) + && (signalIndex + < this.getSignalCount()); } /** @@ -200,11 +213,15 @@ public Double getShift(final int signalIndex, final int dim) { return null; } - return this.getSignal(signalIndex).getShift(dim); + return this.getSignal(signalIndex) + .getShift(dim); } public List getShifts(final int dim) { - return this.getSignals().stream().map(signal -> signal.getShift(dim)).collect(Collectors.toList()); + return this.getSignals() + .stream() + .map(signal -> signal.getShift(dim)) + .collect(Collectors.toList()); } public String getMultiplicity(final int signalIndex) { @@ -212,7 +229,8 @@ public String getMultiplicity(final int signalIndex) { return null; } - return this.getSignal(signalIndex).getMultiplicity(); + return this.getSignal(signalIndex) + .getMultiplicity(); } public Boolean hasEquivalences(final int signalIndex) { @@ -220,7 +238,8 @@ public Boolean hasEquivalences(final int signalIndex) { return null; } - return this.getEquivalencesCount(signalIndex) > 0; + return this.getEquivalencesCount(signalIndex) + > 1; } public Integer getEquivalencesCount(final int signalIndex) { @@ -228,11 +247,15 @@ public Integer getEquivalencesCount(final int signalIndex) { return null; } - return this.getSignal(signalIndex).getEquivalencesCount(); + return this.getSignal(signalIndex) + .getEquivalencesCount(); } public List getEquivalencesCounts() { - return this.getSignals().stream().map(Signal::getEquivalencesCount).collect(Collectors.toList()); + return this.getSignals() + .stream() + .map(Signal::getEquivalencesCount) + .collect(Collectors.toList()); } /** @@ -243,8 +266,10 @@ public List getEquivalencesCounts() { * @return */ public int getSignalIndex(final Signal signal) { - for (int s = 0; s < this.signals.size(); s++) { - if (this.signals.get(s) == signal) { + for (int s = 0; s + < this.signals.size(); s++) { + if (this.signals.get(s) + == signal) { return s; } } @@ -293,13 +318,20 @@ public List pickClosestSignal(final double shift, final int dim, final } double minDiff = pickPrecision; // detect the minimal difference between a signal shift to the given query shift - for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) < minDiff) { - minDiff = Math.abs(this.getShift(s, dim) - shift); + for (int s = 0; s + < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) + - shift) + < minDiff) { + minDiff = Math.abs(this.getShift(s, dim) + - shift); } } - for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) == minDiff) { + for (int s = 0; s + < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) + - shift) + == minDiff) { matchIndices.add(s); } } @@ -323,21 +355,28 @@ public List pickSignals(final Double shift, final int dim, final double if (!this.containsDim(dim)) { return pickedSignals; } - for (int s = 0; s < this.getSignalCount(); s++) { - if (Math.abs(this.getShift(s, dim) - shift) <= pickPrecision) { + for (int s = 0; s + < this.getSignalCount(); s++) { + if (Math.abs(this.getShift(s, dim) + - shift) + <= pickPrecision) { pickedSignals.add(s); } } // sort signal indices by distance to query shift - pickedSignals.sort(Comparator.comparingDouble(pickedSignalIndex -> Math.abs(shift - this.getShift(pickedSignalIndex, dim)))); + pickedSignals.sort(Comparator.comparingDouble(pickedSignalIndex -> Math.abs(shift + - this.getShift( + pickedSignalIndex, dim)))); return pickedSignals; } public Spectrum buildClone() { final Spectrum clone = new Spectrum(this.getNuclei()); - for (int i = 0; i < this.getSignalCount(); i++) { - clone.addSignal(this.getSignal(i).buildClone()); + for (int i = 0; i + < this.getSignalCount(); i++) { + clone.addSignal(this.getSignal(i) + .buildClone()); } clone.setSpecDescription(this.description); clone.setSolvent(this.solvent); @@ -350,7 +389,26 @@ public Spectrum buildClone() { @Override public String toString() { - return "Spectrum{" + "description='" + description + '\'' + ", specType='" + specType + '\'' + ", spectrometerFrequency=" + spectrometerFrequency + ", solvent='" + solvent + '\'' + ", standard='" + standard + '\'' + ", signals=" + signals + ", signalCount=" + signalCount + '}'; + return "Spectrum{" + + "description='" + + description + + '\'' + + ", specType='" + + specType + + '\'' + + ", spectrometerFrequency=" + + spectrometerFrequency + + ", solvent='" + + solvent + + '\'' + + ", standard='" + + standard + + '\'' + + ", signals=" + + signals + + ", signalCount=" + + signalCount + + '}'; } public String getDescription() { diff --git a/src/casekit/nmr/match/Matcher.java b/src/casekit/nmr/utils/Match.java similarity index 77% rename from src/casekit/nmr/match/Matcher.java rename to src/casekit/nmr/utils/Match.java index 11c23a8..57d2430 100644 --- a/src/casekit/nmr/match/Matcher.java +++ b/src/casekit/nmr/utils/Match.java @@ -10,7 +10,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.nmr.match; +package casekit.nmr.utils; import casekit.nmr.Utils; import casekit.nmr.model.Assignment; @@ -22,7 +22,7 @@ import java.util.*; -public class Matcher { +public class Match { /** @@ -35,8 +35,10 @@ public class Matcher { * * @return true if both spectra contain the selected dimension */ - private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) { - return spectrum1.containsDim(dim1) && spectrum2.containsDim(dim2); + private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2) { + return spectrum1.containsDim(dim1) + && spectrum2.containsDim(dim2); } /** @@ -53,8 +55,9 @@ private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum * * @return null if one spectrum does not contain the selected dimension */ - public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double pickPrecision) throws Exception { - if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final double pickPrecision) throws Exception { + if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } // create new spectra which is to fill with signals of both spectra @@ -79,13 +82,18 @@ public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum s * * @throws CDKException */ - public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) throws CDKException { - if (!Matcher.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2) throws CDKException { + if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } - final double[] shiftsSpectrum1 = ArrayUtils.toPrimitive(spectrum1.getShifts(dim1).toArray(new Double[spectrum1.getSignalCount()])); + final double[] shiftsSpectrum1 = ArrayUtils.toPrimitive(spectrum1.getShifts(dim1) + .toArray( + new Double[spectrum1.getSignalCount()])); Arrays.parallelSort(shiftsSpectrum1); - final double[] shiftsSpectrum2 = ArrayUtils.toPrimitive(spectrum2.getShifts(dim2).toArray(new Double[spectrum2.getSignalCount()])); + final double[] shiftsSpectrum2 = ArrayUtils.toPrimitive(spectrum2.getShifts(dim2) + .toArray( + new Double[spectrum2.getSignalCount()])); Arrays.parallelSort(shiftsSpectrum2); return Tanimoto.calculate(shiftsSpectrum1, shiftsSpectrum2); @@ -105,16 +113,21 @@ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final * * @see #matchSpectra(Spectrum, Spectrum, int, int, double) */ - public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { + public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final double shiftTol) { final Double[] deviations = new Double[spectrum1.getSignalCount()]; - final Assignment matchAssignments = Matcher.matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol); + final Assignment matchAssignments = Match.matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol); Signal matchedSignalInSpectrum2; - for (int i = 0; i < spectrum1.getSignalCount(); i++) { - if (matchAssignments.getAssignment(0, i) == -1) { + for (int i = 0; i + < spectrum1.getSignalCount(); i++) { + if (matchAssignments.getAssignment(0, i) + == -1) { deviations[i] = null; } else { matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAssignment(0, i)); - deviations[i] = Math.abs(spectrum1.getSignal(i).getShift(dim1) - matchedSignalInSpectrum2.getShift(dim2)); + deviations[i] = Math.abs(spectrum1.getSignal(i) + .getShift(dim1) + - matchedSignalInSpectrum2.getShift(dim2)); } } return deviations; @@ -130,7 +143,8 @@ public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum sp public static Double calculateAverageDeviation(final Double[] deviations) { // every signal has to have a match for (final Double deviation : deviations) { - if (deviation == null) { + if (deviation + == null) { return null; } } @@ -154,8 +168,9 @@ public static Double calculateAverageDeviation(final Double[] deviations) { * @see #getDeviations(Spectrum, Spectrum, int, int, double) * @see #calculateAverageDeviation(Double[]) */ - public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol) { - return Matcher.calculateAverageDeviation(Matcher.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); + public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final double shiftTol) { + return Match.calculateAverageDeviation(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); } /** @@ -174,23 +189,31 @@ public static Double calculateAverageDeviation(final Spectrum spectrum1, final S * in query spectrum; null if one of the spectra does not * contain the selected dimension */ - public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum querySpectrum, final int dim1, final int dim2, final double shiftTol) { - if (!Matcher.checkDimensions(spectrum, querySpectrum, dim1, dim2)) { + public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum querySpectrum, final int dim1, + final int dim2, final double shiftTol) { + if (!Match.checkDimensions(spectrum, querySpectrum, dim1, dim2)) { return null; } final Assignment matchAssignments = new Assignment(spectrum); final Set assigned = new HashSet<>(); List pickedSignalIndicesSpectrum2; - for (int i = 0; i < spectrum.getSignalCount(); i++) { - if (spectrum.getShift(i, dim1) == null) + for (int i = 0; i + < spectrum.getSignalCount(); i++) { + if (spectrum.getShift(i, dim1) + == null) { continue; + } // @TODO add solvent deviation value for picking closest signal(s) pickedSignalIndicesSpectrum2 = new ArrayList<>(); - for (final int pickedSignalIndexSpectrum2 : querySpectrum.pickClosestSignal(spectrum.getShift(i, dim1), dim2, shiftTol)) { + for (final int pickedSignalIndexSpectrum2 : querySpectrum.pickClosestSignal(spectrum.getShift(i, dim1), + dim2, shiftTol)) { // @TODO maybe consider further parameters to check ? e.g. intensity - if (querySpectrum.getMultiplicity(pickedSignalIndexSpectrum2).equals(spectrum.getMultiplicity(i)) && querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) <= spectrum.getEquivalencesCount(i)) { + if (querySpectrum.getMultiplicity(pickedSignalIndexSpectrum2) + .equals(spectrum.getMultiplicity(i)) + && querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) + <= spectrum.getEquivalencesCount(i)) { pickedSignalIndicesSpectrum2.add(pickedSignalIndexSpectrum2); } } @@ -227,13 +250,19 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu * * @see #matchSpectra(Spectrum, Spectrum, int, int, double) */ - public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final double[] shiftTols) { - if ((spectrum1.getNDim() != spectrum2.getNDim()) || (spectrum1.getNDim() != shiftTols.length)) { + public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, + final double[] shiftTols) { + if ((spectrum1.getNDim() + != spectrum2.getNDim()) + || (spectrum1.getNDim() + != shiftTols.length)) { return null; } final Assignment matchAssignment = new Assignment(spectrum1); - for (int dim = 0; dim < spectrum1.getNDim(); dim++) { - matchAssignment.setAssignments(dim, Matcher.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]).getAssignments(0)); + for (int dim = 0; dim + < spectrum1.getNDim(); dim++) { + matchAssignment.setAssignments(dim, Match.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]) + .getAssignments(0)); } return matchAssignment; @@ -253,7 +282,7 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s // * // * @param shiftList1 Shift value list to search in // * @param shiftList2 Shift value list to match in shiftList1 - // * @param matchesInShiftList1 Matcher list to correct + // * @param matchesInShiftList1 Match list to correct // * @param tol Tolerance value // * @return // */ diff --git a/src/casekit/nmr/predict/Predict.java b/src/casekit/nmr/utils/Predict.java similarity index 86% rename from src/casekit/nmr/predict/Predict.java rename to src/casekit/nmr/utils/Predict.java index 28f17b9..b001f4a 100644 --- a/src/casekit/nmr/predict/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package casekit.nmr.predict; +package casekit.nmr.utils; import casekit.nmr.Utils; @@ -82,16 +82,21 @@ public static Double predictShift(final Map> HOSECodeL * @throws CDKException * @see #predictShift(HashMap, String) */ - public static Signal predictSignal(final Map> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, final String nucleus) throws Exception { + public static Signal predictSignal(final Map> HOSECodeLookupTable, + final IAtomContainer ac, final int atomIndex, final Integer maxSphere, + final String nucleus) throws Exception { if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } final String HOSECode = HOSECodeBuilder.buildHOSECode(ac, atomIndex, maxSphere, false); final Double predictedShift = Predict.predictShift(HOSECodeLookupTable, HOSECode); - if (predictedShift == null) { + if (predictedShift + == null) { return null; } - return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, Utils.getMultiplicityFromProtonsCount(ac.getAtom(atomIndex).getImplicitHydrogenCount()), "signal", null, 0, 0); + return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, Utils.getMultiplicityFromProtonsCount( + ac.getAtom(atomIndex) + .getImplicitHydrogenCount()), "signal", null, 1, 0); } /** @@ -110,13 +115,17 @@ public static Signal predictSignal(final Map> HOSECode * @throws org.openscience.cdk.exception.CDKException * @see #predictSignal(HashMap, IAtomContainer, int, Integer, String) */ - public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final Integer maxSphere, final String nucleus) throws Exception { + public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, + final IAtomContainer ac, final Integer maxSphere, + final String nucleus) throws Exception { final Spectrum predictedSpectrum = new Spectrum(new String[]{nucleus}); Signal signal; for (final IAtom atom : ac.atoms()) { - if (atom.getSymbol().equals(casekit.nmr.utils.Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { + if (atom.getSymbol() + .equals(casekit.nmr.utils.Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus); - if (signal == null) { + if (signal + == null) { continue; // return null; } From 0b32ca86d655b41829be7faf54a0d1b0e3cad66d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 30 Mar 2021 00:43:19 +0200 Subject: [PATCH 161/405] decoupled Signal, Spectrum and Assignment class from Dimensional; added RMSD calculation --- src/casekit/nmr/Utils.java | 87 +++----- src/casekit/nmr/model/Assignment.java | 197 +++++++++++------- src/casekit/nmr/model/Signal.java | 65 ++++-- src/casekit/nmr/model/Spectrum.java | 39 +++- .../nmr/model/dimensional/Dimensional.java | 75 ------- src/casekit/nmr/utils/Match.java | 40 ++++ src/casekit/nmr/utils/Utils.java | 36 ++++ 7 files changed, 302 insertions(+), 237 deletions(-) delete mode 100644 src/casekit/nmr/model/dimensional/Dimensional.java diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index fdbd49c..bead5ed 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -177,7 +177,7 @@ public static String getIsotopeIdentifier(final String element) { public static Set getAtomTypesInAtomContainer(final IAtomContainer ac) { final HashSet atomTypes = new HashSet<>(); - for (IAtom atom : ac.atoms()) { + for (final IAtom atom : ac.atoms()) { atomTypes.add(atom.getSymbol()); } @@ -452,61 +452,24 @@ public static String getFileFormat(final String pathToFile) { } - /** - * @param data - * - * @return - */ - public static Double getRMS(final ArrayList data) { - if ((data - == null) - || data.isEmpty()) { - return null; - } - if (data.size() - == 1) { - return data.get(0); - } - int nullCounter = 0; - double qSum = 0; - for (final Double d : data) { - if (d - != null) { - qSum += d - * d; - } else { - nullCounter++; - } - } - - return ((data.size() - - nullCounter) - != 0) - ? Math.sqrt(qSum - / (data.size() - - nullCounter)) - : null; - } - - - /** - * @param lookup - * - * @return - */ - public static Map getRMS(final Map> lookup) { - final HashMap rms = new HashMap<>(); - Double rmsInList; - for (final String key : lookup.keySet()) { - rmsInList = Utils.getRMS(lookup.get(key)); - if (rmsInList - != null) { - rms.put(key, rmsInList); - } - } - - return rms; - } + // /** + // * @param lookup + // * + // * @return + // */ + // public static Map getRMSD(final Map> lookup) { + // final HashMap rmsd = new HashMap<>(); + // Double rmsdInList; + // for (final String key : lookup.keySet()) { + // rmsdInList = casekit.nmr.utils.Utils.getRMSD(lookup.get(key)); + // if (rmsdInList + // != null) { + // rmsd.put(key, rmsdInList); + // } + // } + // + // return rmsd; + // } public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) { if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { @@ -521,7 +484,7 @@ public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException { final CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(ac.getBuilder()); IAtomType type; - for (IAtom atom : ac.atoms()) { + for (final IAtom atom : ac.atoms()) { type = matcher.findMatchingAtomType(ac, atom); AtomTypeManipulator.configure(atom, type); } @@ -691,13 +654,13 @@ public static void setAromaticityAndKekulize(final IAtomContainer ac) throws CDK public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType) { final ArrayList toRemoveList = new ArrayList<>(); - for (IAtom atomA : ac.atoms()) { + for (final IAtom atomA : ac.atoms()) { if (atomA.getSymbol() .equals(atomType)) {// detect whether the current atom A is a from the given atom type toRemoveList.add(atomA); } } - for (IAtom iAtom : toRemoveList) { + for (final IAtom iAtom : toRemoveList) { ac.removeAtom(iAtom); } @@ -734,7 +697,7 @@ public static void stopExecuter(final ExecutorService executor, final long secon System.err.println("killing non-finished tasks!"); executor.shutdownNow(); } - } catch (InterruptedException e) { + } catch (final InterruptedException e) { System.err.println("killing non-finished tasks!"); executor.shutdownNow(); } @@ -748,7 +711,7 @@ public static void stopExecuter(final ExecutorService executor, final long secon * @return */ public static IBond.Order getBondOrder(final int orderAsNumeric) { - for (IBond.Order order : IBond.Order.values()) { + for (final IBond.Order order : IBond.Order.values()) { if (order.numeric() == orderAsNumeric) { return order; @@ -763,7 +726,7 @@ public static Float getBondOrderAsNumeric(final IBond bond) { == null) { return null; } - float bondOrderAsNumeric; + final float bondOrderAsNumeric; if (bond.isAromatic()) { bondOrderAsNumeric = (float) 1.5; } else { diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index 53378ff..e6fb979 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -23,7 +23,6 @@ */ package casekit.nmr.model; -import casekit.nmr.model.dimensional.Dimensional; import org.apache.commons.lang3.ArrayUtils; import java.util.ArrayList; @@ -31,11 +30,12 @@ import java.util.List; /** - * * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Assignment extends Dimensional implements Cloneable { - +public class Assignment + implements Cloneable { + + private String[] nuclei; private int[][] assignments; @@ -43,118 +43,155 @@ public Assignment() { } public Assignment(final String[] nuclei, final int[][] assignments) { - super(nuclei); + this.nuclei = nuclei; this.assignments = assignments; } public Assignment(final Spectrum spectrum) { - super(spectrum.getNuclei()); - this.assignments = this.initAssignments(this.getNDim(), spectrum.getSignalCount()); + this.nuclei = spectrum.getNuclei(); + this.assignments = this.initAssignments(this.nuclei.length, spectrum.getSignalCount()); } - - private int[][] initAssignments(final int nDim, final int nSignals){ + + private int[][] initAssignments(final int nDim, final int nSignals) { final int[][] temp = new int[nDim][nSignals]; - for (int i = 0; i < nDim; i++) { - for (int j = 0; j < nSignals; j++) { + for (int i = 0; i + < nDim; i++) { + for (int j = 0; j + < nSignals; j++) { temp[i][j] = -1; } } - + return temp; } - + + public String[] getNuclei() { + return this.nuclei; + } + + public void setNuclei(final String[] nuclei) { + this.nuclei = nuclei; + } + + public int getNDim() { + return this.getNuclei().length; + } + + public boolean containsDim(final int dim) { + return dim + >= 0 + && dim + <= this.getNDim(); + } + + public boolean compareNuclei(final String[] nuclei) { + return Arrays.equals(this.getNuclei(), nuclei); + } + /** * Sets an assignment with value for an index position. * * @param dim * @param index * @param assignment + * * @return */ - public boolean setAssignment(final int dim, final int index, final int assignment){ - if(!this.containsDim(dim) || !this.checkIndex(dim, index)){ + public boolean setAssignment(final int dim, final int index, final int assignment) { + if (!this.containsDim(dim) + || !this.checkIndex(dim, index)) { return false; } this.assignments[dim][index] = assignment; - + return true; } - - public boolean setAssignments(final int dim, final List assignments){ - if(!this.containsDim(dim) || !this.checkInputListSize(assignments.size())){ + + public boolean setAssignments(final int dim, final List assignments) { + if (!this.containsDim(dim) + || !this.checkInputListSize(assignments.size())) { return false; } - for (int i = 0; i < this.getAssignmentsCount(); i++) { + for (int i = 0; i + < this.getAssignmentsCount(); i++) { this.setAssignment(dim, i, assignments.get(i)); } - + return true; } - - public Integer getAssignment(final int dim, final int index){ - if(!this.containsDim(dim) || !this.checkIndex(dim, index)){ + + public Integer getAssignment(final int dim, final int index) { + if (!this.containsDim(dim) + || !this.checkIndex(dim, index)) { return null; } return this.assignments[dim][index]; } - - public Integer getIndex(final int dim, final int assignment){ - if(!this.containsDim(dim)){ + + public Integer getIndex(final int dim, final int assignment) { + if (!this.containsDim(dim)) { return null; } - for (int index = 0; index < this.assignments[dim].length; index++) { - if(this.getAssignment(dim, index) == assignment){ + for (int index = 0; index + < this.assignments[dim].length; index++) { + if (this.getAssignment(dim, index) + == assignment) { return index; } } - + return -1; } - - public ArrayList getAssignments(final int dim){ - if(!this.containsDim(dim)){ + + public ArrayList getAssignments(final int dim) { + if (!this.containsDim(dim)) { return null; } return new ArrayList<>(Arrays.asList(ArrayUtils.toObject(this.assignments[dim]))); } - - public int getAssignmentsCount(){ - if(this.getNDim() > 0){ + + public int getAssignmentsCount() { + if (this.getNDim() + > 0) { return this.assignments[0].length; } return 0; } - - public int getSetAssignmentsCount(final int dim){ + + public int getSetAssignmentsCount(final int dim) { int setAssignmentsCounter = 0; - if(this.containsDim(dim)){ - for (int j = 0; j < this.assignments[dim].length; j++) { - if(this.assignments[dim][j] != -1){ + if (this.containsDim(dim)) { + for (int j = 0; j + < this.assignments[dim].length; j++) { + if (this.assignments[dim][j] + != -1) { setAssignmentsCounter++; } } } return setAssignmentsCounter; } - - public Boolean isFullyAssigned(final int dim){ - if(!this.containsDim(dim)){ + + public Boolean isFullyAssigned(final int dim) { + if (!this.containsDim(dim)) { return null; } - - return this.getSetAssignmentsCount(dim) == this.getAssignmentsCount(); + + return this.getSetAssignmentsCount(dim) + == this.getAssignmentsCount(); } - public Boolean isAssigned(final int dim, final int index){ - if(!this.containsDim(dim)){ + public Boolean isAssigned(final int dim, final int index) { + if (!this.containsDim(dim)) { return null; } - return this.getAssignment(dim, index) != -1; + return this.getAssignment(dim, index) + != -1; } - + /** * Adds a new assignment entry (index), e.g. for a new signal. The given assignment indices * will be stored for each dimension of the new assignment entry (index). @@ -163,33 +200,41 @@ public Boolean isAssigned(final int dim, final int index){ * * @return */ - public boolean addAssignment(final int[] assignment){ - if(!this.compareNDim(assignments.length)){ + public boolean addAssignment(final int[] assignment) { + if (this.getNDim() + != assignments.length) { return false; } - final int[][] extendedAssignments = new int[this.getNDim()][this.getAssignmentsCount() + 1]; - for (int dim = 0; dim < this.getNDim(); dim++) { - for (int i = 0; i < this.getAssignmentsCount(); i++) { + final int[][] extendedAssignments = new int[this.getNDim()][this.getAssignmentsCount() + + 1]; + for (int dim = 0; dim + < this.getNDim(); dim++) { + for (int i = 0; i + < this.getAssignmentsCount(); i++) { extendedAssignments[dim][i] = this.getAssignment(dim, i); } extendedAssignments[dim][this.getAssignmentsCount()] = assignment[dim]; } this.assignments = extendedAssignments; - + return true; } - public boolean removeAssignment(final int index){ - if(!this.checkIndex(0, index)){ + public boolean removeAssignment(final int index) { + if (!this.checkIndex(0, index)) { return false; } - final int[][] reducedAssignments = new int[this.getNDim()][this.getAssignmentsCount() - 1]; + final int[][] reducedAssignments = new int[this.getNDim()][this.getAssignmentsCount() + - 1]; int nextIndexToInsertCounter = 0; - for (int i = 0; i < this.getAssignmentsCount(); i++) { - if(i == index){ + for (int i = 0; i + < this.getAssignmentsCount(); i++) { + if (i + == index) { continue; } - for (int dim = 0; dim < this.getNDim(); dim++) { + for (int dim = 0; dim + < this.getNDim(); dim++) { reducedAssignments[dim][nextIndexToInsertCounter] = this.getAssignment(dim, i); } nextIndexToInsertCounter++; @@ -199,16 +244,20 @@ public boolean removeAssignment(final int index){ return true; } - private boolean checkIndex(final int dim, final int index){ - return (index >= 0) && (index < this.assignments[dim].length); + private boolean checkIndex(final int dim, final int index) { + return (index + >= 0) + && (index + < this.assignments[dim].length); + } + + private boolean checkInputListSize(final int size) { + return (size + == this.getAssignmentsCount()); } - - private boolean checkInputListSize(final int size){ - return (size == this.getAssignmentsCount()); - } - + @Override - public Assignment clone() throws CloneNotSupportedException{ + public Assignment clone() throws CloneNotSupportedException { return (Assignment) super.clone(); } @@ -217,11 +266,13 @@ public String toString() { final StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("Assignments:\n"); - for (int i = 0; i < this.getNDim(); i++) { - stringBuilder.append(Arrays.toString(this.assignments[i])).append("\n"); + for (int i = 0; i + < this.getNDim(); i++) { + stringBuilder.append(Arrays.toString(this.assignments[i])) + .append("\n"); } - return stringBuilder.toString(); + return stringBuilder.toString(); } public int[][] getAssignments() { diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index c20a346..08c68aa 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -27,15 +27,14 @@ */ package casekit.nmr.model; -import casekit.nmr.model.dimensional.Dimensional; - import java.util.Arrays; /** * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Signal extends Dimensional { +public class Signal { + private String[] nuclei; private Double[] shifts; private String multiplicity; private Double intensity; @@ -51,9 +50,10 @@ public Signal(final String[] nuclei) { this(nuclei, null, null, null, null, 1, 0); } - public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, final Double intensity, final int equivalencesCount, final int phase) { - super(nuclei); - this.shifts = shifts; // this.initShifts(shifts, this.getNDim()); + public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, + final Double intensity, final int equivalencesCount, final int phase) { + this.nuclei = nuclei; + this.shifts = shifts; this.multiplicity = multiplicity; this.kind = kind; this.intensity = intensity; @@ -61,17 +61,28 @@ public Signal(final String[] nuclei, final Double[] shifts, final String multipl this.phase = phase; } - // private Double[] initShifts(final Double[] shifts, final int nDim) { - //// if((shifts == null) || (shifts.length != nDim)){ - //// throw new Exception("Number of given nuclei (" + nDim + ") and shifts (" + shifts.length + ") is not the same!!!"); - //// } - // final Double[] tempShifts = new Double[nDim]; - // for (int d = 0; d < nDim; d++) { - // tempShifts[d] = shifts[d]; - // } - // - // return tempShifts; - // } + public String[] getNuclei() { + return this.nuclei; + } + + public void setNuclei(final String[] nuclei) { + this.nuclei = nuclei; + } + + public int getNDim() { + return this.getNuclei().length; + } + + public boolean containsDim(final int dim) { + return dim + >= 0 + && dim + <= this.getNDim(); + } + + public boolean compareNuclei(final String[] nuclei) { + return Arrays.equals(this.getNuclei(), nuclei); + } public boolean setShift(final Double shift, final int dim) { if (!this.containsDim(dim)) { @@ -123,12 +134,28 @@ public void setKind(final String kind) { public Signal buildClone() { - return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.kind, this.intensity, this.equivalencesCount, this.phase); + return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.kind, this.intensity, + this.equivalencesCount, this.phase); } @Override public String toString() { - return "Signal{" + "shifts=" + Arrays.toString(shifts) + ", multiplicity='" + multiplicity + '\'' + ", intensity=" + intensity + ", kind='" + kind + '\'' + ", equivalencesCount=" + equivalencesCount + ", phase=" + phase + '}'; + return "Signal{" + + "shifts=" + + Arrays.toString(shifts) + + ", multiplicity='" + + multiplicity + + '\'' + + ", intensity=" + + intensity + + ", kind='" + + kind + + '\'' + + ", equivalencesCount=" + + equivalencesCount + + ", phase=" + + phase + + '}'; } public Double[] getShifts() { diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index a8adee8..c1ef3ce 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -28,9 +28,8 @@ */ package casekit.nmr.model; -import casekit.nmr.model.dimensional.Dimensional; - import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.stream.Collectors; @@ -38,9 +37,9 @@ /** * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Spectrum - extends Dimensional { +public class Spectrum { + private String[] nuclei; /** * An arbitrary name or description that can be assigned to this spectrum for identification purposes. */ @@ -64,14 +63,15 @@ public Spectrum() { } public Spectrum(final String[] nuclei) { - super(nuclei); + this.nuclei = nuclei; this.signals = new ArrayList<>(); this.signalCount = 0; } - public Spectrum(String[] nuclei, String description, String specType, Double spectrometerFrequency, String solvent, - String standard, List signals, int signalCount) { - super(nuclei); + public Spectrum(final String[] nuclei, final String description, final String specType, + final Double spectrometerFrequency, final String solvent, final String standard, + final List signals, final int signalCount) { + this.nuclei = nuclei; this.description = description; this.specType = specType; this.spectrometerFrequency = spectrometerFrequency; @@ -81,6 +81,29 @@ public Spectrum(String[] nuclei, String description, String specType, Double spe this.signalCount = signalCount; } + public String[] getNuclei() { + return this.nuclei; + } + + public void setNuclei(final String[] nuclei) { + this.nuclei = nuclei; + } + + public int getNDim() { + return this.getNuclei().length; + } + + public boolean containsDim(final int dim) { + return dim + >= 0 + && dim + <= this.getNDim(); + } + + public boolean compareNuclei(final String[] nuclei) { + return Arrays.equals(this.getNuclei(), nuclei); + } + public void setSpecType(final String specType) { this.specType = specType; } diff --git a/src/casekit/nmr/model/dimensional/Dimensional.java b/src/casekit/nmr/model/dimensional/Dimensional.java deleted file mode 100644 index e37a00d..0000000 --- a/src/casekit/nmr/model/dimensional/Dimensional.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package casekit.nmr.model.dimensional; - -import java.util.Arrays; - -public class Dimensional { - - private String[] nuclei; - private int nDim; - - protected Dimensional() { - } - - protected Dimensional(final String[] nuclei) { - this.nuclei = nuclei; - this.nDim = nuclei.length; - } - - public final String[] getNuclei(){ - return this.nuclei; - } - - public void setNuclei(String[] nuclei) { - this.nuclei = nuclei; - } - - public boolean compareNuclei(final String[] nuclei){ - return Arrays.equals(this.getNuclei(), nuclei); - } - - /** - * Returns the number of dimensions. - * - * @return - */ - public final int getNDim() { - return this.nDim; - } - - public void setNDim(int nDim) { - this.nDim = nDim; - } - - /** - * Checks whether the input dimension exists by dimension number. The dimension - * indexing starts at 0. - * - * @param dim input dimension number - * @return - */ - public final boolean containsDim(final int dim){ - return (dim >= 0) && (dim < this.getNDim()); - } - - /** - * Checks whether the input dimension count is equal to the number of dimensions of this object. - * - * @param nDim number of input dimensions - * @return - */ - public final boolean compareNDim(final int nDim){ - return nDim == this.getNDim(); - } -} diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index 57d2430..d7b6432 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -173,6 +173,46 @@ public static Double calculateAverageDeviation(final Spectrum spectrum1, final S return Match.calculateAverageDeviation(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); } + /** + * Returns the average of all deviations within a given input array. + * + * @param data array of deviations + * + * @return + */ + public static Double calculateRMSD(final Double[] data) { + // every signal has to have a match + for (final Double value : data) { + if (value + == null) { + return null; + } + } + + return casekit.nmr.utils.Utils.getRMSD(data); + } + + /** + * Returns the average of all deviations of matched shifts between two + * spectra. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during peak picking in + * shift comparison + * + * @return + * + * @see #getDeviations(Spectrum, Spectrum, int, int, double) + * @see #calculateAverageDeviation(Double[]) + */ + public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final double shiftTol) { + return Match.calculateRMSD(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); + } + /** * Returns the closest shift matches between two spectra in selected dimensions * as an Assignment object with one set dimension only.
diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 7133ab4..30653ad 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -83,4 +83,40 @@ public static String getSmilesFromAtomContainer(final IAtomContainer ac) throws return smilesGenerator.create(ac); } + /** + * @param data + * + * @return + */ + public static Double getRMSD(final Double[] data) { + if (data + == null + || data.length + == 0) { + return null; + } + if (data.length + == 1) { + return data[0]; + } + int nullCounter = 0; + double qSum = 0; + for (final Double d : data) { + if (d + != null) { + qSum += d + * d; + } else { + nullCounter++; + } + } + + return ((data.length + - nullCounter) + != 0) + ? Math.sqrt(qSum + / (data.length + - nullCounter)) + : null; + } } From e20eae33637e70b28eef0032f0d2135a33208e0e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 30 Mar 2021 02:22:00 +0200 Subject: [PATCH 162/405] adaptions from PyLSDInputFileBuilder from WebCASE --- .../nmr/lsd/PyLSDInputFileBuilder.java | 379 +++++++++++++----- .../nmr/model/nmrdisplayer/Correlations.java | 7 +- src/casekit/nmr/utils/Utils.java | 42 +- 3 files changed, 310 insertions(+), 118 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index ed22821..dad1aa6 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -13,22 +13,27 @@ public class PyLSDInputFileBuilder { - private static String buildHeader(final String uuid) { + private static String buildHeader() { final StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("; PyLSD input file created by webCASE\n"); - stringBuilder.append("; ").append(uuid).append("\n"); final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z"); final Date date = new Date(System.currentTimeMillis()); - stringBuilder.append("; ").append(formatter.format(date)); + stringBuilder.append("; ") + .append(formatter.format(date)); return stringBuilder.toString(); } private static String buildFORM(final String mf, final Map elementCounts) { final StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append("; Molecular Formula: ").append(mf).append("\n"); + stringBuilder.append("; Molecular Formula: ") + .append(mf) + .append("\n"); stringBuilder.append("FORM "); - elementCounts.forEach((elem, count) -> stringBuilder.append(elem).append(" ").append(count).append(" ")); + elementCounts.forEach((elem, count) -> stringBuilder.append(elem) + .append(" ") + .append(count) + .append(" ")); return stringBuilder.toString(); } @@ -41,31 +46,55 @@ private static Map buildIndicesMap(final Data data, final Map // index in correlation data -> [atom type, indices in PyLSD file...] final Map indicesMap = new HashMap<>(); // init element indices within correlations with same order as in correlation data input - final int totalHeavyAtomsCount = elementCounts.entrySet().stream().filter(set -> !set.getKey().equals("H")).map(Map.Entry::getValue).reduce(0, Integer::sum); + final int totalHeavyAtomsCount = elementCounts.entrySet() + .stream() + .filter(set -> !set.getKey() + .equals("H")) + .map(Map.Entry::getValue) + .reduce(0, Integer::sum); int heavyAtomIndexInPyLSDFile = 1; - int protonIndexInPyLSDFile = totalHeavyAtomsCount + 1; + int protonIndexInPyLSDFile = totalHeavyAtomsCount + + 1; int protonsToInsert; Correlation correlation; - for (int i = 0; i < data.getCorrelations().getValues().size(); i++) { - correlation = data.getCorrelations().getValues().get(i); + for (int i = 0; i + < data.getCorrelations() + .getValues() + .size(); i++) { + correlation = data.getCorrelations() + .getValues() + .get(i); // set entry for each correlation with consideration of equivalences - if (correlation.getAtomType().equals("H")) { + if (correlation.getAtomType() + .equals("H")) { protonsToInsert = 0; for (final Link link : correlation.getLink()) { - if (link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) { - protonsToInsert += data.getCorrelations().getValues().get(link.getMatch().get(0)).getEquivalence(); + if (link.getExperimentType() + .equals("hsqc") + || link.getExperimentType() + .equals("hmqc")) { + protonsToInsert += data.getCorrelations() + .getValues() + .get(link.getMatch() + .get(0)) + .getEquivalence(); } } - indicesMap.put(i, new Object[1 + protonsToInsert]); + indicesMap.put(i, new Object[1 + + protonsToInsert]); indicesMap.get(i)[0] = correlation.getAtomType(); - for (int j = 0; j < protonsToInsert; j++) { - indicesMap.get(i)[1 + j] = protonIndexInPyLSDFile; + for (int j = 0; j + < protonsToInsert; j++) { + indicesMap.get(i)[1 + + j] = protonIndexInPyLSDFile; protonIndexInPyLSDFile++; } } else { - indicesMap.put(i, new Object[1 + correlation.getEquivalence()]); + indicesMap.put(i, new Object[1 + + correlation.getEquivalence()]); indicesMap.get(i)[0] = correlation.getAtomType(); - for (int j = 1; j <= correlation.getEquivalence(); j++) { + for (int j = 1; j + <= correlation.getEquivalence(); j++) { indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; heavyAtomIndexInPyLSDFile++; } @@ -75,20 +104,28 @@ private static Map buildIndicesMap(final Data data, final Map return indicesMap; } - private static String buildMULT(final Correlation correlation, final int index, final Map indicesMap, final Map> detectedHybridizations) { - if (correlation.getAtomType().equals("H")) { + private static String buildMULT(final Correlation correlation, final int index, + final Map indicesMap, + final Map> detectedHybridizations) { + if (correlation.getAtomType() + .equals("H")) { return null; } final StringBuilder stringBuilder = new StringBuilder(); List hybridizations = new ArrayList<>(); - StringBuilder hybridizationStringBuilder; - StringBuilder attachedProtonsCountStringBuilder; + final StringBuilder hybridizationStringBuilder; + final StringBuilder attachedProtonsCountStringBuilder; - if (correlation.getHybridization() != null && !correlation.getHybridization().isEmpty()) { + if (correlation.getHybridization() + != null + && !correlation.getHybridization() + .isEmpty()) { // if hybridization is already given - if (correlation.getHybridization().equals("SP")) { + if (correlation.getHybridization() + .equals("SP")) { hybridizations.add(1); - } else if (correlation.getHybridization().equals("SP2")) { + } else if (correlation.getHybridization() + .equals("SP2")) { hybridizations.add(2); } else { hybridizations.add(3); @@ -100,43 +137,68 @@ private static String buildMULT(final Correlation correlation, final int index, } } if (hybridizations.isEmpty()) { - hybridizationStringBuilder = new StringBuilder(Constants.defaultHybridizationMap.get(correlation.getAtomType())); + hybridizationStringBuilder = new StringBuilder( + Constants.defaultHybridizationMap.get(correlation.getAtomType())); } else { hybridizationStringBuilder = new StringBuilder(); - if (hybridizations.size() > 1) { + if (hybridizations.size() + > 1) { hybridizationStringBuilder.append("("); } - for (int k = 0; k < hybridizations.size(); k++) { + for (int k = 0; k + < hybridizations.size(); k++) { hybridizationStringBuilder.append(hybridizations.get(k)); - if (k < hybridizations.size() - 1) { + if (k + < hybridizations.size() + - 1) { hybridizationStringBuilder.append(" "); } } - if (hybridizations.size() > 1) { + if (hybridizations.size() + > 1) { hybridizationStringBuilder.append(")"); } } // set attached protons count attachedProtonsCountStringBuilder = new StringBuilder(); // if protons count is given - if (correlation.getProtonsCount() != null && !correlation.getProtonsCount().isEmpty()) { - if (correlation.getProtonsCount().size() == 1) { - attachedProtonsCountStringBuilder.append(correlation.getProtonsCount().get(0)); + if (correlation.getProtonsCount() + != null + && !correlation.getProtonsCount() + .isEmpty()) { + if (correlation.getProtonsCount() + .size() + == 1) { + attachedProtonsCountStringBuilder.append(correlation.getProtonsCount() + .get(0)); } else { attachedProtonsCountStringBuilder.append("("); for (final int protonsCount : correlation.getProtonsCount()) { - attachedProtonsCountStringBuilder.append(protonsCount).append(" "); + attachedProtonsCountStringBuilder.append(protonsCount) + .append(" "); } - attachedProtonsCountStringBuilder.deleteCharAt(attachedProtonsCountStringBuilder.length() - 1); + attachedProtonsCountStringBuilder.deleteCharAt(attachedProtonsCountStringBuilder.length() + - 1); attachedProtonsCountStringBuilder.append(")"); } } else { // if protons count is not given then set it to default value - attachedProtonsCountStringBuilder.append(Constants.defaultProtonsCountPerValencyMap.get(Constants.defaultAtomLabelMap.get(correlation.getAtomType()))); + attachedProtonsCountStringBuilder.append(Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType()))); } - for (int j = 1; j < indicesMap.get(index).length; j++) { - stringBuilder.append("MULT ").append(indicesMap.get(index)[j]).append(" ").append(correlation.getAtomType()).append(" ").append(hybridizationStringBuilder.toString()).append(" ").append(attachedProtonsCountStringBuilder.toString()); - if (j >= 2) { - stringBuilder.append("; equivalent to ").append(indicesMap.get(index)[1]); + for (int j = 1; j + < indicesMap.get(index).length; j++) { + stringBuilder.append("MULT ") + .append(indicesMap.get(index)[j]) + .append(" ") + .append(correlation.getAtomType()) + .append(" ") + .append(hybridizationStringBuilder.toString()) + .append(" ") + .append(attachedProtonsCountStringBuilder.toString()); + if (j + >= 2) { + stringBuilder.append("; equivalent to ") + .append(indicesMap.get(index)[1]); } stringBuilder.append("\n"); } @@ -144,17 +206,27 @@ private static String buildMULT(final Correlation correlation, final int index, return stringBuilder.toString(); } - private static String buildHSQC(final Correlation correlation, final int index, final Map indicesMap) { - if (correlation.getAtomType().equals("H")) { + private static String buildHSQC(final Correlation correlation, final int index, + final Map indicesMap) { + if (correlation.getAtomType() + .equals("H")) { return null; } final StringBuilder stringBuilder = new StringBuilder(); for (final Link link : correlation.getLink()) { - if (link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) { + if (link.getExperimentType() + .equals("hsqc") + || link.getExperimentType() + .equals("hmqc")) { for (final int matchIndex : link.getMatch()) { // for each equivalence of heavy atom and attached equivalent proton - for (int k = 1; k < indicesMap.get(index).length; k++) { - stringBuilder.append("HSQC ").append(indicesMap.get(index)[k]).append(" ").append(indicesMap.get(matchIndex)[k]).append("\n"); + for (int k = 1; k + < indicesMap.get(index).length; k++) { + stringBuilder.append("HSQC ") + .append(indicesMap.get(index)[k]) + .append(" ") + .append(indicesMap.get(matchIndex)[k]) + .append("\n"); } } } @@ -163,20 +235,39 @@ private static String buildHSQC(final Correlation correlation, final int index, return stringBuilder.toString(); } - private static String buildHMBC(final Correlation correlation, final int index, final Data data, final Map indicesMap) { - if (correlation.getAtomType().equals("H")) { + private static String buildHMBC(final Correlation correlation, final int index, final Data data, + final Map indicesMap) { + if (correlation.getAtomType() + .equals("H")) { return null; } final String defaultBondDistance = "2 4"; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { - if (link.getExperimentType().equals("hmbc")) { + if (link.getExperimentType() + .equals("hmbc")) { for (final int matchIndex : link.getMatch()) { - for (int k = 1; k < indicesMap.get(index).length; k++) { - for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { + for (int k = 1; k + < indicesMap.get(index).length; k++) { + for (int l = 1; l + < indicesMap.get(matchIndex).length; l++) { // only add an HMBC correlation if there is no direct link via HSQC and the equivalence index is not equal - if (!(data.getCorrelations().getValues().get(matchIndex).getAttachment().containsKey(correlation.getAtomType()) && data.getCorrelations().getValues().get(matchIndex).getAttachment().get(correlation.getAtomType()).contains(index) && l == k)) { - uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l]); + if (!(data.getCorrelations() + .getValues() + .get(matchIndex) + .getAttachment() + .containsKey(correlation.getAtomType()) + && data.getCorrelations() + .getValues() + .get(matchIndex) + .getAttachment() + .get(correlation.getAtomType()) + .contains(index) + && l + == k)) { + uniqueSet.add(indicesMap.get(index)[k] + + " " + + indicesMap.get(matchIndex)[l]); } } } @@ -184,28 +275,46 @@ private static String buildHMBC(final Correlation correlation, final int index, } } - return uniqueSet.stream().map(str -> "HMBC " + str + " " + defaultBondDistance + "\n").reduce("", (strAll, str) -> strAll + str); + return uniqueSet.stream() + .map(str -> "HMBC " + + str + + " " + + defaultBondDistance + + "\n") + .reduce("", (strAll, str) -> strAll + + str); } - private static String buildCOSY(final Correlation correlation, final int index, final Data data, final Map indicesMap) { - if (!correlation.getAtomType().equals("H")) { + private static String buildCOSY(final Correlation correlation, final int index, final Data data, + final Map indicesMap) { + if (!correlation.getAtomType() + .equals("H")) { return null; } final String defaultBondDistance = "3 4"; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { - if (link.getExperimentType().equals("cosy")) { + if (link.getExperimentType() + .equals("cosy")) { for (final int matchIndex : link.getMatch()) { // only add an COSY correlation if the two signals there is not equivalent - if (!data.getCorrelations().getValues().get(matchIndex).getId().equals(correlation.getId())) { - for (int k = 1; k < indicesMap.get(index).length; k++) { + if (!data.getCorrelations() + .getValues() + .get(matchIndex) + .getId() + .equals(correlation.getId())) { + for (int k = 1; k + < indicesMap.get(index).length; k++) { // for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { // uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l]); // } // only allow COSY values between possible equivalent protons and only one another non-equivalent proton - if (indicesMap.get(matchIndex).length == 2) { - uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[1]); + if (indicesMap.get(matchIndex).length + == 2) { + uniqueSet.add(indicesMap.get(index)[k] + + " " + + indicesMap.get(matchIndex)[1]); } } } @@ -213,31 +322,61 @@ private static String buildCOSY(final Correlation correlation, final int index, } } - return uniqueSet.stream().map(str -> "COSY " + str + " " + defaultBondDistance + "\n").reduce("", (strAll, str) -> strAll + str); + return uniqueSet.stream() + .map(str -> "COSY " + + str + + " " + + defaultBondDistance + + "\n") + .reduce("", (strAll, str) -> strAll + + str); } - private static String buildSHIX(final Correlation correlation, final int index, final Map indicesMap) { - if (correlation.getAtomType().equals("H") || correlation.isPseudo()) { + private static String buildSHIX(final Correlation correlation, final int index, + final Map indicesMap) { + if (correlation.getAtomType() + .equals("H") + || correlation.isPseudo()) { return null; } final StringBuilder stringBuilder = new StringBuilder(); - for (int k = 1; k < indicesMap.get(index).length; k++) { - stringBuilder.append("SHIX ").append(indicesMap.get(index)[k]).append(" ").append(correlation.getSignal().getDelta()).append("\n"); + for (int k = 1; k + < indicesMap.get(index).length; k++) { + stringBuilder.append("SHIX ") + .append(indicesMap.get(index)[k]) + .append(" ") + .append(correlation.getSignal() + .getDelta()) + .append("\n"); } return stringBuilder.toString(); } - private static String buildSHIH(final Correlation correlation, final int index, final Map indicesMap) { - if (!correlation.getAtomType().equals("H") || correlation.isPseudo()) { + private static String buildSHIH(final Correlation correlation, final int index, + final Map indicesMap) { + if (!correlation.getAtomType() + .equals("H") + || correlation.isPseudo()) { return null; } final StringBuilder stringBuilder = new StringBuilder(); // only consider protons which are attached via HSQC/HMQC (pseudo and real links) for (final Link link : correlation.getLink()) { - if ((link.getExperimentType().equals("hsqc") || link.getExperimentType().equals("hmqc")) && !link.getMatch().isEmpty()) { // && !link.isPseudo() - for (int k = 1; k < indicesMap.get(index).length; k++) { - stringBuilder.append("SHIH ").append(indicesMap.get(index)[k]).append(" ").append(correlation.getSignal().getDelta()).append("\n"); + if ((link.getExperimentType() + .equals("hsqc") + || link.getExperimentType() + .equals("hmqc")) + && !link.getMatch() + .isEmpty()) { // && !link.isPseudo() + for (int k = 1; k + < indicesMap.get(index).length; k++) { + stringBuilder.append("SHIH ") + .append(indicesMap.get(index)[k]) + .append(" ") + .append(correlation.getSignal() + .getDelta()) + .append("\n"); } } } @@ -250,7 +389,8 @@ private static String buildLISTAndPROP(final boolean allowHeteroHeteroBonds) { // LIST PROP for hetero hetero bonds allowance if (!allowHeteroHeteroBonds) { // create hetero atom list automatically - stringBuilder.append("HETE L1").append("; list of hetero atoms\n"); + stringBuilder.append("HETE L1") + .append("; list of hetero atoms\n"); stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); } @@ -265,26 +405,36 @@ private static String buildFilters(final String pathToLSDFilterList) { int counter = 1; try { final BufferedReader bufferedReader = FileSystem.readFile(pathToLSDFilterList); - if (bufferedReader != null) { + if (bufferedReader + != null) { String line; - while ((line = bufferedReader.readLine()) != null) { - filters.put("F" + counter, line); + while ((line = bufferedReader.readLine()) + != null) { + filters.put("F" + + counter, line); counter++; } bufferedReader.close(); } - } catch (IOException e) { + } catch (final IOException e) { e.printStackTrace(); } if (!filters.isEmpty()) { - filters.forEach((label, filePath) -> stringBuilder.append("DEFF ").append(label).append(" \"").append(filePath).append("\"\n")); + filters.forEach((label, filePath) -> stringBuilder.append("DEFF ") + .append(label) + .append(" \"") + .append(filePath) + .append("\"\n")); stringBuilder.append("\n"); stringBuilder.append("FEXP \""); counter = 0; for (final String label : filters.keySet()) { - stringBuilder.append("NOT ").append(label); - if (counter < filters.size() - 1) { + stringBuilder.append("NOT ") + .append(label); + if (counter + < filters.size() + - 1) { stringBuilder.append(" and "); } counter++; @@ -295,18 +445,32 @@ private static String buildFilters(final String pathToLSDFilterList) { return stringBuilder.toString(); } - public static String buildPyLSDFileContent(final Data data, final String mf, final Map> detectedHybridizations, final boolean allowHeteroHeteroBonds, final String pathToLSDFilterList, final String uuid) { - final HashMap> state = data.getCorrelations().getState(); - boolean hasErrors = state.keySet().stream().anyMatch(s -> state.get(s).containsKey("error")); - if (mf != null && !hasErrors) { + public static String buildPyLSDInputFileContent(final Data data, final String mf, + final Map> detectedHybridizations, + final boolean allowHeteroHeteroBonds, + final String pathToLSDFilterList) { + final Map> state = data.getCorrelations() + .getState(); + final boolean hasErrors = state.keySet() + .stream() + .anyMatch(atomType -> state.get(atomType) + .containsKey("error") + && !((Map) state.get(atomType) + .get("error")).isEmpty()); + if (mf + != null + && !hasErrors) { final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); final StringBuilder stringBuilder = new StringBuilder(); // create header - stringBuilder.append(buildHeader(uuid)).append("\n\n"); + stringBuilder.append(buildHeader()) + .append("\n\n"); // FORM - stringBuilder.append(buildFORM(mf, elementCounts)).append("\n\n"); + stringBuilder.append(buildFORM(mf, elementCounts)) + .append("\n\n"); // PIEC - stringBuilder.append(buildPIEC()).append("\n\n"); + stringBuilder.append(buildPIEC()) + .append("\n\n"); final Map> collection = new LinkedHashMap<>(); collection.put("MULT", new ArrayList<>()); @@ -319,27 +483,46 @@ public static String buildPyLSDFileContent(final Data data, final String mf, fin final Map indicesMap = buildIndicesMap(data, elementCounts); Correlation correlation; - for (int i = 0; i < data.getCorrelations().getValues().size(); i++) { - correlation = data.getCorrelations().getValues().get(i); - collection.get("MULT").add(buildMULT(correlation, i, indicesMap, detectedHybridizations)); - collection.get("HSQC").add(buildHSQC(correlation, i, indicesMap)); - collection.get("HMBC").add(buildHMBC(correlation, i, data, indicesMap)); - collection.get("COSY").add(buildCOSY(correlation, i, data, indicesMap)); - collection.get("SHIX").add(buildSHIX(correlation, i, indicesMap)); - collection.get("SHIH").add(buildSHIH(correlation, i, indicesMap)); + for (int i = 0; i + < data.getCorrelations() + .getValues() + .size(); i++) { + correlation = data.getCorrelations() + .getValues() + .get(i); + collection.get("MULT") + .add(buildMULT(correlation, i, indicesMap, detectedHybridizations)); + collection.get("HSQC") + .add(buildHSQC(correlation, i, indicesMap)); + collection.get("HMBC") + .add(buildHMBC(correlation, i, data, indicesMap)); + collection.get("COSY") + .add(buildCOSY(correlation, i, data, indicesMap)); + collection.get("SHIX") + .add(buildSHIX(correlation, i, indicesMap)); + collection.get("SHIH") + .add(buildSHIH(correlation, i, indicesMap)); } - collection.keySet().forEach(key -> { - collection.get(key).stream().filter(Objects::nonNull).forEach(stringBuilder::append); - stringBuilder.append("\n"); - }); + collection.keySet() + .forEach(key -> { + collection.get(key) + .stream() + .filter(Objects::nonNull) + .forEach(stringBuilder::append); + stringBuilder.append("\n"); + }); // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance - stringBuilder.append(buildLISTAndPROP(allowHeteroHeteroBonds)).append("\n"); + stringBuilder.append(buildLISTAndPROP(allowHeteroHeteroBonds)) + .append("\n"); // DEFF and FEXP as default filters (bad lists) - stringBuilder.append(buildFilters(pathToLSDFilterList)).append("\n"); + stringBuilder.append(buildFilters(pathToLSDFilterList)) + .append("\n"); + + // stringBuilder.append("\n").append("MAXT 30").append("\n"); return stringBuilder.toString(); } diff --git a/src/casekit/nmr/model/nmrdisplayer/Correlations.java b/src/casekit/nmr/model/nmrdisplayer/Correlations.java index 9f12d9e..76bc96c 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Correlations.java +++ b/src/casekit/nmr/model/nmrdisplayer/Correlations.java @@ -29,12 +29,13 @@ import lombok.Setter; import lombok.ToString; -import java.util.HashMap; +import java.util.Map; @NoArgsConstructor @Getter @Setter @ToString -public class Correlations extends Default { - private HashMap> state; +public class Correlations + extends Default { + private Map> state; } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 30653ad..dff108c 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -56,23 +56,6 @@ public static String getAtomTypeFromNucleus(final String nucleus) { - 1]; } - public static Map getMolecularFormulaElementCounts(final String mf) { - final LinkedHashMap counts = new LinkedHashMap<>(); - final IMolecularFormula iMolecularFormula = Utils.getMolecularFormulaFromString(mf); - final List elements = new ArrayList<>(); - final Matcher matcher = Pattern.compile("([A-Z][a-z]*)") - .matcher(mf); - - while (matcher.find()) { - elements.add(matcher.group(1)); - } - for (final String element : elements) { - counts.put(element, MolecularFormulaManipulator.getElementCount(iMolecularFormula, element)); - } - - return counts; - } - public static IMolecularFormula getMolecularFormulaFromString(final String mf) { return MolecularFormulaManipulator.getMolecularFormula(mf, SilentChemObjectBuilder.getInstance()); } @@ -119,4 +102,29 @@ public static Double getRMSD(final Double[] data) { - nullCounter)) : null; } + + public static Map getMolecularFormulaElementCounts(final String mf) { + final LinkedHashMap counts = new LinkedHashMap<>(); + final List elements = new ArrayList<>(); + Matcher matcher = Pattern.compile("([A-Z][a-z]{0,1})") + .matcher(mf); + while (matcher.find()) { + elements.add(matcher.group(1)); + } + int count; + for (final String element : elements) { + matcher = Pattern.compile("(" + + element + + "\\d+)") + .matcher(mf); + count = 1; + if (matcher.find()) { + count = Integer.parseInt(matcher.group(1) + .split(element)[1]); + } + counts.put(element, count); + } + + return counts; + } } From b3badc3a92677a082b99a14b1da585dab27e3658 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 30 Mar 2021 02:43:37 +0200 Subject: [PATCH 163/405] minor --- src/casekit/nmr/core/Elucidation.java | 1 + src/casekit/nmr/dbservice/MongoDB.java | 1 + src/casekit/nmr/remarks | 16 ---------------- 3 files changed, 2 insertions(+), 16 deletions(-) delete mode 100644 src/casekit/nmr/remarks diff --git a/src/casekit/nmr/core/Elucidation.java b/src/casekit/nmr/core/Elucidation.java index cb2d454..11d6fd6 100644 --- a/src/casekit/nmr/core/Elucidation.java +++ b/src/casekit/nmr/core/Elucidation.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.List; +@Deprecated public class Elucidation { public static List findFragments(final Spectrum querySpectrum, final List compoundDataSets, diff --git a/src/casekit/nmr/dbservice/MongoDB.java b/src/casekit/nmr/dbservice/MongoDB.java index 373d904..8b06bf6 100644 --- a/src/casekit/nmr/dbservice/MongoDB.java +++ b/src/casekit/nmr/dbservice/MongoDB.java @@ -15,6 +15,7 @@ /** * @author Michael Wenk [https://github.com/michaelwenk] */ +@Deprecated public class MongoDB { diff --git a/src/casekit/nmr/remarks b/src/casekit/nmr/remarks deleted file mode 100644 index dccc0b3..0000000 --- a/src/casekit/nmr/remarks +++ /dev/null @@ -1,16 +0,0 @@ -general things: -- JUnit -- PMD: CDK GitHub PMD example -- JCoCo -- yourkit (license received) - -coding: - -- in parseRawData class: - - new: usage of Assignment class objects: - -> atom properties for signals, e.g. HMBC, are not longer set - - control function should be implemented for assigning of protons: - -> no. of assigned protons (HSQC, HMBC, ...) vs. DEPT information and for other heavy atoms too - -> how many protons for one shift value (carbons or other heavy atoms) are still left? - - some function descriptions (javadoc) are deprecated and have to be updated -- add exceptions where needed \ No newline at end of file From a7ed157d5336c79d4dede88f101830015804986d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 1 Apr 2021 17:43:57 +0200 Subject: [PATCH 164/405] assignments can now hold more than only one single assignment index (equivalences); small optimisations --- src/casekit/nmr/Utils.java | 22 --- src/casekit/nmr/core/Dereplication.java | 35 ---- src/casekit/nmr/core/Elucidation.java | 33 ---- src/casekit/nmr/dbservice/NMRShiftDB.java | 45 +++-- src/casekit/nmr/model/Assignment.java | 160 ++++++------------ src/casekit/nmr/model/Signal.java | 82 ++++----- src/casekit/nmr/model/Spectrum.java | 70 ++++---- .../nmr/model/nmrdisplayer/Spectrum.java | 66 +++++--- src/casekit/nmr/utils/Match.java | 127 ++++++-------- src/casekit/nmr/utils/Predict.java | 25 +-- src/casekit/nmr/utils/Utils.java | 37 ++-- 11 files changed, 294 insertions(+), 408 deletions(-) delete mode 100644 src/casekit/nmr/core/Dereplication.java delete mode 100644 src/casekit/nmr/core/Elucidation.java diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index bead5ed..38a17b3 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -117,28 +117,6 @@ public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectr - spectrum.getSignalCountWithEquivalences(); } - /** - * Specified for carbons only -> not generic!!! - * - * @param protonsCount - * - * @return - */ - public static String getMultiplicityFromProtonsCount(final int protonsCount) { - switch (protonsCount) { - case 0: - return "S"; - case 1: - return "D"; - case 2: - return "T"; - case 3: - return "Q"; - default: - return null; - } - } - /** * Returns the casekit.nmr isotope identifier for a given element, e.g. C -> 13C. * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. diff --git a/src/casekit/nmr/core/Dereplication.java b/src/casekit/nmr/core/Dereplication.java deleted file mode 100644 index c1034a5..0000000 --- a/src/casekit/nmr/core/Dereplication.java +++ /dev/null @@ -1,35 +0,0 @@ -package casekit.nmr.core; - -import casekit.nmr.model.Assignment; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Spectrum; -import casekit.nmr.utils.Match; -import org.openscience.cdk.exception.CDKException; - -import java.util.ArrayList; -import java.util.List; - -public class Dereplication { - - public static List dereplicate1D(final Spectrum querySpectrum, final List compoundDataSets, - final double shiftTol) { - final List solutions = new ArrayList<>(); - - for (final DataSet dataSet : compoundDataSets) { - final Assignment matchAssignment = Match.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, 1); - if (matchAssignment.isFullyAssigned(0)) { - try { - dataSet.addMetaInfo("tanimoto", String.valueOf( - Match.calculateTanimotoCoefficient(dataSet.getSpectrum(), querySpectrum, 0, 0))); - } catch (CDKException e) { - e.printStackTrace(); - } - dataSet.addMetaInfo("avgDev", String.valueOf( - Match.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol))); - solutions.add(dataSet); - } - } - - return solutions; - } -} diff --git a/src/casekit/nmr/core/Elucidation.java b/src/casekit/nmr/core/Elucidation.java deleted file mode 100644 index 11d6fd6..0000000 --- a/src/casekit/nmr/core/Elucidation.java +++ /dev/null @@ -1,33 +0,0 @@ -package casekit.nmr.core; - -import casekit.nmr.model.Assignment; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Spectrum; -import casekit.nmr.utils.Match; - -import java.util.ArrayList; -import java.util.List; - -@Deprecated -public class Elucidation { - - public static List findFragments(final Spectrum querySpectrum, final List compoundDataSets, - final double shiftTol) { - final List fragments = new ArrayList<>(); - - Assignment matchAssignment; - for (final DataSet dataSet : compoundDataSets) { - matchAssignment = Match.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol); - - } - - return fragments; - } - - public static List elucidate() { - final List solutions = new ArrayList<>(); - - return solutions; - } - -} diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index a4f3f3a..54fe2d7 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -110,6 +110,7 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif String spectrumIndexInRecord; IMolecularFormula mf; List explicitHydrogenIndices; + int[] temp; while (iterator.hasNext()) { structure = iterator.next(); @@ -168,7 +169,7 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split( spectrumIndexInRecord + ":")[1])); - } catch (NumberFormatException e) { + } catch (final NumberFormatException e) { // e.printStackTrace(); } break; @@ -182,18 +183,23 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif && !explicitHydrogenIndices.isEmpty()) { int hCount; for (int i = 0; i - < assignment.getAssignmentsCount(); i++) { - hCount = 0; - for (int j = 0; j - < explicitHydrogenIndices.size(); j++) { - if (explicitHydrogenIndices.get(j) - >= assignment.getAssignment(0, i)) { - break; + < assignment.getSize(); i++) { + for (int k = 0; k + < assignment.getAssignment(0, i).length; k++) { + hCount = 0; + for (int j = 0; j + < explicitHydrogenIndices.size(); j++) { + if (explicitHydrogenIndices.get(j) + >= assignment.getAssignment(0, i, k)) { + break; + } + hCount++; } - hCount++; + temp = assignment.getAssignment(0, i); + temp[k] = assignment.getAssignment(0, i, k) + - hCount; + assignment.setAssignment(0, i, temp); } - assignment.setAssignment(0, i, assignment.getAssignment(0, i) - - hCount); } } @@ -379,7 +385,7 @@ public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShift basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][1])) .append("\n"); } - } catch (Exception e) { + } catch (final Exception e) { return null; } @@ -394,7 +400,9 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect return null; } final String[][] spectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - final Spectrum spectrum = new Spectrum(new String[]{nucleus}); + final Spectrum spectrum = new Spectrum(); + spectrum.setNuclei(new String[]{nucleus}); + spectrum.setSignals(new ArrayList<>()); String multiplicity; Double shift, intensity; try { @@ -407,7 +415,7 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 1, 0)); } - } catch (Exception e) { + } catch (final Exception e) { return null; } @@ -423,10 +431,15 @@ public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBS } final String[][] NMRShiftDBSpectrumStringArray = parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); final Spectrum spectrum = NMRShiftDBSpectrumToSpectrum(NMRShiftDBSpectrum, nucleus); - final Assignment assignment = new Assignment(spectrum); + final Assignment assignment = new Assignment(); + assignment.setNuclei(spectrum.getNuclei()); + assignment.initAssignments(spectrum.getSignalCount()); + int signalIndex; for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { - assignment.setAssignment(0, i, new Integer(NMRShiftDBSpectrumStringArray[i][3])); + signalIndex = spectrum.pickClosestSignal(Double.parseDouble(NMRShiftDBSpectrumStringArray[i][0]), 0, 0.0) + .get(0); + assignment.addAssignmentEquivalence(0, signalIndex, Integer.parseInt(NMRShiftDBSpectrumStringArray[i][3])); } return assignment; diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index e6fb979..0163274 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -23,11 +23,7 @@ */ package casekit.nmr.model; -import org.apache.commons.lang3.ArrayUtils; - -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; /** * @author Michael Wenk [https://github.com/michaelwenk] @@ -36,33 +32,28 @@ public class Assignment implements Cloneable { private String[] nuclei; - private int[][] assignments; + private int[][][] assignments; public Assignment() { } - public Assignment(final String[] nuclei, final int[][] assignments) { + public Assignment(final String[] nuclei, final int[][][] assignments) { this.nuclei = nuclei; this.assignments = assignments; } - public Assignment(final Spectrum spectrum) { - this.nuclei = spectrum.getNuclei(); - this.assignments = this.initAssignments(this.nuclei.length, spectrum.getSignalCount()); - } - - private int[][] initAssignments(final int nDim, final int nSignals) { - final int[][] temp = new int[nDim][nSignals]; + public void initAssignments(final int length) { + final int[][][] temp = new int[this.getNDim()][length][1]; for (int i = 0; i - < nDim; i++) { + < this.getNDim(); i++) { for (int j = 0; j - < nSignals; j++) { - temp[i][j] = -1; + < length; j++) { + temp[i][j] = new int[]{}; } } - return temp; + this.assignments = temp; } public String[] getNuclei() { @@ -89,7 +80,7 @@ public boolean compareNuclei(final String[] nuclei) { } /** - * Sets an assignment with value for an index position. + * Sets an assignment array with value for an index position. * * @param dim * @param index @@ -97,7 +88,7 @@ public boolean compareNuclei(final String[] nuclei) { * * @return */ - public boolean setAssignment(final int dim, final int index, final int assignment) { + public boolean setAssignment(final int dim, final int index, final int[] assignment) { if (!this.containsDim(dim) || !this.checkIndex(dim, index)) { return false; @@ -107,20 +98,21 @@ public boolean setAssignment(final int dim, final int index, final int assignmen return true; } - public boolean setAssignments(final int dim, final List assignments) { + public boolean setAssignments(final int dim, final int[][] assignments) { if (!this.containsDim(dim) - || !this.checkInputListSize(assignments.size())) { + || this.getSize() + != assignments.length) { return false; } for (int i = 0; i - < this.getAssignmentsCount(); i++) { - this.setAssignment(dim, i, assignments.get(i)); + < this.getSize(); i++) { + this.setAssignment(dim, i, assignments[i]); } return true; } - public Integer getAssignment(final int dim, final int index) { + public int[] getAssignment(final int dim, final int index) { if (!this.containsDim(dim) || !this.checkIndex(dim, index)) { return null; @@ -129,14 +121,38 @@ public Integer getAssignment(final int dim, final int index) { return this.assignments[dim][index]; } + public int getAssignment(final int dim, final int index, final int equivalenceIndex) { + if (!this.containsDim(dim) + || !this.checkIndex(dim, index)) { + return -1; + } + + return this.assignments[dim][index][equivalenceIndex]; + } + + public void addAssignmentEquivalence(final int dim, final int index, final int equivalenceIndex) { + final int[] temp = this.getAssignment(dim, index); + final int[] equivalenceIndices = new int[temp.length + + 1]; + for (int j = 0; j + < temp.length; j++) { + equivalenceIndices[j] = temp[j]; + } + equivalenceIndices[equivalenceIndices.length + - 1] = equivalenceIndex; + + this.setAssignment(dim, index, equivalenceIndices); + } + public Integer getIndex(final int dim, final int assignment) { if (!this.containsDim(dim)) { return null; } for (int index = 0; index < this.assignments[dim].length; index++) { - if (this.getAssignment(dim, index) - == assignment) { + if (Arrays.stream(this.getAssignment(dim, index)) + .anyMatch(value -> value + == assignment)) { return index; } } @@ -144,15 +160,15 @@ public Integer getIndex(final int dim, final int assignment) { return -1; } - public ArrayList getAssignments(final int dim) { + public int[][] getAssignments(final int dim) { if (!this.containsDim(dim)) { return null; } - return new ArrayList<>(Arrays.asList(ArrayUtils.toObject(this.assignments[dim]))); + return this.assignments[dim]; } - public int getAssignmentsCount() { + public int getSize() { if (this.getNDim() > 0) { return this.assignments[0].length; @@ -165,8 +181,8 @@ public int getSetAssignmentsCount(final int dim) { if (this.containsDim(dim)) { for (int j = 0; j < this.assignments[dim].length; j++) { - if (this.assignments[dim][j] - != -1) { + if (this.assignments[dim][j].length + > 0) { setAssignmentsCounter++; } } @@ -174,74 +190,15 @@ public int getSetAssignmentsCount(final int dim) { return setAssignmentsCounter; } - public Boolean isFullyAssigned(final int dim) { - if (!this.containsDim(dim)) { - return null; - } - - return this.getSetAssignmentsCount(dim) - == this.getAssignmentsCount(); - } - - public Boolean isAssigned(final int dim, final int index) { - if (!this.containsDim(dim)) { - return null; - } - - return this.getAssignment(dim, index) - != -1; - } - - /** - * Adds a new assignment entry (index), e.g. for a new signal. The given assignment indices - * will be stored for each dimension of the new assignment entry (index). - * - * @param assignment assignment indices to store in each dimension of new assignment entry - * - * @return - */ - public boolean addAssignment(final int[] assignment) { - if (this.getNDim() - != assignments.length) { - return false; - } - final int[][] extendedAssignments = new int[this.getNDim()][this.getAssignmentsCount() - + 1]; - for (int dim = 0; dim - < this.getNDim(); dim++) { - for (int i = 0; i - < this.getAssignmentsCount(); i++) { - extendedAssignments[dim][i] = this.getAssignment(dim, i); - } - extendedAssignments[dim][this.getAssignmentsCount()] = assignment[dim]; - } - this.assignments = extendedAssignments; - - return true; - } - - public boolean removeAssignment(final int index) { - if (!this.checkIndex(0, index)) { - return false; - } - final int[][] reducedAssignments = new int[this.getNDim()][this.getAssignmentsCount() - - 1]; - int nextIndexToInsertCounter = 0; - for (int i = 0; i - < this.getAssignmentsCount(); i++) { - if (i - == index) { - continue; - } - for (int dim = 0; dim - < this.getNDim(); dim++) { - reducedAssignments[dim][nextIndexToInsertCounter] = this.getAssignment(dim, i); + public int getSetAssignmentsCountWithEquivalences(final int dim) { + int setAssignmentsCounter = 0; + if (this.containsDim(dim)) { + for (int j = 0; j + < this.assignments[dim].length; j++) { + setAssignmentsCounter += this.assignments[dim][j].length; } - nextIndexToInsertCounter++; } - this.assignments = reducedAssignments; - - return true; + return setAssignmentsCounter; } private boolean checkIndex(final int dim, final int index) { @@ -251,9 +208,8 @@ private boolean checkIndex(final int dim, final int index) { < this.assignments[dim].length); } - private boolean checkInputListSize(final int size) { - return (size - == this.getAssignmentsCount()); + public int[][][] getAssignments() { + return this.assignments; } @Override @@ -274,8 +230,4 @@ public String toString() { return stringBuilder.toString(); } - - public int[][] getAssignments() { - return assignments; - } } diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index 08c68aa..ecefbd8 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -46,10 +46,6 @@ public class Signal { public Signal() { } - public Signal(final String[] nuclei) { - this(nuclei, null, null, null, null, 1, 0); - } - public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, final Double intensity, final int equivalencesCount, final int phase) { this.nuclei = nuclei; @@ -100,41 +96,57 @@ public Double getShift(final int dim) { return this.shifts[dim]; } - public void setIntensity(final Double intensity) { - this.intensity = intensity; - } - public Double getIntensity() { return this.intensity; } - public void setMultiplicity(final String multiplicity) { - this.multiplicity = multiplicity; + public void setIntensity(final Double intensity) { + this.intensity = intensity; } public String getMultiplicity() { return this.multiplicity; } + public void setMultiplicity(final String multiplicity) { + this.multiplicity = multiplicity; + } + public String getKind() { - return kind; + return this.kind; } public void setKind(final String kind) { this.kind = kind; } - // public void setPhase(final Integer phase) { - // this.phase = phase; - // } - // - // public Integer getPhase() { - // return this.phase; - // } + public Double[] getShifts() { + return this.shifts; + } + + public void setShifts(final Double[] shifts) { + this.shifts = shifts; + } + + public int getEquivalencesCount() { + return this.equivalencesCount; + } + public void setEquivalencesCount(final int equivalencesCount) { + this.equivalencesCount = equivalencesCount; + } + + public int getPhase() { + return this.phase; + } + + public void setPhase(final int phase) { + this.phase = phase; + } public Signal buildClone() { - return new Signal(this.getNuclei(), this.shifts, this.multiplicity, this.kind, this.intensity, + return new Signal(this.getNuclei() + .clone(), this.shifts.clone(), this.multiplicity, this.kind, this.intensity, this.equivalencesCount, this.phase); } @@ -142,39 +154,19 @@ public Signal buildClone() { public String toString() { return "Signal{" + "shifts=" - + Arrays.toString(shifts) + + Arrays.toString(this.shifts) + ", multiplicity='" - + multiplicity + + this.multiplicity + '\'' + ", intensity=" - + intensity + + this.intensity + ", kind='" - + kind + + this.kind + '\'' + ", equivalencesCount=" - + equivalencesCount + + this.equivalencesCount + ", phase=" - + phase + + this.phase + '}'; } - - public Double[] getShifts() { - return shifts; - } - - public int getEquivalencesCount() { - return equivalencesCount; - } - - public void setEquivalencesCount(final int equivalencesCount) { - this.equivalencesCount = equivalencesCount; - } - - public int getPhase() { - return phase; - } - - public void setPhase(final int phase) { - this.phase = phase; - } } diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index c1ef3ce..c1b0e91 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -62,12 +62,6 @@ public class Spectrum { public Spectrum() { } - public Spectrum(final String[] nuclei) { - this.nuclei = nuclei; - this.signals = new ArrayList<>(); - this.signalCount = 0; - } - public Spectrum(final String[] nuclei, final String description, final String specType, final Double spectrometerFrequency, final String solvent, final String standard, final List signals, final int signalCount) { @@ -77,7 +71,7 @@ public Spectrum(final String[] nuclei, final String description, final String sp this.spectrometerFrequency = spectrometerFrequency; this.solvent = solvent; this.standard = standard; - this.signals = new ArrayList<>(signals); + this.signals = signals; this.signalCount = signalCount; } @@ -104,22 +98,22 @@ public boolean compareNuclei(final String[] nuclei) { return Arrays.equals(this.getNuclei(), nuclei); } - public void setSpecType(final String specType) { - this.specType = specType; - } - public String getSpecType() { return this.specType; } - public void setSpecDescription(final String description) { - this.description = description; + public void setSpecType(final String specType) { + this.specType = specType; } public String getSpecDescription() { return this.description; } + public void setSpecDescription(final String description) { + this.description = description; + } + public int getSignalCount() { return this.signalCount; } @@ -133,11 +127,13 @@ public int getSignalCountWithEquivalences() { } /** - * Adds a signal to this spectrum. + * Adds a signal to this spectrum with pickPrecision of 0. * * @param signal signal to add * * @return + * + * @see #addSignal(Signal, double) */ public boolean addSignal(final Signal signal) { return this.addSignal(signal, 0.0); @@ -231,6 +227,10 @@ public List getSignals() { return this.signals; } + public void setSignals(final List signals) { + this.signals = signals; + } + public Double getShift(final int signalIndex, final int dim) { if (!this.checkSignalIndex(signalIndex)) { return null; @@ -299,30 +299,37 @@ public int getSignalIndex(final Signal signal) { return -1; } + public Double getSpectrometerFrequency() { + return this.spectrometerFrequency; + } + public void setSpectrometerFrequency(final Double sf) { this.spectrometerFrequency = sf; } - public Double getSpectrometerFrequency() { - return spectrometerFrequency; + public String getSolvent() { + return this.solvent; } public void setSolvent(final String solvent) { this.solvent = solvent; } - public String getSolvent() { - return solvent; + public String getStandard() { + return this.standard; } public void setStandard(final String standard) { this.standard = standard; } - public String getStandard() { - return standard; + public String getDescription() { + return this.description; } + public void setDescription(final String description) { + this.description = description; + } /** * Returns the signal index (or indices) closest to the given shift. If no signal is found within the interval @@ -395,7 +402,10 @@ public List pickSignals(final Double shift, final int dim, final double } public Spectrum buildClone() { - final Spectrum clone = new Spectrum(this.getNuclei()); + final Spectrum clone = new Spectrum(); + clone.setNuclei(this.getNuclei() + .clone()); + clone.setSignals(new ArrayList<>()); for (int i = 0; i < this.getSignalCount(); i++) { clone.addSignal(this.getSignal(i) @@ -414,27 +424,23 @@ public Spectrum buildClone() { public String toString() { return "Spectrum{" + "description='" - + description + + this.description + '\'' + ", specType='" - + specType + + this.specType + '\'' + ", spectrometerFrequency=" - + spectrometerFrequency + + this.spectrometerFrequency + ", solvent='" - + solvent + + this.solvent + '\'' + ", standard='" - + standard + + this.standard + '\'' + ", signals=" - + signals + + this.signals + ", signalCount=" - + signalCount + + this.signalCount + '}'; } - - public String getDescription() { - return description; - } } diff --git a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java index d6bb1df..c79fa82 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java +++ b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java @@ -48,34 +48,56 @@ public class Spectrum { private HashMap info; public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { - final int dimension = (int) info.get("dimension"); - final boolean isFid = (boolean) info.get("isFid"); + final int dimension = (int) this.info.get("dimension"); + final boolean isFid = (boolean) this.info.get("isFid"); if (!isFid) { - if (dimension == 1) { - final String nucleus = (String) info.get("nucleus"); - final casekit.nmr.model.Spectrum spectrum = new casekit.nmr.model.Spectrum(new String[]{nucleus}); - ranges.getValues().forEach(range -> range.getSignal().forEach(signal1D -> { - if (considerSignalKind && signal1D.getKind().equals("signal")) { - spectrum.addSignal(new Signal(new String[]{nucleus}, new Double[]{signal1D.getDelta()}, signal1D.getMultiplicity(), signal1D.getKind(), null, 0, 0)); - } - })); - spectrum.setSolvent((String) info.get("solvent")); - spectrum.setSpecType((String) info.get("experiment")); + if (dimension + == 1) { + final String nucleus = (String) this.info.get("nucleus"); + final casekit.nmr.model.Spectrum spectrum = new casekit.nmr.model.Spectrum(); + spectrum.setNuclei(new String[]{nucleus}); + this.ranges.getValues() + .forEach(range -> range.getSignal() + .forEach(signal1D -> { + if (considerSignalKind + && signal1D.getKind() + .equals("signal")) { + spectrum.addSignal(new Signal(new String[]{nucleus}, + new Double[]{ + signal1D.getDelta()}, + signal1D.getMultiplicity(), + signal1D.getKind(), null, 0, + 0)); + } + })); + spectrum.setSolvent((String) this.info.get("solvent")); + spectrum.setSpecType((String) this.info.get("experiment")); return spectrum; - } else if (dimension == 2) { - final String[] nuclei = ((ArrayList) info.get("nucleus")).toArray(new String[]{}); - final casekit.nmr.model.Spectrum spectrum = new casekit.nmr.model.Spectrum(nuclei); + } else if (dimension + == 2) { + final String[] nuclei = ((ArrayList) this.info.get("nucleus")).toArray(new String[]{}); + final casekit.nmr.model.Spectrum spectrum = new casekit.nmr.model.Spectrum(); + spectrum.setNuclei(nuclei); - zones.getValues().forEach(zone -> zone.getSignal().forEach(signal2D -> { - if (considerSignalKind && signal2D.getKind().equals("signal")) { - spectrum.addSignal(new Signal(nuclei, new Double[]{(Double) signal2D.getX().get("delta"), (Double) signal2D.getY().get("delta")}, signal2D.getMultiplicity(), signal2D.getKind(), null, 0, 0)); - } - })); - spectrum.setSolvent((String) info.get("solvent")); - spectrum.setSpecType((String) info.get("experiment")); + this.zones.getValues() + .forEach(zone -> zone.getSignal() + .forEach(signal2D -> { + if (considerSignalKind + && signal2D.getKind() + .equals("signal")) { + spectrum.addSignal(new Signal(nuclei, new Double[]{ + (Double) signal2D.getX() + .get("delta"), (Double) signal2D.getY() + .get("delta")}, + signal2D.getMultiplicity(), + signal2D.getKind(), null, 0, 0)); + } + })); + spectrum.setSolvent((String) this.info.get("solvent")); + spectrum.setSpecType((String) this.info.get("experiment")); return spectrum; } diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index d7b6432..459d908 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -111,20 +111,22 @@ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final * * @return * - * @see #matchSpectra(Spectrum, Spectrum, int, int, double) + * @see #matchSpectra(Spectrum, Spectrum, int, int, double, boolean, boolean) */ public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2, final double shiftTol) { + final int dim2, final double shiftTol, final boolean checkMultiplicity, + final boolean checkEquivalencesCount) { final Double[] deviations = new Double[spectrum1.getSignalCount()]; - final Assignment matchAssignments = Match.matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol); + final Assignment matchAssignments = matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, + checkEquivalencesCount); Signal matchedSignalInSpectrum2; for (int i = 0; i < spectrum1.getSignalCount(); i++) { - if (matchAssignments.getAssignment(0, i) - == -1) { + if (matchAssignments.getAssignment(0, i).length + == 0) { deviations[i] = null; } else { - matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAssignment(0, i)); + matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAssignment(0, i)[0]); deviations[i] = Math.abs(spectrum1.getSignal(i) .getShift(dim1) - matchedSignalInSpectrum2.getShift(dim2)); @@ -165,12 +167,16 @@ public static Double calculateAverageDeviation(final Double[] deviations) { * * @return * - * @see #getDeviations(Spectrum, Spectrum, int, int, double) + * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean) * @see #calculateAverageDeviation(Double[]) */ public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2, final double shiftTol) { - return Match.calculateAverageDeviation(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); + final int dim2, final double shiftTol, + final boolean checkMultiplicity, + final boolean checkEquivalencesCount) { + return Match.calculateAverageDeviation( + Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, + checkEquivalencesCount)); } /** @@ -205,12 +211,14 @@ public static Double calculateRMSD(final Double[] data) { * * @return * - * @see #getDeviations(Spectrum, Spectrum, int, int, double) + * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean) * @see #calculateAverageDeviation(Double[]) */ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2, final double shiftTol) { - return Match.calculateRMSD(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol)); + final int dim2, final double shiftTol, final boolean checkMultiplicity, + final boolean checkEquivalencesCount) { + return Match.calculateRMSD(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, + checkEquivalencesCount)); } /** @@ -230,13 +238,17 @@ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spec * contain the selected dimension */ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum querySpectrum, final int dim1, - final int dim2, final double shiftTol) { + final int dim2, final double shiftTol, final boolean checkMultiplicity, + final boolean checkEquivalencesCount) { if (!Match.checkDimensions(spectrum, querySpectrum, dim1, dim2)) { return null; } - final Assignment matchAssignments = new Assignment(spectrum); + final Assignment matchAssignments = new Assignment(); + matchAssignments.setNuclei(new String[]{spectrum.getNuclei()[dim1]}); + matchAssignments.initAssignments(spectrum.getSignalCount()); final Set assigned = new HashSet<>(); List pickedSignalIndicesSpectrum2; + boolean passed; for (int i = 0; i < spectrum.getSignalCount(); i++) { @@ -247,13 +259,20 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu // @TODO add solvent deviation value for picking closest signal(s) pickedSignalIndicesSpectrum2 = new ArrayList<>(); - for (final int pickedSignalIndexSpectrum2 : querySpectrum.pickClosestSignal(spectrum.getShift(i, dim1), - dim2, shiftTol)) { + for (final int pickedSignalIndexSpectrum2 : querySpectrum.pickSignals(spectrum.getShift(i, dim1), dim2, + shiftTol)) { + passed = true; // @TODO maybe consider further parameters to check ? e.g. intensity - if (querySpectrum.getMultiplicity(pickedSignalIndexSpectrum2) - .equals(spectrum.getMultiplicity(i)) - && querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) - <= spectrum.getEquivalencesCount(i)) { + if (checkMultiplicity) { + passed = querySpectrum.getMultiplicity(pickedSignalIndexSpectrum2) + .equals(spectrum.getMultiplicity(i)); + } + if (checkEquivalencesCount) { + passed = querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) + <= spectrum.getEquivalencesCount(i); + } + + if (passed) { pickedSignalIndicesSpectrum2.add(pickedSignalIndexSpectrum2); } } @@ -261,9 +280,10 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu if (!assigned.contains(pickedSignalIndexSpectrum2)) { // add signal to list of already assigned signals assigned.add(pickedSignalIndexSpectrum2); - // set picked signal index in assignment object - matchAssignments.setAssignment(0, i, pickedSignalIndexSpectrum2); - + for (int k = 0; k + < spectrum.getEquivalencesCount(i); k++) { + matchAssignments.addAssignmentEquivalence(0, i, pickedSignalIndexSpectrum2); + } break; } } @@ -288,69 +308,26 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu * dimensions in both spectra is not the same or is different than the number of given * shift tolerances * - * @see #matchSpectra(Spectrum, Spectrum, int, int, double) + * @see #matchSpectra(Spectrum, Spectrum, int, int, double, boolean, boolean) */ - public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, - final double[] shiftTols) { + public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final double[] shiftTols, + final boolean checkMultiplicity, final boolean checkEquivalencesCount) { if ((spectrum1.getNDim() != spectrum2.getNDim()) || (spectrum1.getNDim() != shiftTols.length)) { return null; } - final Assignment matchAssignment = new Assignment(spectrum1); + final Assignment matchAssignment = new Assignment(); + matchAssignment.setNuclei(spectrum1.getNuclei()); + matchAssignment.initAssignments(spectrum1.getSignalCount()); for (int dim = 0; dim < spectrum1.getNDim(); dim++) { - matchAssignment.setAssignments(dim, Match.matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim]) - .getAssignments(0)); + matchAssignment.setAssignments(dim, matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim], + checkMultiplicity, checkEquivalencesCount).getAssignments( + 0)); } return matchAssignment; } - - - // might be useful in future to correct matches between spectra - - // /** - // * Corrects a match list regarding a given shift list and an atom container. - // * This is useful when two ore more shift values (e.g. DEPT shifts) match - // * with the same atom in the atom container. So the purpose here is to - // * enable more unambiguous matches. This method first looks for unambiguous - // * matches and calculates the median of the difference values between the - // * shift list values and the shifts of atom container. Then, all shift list - // * values are adjusted (+/-) with this median value. - // * - // * @param shiftList1 Shift value list to search in - // * @param shiftList2 Shift value list to match in shiftList1 - // * @param matchesInShiftList1 Match list to correct - // * @param tol Tolerance value - // * @return - // */ - // public static ArrayList correctShiftMatches(final ArrayList shiftList1, final ArrayList shiftList2, final ArrayList matchesInShiftList1, final double tol) { - // - // int matchIndex; - // // get differences of unique matches between query shift and ac shifts - // ArrayList diffs = new ArrayList<>(); - // final HashSet uniqueMatchIndicesSet = new HashSet<>(matchesInShiftList1); - // for (final int uniqueMatchIndex : uniqueMatchIndicesSet) { - // if (Collections.frequency(matchesInShiftList1, uniqueMatchIndex) == 1) { - // matchIndex = matchesInShiftList1.indexOf(uniqueMatchIndex); - // if (matchesInShiftList1.get(matchIndex) >= 0) { - // diffs.add(shiftList2.get(matchIndex) - shiftList1.get(matchesInShiftList1.get(matchIndex))); - // } - // } - // } - // // calculate the median of found unique match differences - // if (diffs.size() > 0) { - // final double median = casekit.casekit.nmr.Utils.getMedian(diffs); - // // add or subtract the median of the differences to all shift list values (input) and match again then - // for (int i = 0; i < shiftList2.size(); i++) { - // shiftList2.set(i, shiftList2.get(i) - median); - // } - // // rematch - // return casekit.casekit.nmr.Utils.findShiftMatches(shiftList1, shiftList2, tol); - // } - // - // return matchesInShiftList1; - // } } diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index b001f4a..20069ac 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -34,6 +34,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -45,7 +46,7 @@ public class Predict { * Predicts a shift value for a central atom based on its HOSE code and a * given HOSE code lookup table. The prediction is done by using the median * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromProtonsCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.utils.Utils#getMultiplicityFromProtonsCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values @@ -53,12 +54,11 @@ public class Predict { * * @return null if HOSE code does not exist in lookup table * - * @see casekit.nmr.Utils#getMedian(ArrayList) + * @see casekit.nmr.Utils#getMedian(List) */ public static Double predictShift(final Map> HOSECodeLookupTable, final String HOSECode) { if (HOSECodeLookupTable.containsKey(HOSECode)) { return Utils.getMedian(HOSECodeLookupTable.get(HOSECode)); - // return Utils.getMean(HOSECodeLookupTable.get(HOSECode)); } return null; @@ -68,7 +68,7 @@ public static Double predictShift(final Map> HOSECodeL * Predicts a signal for a central atom based on its HOSE code and a * given HOSE code lookup table. The prediction is done by using the mean * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromProtonsCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.utils.Utils#getMultiplicityFromProtonsCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values @@ -80,7 +80,7 @@ public static Double predictShift(final Map> HOSECodeL * @return null if HOSE code of selected atom does not exist in lookup table * * @throws CDKException - * @see #predictShift(HashMap, String) + * @see #predictShift(Map, String) */ public static Signal predictSignal(final Map> HOSECodeLookupTable, final IAtomContainer ac, final int atomIndex, final Integer maxSphere, @@ -94,15 +94,16 @@ public static Signal predictSignal(final Map> HOSECode == null) { return null; } - return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, Utils.getMultiplicityFromProtonsCount( - ac.getAtom(atomIndex) - .getImplicitHydrogenCount()), "signal", null, 1, 0); + return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, + casekit.nmr.utils.Utils.getMultiplicityFromProtonsCount(ac.getAtom(atomIndex) + .getImplicitHydrogenCount()), + "signal", null, 1, 0); } /** * Predicts a spectrum for a given structure based on HOSE code of atoms with specified nucleus and a * given HOSE code lookup table.
- * Specified for carbons (13C) only -> {@link casekit.nmr.Utils#getMultiplicityFromProtonsCount(int)}. + * Specified for carbons (13C) only -> {@link casekit.nmr.utils.Utils#getMultiplicityFromProtonsCount(int)}. * * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts * of occurring central atoms as values @@ -113,12 +114,14 @@ public static Signal predictSignal(final Map> HOSECode * @return null if a HOSE code of one atom does not exist in lookup table * * @throws org.openscience.cdk.exception.CDKException - * @see #predictSignal(HashMap, IAtomContainer, int, Integer, String) + * @see #predictSignal(Map, IAtomContainer, int, Integer, String) */ public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, final IAtomContainer ac, final Integer maxSphere, final String nucleus) throws Exception { - final Spectrum predictedSpectrum = new Spectrum(new String[]{nucleus}); + final Spectrum predictedSpectrum = new Spectrum(); + predictedSpectrum.setNuclei(new String[]{nucleus}); + predictedSpectrum.setSignals(new ArrayList<>()); Signal signal; for (final IAtom atom : ac.atoms()) { if (atom.getSymbol() diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index dff108c..aae856a 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -19,25 +19,36 @@ public class Utils { + /** + * Specified for carbons only -> not generic!!! + * + * @param protonsCount + * + * @return + */ + public static String getMultiplicityFromProtonsCount(final int protonsCount) { + switch (protonsCount) { + case 0: + return "S"; + case 1: + return "D"; + case 2: + return "T"; + case 3: + return "Q"; + default: + return null; + } + } + public static String getMultiplicityFromProtonsCount(final Correlation correlation) { if (correlation.getAtomType() .equals("C") && correlation.getProtonsCount() .size() == 1) { - switch (correlation.getProtonsCount() - .get(0)) { - case 0: - return "s"; - case 1: - return "d"; - case 2: - return "t"; - case 3: - return "q"; - default: - return null; - } + return getMultiplicityFromProtonsCount(correlation.getProtonsCount() + .get(0)); } return null; } From 6f531ff0ca8f3b56c47f5e29a5ad91165b15f90a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 1 Apr 2021 17:44:49 +0200 Subject: [PATCH 165/405] disabled SHIH in PyLSDInputFileBuilder.java --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index dad1aa6..37c9f3b 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -500,8 +500,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf .add(buildCOSY(correlation, i, data, indicesMap)); collection.get("SHIX") .add(buildSHIX(correlation, i, indicesMap)); - collection.get("SHIH") - .add(buildSHIH(correlation, i, indicesMap)); + // collection.get("SHIH") + // .add(buildSHIH(correlation, i, indicesMap)); } collection.keySet() From da49058712682d6259da38aa68feb6e673fb7353 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 1 Apr 2021 17:45:58 +0200 Subject: [PATCH 166/405] added RankedResultSDFParser --- .../nmr/lsd/RankedResultSDFParser.java | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 src/casekit/nmr/lsd/RankedResultSDFParser.java diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java new file mode 100644 index 0000000..a5301bc --- /dev/null +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -0,0 +1,163 @@ +package casekit.nmr.lsd; + +import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Match; +import casekit.nmr.utils.Utils; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.util.*; + +public class RankedResultSDFParser { + + public static List parseRankedResultSDF(final String pathToFile, + final String nucleus) throws CDKException, FileNotFoundException { + final List dataSetList = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToFile), + SilentChemObjectBuilder.getInstance()); + IAtomContainer structure; + Spectrum experimentalSpectrum, predictedSpectrum; + Assignment assignment; + HashMap meta; + final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); + IMolecularFormula mf; + LinkedHashMap shiftProperties1D; + String[] split; + Signal experimentalSignal; + double experimentalShift, predictedShift; + String multiplicity; + Map> signalShiftList; + DataSet dataSet; + Double[] deviations; + int signalCounter, matchedSignalIndex; + + while (iterator.hasNext()) { + structure = iterator.next(); + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + hydrogenAdder.addImplicitHydrogens(structure); + casekit.nmr.Utils.setAromaticityAndKekulize(structure); + meta = new HashMap<>(); + meta.put("title", structure.getTitle()); + meta.put("id", structure.getProperty("nmrshiftdb2 ID")); + mf = casekit.nmr.Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mf", casekit.nmr.Utils.molecularFormularToString(mf)); + try { + final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); + meta.put("smiles", smiles); + } catch (final CDKException e) { + e.printStackTrace(); + } + shiftProperties1D = getShiftProperties1D(structure); + + experimentalSpectrum = new Spectrum(); + experimentalSpectrum.setNuclei(new String[]{nucleus}); + experimentalSpectrum.setSignals(new ArrayList<>()); + + + for (final Map.Entry shiftProperty1D : shiftProperties1D.entrySet()) { + split = shiftProperty1D.getValue() + .split("\\s"); + multiplicity = Utils.getMultiplicityFromProtonsCount(structure.getAtom(Integer.parseInt(split[0]) + - 1) + .getImplicitHydrogenCount()); + experimentalSignal = new Signal(); + experimentalSignal.setNuclei(new String[]{nucleus}); + experimentalSignal.setShifts(new Double[experimentalSignal.getNDim()]); + experimentalShift = Double.parseDouble(split[1]); // exp. shift + experimentalSignal.setShift(experimentalShift, 0); + experimentalSignal.setEquivalencesCount(1); + experimentalSignal.setMultiplicity(multiplicity); + experimentalSpectrum.addSignal(experimentalSignal); + } + assignment = new Assignment(); + assignment.setNuclei(new String[]{nucleus}); + assignment.initAssignments(experimentalSpectrum.getSignalCount()); + + predictedSpectrum = experimentalSpectrum.buildClone(); + + deviations = new Double[predictedSpectrum.getSignalCountWithEquivalences()]; + signalCounter = 0; + signalShiftList = new HashMap<>(); + for (final Map.Entry shiftProperty1D : shiftProperties1D.entrySet()) { + split = shiftProperty1D.getValue() + .split("\\s"); + experimentalShift = Double.parseDouble(split[1]); // exp. shift + predictedShift = Double.parseDouble(split[3]); // pred. shift + + matchedSignalIndex = experimentalSpectrum.pickClosestSignal(experimentalShift, 0, 0.0) + .get(0); + deviations[signalCounter] = Math.abs(predictedShift + - experimentalShift); + signalShiftList.putIfAbsent(matchedSignalIndex, new ArrayList<>()); + signalShiftList.get(matchedSignalIndex) + .add(predictedShift); + assignment.addAssignmentEquivalence(0, matchedSignalIndex, Integer.parseInt(split[0]) + - 1); + signalCounter++; + } + for (final int signalIndex : signalShiftList.keySet()) { + predictedSpectrum.getSignal(signalIndex) + .setShift(casekit.nmr.Utils.getMean(signalShiftList.get(signalIndex)), 0); + predictedSpectrum.getSignal(signalIndex) + .setEquivalencesCount(signalShiftList.get(signalIndex) + .size()); + } + // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule + if (casekit.nmr.Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(predictedSpectrum, mf, 0) + != 0) { + continue; + } + dataSet = new DataSet(structure, predictedSpectrum, assignment, meta); + dataSet.addMetaInfo("rmsd", String.valueOf(Match.calculateRMSD(deviations))); + try { + dataSet.addMetaInfo("tanimoto", String.valueOf( + Match.calculateTanimotoCoefficient(dataSet.getSpectrum(), experimentalSpectrum, 0, 0))); + } catch (final CDKException e) { + e.printStackTrace(); + } + dataSetList.add(dataSet); + } + // pre-sort by RMSD value + dataSetList.sort((dataSet1, dataSet2) -> { + if (Double.parseDouble(dataSet1.getMeta() + .get("rmsd")) + < Double.parseDouble(dataSet2.getMeta() + .get("rmsd"))) { + return -1; + } else if (Double.parseDouble(dataSet1.getMeta() + .get("rmsd")) + > Double.parseDouble(dataSet2.getMeta() + .get("rmsd"))) { + return 1; + } + return 0; + }); + + return dataSetList; + } + + public static LinkedHashMap getShiftProperties1D(final IAtomContainer ac) { + final LinkedHashMap shiftProperties1D = new LinkedHashMap<>(); + String[] split; + for (final Object key : ac.getProperties() + .keySet()) { + if (key instanceof String + && ((String) key).startsWith("CS")) { + split = ((String) key).split("CS"); + shiftProperties1D.put(split[1], ac.getProperty(key)); + } + } + + return shiftProperties1D; + } +} From 93861d05451664a08894ef6f1e1507a17e4523f3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 1 Apr 2021 18:59:43 +0200 Subject: [PATCH 167/405] Match: when checkEquivalencesCount is set to true then check for equal number instead of less or equal --- src/casekit/nmr/utils/Match.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index 459d908..7cbb01b 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -269,7 +269,7 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu } if (checkEquivalencesCount) { passed = querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) - <= spectrum.getEquivalencesCount(i); + == spectrum.getEquivalencesCount(i); } if (passed) { From e731835dca41bd2d9d0b6bcf85e04e54bb9ace1e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 1 Apr 2021 19:11:54 +0200 Subject: [PATCH 168/405] fix: an error when passing conditions --- src/casekit/nmr/utils/Match.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index 7cbb01b..ecd64a0 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -267,7 +267,8 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu passed = querySpectrum.getMultiplicity(pickedSignalIndexSpectrum2) .equals(spectrum.getMultiplicity(i)); } - if (checkEquivalencesCount) { + if (passed + && checkEquivalencesCount) { passed = querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) == spectrum.getEquivalencesCount(i); } From 716dcff4bdaca76aa1521e8f0871953ebc2546e0 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 4 Apr 2021 11:52:30 +0200 Subject: [PATCH 169/405] feat: changed "mf" to "mfOriginal" and added alphabetic sorted mf as "mf" --- src/casekit/nmr/dbservice/NMRShiftDB.java | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 54fe2d7..7006d74 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -27,10 +27,7 @@ import java.io.FileNotFoundException; import java.io.FileReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; +import java.util.*; public class NMRShiftDB { @@ -102,7 +99,7 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif IAtomContainer structure; Spectrum spectrum; Assignment assignment; - HashMap meta; + Map meta; final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); List spectraProperties1D; @@ -111,6 +108,8 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif IMolecularFormula mf; List explicitHydrogenIndices; int[] temp; + StringBuilder mfAlphabetic; + Map mfAlphabeticMap; while (iterator.hasNext()) { structure = iterator.next(); @@ -128,7 +127,18 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif meta.put("title", structure.getTitle()); meta.put("id", structure.getProperty("nmrshiftdb2 ID")); mf = Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mf", Utils.molecularFormularToString(mf)); + meta.put("mfOriginal", Utils.molecularFormularToString(mf)); + mfAlphabetic = new StringBuilder(); + mfAlphabeticMap = new TreeMap<>( + casekit.nmr.utils.Utils.getMolecularFormulaElementCounts(Utils.molecularFormularToString(mf))); + for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { + mfAlphabetic.append(entry.getKey()); + if (entry.getValue() + > 1) { + mfAlphabetic.append(entry.getValue()); + } + } + meta.put("mf", mfAlphabetic.toString()); try { final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); meta.put("smiles", smiles); From bde083d344a619ab2966b737dfadfd2168a4529a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 4 Apr 2021 15:41:15 +0200 Subject: [PATCH 170/405] moved try and catch around Tanimoto coefficient calculation --- src/casekit/nmr/lsd/RankedResultSDFParser.java | 9 +++------ src/casekit/nmr/utils/Match.java | 11 +++++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index a5301bc..d6f8b47 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -119,12 +119,9 @@ public static List parseRankedResultSDF(final String pathToFile, } dataSet = new DataSet(structure, predictedSpectrum, assignment, meta); dataSet.addMetaInfo("rmsd", String.valueOf(Match.calculateRMSD(deviations))); - try { - dataSet.addMetaInfo("tanimoto", String.valueOf( - Match.calculateTanimotoCoefficient(dataSet.getSpectrum(), experimentalSpectrum, 0, 0))); - } catch (final CDKException e) { - e.printStackTrace(); - } + dataSet.addMetaInfo("tanimoto", String.valueOf( + Match.calculateTanimotoCoefficient(dataSet.getSpectrum(), experimentalSpectrum, 0, 0))); + dataSetList.add(dataSet); } // pre-sort by RMSD value diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index ecd64a0..ba3b1d7 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -79,11 +79,9 @@ public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum s * @param dim2 dimension in second spectrum to take the shifts from * * @return - * - * @throws CDKException */ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2) throws CDKException { + final int dim2) { if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } @@ -96,7 +94,12 @@ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final new Double[spectrum2.getSignalCount()])); Arrays.parallelSort(shiftsSpectrum2); - return Tanimoto.calculate(shiftsSpectrum1, shiftsSpectrum2); + try { + return Tanimoto.calculate(shiftsSpectrum1, shiftsSpectrum2); + } catch (final CDKException e) { + e.printStackTrace(); + } + return null; } /** From 6b1b4ec9e4a8ce2257f914cfd5af48610d7f3580 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 7 Apr 2021 13:34:12 +0200 Subject: [PATCH 171/405] feat: multiplicity check when adding a signal to a spectrum --- src/casekit/nmr/dbservice/NMRShiftDB.java | 2 +- src/casekit/nmr/model/Spectrum.java | 51 +++++++++++++++++++---- src/casekit/nmr/utils/Match.java | 17 ++++---- 3 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 7006d74..9b62ee0 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -447,7 +447,7 @@ public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBS int signalIndex; for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { - signalIndex = spectrum.pickClosestSignal(Double.parseDouble(NMRShiftDBSpectrumStringArray[i][0]), 0, 0.0) + signalIndex = spectrum.pickByClosestShift(Double.parseDouble(NMRShiftDBSpectrumStringArray[i][0]), 0, 0.0) .get(0); assignment.addAssignmentEquivalence(0, signalIndex, Integer.parseInt(NMRShiftDBSpectrumStringArray[i][3])); } diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index c1b0e91..1e5f6db 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -133,21 +133,23 @@ public int getSignalCountWithEquivalences() { * * @return * - * @see #addSignal(Signal, double) + * @see #addSignal(Signal, double, boolean) */ public boolean addSignal(final Signal signal) { - return this.addSignal(signal, 0.0); + return this.addSignal(signal, 0.0, true); } /** * Adds a signal to this spectrum and stores an equivalent signal index. * - * @param signal signal to add - * @param pickPrecision precision to find equivalent signals to store in + * @param signal signal to add + * @param pickPrecision precision to find equivalent signals to store in + * @param checkMultiplicity indicates whether to compare the multiplicity of signal + * to add while searching for equivalences * * @return */ - public boolean addSignal(final Signal signal, final double pickPrecision) { + public boolean addSignal(final Signal signal, final double pickPrecision, final boolean checkMultiplicity) { if ((signal == null) || !this.compareNuclei(signal.getNuclei())) { @@ -155,10 +157,14 @@ public boolean addSignal(final Signal signal, final double pickPrecision) { } // check for equivalent signals in all dimensions - final List closestSignalList = this.pickClosestSignal(signal.getShift(0), 0, pickPrecision); + final List closestSignalList = this.pickByClosestShift(signal.getShift(0), 0, pickPrecision); for (int dim = 1; dim < this.getNDim(); dim++) { - closestSignalList.retainAll(this.pickClosestSignal(signal.getShift(dim), dim, pickPrecision)); + closestSignalList.retainAll(this.pickByClosestShift(signal.getShift(dim), dim, pickPrecision)); + } + + if (checkMultiplicity) { + closestSignalList.retainAll(this.pickByMultiplicity(signal.getMultiplicity())); } if (closestSignalList.isEmpty()) { @@ -331,6 +337,35 @@ public void setDescription(final String description) { this.description = description; } + /** + * Returns the indices of signals with same multiplicity. + * + * @param multiplicity multiplicity to search for + * + * @return + */ + public List pickByMultiplicity(final String multiplicity) { + final List matchIndices = new ArrayList<>(); + for (int s = 0; s + < this.getSignalCount(); s++) { + if ((this.getSignal(s) + .getMultiplicity() + == null + && multiplicity + == null) + || (this.getSignal(s) + .getMultiplicity() + != null + && this.getSignal(s) + .getMultiplicity() + .equals(multiplicity))) { + matchIndices.add(s); + } + } + + return matchIndices; + } + /** * Returns the signal index (or indices) closest to the given shift. If no signal is found within the interval * defined by {@code pickPrecision}, an empty list is returned. @@ -341,7 +376,7 @@ public void setDescription(final String description) { * * @return */ - public List pickClosestSignal(final double shift, final int dim, final double pickPrecision) { + public List pickByClosestShift(final double shift, final int dim, final double pickPrecision) { final List matchIndices = new ArrayList<>(); if (!this.containsDim(dim)) { return matchIndices; diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index ba3b1d7..488ce3f 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -46,17 +46,18 @@ private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum * via the {@code pickPrecision} parameter and multiplicity comparison. * In {@code spectrum1}, the equivalent signals have to be set. * - * @param spectrum1 first spectrum, incl. equivalent signals - * @param spectrum2 second spectrum - * @param pickPrecision tolerance value used for signal shift matching to - * find equivalent signals - * @param dim1 dimension of first spectrum to combine - * @param dim2 dimension of second spectrum to combine + * @param spectrum1 first spectrum, incl. equivalent signals + * @param spectrum2 second spectrum + * @param dim1 dimension of first spectrum to combine + * @param dim2 dimension of second spectrum to combine + * @param pickPrecision tolerance value used for signal shift matching to + * find equivalent signals + * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals * * @return null if one spectrum does not contain the selected dimension */ public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2, final double pickPrecision) throws Exception { + final int dim2, final double pickPrecision, final boolean checkMultiplicity) { if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } @@ -65,7 +66,7 @@ public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum s // fill in signals from spectrum2 // consider the possibility of potential equivalent signals here for (final Signal signalSpectrum2 : spectrum2.getSignals()) { - combinedSpectrum.addSignal(signalSpectrum2.buildClone(), pickPrecision); + combinedSpectrum.addSignal(signalSpectrum2.buildClone(), pickPrecision, checkMultiplicity); } return combinedSpectrum; } From 354594065a8f6088c62d254775d593e1189de2d7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 7 Apr 2021 14:15:34 +0200 Subject: [PATCH 172/405] fix: multiplicity check when searching for equivalence signals; use median for predicted shift calculation instead of mean --- src/casekit/nmr/lsd/RankedResultSDFParser.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index d6f8b47..d01a80e 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -40,6 +40,7 @@ public static List parseRankedResultSDF(final String pathToFile, DataSet dataSet; Double[] deviations; int signalCounter, matchedSignalIndex; + List closestSignalList; while (iterator.hasNext()) { structure = iterator.next(); @@ -91,11 +92,17 @@ public static List parseRankedResultSDF(final String pathToFile, for (final Map.Entry shiftProperty1D : shiftProperties1D.entrySet()) { split = shiftProperty1D.getValue() .split("\\s"); + multiplicity = Utils.getMultiplicityFromProtonsCount(structure.getAtom(Integer.parseInt(split[0]) + - 1) + .getImplicitHydrogenCount()); experimentalShift = Double.parseDouble(split[1]); // exp. shift predictedShift = Double.parseDouble(split[3]); // pred. shift - matchedSignalIndex = experimentalSpectrum.pickClosestSignal(experimentalShift, 0, 0.0) - .get(0); + // just to be sure that we take the right signal if equivalences are present + closestSignalList = experimentalSpectrum.pickByClosestShift(experimentalShift, 0, 0.0); + closestSignalList.retainAll(experimentalSpectrum.pickByMultiplicity(multiplicity)); + matchedSignalIndex = closestSignalList.get(0); + deviations[signalCounter] = Math.abs(predictedShift - experimentalShift); signalShiftList.putIfAbsent(matchedSignalIndex, new ArrayList<>()); @@ -107,7 +114,7 @@ public static List parseRankedResultSDF(final String pathToFile, } for (final int signalIndex : signalShiftList.keySet()) { predictedSpectrum.getSignal(signalIndex) - .setShift(casekit.nmr.Utils.getMean(signalShiftList.get(signalIndex)), 0); + .setShift(casekit.nmr.Utils.getMedian(signalShiftList.get(signalIndex)), 0); predictedSpectrum.getSignal(signalIndex) .setEquivalencesCount(signalShiftList.get(signalIndex) .size()); From 519bb94943b8111175ac892c3866ac1573af2174 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 7 Apr 2021 15:15:24 +0200 Subject: [PATCH 173/405] fix: added missing multiplicity check when searching for equivalence signals --- src/casekit/nmr/dbservice/NMRShiftDB.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 9b62ee0..67ef510 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -445,10 +445,17 @@ public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBS assignment.setNuclei(spectrum.getNuclei()); assignment.initAssignments(spectrum.getSignalCount()); int signalIndex; + String multiplicity; + List closestSignalList; for (int i = 0; i < NMRShiftDBSpectrumStringArray.length; i++) { - signalIndex = spectrum.pickByClosestShift(Double.parseDouble(NMRShiftDBSpectrumStringArray[i][0]), 0, 0.0) - .get(0); + // just to be sure that we take the right signal if equivalences are present + closestSignalList = spectrum.pickByClosestShift(Double.parseDouble(NMRShiftDBSpectrumStringArray[i][0]), 0, + 0.0); + multiplicity = NMRShiftDBSpectrumStringArray[i][2]; + closestSignalList.retainAll(spectrum.pickByMultiplicity(multiplicity)); + signalIndex = closestSignalList.get(0); + assignment.addAssignmentEquivalence(0, signalIndex, Integer.parseInt(NMRShiftDBSpectrumStringArray[i][3])); } From 2cbcec5d9b9c67af307876fa089df9c1a506df6a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 7 Apr 2021 21:17:44 +0200 Subject: [PATCH 174/405] feat: added PyLSD ELIM P1 and P2 parameters --- .../nmr/lsd/PyLSDInputFileBuilder.java | 18 ++++++-- .../nmr/lsd/model/ElucidationOptions.java | 45 +++++++++++++++++++ 2 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 src/casekit/nmr/lsd/model/ElucidationOptions.java diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 37c9f3b..c2831fd 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -1,6 +1,7 @@ package casekit.nmr.lsd; import casekit.io.FileSystem; +import casekit.nmr.lsd.model.ElucidationOptions; import casekit.nmr.model.nmrdisplayer.Correlation; import casekit.nmr.model.nmrdisplayer.Data; import casekit.nmr.model.nmrdisplayer.Link; @@ -42,6 +43,13 @@ private static String buildPIEC() { return "PIEC 1"; } + private static String buildELIM(final int elimP1, final int elimP2) { + return "ELIM " + + elimP1 + + " " + + elimP2; + } + private static Map buildIndicesMap(final Data data, final Map elementCounts) { // index in correlation data -> [atom type, indices in PyLSD file...] final Map indicesMap = new HashMap<>(); @@ -447,8 +455,7 @@ private static String buildFilters(final String pathToLSDFilterList) { public static String buildPyLSDInputFileContent(final Data data, final String mf, final Map> detectedHybridizations, - final boolean allowHeteroHeteroBonds, - final String pathToLSDFilterList) { + final ElucidationOptions elucidationOptions) { final Map> state = data.getCorrelations() .getState(); final boolean hasErrors = state.keySet() @@ -471,6 +478,9 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf // PIEC stringBuilder.append(buildPIEC()) .append("\n\n"); + // ELIM + stringBuilder.append(buildELIM(elucidationOptions.getElimP1(), elucidationOptions.getElimP2())) + .append("\n\n"); final Map> collection = new LinkedHashMap<>(); collection.put("MULT", new ArrayList<>()); @@ -516,10 +526,10 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance - stringBuilder.append(buildLISTAndPROP(allowHeteroHeteroBonds)) + stringBuilder.append(buildLISTAndPROP(elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as default filters (bad lists) - stringBuilder.append(buildFilters(pathToLSDFilterList)) + stringBuilder.append(buildFilters(elucidationOptions.getPathToLSDFilterList())) .append("\n"); // stringBuilder.append("\n").append("MAXT 30").append("\n"); diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java new file mode 100644 index 0000000..813bf67 --- /dev/null +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -0,0 +1,45 @@ +package casekit.nmr.lsd.model; + +public class ElucidationOptions { + + // PyLSD options + private String pathToLSDFilterList; + private boolean allowHeteroHeteroBonds; + private int elimP1; + private int elimP2; + + public ElucidationOptions() { + } + + public String getPathToLSDFilterList() { + return this.pathToLSDFilterList; + } + + public void setPathToLSDFilterList(final String pathToLSDFilterList) { + this.pathToLSDFilterList = pathToLSDFilterList; + } + + public boolean isAllowHeteroHeteroBonds() { + return this.allowHeteroHeteroBonds; + } + + public void setAllowHeteroHeteroBonds(final boolean allowHeteroHeteroBonds) { + this.allowHeteroHeteroBonds = allowHeteroHeteroBonds; + } + + public int getElimP1() { + return this.elimP1; + } + + public void setElimP1(final int elimP1) { + this.elimP1 = elimP1; + } + + public int getElimP2() { + return this.elimP2; + } + + public void setElimP2(final int elimP2) { + this.elimP2 = elimP2; + } +} From b5d758abe122cfdd251413ec24da32de4f507389 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 7 Apr 2021 23:38:41 +0200 Subject: [PATCH 175/405] feat: added useElim parameter and changed filter paths to array --- .../nmr/lsd/PyLSDInputFileBuilder.java | 42 ++++++++++--------- .../nmr/lsd/model/ElucidationOptions.java | 19 ++++++--- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index c2831fd..3d9ae4d 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -405,28 +405,32 @@ private static String buildLISTAndPROP(final boolean allowHeteroHeteroBonds) { return stringBuilder.toString(); } - private static String buildFilters(final String pathToLSDFilterList) { + private static String buildFilters(final String[] filterPaths) { final StringBuilder stringBuilder = new StringBuilder(); // DEFF + FEXP -> add filters stringBuilder.append("; externally defined filters\n"); final Map filters = new LinkedHashMap<>(); int counter = 1; - try { - final BufferedReader bufferedReader = FileSystem.readFile(pathToLSDFilterList); - if (bufferedReader - != null) { - String line; - while ((line = bufferedReader.readLine()) + BufferedReader bufferedReader; + for (final String filterPath : filterPaths) { + try { + bufferedReader = FileSystem.readFile(filterPath); + if (bufferedReader != null) { - filters.put("F" - + counter, line); - counter++; + String line; + while ((line = bufferedReader.readLine()) + != null) { + filters.put("F" + + counter, line); + counter++; + } + bufferedReader.close(); } - bufferedReader.close(); + } catch (final IOException e) { + e.printStackTrace(); } - } catch (final IOException e) { - e.printStackTrace(); } + if (!filters.isEmpty()) { filters.forEach((label, filePath) -> stringBuilder.append("DEFF ") .append(label) @@ -479,8 +483,10 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf stringBuilder.append(buildPIEC()) .append("\n\n"); // ELIM - stringBuilder.append(buildELIM(elucidationOptions.getElimP1(), elucidationOptions.getElimP2())) - .append("\n\n"); + if (elucidationOptions.isUseElim()) { + stringBuilder.append(buildELIM(elucidationOptions.getElimP1(), elucidationOptions.getElimP2())) + .append("\n\n"); + } final Map> collection = new LinkedHashMap<>(); collection.put("MULT", new ArrayList<>()); @@ -528,12 +534,10 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance stringBuilder.append(buildLISTAndPROP(elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); - // DEFF and FEXP as default filters (bad lists) - stringBuilder.append(buildFilters(elucidationOptions.getPathToLSDFilterList())) + // DEFF and FEXP as filters (bad lists) + stringBuilder.append(buildFilters(elucidationOptions.getFilterPaths())) .append("\n"); - // stringBuilder.append("\n").append("MAXT 30").append("\n"); - return stringBuilder.toString(); } diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java index 813bf67..d30a6a1 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -3,20 +3,21 @@ public class ElucidationOptions { // PyLSD options - private String pathToLSDFilterList; + private String[] filterPaths; private boolean allowHeteroHeteroBonds; + private boolean useElim; private int elimP1; private int elimP2; public ElucidationOptions() { } - public String getPathToLSDFilterList() { - return this.pathToLSDFilterList; + public String[] getFilterPaths() { + return this.filterPaths; } - public void setPathToLSDFilterList(final String pathToLSDFilterList) { - this.pathToLSDFilterList = pathToLSDFilterList; + public void setFilterPaths(final String[] filterPaths) { + this.filterPaths = filterPaths; } public boolean isAllowHeteroHeteroBonds() { @@ -27,6 +28,14 @@ public void setAllowHeteroHeteroBonds(final boolean allowHeteroHeteroBonds) { this.allowHeteroHeteroBonds = allowHeteroHeteroBonds; } + public boolean isUseElim() { + return this.useElim; + } + + public void setUseElim(final boolean useElim) { + this.useElim = useElim; + } + public int getElimP1() { return this.elimP1; } From 090cc43b217f4664b35102f59a79fd7cb2572eb7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 8 Apr 2021 15:41:57 +0200 Subject: [PATCH 176/405] feat: added atom type check when parsing ranked SDF by PyLSD --- src/casekit/nmr/lsd/RankedResultSDFParser.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index d01a80e..1ac7b3f 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -58,7 +58,7 @@ public static List parseRankedResultSDF(final String pathToFile, } catch (final CDKException e) { e.printStackTrace(); } - shiftProperties1D = getShiftProperties1D(structure); + shiftProperties1D = getShiftProperties1D(structure, Utils.getAtomTypeFromNucleus(nucleus)); experimentalSpectrum = new Spectrum(); experimentalSpectrum.setNuclei(new String[]{nucleus}); @@ -150,7 +150,7 @@ public static List parseRankedResultSDF(final String pathToFile, return dataSetList; } - public static LinkedHashMap getShiftProperties1D(final IAtomContainer ac) { + public static LinkedHashMap getShiftProperties1D(final IAtomContainer ac, final String atomType) { final LinkedHashMap shiftProperties1D = new LinkedHashMap<>(); String[] split; for (final Object key : ac.getProperties() @@ -158,7 +158,12 @@ public static LinkedHashMap getShiftProperties1D(final IAtomCont if (key instanceof String && ((String) key).startsWith("CS")) { split = ((String) key).split("CS"); - shiftProperties1D.put(split[1], ac.getProperty(key)); + if (ac.getAtom(Integer.parseInt(split[1]) + - 1) + .getSymbol() + .equals(atomType)) { + shiftProperties1D.put(split[1], ac.getProperty(key)); + } } } From bf82a1aeadb1417988b10d91cc1157f169e9b242 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 8 Apr 2021 15:46:55 +0200 Subject: [PATCH 177/405] feat: re-added SHIH in PyLSD input file --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 3d9ae4d..4b44b6b 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -313,10 +313,6 @@ private static String buildCOSY(final Correlation correlation, final int index, .equals(correlation.getId())) { for (int k = 1; k < indicesMap.get(index).length; k++) { - // for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { - // uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l]); - // } - // only allow COSY values between possible equivalent protons and only one another non-equivalent proton if (indicesMap.get(matchIndex).length == 2) { @@ -516,8 +512,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf .add(buildCOSY(correlation, i, data, indicesMap)); collection.get("SHIX") .add(buildSHIX(correlation, i, indicesMap)); - // collection.get("SHIH") - // .add(buildSHIH(correlation, i, indicesMap)); + collection.get("SHIH") + .add(buildSHIH(correlation, i, indicesMap)); } collection.keySet() From 82c8f1951d58247ddf2b789923a112f74150ffd7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 9 Apr 2021 14:35:24 +0200 Subject: [PATCH 178/405] feat: added PyLSD HMBC P3, HMBC P4, COSY P3 and COSY P4 parameters --- .../nmr/lsd/PyLSDInputFileBuilder.java | 18 ++++++---- .../nmr/lsd/model/ElucidationOptions.java | 36 +++++++++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 4b44b6b..c4446e8 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -244,12 +244,14 @@ private static String buildHSQC(final Correlation correlation, final int index, } private static String buildHMBC(final Correlation correlation, final int index, final Data data, - final Map indicesMap) { + final Map indicesMap, final int hmbcP3, final int hmbcP4) { if (correlation.getAtomType() .equals("H")) { return null; } - final String defaultBondDistance = "2 4"; + final String defaultBondDistance = hmbcP3 + + " " + + hmbcP4; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType() @@ -294,12 +296,14 @@ private static String buildHMBC(final Correlation correlation, final int index, } private static String buildCOSY(final Correlation correlation, final int index, final Data data, - final Map indicesMap) { + final Map indicesMap, final int cosyP3, final int cosyP4) { if (!correlation.getAtomType() .equals("H")) { return null; } - final String defaultBondDistance = "3 4"; + final String defaultBondDistance = cosyP3 + + " " + + cosyP4; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType() @@ -507,9 +511,11 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf collection.get("HSQC") .add(buildHSQC(correlation, i, indicesMap)); collection.get("HMBC") - .add(buildHMBC(correlation, i, data, indicesMap)); + .add(buildHMBC(correlation, i, data, indicesMap, elucidationOptions.getHmbcP3(), + elucidationOptions.getHmbcP4())); collection.get("COSY") - .add(buildCOSY(correlation, i, data, indicesMap)); + .add(buildCOSY(correlation, i, data, indicesMap, elucidationOptions.getCosyP3(), + elucidationOptions.getCosyP4())); collection.get("SHIX") .add(buildSHIX(correlation, i, indicesMap)); collection.get("SHIH") diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java index d30a6a1..b5e060c 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -8,6 +8,10 @@ public class ElucidationOptions { private boolean useElim; private int elimP1; private int elimP2; + private int hmbcP3; + private int hmbcP4; + private int cosyP3; + private int cosyP4; public ElucidationOptions() { } @@ -51,4 +55,36 @@ public int getElimP2() { public void setElimP2(final int elimP2) { this.elimP2 = elimP2; } + + public int getHmbcP3() { + return this.hmbcP3; + } + + public void setHmbcP3(final int hmbcP3) { + this.hmbcP3 = hmbcP3; + } + + public int getHmbcP4() { + return this.hmbcP4; + } + + public void setHmbcP4(final int hmbcP4) { + this.hmbcP4 = hmbcP4; + } + + public int getCosyP3() { + return this.cosyP3; + } + + public void setCosyP3(final int cosyP3) { + this.cosyP3 = cosyP3; + } + + public int getCosyP4() { + return this.cosyP4; + } + + public void setCosyP4(final int cosyP4) { + this.cosyP4 = cosyP4; + } } From bd3818fd68a792a6fdfc0072c334efbb81d7d6e4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 10 Apr 2021 16:07:55 +0200 Subject: [PATCH 179/405] chore: replace default constructors, setters and getters by lombok annotations --- .../nmr/lsd/model/ElucidationOptions.java | 84 +---- src/casekit/nmr/model/Assignment.java | 28 +- src/casekit/nmr/model/DataSet.java | 75 ++--- .../nmr/model/ExtendedConnectionMatrix.java | 315 +++++++++--------- src/casekit/nmr/model/Signal.java | 81 +---- src/casekit/nmr/model/Spectrum.java | 87 +---- .../nmr/model/nmrdisplayer/Correlation.java | 15 +- .../nmr/model/nmrdisplayer/Correlations.java | 1 + src/casekit/nmr/model/nmrdisplayer/Data.java | 4 +- .../nmr/model/nmrdisplayer/Default.java | 8 +- src/casekit/nmr/model/nmrdisplayer/Link.java | 5 +- src/casekit/nmr/model/nmrdisplayer/Range.java | 5 +- .../nmr/model/nmrdisplayer/Signal1D.java | 1 - .../nmr/model/nmrdisplayer/Signal2D.java | 7 +- .../nmr/model/nmrdisplayer/Spectrum.java | 6 +- src/casekit/nmr/model/nmrdisplayer/Zone.java | 5 +- 16 files changed, 245 insertions(+), 482 deletions(-) diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java index b5e060c..84822d8 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -1,5 +1,14 @@ package casekit.nmr.lsd.model; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter public class ElucidationOptions { // PyLSD options @@ -12,79 +21,4 @@ public class ElucidationOptions { private int hmbcP4; private int cosyP3; private int cosyP4; - - public ElucidationOptions() { - } - - public String[] getFilterPaths() { - return this.filterPaths; - } - - public void setFilterPaths(final String[] filterPaths) { - this.filterPaths = filterPaths; - } - - public boolean isAllowHeteroHeteroBonds() { - return this.allowHeteroHeteroBonds; - } - - public void setAllowHeteroHeteroBonds(final boolean allowHeteroHeteroBonds) { - this.allowHeteroHeteroBonds = allowHeteroHeteroBonds; - } - - public boolean isUseElim() { - return this.useElim; - } - - public void setUseElim(final boolean useElim) { - this.useElim = useElim; - } - - public int getElimP1() { - return this.elimP1; - } - - public void setElimP1(final int elimP1) { - this.elimP1 = elimP1; - } - - public int getElimP2() { - return this.elimP2; - } - - public void setElimP2(final int elimP2) { - this.elimP2 = elimP2; - } - - public int getHmbcP3() { - return this.hmbcP3; - } - - public void setHmbcP3(final int hmbcP3) { - this.hmbcP3 = hmbcP3; - } - - public int getHmbcP4() { - return this.hmbcP4; - } - - public void setHmbcP4(final int hmbcP4) { - this.hmbcP4 = hmbcP4; - } - - public int getCosyP3() { - return this.cosyP3; - } - - public void setCosyP3(final int cosyP3) { - this.cosyP3 = cosyP3; - } - - public int getCosyP4() { - return this.cosyP4; - } - - public void setCosyP4(final int cosyP4) { - this.cosyP4 = cosyP4; - } } diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index 0163274..edd9df1 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -23,11 +23,20 @@ */ package casekit.nmr.model; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + import java.util.Arrays; /** * @author Michael Wenk [https://github.com/michaelwenk] */ +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter public class Assignment implements Cloneable { @@ -35,14 +44,6 @@ public class Assignment private int[][][] assignments; - public Assignment() { - } - - public Assignment(final String[] nuclei, final int[][][] assignments) { - this.nuclei = nuclei; - this.assignments = assignments; - } - public void initAssignments(final int length) { final int[][][] temp = new int[this.getNDim()][length][1]; for (int i = 0; i @@ -56,14 +57,6 @@ public void initAssignments(final int length) { this.assignments = temp; } - public String[] getNuclei() { - return this.nuclei; - } - - public void setNuclei(final String[] nuclei) { - this.nuclei = nuclei; - } - public int getNDim() { return this.getNuclei().length; } @@ -208,9 +201,6 @@ private boolean checkIndex(final int dim, final int index) { < this.assignments[dim].length); } - public int[][][] getAssignments() { - return this.assignments; - } @Override public Assignment clone() throws CloneNotSupportedException { diff --git a/src/casekit/nmr/model/DataSet.java b/src/casekit/nmr/model/DataSet.java index 60c37c5..18eb077 100644 --- a/src/casekit/nmr/model/DataSet.java +++ b/src/casekit/nmr/model/DataSet.java @@ -1,81 +1,52 @@ package casekit.nmr.model; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; import org.openscience.cdk.interfaces.IAtomContainer; import java.util.HashMap; import java.util.Map; +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter public class DataSet { private ExtendedConnectionMatrix structure; private Spectrum spectrum; private Assignment assignment; - private HashMap meta; + private Map meta; - public DataSet() { - } - - public DataSet(final ExtendedConnectionMatrix structure, final Spectrum spectrum, final Assignment assignment, Map meta) { - this.structure = structure; - this.spectrum = spectrum; - this.assignment = assignment; - this.meta = new HashMap<>(meta); - } - - public DataSet(final IAtomContainer structure, final Spectrum spectrum, final Assignment assignment, Map meta) { + public DataSet(final IAtomContainer structure, final Spectrum spectrum, final Assignment assignment, + final Map meta) { this.structure = new ExtendedConnectionMatrix(structure); this.spectrum = spectrum; this.assignment = assignment; this.meta = new HashMap<>(meta); } - public void addMetaInfo(final String key, final String value){ + public void addMetaInfo(final String key, final String value) { this.meta.put(key, value); } - public void removeMetaInfo(final String key){ + public void removeMetaInfo(final String key) { this.meta.remove(key); } - public ExtendedConnectionMatrix getStructure() { - return structure; - } - - public void setStructure(final ExtendedConnectionMatrix structure) { - this.structure = structure; - } - - public Spectrum getSpectrum() { - return spectrum; - } - - public void setSpectrum(final Spectrum spectrum) { - this.spectrum = spectrum; - } - - public Assignment getAssignment() { - return assignment; - } - - public void setAssignment(final Assignment assignment) { - this.assignment = assignment; - } - - public Map getMeta() { - return meta; - } - - public void setMeta(Map meta) { - this.meta = new HashMap<>(meta); - } - @Override public String toString() { - return "DataSet{" + - "structure=" + structure + - ", spectrum=" + spectrum + - ", assignment=" + assignment + - ", meta=" + meta + - '}'; + return "DataSet{" + + "structure=" + + this.structure + + ", spectrum=" + + this.spectrum + + ", assignment=" + + this.assignment + + ", meta=" + + this.meta + + '}'; } } diff --git a/src/casekit/nmr/model/ExtendedConnectionMatrix.java b/src/casekit/nmr/model/ExtendedConnectionMatrix.java index dfd43dd..ffa4ee0 100644 --- a/src/casekit/nmr/model/ExtendedConnectionMatrix.java +++ b/src/casekit/nmr/model/ExtendedConnectionMatrix.java @@ -12,6 +12,10 @@ package casekit.nmr.model; import casekit.nmr.Utils; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; import org.openscience.cdk.graph.matrix.ConnectionMatrix; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; @@ -24,9 +28,12 @@ import java.util.Arrays; /** - * * @author Michael Wenk [https://github.com/michaelwenk] */ +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter public class ExtendedConnectionMatrix { private double[][] connectionMatrix; @@ -38,20 +45,7 @@ public class ExtendedConnectionMatrix { private int bondCount; - public ExtendedConnectionMatrix() { - } - - public ExtendedConnectionMatrix(double[][] connectionMatrix, String[] atomTypes, Integer[][] atomPropertiesNumeric, Hybridization[] hybridizations, Boolean[][] atomPropertiesBoolean, Boolean[][][] bondProperties, int bondCount) { - this.connectionMatrix = connectionMatrix; - this.atomTypes = atomTypes; - this.atomPropertiesNumeric = atomPropertiesNumeric; - this.hybridizations = hybridizations; - this.atomPropertiesBoolean = atomPropertiesBoolean; - this.bondProperties = bondProperties; - this.bondCount = bondCount; - } - - public ExtendedConnectionMatrix(final IAtomContainer ac){ + public ExtendedConnectionMatrix(final IAtomContainer ac) { this.connectionMatrix = ConnectionMatrix.getMatrix(ac); this.atomTypes = new String[this.connectionMatrix.length]; this.hybridizations = new Hybridization[this.connectionMatrix.length]; @@ -62,18 +56,23 @@ public ExtendedConnectionMatrix(final IAtomContainer ac){ this.init(ac); } - private void init(final IAtomContainer ac){ + private void init(final IAtomContainer ac) { IAtom atom1, atom2; IBond bond; - for (int i = 0; i < this.connectionMatrix.length; i++) { + for (int i = 0; i + < this.connectionMatrix.length; i++) { atom1 = ac.getAtom(i); - this.setAtomProperties(i, atom1.getSymbol(), atom1.getImplicitHydrogenCount(), atom1.getValency(), atom1.getFormalCharge(), atom1.isInRing(), atom1.isAromatic(), atom1.getHybridization()); + this.setAtomProperties(i, atom1.getSymbol(), atom1.getImplicitHydrogenCount(), atom1.getValency(), + atom1.getFormalCharge(), atom1.isInRing(), atom1.isAromatic(), + atom1.getHybridization()); this.bondProperties[i] = new Boolean[this.connectionMatrix.length][2]; - for (int k = 0; k < this.connectionMatrix.length; k++) { + for (int k = 0; k + < this.connectionMatrix.length; k++) { atom2 = ac.getAtom(k); bond = ac.getBond(atom1, atom2); - if(bond != null){ + if (bond + != null) { this.setBondProperty(i, k, bond.isInRing(), bond.isAromatic()); } } @@ -81,42 +80,49 @@ private void init(final IAtomContainer ac){ this.updateBondCount(); } - private void init(final ExtendedConnectionMatrix extendedConnectionMatrix){ - for (int i = 0; i < this.getAtomCount(); i++) { - if(i < extendedConnectionMatrix.getAtomCount()){ + private void init(final ExtendedConnectionMatrix extendedConnectionMatrix) { + for (int i = 0; i + < this.getAtomCount(); i++) { + if (i + < extendedConnectionMatrix.getAtomCount()) { this.setAtomProperties(i, extendedConnectionMatrix.getAtomType(i), - extendedConnectionMatrix.getHydrogenCount(i), - extendedConnectionMatrix.getValency(i), - extendedConnectionMatrix.getFormalCharge(i), - extendedConnectionMatrix.isInRing(i), - extendedConnectionMatrix.isAromatic(i), - extendedConnectionMatrix.getHybridization(i)); + extendedConnectionMatrix.getHydrogenCount(i), + extendedConnectionMatrix.getValency(i), + extendedConnectionMatrix.getFormalCharge(i), + extendedConnectionMatrix.isInRing(i), extendedConnectionMatrix.isAromatic(i), + extendedConnectionMatrix.getHybridization(i)); } this.bondProperties[i] = new Boolean[this.getAtomCount()][2]; - if(i < extendedConnectionMatrix.getAtomCount()){ - for (int k = 0; k < extendedConnectionMatrix.getAtomCount(); k++) { + if (i + < extendedConnectionMatrix.getAtomCount()) { + for (int k = 0; k + < extendedConnectionMatrix.getAtomCount(); k++) { this.connectionMatrix[i][k] = extendedConnectionMatrix.getBondOrder(i, k); - this.setBondProperty(i, k, extendedConnectionMatrix.isInRing(i, k), extendedConnectionMatrix.isAromatic(i, k)); + this.setBondProperty(i, k, extendedConnectionMatrix.isInRing(i, k), + extendedConnectionMatrix.isAromatic(i, k)); } } else { - for (int k = 0; k < this.getAtomCount(); k++) { + for (int k = 0; k + < this.getAtomCount(); k++) { this.connectionMatrix[i][k] = 0.0; -// this.setBondProperty(i, k, null, null); + // this.setBondProperty(i, k, null, null); } } } this.updateBondCount(); } - private void extendConnectionMatrix(){ + private void extendConnectionMatrix() { this.extendConnectionMatrix(1); } - private void extendConnectionMatrix(final int extensionSize){ - this.connectionMatrix = new double[this.getAtomCount() + extensionSize][this.getAtomCount() + extensionSize]; + private void extendConnectionMatrix(final int extensionSize) { + this.connectionMatrix = new double[this.getAtomCount() + + extensionSize][this.getAtomCount() + + extensionSize]; this.atomTypes = new String[this.connectionMatrix.length]; this.hybridizations = new Hybridization[this.connectionMatrix.length]; this.atomPropertiesNumeric = new Integer[this.connectionMatrix.length][]; @@ -124,7 +130,9 @@ private void extendConnectionMatrix(final int extensionSize){ this.bondProperties = new Boolean[this.connectionMatrix.length][][]; } - public void addAtom(final String atomType, final Integer implicitHydrogenCount, final Integer valency, final Integer formalCharge, final Boolean isInRing, final Boolean isAromatic, final Hybridization hybridization){ + public void addAtom(final String atomType, final Integer implicitHydrogenCount, final Integer valency, + final Integer formalCharge, final Boolean isInRing, final Boolean isAromatic, + final Hybridization hybridization) { // create backup object final ExtendedConnectionMatrix extendedConnectionMatrixBackup = this.buildClone(); // extend the sizes of all matrices by one @@ -132,14 +140,18 @@ public void addAtom(final String atomType, final Integer implicitHydrogenCount, // fill all information in again from backup object this.init(extendedConnectionMatrixBackup); // set information for new atom - this.setAtomProperties(this.getAtomCount() - 1, atomType, implicitHydrogenCount, valency, formalCharge, isInRing, isAromatic, hybridization); + this.setAtomProperties(this.getAtomCount() + - 1, atomType, implicitHydrogenCount, valency, formalCharge, isInRing, + isAromatic, hybridization); } - public boolean addBond(final int atomIndex1, final int atomIndex2, final double order, final Boolean isInRing, final Boolean isAromatic){ - if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + public boolean addBond(final int atomIndex1, final int atomIndex2, final double order, final Boolean isInRing, + final Boolean isAromatic) { + if (!this.hasAtom(atomIndex1) + || !this.hasAtom(atomIndex2)) { return false; } - if(!this.isValidBondAddition(atomIndex1, atomIndex2, order, isAromatic)){ + if (!this.isValidBondAddition(atomIndex1, atomIndex2, order, isAromatic)) { return false; } this.connectionMatrix[atomIndex1][atomIndex2] = order; @@ -152,30 +164,38 @@ public boolean addBond(final int atomIndex1, final int atomIndex2, final double return true; } - public boolean isValidBondAddition(final int atomIndex1, final int atomIndex2, final double order, final boolean isAromatic){ - if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + public boolean isValidBondAddition(final int atomIndex1, final int atomIndex2, final double order, + final boolean isAromatic) { + if (!this.hasAtom(atomIndex1) + || !this.hasAtom(atomIndex2)) { return false; } - return this.isValidBondAddition(atomIndex1, order, isAromatic) && this.isValidBondAddition(atomIndex2, order, isAromatic); + return this.isValidBondAddition(atomIndex1, order, isAromatic) + && this.isValidBondAddition(atomIndex2, order, isAromatic); } - public boolean isValidBondAddition(final int atomIndex, final double order, final boolean isAromatic){ + public boolean isValidBondAddition(final int atomIndex, final double order, final boolean isAromatic) { float bondOrderSum = this.getBondOrderSum(atomIndex, true); - if(isAromatic){ + if (isAromatic) { bondOrderSum += 1.5; } else { bondOrderSum += order; } // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group - if(this.isAromatic(atomIndex) && (!this.getAtomType(atomIndex).equals("C"))){ + if (this.isAromatic(atomIndex) + && (!this.getAtomType(atomIndex) + .equals("C"))) { bondOrderSum -= 1; } - return bondOrderSum <= this.getValency(atomIndex); + return bondOrderSum + <= this.getValency(atomIndex); } - private void setAtomProperties(final int atomIndex, final String atomType, final Integer implicitHydrogenCount, final Integer valency, final Integer formalCharge, final Boolean isInRing, final Boolean isAromatic, final Hybridization hybridization){ + private void setAtomProperties(final int atomIndex, final String atomType, final Integer implicitHydrogenCount, + final Integer valency, final Integer formalCharge, final Boolean isInRing, + final Boolean isAromatic, final Hybridization hybridization) { this.atomTypes[atomIndex] = atomType; this.atomPropertiesNumeric[atomIndex] = new Integer[3]; this.atomPropertiesNumeric[atomIndex][0] = implicitHydrogenCount; @@ -187,16 +207,21 @@ private void setAtomProperties(final int atomIndex, final String atomType, final this.hybridizations[atomIndex] = hybridization; } - private void setBondProperty(final int atomIndex1, final int atomIndex2, final Boolean isInRing, final Boolean isAromatic){ + private void setBondProperty(final int atomIndex1, final int atomIndex2, final Boolean isInRing, + final Boolean isAromatic) { this.bondProperties[atomIndex1][atomIndex2][0] = isInRing; this.bondProperties[atomIndex1][atomIndex2][1] = isAromatic; } - private void updateBondCount(){ + private void updateBondCount() { int bondCounter = 0; - for (int i = 0; i < this.getAtomCount(); i++) { - for (int j = i + 1; j < this.getAtomCount(); j++) { - if(this.connectionMatrix[i][j] > 0.0){ + for (int i = 0; i + < this.getAtomCount(); i++) { + for (int j = i + + 1; j + < this.getAtomCount(); j++) { + if (this.connectionMatrix[i][j] + > 0.0) { bondCounter++; } } @@ -204,137 +229,151 @@ private void updateBondCount(){ this.bondCount = bondCounter; } - public Boolean hasBond(final int atomIndex1, final int atomIndex2){ - if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + public Boolean hasBond(final int atomIndex1, final int atomIndex2) { + if (!this.hasAtom(atomIndex1) + || !this.hasAtom(atomIndex2)) { return null; } - return this.getBondOrder(atomIndex1, atomIndex2) > 0.0; + return this.getBondOrder(atomIndex1, atomIndex2) + > 0.0; } - public Double getBondOrder(final int atomIndex1, final int atomIndex2){ - if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + public Double getBondOrder(final int atomIndex1, final int atomIndex2) { + if (!this.hasAtom(atomIndex1) + || !this.hasAtom(atomIndex2)) { return null; } return this.connectionMatrix[atomIndex1][atomIndex2]; } - public Float getBondOrderSum(final int atomIndex, final boolean includeHydrogens){ - if(!this.hasAtom(atomIndex)){ + public Float getBondOrderSum(final int atomIndex, final boolean includeHydrogens) { + if (!this.hasAtom(atomIndex)) { return null; } float bondOrderSum = (float) 0.0; - for (int j = 0; j < this.connectionMatrix[atomIndex].length; j++) { - if((this.isAromatic(atomIndex, j) != null) && this.isAromatic(atomIndex, j)){ + for (int j = 0; j + < this.connectionMatrix[atomIndex].length; j++) { + if ((this.isAromatic(atomIndex, j) + != null) + && this.isAromatic(atomIndex, j)) { bondOrderSum += 1.5; } else { bondOrderSum += this.getBondOrder(atomIndex, j); } } - if(includeHydrogens){ + if (includeHydrogens) { bondOrderSum += this.getHydrogenCount(atomIndex); } return bondOrderSum; } - public String getAtomType(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public String getAtomType(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } return this.atomTypes[atomIndex]; } - public Integer getHydrogenCount(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public Integer getHydrogenCount(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } return this.atomPropertiesNumeric[atomIndex][0]; } - public Integer getValency(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public Integer getValency(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } return this.atomPropertiesNumeric[atomIndex][1]; } - public Integer getFormalCharge(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public Integer getFormalCharge(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } return this.atomPropertiesNumeric[atomIndex][2]; } - public Boolean isInRing(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public Boolean isInRing(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } return this.atomPropertiesBoolean[atomIndex][0]; } - public Boolean isAromatic(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public Boolean isAromatic(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } return this.atomPropertiesBoolean[atomIndex][1]; } - public Hybridization getHybridization(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public Hybridization getHybridization(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } return this.hybridizations[atomIndex]; } - public Boolean isInRing(final int atomIndex1, final int atomIndex2){ - if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + public Boolean isInRing(final int atomIndex1, final int atomIndex2) { + if (!this.hasAtom(atomIndex1) + || !this.hasAtom(atomIndex2)) { return null; } return this.bondProperties[atomIndex1][atomIndex2][0]; } - public Boolean isAromatic(final int atomIndex1, final int atomIndex2){ - if(!this.hasAtom(atomIndex1) || !this.hasAtom(atomIndex2)){ + public Boolean isAromatic(final int atomIndex1, final int atomIndex2) { + if (!this.hasAtom(atomIndex1) + || !this.hasAtom(atomIndex2)) { return null; } return this.bondProperties[atomIndex1][atomIndex2][1]; } - public int getAtomCount(){ + public int getAtomCount() { return this.connectionMatrix.length; } - public int getBondCount(){ + public int getBondCount() { return this.bondCount; } - public Boolean isUnsaturated(final int atomIndex){ - if(!this.hasAtom(atomIndex)){ + public Boolean isUnsaturated(final int atomIndex) { + if (!this.hasAtom(atomIndex)) { return null; } - return this.getBondOrderSum(atomIndex, true) < this.getValency(atomIndex); + return this.getBondOrderSum(atomIndex, true) + < this.getValency(atomIndex); } - public boolean hasAtom(final int atomIndex){ - return (atomIndex >= 0) && (atomIndex < this.getAtomCount()); + public boolean hasAtom(final int atomIndex) { + return (atomIndex + >= 0) + && (atomIndex + < this.getAtomCount()); } - public IAtomContainer toAtomContainer(){ - final IAtomContainer ac = SilentChemObjectBuilder.getInstance().newAtomContainer(); + public IAtomContainer toAtomContainer() { + final IAtomContainer ac = SilentChemObjectBuilder.getInstance() + .newAtomContainer(); IAtom atom; - for (int i = 0; i < this.connectionMatrix.length; i++) { + for (int i = 0; i + < this.connectionMatrix.length; i++) { atom = new Atom(this.atomTypes[i]); atom.setImplicitHydrogenCount(this.atomPropertiesNumeric[i][0]); atom.setValency(this.atomPropertiesNumeric[i][1]); @@ -346,10 +385,15 @@ public IAtomContainer toAtomContainer(){ ac.addAtom(atom); } IBond bond; - for (int i = 0; i < this.bondProperties.length; i++) { - for (int k = i + 1; k < this.bondProperties.length; k++) { - if(this.connectionMatrix[i][k] > 0.0){ - bond = new Bond(ac.getAtom(i), ac.getAtom(k), Utils.getBondOrder((int) this.connectionMatrix[i][k])); + for (int i = 0; i + < this.bondProperties.length; i++) { + for (int k = i + + 1; k + < this.bondProperties.length; k++) { + if (this.connectionMatrix[i][k] + > 0.0) { + bond = new Bond(ac.getAtom(i), ac.getAtom(k), + Utils.getBondOrder((int) this.connectionMatrix[i][k])); bond.setIsInRing(this.bondProperties[i][k][0]); bond.setIsAromatic(this.bondProperties[i][k][1]); ac.addBond(bond); @@ -366,66 +410,21 @@ public ExtendedConnectionMatrix buildClone() { @Override public String toString() { - return "ExtendedConnectionMatrix{" + - "connectionMatrix=" + Arrays.toString(connectionMatrix) + - ", atomTypes=" + Arrays.toString(atomTypes) + - ", atomPropertiesNumeric=" + Arrays.toString(atomPropertiesNumeric) + - ", hybridizations=" + Arrays.toString(hybridizations) + - ", atomPropertiesBoolean=" + Arrays.toString(atomPropertiesBoolean) + - ", bondProperties=" + Arrays.toString(bondProperties) + - ", bondCount=" + bondCount + - '}'; - } - - public double[][] getConnectionMatrix() { - return connectionMatrix; - } - - public String[] getAtomTypes() { - return atomTypes; - } - - public Integer[][] getAtomPropertiesNumeric() { - return atomPropertiesNumeric; - } - - public Hybridization[] getHybridizations() { - return hybridizations; - } - - public Boolean[][] getAtomPropertiesBoolean() { - return atomPropertiesBoolean; - } - - public Boolean[][][] getBondProperties() { - return bondProperties; - } - - public void setConnectionMatrix(double[][] connectionMatrix) { - this.connectionMatrix = connectionMatrix; - } - - public void setAtomTypes(String[] atomTypes) { - this.atomTypes = atomTypes; - } - - public void setAtomPropertiesNumeric(Integer[][] atomPropertiesNumeric) { - this.atomPropertiesNumeric = atomPropertiesNumeric; - } - - public void setHybridizations(Hybridization[] hybridizations) { - this.hybridizations = hybridizations; - } - - public void setAtomPropertiesBoolean(Boolean[][] atomPropertiesBoolean) { - this.atomPropertiesBoolean = atomPropertiesBoolean; - } - - public void setBondProperties(Boolean[][][] bondProperties) { - this.bondProperties = bondProperties; - } - - public void setBondCount(int bondCount) { - this.bondCount = bondCount; + return "ExtendedConnectionMatrix{" + + "connectionMatrix=" + + Arrays.toString(this.connectionMatrix) + + ", atomTypes=" + + Arrays.toString(this.atomTypes) + + ", atomPropertiesNumeric=" + + Arrays.toString(this.atomPropertiesNumeric) + + ", hybridizations=" + + Arrays.toString(this.hybridizations) + + ", atomPropertiesBoolean=" + + Arrays.toString(this.atomPropertiesBoolean) + + ", bondProperties=" + + Arrays.toString(this.bondProperties) + + ", bondCount=" + + this.bondCount + + '}'; } } diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index ecefbd8..4d14e00 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -27,44 +27,31 @@ */ package casekit.nmr.model; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + import java.util.Arrays; /** * @author Michael Wenk [https://github.com/michaelwenk] */ +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter public class Signal { private String[] nuclei; private Double[] shifts; private String multiplicity; - private Double intensity; private String kind; + private Double intensity; private int equivalencesCount; private int phase; - public Signal() { - } - - public Signal(final String[] nuclei, final Double[] shifts, final String multiplicity, final String kind, - final Double intensity, final int equivalencesCount, final int phase) { - this.nuclei = nuclei; - this.shifts = shifts; - this.multiplicity = multiplicity; - this.kind = kind; - this.intensity = intensity; - this.equivalencesCount = equivalencesCount; - this.phase = phase; - } - - public String[] getNuclei() { - return this.nuclei; - } - - public void setNuclei(final String[] nuclei) { - this.nuclei = nuclei; - } - public int getNDim() { return this.getNuclei().length; } @@ -96,54 +83,6 @@ public Double getShift(final int dim) { return this.shifts[dim]; } - public Double getIntensity() { - return this.intensity; - } - - public void setIntensity(final Double intensity) { - this.intensity = intensity; - } - - public String getMultiplicity() { - return this.multiplicity; - } - - public void setMultiplicity(final String multiplicity) { - this.multiplicity = multiplicity; - } - - public String getKind() { - return this.kind; - } - - public void setKind(final String kind) { - this.kind = kind; - } - - public Double[] getShifts() { - return this.shifts; - } - - public void setShifts(final Double[] shifts) { - this.shifts = shifts; - } - - public int getEquivalencesCount() { - return this.equivalencesCount; - } - - public void setEquivalencesCount(final int equivalencesCount) { - this.equivalencesCount = equivalencesCount; - } - - public int getPhase() { - return this.phase; - } - - public void setPhase(final int phase) { - this.phase = phase; - } - public Signal buildClone() { return new Signal(this.getNuclei() .clone(), this.shifts.clone(), this.multiplicity, this.kind, this.intensity, diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 1e5f6db..10007e7 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -28,6 +28,11 @@ */ package casekit.nmr.model; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -37,6 +42,10 @@ /** * @author Michael Wenk [https://github.com/michaelwenk] */ +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter public class Spectrum { private String[] nuclei; @@ -59,30 +68,6 @@ public class Spectrum { private List signals; private int signalCount; - public Spectrum() { - } - - public Spectrum(final String[] nuclei, final String description, final String specType, - final Double spectrometerFrequency, final String solvent, final String standard, - final List signals, final int signalCount) { - this.nuclei = nuclei; - this.description = description; - this.specType = specType; - this.spectrometerFrequency = spectrometerFrequency; - this.solvent = solvent; - this.standard = standard; - this.signals = signals; - this.signalCount = signalCount; - } - - public String[] getNuclei() { - return this.nuclei; - } - - public void setNuclei(final String[] nuclei) { - this.nuclei = nuclei; - } - public int getNDim() { return this.getNuclei().length; } @@ -98,26 +83,6 @@ public boolean compareNuclei(final String[] nuclei) { return Arrays.equals(this.getNuclei(), nuclei); } - public String getSpecType() { - return this.specType; - } - - public void setSpecType(final String specType) { - this.specType = specType; - } - - public String getSpecDescription() { - return this.description; - } - - public void setSpecDescription(final String description) { - this.description = description; - } - - public int getSignalCount() { - return this.signalCount; - } - public int getSignalCountWithEquivalences() { int sum = 0; for (final Signal signal : this.getSignals()) { @@ -305,38 +270,6 @@ public int getSignalIndex(final Signal signal) { return -1; } - public Double getSpectrometerFrequency() { - return this.spectrometerFrequency; - } - - public void setSpectrometerFrequency(final Double sf) { - this.spectrometerFrequency = sf; - } - - public String getSolvent() { - return this.solvent; - } - - public void setSolvent(final String solvent) { - this.solvent = solvent; - } - - public String getStandard() { - return this.standard; - } - - public void setStandard(final String standard) { - this.standard = standard; - } - - public String getDescription() { - return this.description; - } - - public void setDescription(final String description) { - this.description = description; - } - /** * Returns the indices of signals with same multiplicity. * @@ -446,7 +379,7 @@ public Spectrum buildClone() { clone.addSignal(this.getSignal(i) .buildClone()); } - clone.setSpecDescription(this.description); + clone.setDescription(this.description); clone.setSolvent(this.solvent); clone.setSpecType(this.specType); clone.setSpectrometerFrequency(this.spectrometerFrequency); diff --git a/src/casekit/nmr/model/nmrdisplayer/Correlation.java b/src/casekit/nmr/model/nmrdisplayer/Correlation.java index f05573a..6b8b23c 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Correlation.java +++ b/src/casekit/nmr/model/nmrdisplayer/Correlation.java @@ -29,8 +29,8 @@ import lombok.Setter; import lombok.ToString; -import java.util.ArrayList; -import java.util.HashMap; +import java.util.List; +import java.util.Map; @NoArgsConstructor @Getter @@ -38,17 +38,18 @@ @ToString public class Correlation { + private String id; private String experimentType; private String experimentID; private String atomType; - private HashMap label; + private Map label; private Signal1D signal; - private ArrayList link; + private List link; private int equivalence; - private HashMap> attachment; - private ArrayList protonsCount; + private Map> attachment; + private List protonsCount; private String hybridization; private boolean pseudo; - private HashMap edited; + private Map edited; } diff --git a/src/casekit/nmr/model/nmrdisplayer/Correlations.java b/src/casekit/nmr/model/nmrdisplayer/Correlations.java index 76bc96c..5070b64 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Correlations.java +++ b/src/casekit/nmr/model/nmrdisplayer/Correlations.java @@ -37,5 +37,6 @@ @ToString public class Correlations extends Default { + private Map> state; } diff --git a/src/casekit/nmr/model/nmrdisplayer/Data.java b/src/casekit/nmr/model/nmrdisplayer/Data.java index 4e3b9ad..53c42b9 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Data.java +++ b/src/casekit/nmr/model/nmrdisplayer/Data.java @@ -30,7 +30,7 @@ import lombok.Setter; import lombok.ToString; -import java.util.ArrayList; +import java.util.List; @NoArgsConstructor @Getter @@ -40,6 +40,6 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class Data { - private ArrayList spectra; + private List spectra; private Correlations correlations; } diff --git a/src/casekit/nmr/model/nmrdisplayer/Default.java b/src/casekit/nmr/model/nmrdisplayer/Default.java index 297ce5f..f2117c4 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Default.java +++ b/src/casekit/nmr/model/nmrdisplayer/Default.java @@ -26,8 +26,8 @@ import lombok.*; -import java.util.ArrayList; -import java.util.HashMap; +import java.util.List; +import java.util.Map; @NoArgsConstructor @AllArgsConstructor @@ -37,6 +37,6 @@ public class Default { - private HashMap options; - private ArrayList values; + private Map options; + private List values; } diff --git a/src/casekit/nmr/model/nmrdisplayer/Link.java b/src/casekit/nmr/model/nmrdisplayer/Link.java index 1fbecd5..da575da 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Link.java +++ b/src/casekit/nmr/model/nmrdisplayer/Link.java @@ -29,19 +29,20 @@ import lombok.Setter; import lombok.ToString; -import java.util.ArrayList; +import java.util.List; @NoArgsConstructor @Getter @Setter @ToString public class Link { + private String experimentType; private String experimentID; private String[] atomType; private Signal2D signal; private String axis; - private ArrayList match; + private List match; private String id; private String experimentLabel; private boolean pseudo; diff --git a/src/casekit/nmr/model/nmrdisplayer/Range.java b/src/casekit/nmr/model/nmrdisplayer/Range.java index cbaff13..faa62d4 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Range.java +++ b/src/casekit/nmr/model/nmrdisplayer/Range.java @@ -27,18 +27,17 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; -import java.util.ArrayList; +import java.util.List; @NoArgsConstructor @AllArgsConstructor @Getter @Setter @ToString - @JsonIgnoreProperties(ignoreUnknown = true) public class Range { private String id; private String kind; - private ArrayList signal; + private List signal; } diff --git a/src/casekit/nmr/model/nmrdisplayer/Signal1D.java b/src/casekit/nmr/model/nmrdisplayer/Signal1D.java index 166d1d0..9733610 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Signal1D.java +++ b/src/casekit/nmr/model/nmrdisplayer/Signal1D.java @@ -32,7 +32,6 @@ @Getter @Setter @ToString - @JsonIgnoreProperties(ignoreUnknown = true) public class Signal1D { diff --git a/src/casekit/nmr/model/nmrdisplayer/Signal2D.java b/src/casekit/nmr/model/nmrdisplayer/Signal2D.java index 1366bf4..162806e 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Signal2D.java +++ b/src/casekit/nmr/model/nmrdisplayer/Signal2D.java @@ -27,21 +27,20 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; -import java.util.HashMap; +import java.util.Map; @NoArgsConstructor @AllArgsConstructor @Getter @Setter @ToString - @JsonIgnoreProperties(ignoreUnknown = true) public class Signal2D { private String id; private String kind; private String multiplicity; - private HashMap x; - private HashMap y; + private Map x; + private Map y; private Integer sign; } diff --git a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java index c79fa82..816d132 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java +++ b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java @@ -32,20 +32,19 @@ import lombok.ToString; import java.util.ArrayList; -import java.util.HashMap; +import java.util.Map; @NoArgsConstructor @Getter @Setter @ToString - @JsonIgnoreProperties(ignoreUnknown = true) public class Spectrum { private String id; private Default ranges; private Default zones; - private HashMap info; + private Map info; public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { final int dimension = (int) this.info.get("dimension"); @@ -105,5 +104,4 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { return null; } - } diff --git a/src/casekit/nmr/model/nmrdisplayer/Zone.java b/src/casekit/nmr/model/nmrdisplayer/Zone.java index 17ad0e4..dd5521c 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Zone.java +++ b/src/casekit/nmr/model/nmrdisplayer/Zone.java @@ -27,18 +27,17 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; -import java.util.ArrayList; +import java.util.List; @NoArgsConstructor @AllArgsConstructor @Getter @Setter @ToString - @JsonIgnoreProperties(ignoreUnknown = true) public class Zone { private String id; private String kind; - private ArrayList signal; + private List signal; } From d0419351273416639a1a4a8a42c1486345b4d45b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 10 Apr 2021 16:13:06 +0200 Subject: [PATCH 180/405] chore: replace default constructors, setters and getters by lombok annotations (2) --- src/casekit/nmr/model/ExtendedConnectionMatrix.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/casekit/nmr/model/ExtendedConnectionMatrix.java b/src/casekit/nmr/model/ExtendedConnectionMatrix.java index ffa4ee0..1199bf8 100644 --- a/src/casekit/nmr/model/ExtendedConnectionMatrix.java +++ b/src/casekit/nmr/model/ExtendedConnectionMatrix.java @@ -348,10 +348,6 @@ public int getAtomCount() { return this.connectionMatrix.length; } - public int getBondCount() { - return this.bondCount; - } - public Boolean isUnsaturated(final int atomIndex) { if (!this.hasAtom(atomIndex)) { return null; From 17d1ca9e14d924e2f1836e8ffa3910b657ece144 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 10 Apr 2021 17:53:09 +0200 Subject: [PATCH 181/405] feat: added allowLowerEquivalencesCount to spectra matching --- src/casekit/nmr/utils/Match.java | 133 ++++++++++++++++++------------- 1 file changed, 79 insertions(+), 54 deletions(-) diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index 488ce3f..12d8e95 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -107,22 +107,26 @@ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final * Returns deviations between matched shifts of two spectra. * The matching procedure is already included here. * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol shift tolerance + * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals + * @param checkEquivalencesCount indicates whether to compare the equivalences counts of matched signals + * @param allowLowerEquivalencesCount indicates to allow a lower equivalences counts spectrum 2 * * @return * - * @see #matchSpectra(Spectrum, Spectrum, int, int, double, boolean, boolean) + * @see #matchSpectra(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) */ public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol, final boolean checkMultiplicity, - final boolean checkEquivalencesCount) { + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount) { final Double[] deviations = new Double[spectrum1.getSignalCount()]; final Assignment matchAssignments = matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, - checkEquivalencesCount); + checkEquivalencesCount, allowLowerEquivalencesCount); Signal matchedSignalInSpectrum2; for (int i = 0; i < spectrum1.getSignalCount(); i++) { @@ -162,25 +166,29 @@ public static Double calculateAverageDeviation(final Double[] deviations) { * Returns the average of all deviations of matched shifts between two * spectra. * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol Tolerance value [ppm] used during peak picking in - * shift comparison + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during peak picking in + * shift comparison + * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals + * @param checkEquivalencesCount indicates whether to compare the equivalences counts of matched signals + * @param allowLowerEquivalencesCount indicates to allow a lower equivalences counts spectrum 2 * * @return * - * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean) + * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) * @see #calculateAverageDeviation(Double[]) */ public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol, final boolean checkMultiplicity, - final boolean checkEquivalencesCount) { + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount) { return Match.calculateAverageDeviation( Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, - checkEquivalencesCount)); + checkEquivalencesCount, allowLowerEquivalencesCount)); } /** @@ -206,23 +214,27 @@ public static Double calculateRMSD(final Double[] data) { * Returns the average of all deviations of matched shifts between two * spectra. * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol Tolerance value [ppm] used during peak picking in - * shift comparison + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during peak picking in + * shift comparison + * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals + * @param checkEquivalencesCount indicates whether to compare the equivalences counts of matched signals + * @param allowLowerEquivalencesCount indicates to allow a lower equivalences counts spectrum 2 * * @return * - * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean) + * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) * @see #calculateAverageDeviation(Double[]) */ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol, final boolean checkMultiplicity, - final boolean checkEquivalencesCount) { + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount) { return Match.calculateRMSD(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, - checkEquivalencesCount)); + checkEquivalencesCount, allowLowerEquivalencesCount)); } /** @@ -230,51 +242,60 @@ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spec * as an Assignment object with one set dimension only.
* Despite intensities are expected, they are still not considered here. * - * @param spectrum first spectrum - * @param querySpectrum query spectrum (Subspectrum) - * @param dim1 dimension in first spectrum to take the shifts from - * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol Tolerance value [ppm] used during spectra shift - * comparison + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum (query as exact or subspectrum to check) + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTol Tolerance value [ppm] used during spectra shift + * comparison + * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals + * @param checkEquivalencesCount indicates whether to compare the equivalences counts of matched signals + * @param allowLowerEquivalencesCount indicates to allow a lower equivalences counts spectrum 2 * * @return Assignments with signal indices of spectrum and matched indices * in query spectrum; null if one of the spectra does not * contain the selected dimension */ - public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum querySpectrum, final int dim1, + public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol, final boolean checkMultiplicity, - final boolean checkEquivalencesCount) { - if (!Match.checkDimensions(spectrum, querySpectrum, dim1, dim2)) { + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount) { + if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } final Assignment matchAssignments = new Assignment(); - matchAssignments.setNuclei(new String[]{spectrum.getNuclei()[dim1]}); - matchAssignments.initAssignments(spectrum.getSignalCount()); + matchAssignments.setNuclei(new String[]{spectrum1.getNuclei()[dim1]}); + matchAssignments.initAssignments(spectrum1.getSignalCount()); final Set assigned = new HashSet<>(); List pickedSignalIndicesSpectrum2; boolean passed; for (int i = 0; i - < spectrum.getSignalCount(); i++) { - if (spectrum.getShift(i, dim1) + < spectrum1.getSignalCount(); i++) { + if (spectrum1.getShift(i, dim1) == null) { continue; } // @TODO add solvent deviation value for picking closest signal(s) pickedSignalIndicesSpectrum2 = new ArrayList<>(); - for (final int pickedSignalIndexSpectrum2 : querySpectrum.pickSignals(spectrum.getShift(i, dim1), dim2, - shiftTol)) { + for (final int pickedSignalIndexSpectrum2 : spectrum2.pickSignals(spectrum1.getShift(i, dim1), dim2, + shiftTol)) { passed = true; // @TODO maybe consider further parameters to check ? e.g. intensity if (checkMultiplicity) { - passed = querySpectrum.getMultiplicity(pickedSignalIndexSpectrum2) - .equals(spectrum.getMultiplicity(i)); + passed = spectrum2.getMultiplicity(pickedSignalIndexSpectrum2) + .equals(spectrum1.getMultiplicity(i)); } if (passed && checkEquivalencesCount) { - passed = querySpectrum.getEquivalencesCount(pickedSignalIndexSpectrum2) - == spectrum.getEquivalencesCount(i); + if (allowLowerEquivalencesCount) { + passed = spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2) + <= spectrum1.getEquivalencesCount(i); + } else { + passed = spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2) + == spectrum1.getEquivalencesCount(i); + } } if (passed) { @@ -286,7 +307,7 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu // add signal to list of already assigned signals assigned.add(pickedSignalIndexSpectrum2); for (int k = 0; k - < spectrum.getEquivalencesCount(i); k++) { + < spectrum1.getEquivalencesCount(i); k++) { matchAssignments.addAssignmentEquivalence(0, i, pickedSignalIndexSpectrum2); } break; @@ -303,20 +324,24 @@ public static Assignment matchSpectra(final Spectrum spectrum, final Spectrum qu * N here means the number of dimensions in both spectra.
* Despite intensities are expected, they are still not considered here. * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum (query) - * @param shiftTols tolerance values [ppm] per each dimension used during spectra shift - * comparisons + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum (query as exact or subspectrum to check) + * @param shiftTols tolerance values [ppm] per each dimension used during spectra shift + * comparisons + * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals + * @param checkEquivalencesCount indicates whether to compare the equivalences counts of matched signals + * @param allowLowerEquivalencesCount indicates to allow a lower equivalences counts spectrum 2 * * @return Assignments with signal indices of spectrum1 and matched indices * in spectrum2 for each dimension; null if the number of * dimensions in both spectra is not the same or is different than the number of given * shift tolerances * - * @see #matchSpectra(Spectrum, Spectrum, int, int, double, boolean, boolean) + * @see #matchSpectra(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) */ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final double[] shiftTols, - final boolean checkMultiplicity, final boolean checkEquivalencesCount) { + final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount) { if ((spectrum1.getNDim() != spectrum2.getNDim()) || (spectrum1.getNDim() @@ -329,8 +354,8 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s for (int dim = 0; dim < spectrum1.getNDim(); dim++) { matchAssignment.setAssignments(dim, matchSpectra(spectrum1, spectrum2, dim, dim, shiftTols[dim], - checkMultiplicity, checkEquivalencesCount).getAssignments( - 0)); + checkMultiplicity, checkEquivalencesCount, + allowLowerEquivalencesCount).getAssignments(0)); } return matchAssignment; From 6bb4077a0f1ee3eedc5ac9336c7942afbddbe98f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 10 Apr 2021 18:26:31 +0200 Subject: [PATCH 182/405] feat: added setSignal method to Spectrum --- src/casekit/nmr/model/Spectrum.java | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 10007e7..1e082af 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -180,7 +180,7 @@ private boolean checkSignalIndex(final Integer signalIndex) { } /** - * Returns a NMR Signal at position number in the List + * Returns a NMR Signal at position number in the signal list * * @param signalIndex * @@ -194,12 +194,23 @@ public Signal getSignal(final int signalIndex) { return this.signals.get(signalIndex); } - public List getSignals() { - return this.signals; - } + /** + * Sets a NMR Signal at position number in the signal list + * + * @param signalIndex signal index in list + * @param signal signal + * + * @return + */ + public boolean setSignal(final int signalIndex, final Signal signal) { + if (!this.checkSignalIndex(signalIndex) + || !this.compareNuclei(signal.getNuclei())) { + return false; + } - public void setSignals(final List signals) { - this.signals = signals; + this.signals.set(signalIndex, signal); + + return true; } public Double getShift(final int signalIndex, final int dim) { From f2711a3292af830d9c47ffc6a2841f484d6d6710 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 10 Apr 2021 18:52:43 +0200 Subject: [PATCH 183/405] fix: add equivalences count of newly added signal instead of one only --- src/casekit/nmr/model/Spectrum.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 1e082af..db4542a 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -143,7 +143,7 @@ public boolean addSignal(final Signal signal, final double pickPrecision, final if (closestSignal.getMultiplicity() .equals(signal.getMultiplicity())) { closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() - + 1); + + signal.getEquivalencesCount()); } } } From df12f851fea6f5cfeb7d4c19091bd0b992adef29 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 10 Apr 2021 20:10:10 +0200 Subject: [PATCH 184/405] feat: added addAssignment method to Assignment --- src/casekit/nmr/model/Assignment.java | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index edd9df1..fc2fe98 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -194,6 +194,27 @@ public int getSetAssignmentsCountWithEquivalences(final int dim) { return setAssignmentsCounter; } + public boolean addAssignment(final int dim, final int[] assignment) { + if (!this.containsDim(dim)) { + return false; + } + + final int[][][] newAssignments = new int[this.getNDim()][][]; + for (int d = 0; d + < this.getNDim(); d++) { + newAssignments[d] = new int[this.assignments[d].length + + 1][]; + for (int i = 0; i + < this.assignments[d].length; i++) { + newAssignments[d][i] = this.assignments[d][i]; + } + } + newAssignments[dim][this.assignments[dim].length] = assignment; + this.assignments = newAssignments; + + return true; + } + private boolean checkIndex(final int dim, final int index) { return (index >= 0) @@ -201,7 +222,6 @@ private boolean checkIndex(final int dim, final int index) { < this.assignments[dim].length); } - @Override public Assignment clone() throws CloneNotSupportedException { return (Assignment) super.clone(); From 561c0df75631601654a6bcd2541d9dffb8c321d1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 10 Apr 2021 20:12:05 +0200 Subject: [PATCH 185/405] fix: too high index when checking the dimension --- src/casekit/nmr/model/Assignment.java | 2 +- src/casekit/nmr/model/Signal.java | 2 +- src/casekit/nmr/model/Spectrum.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index fc2fe98..5bcd0f9 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -65,7 +65,7 @@ public boolean containsDim(final int dim) { return dim >= 0 && dim - <= this.getNDim(); + < this.getNDim(); } public boolean compareNuclei(final String[] nuclei) { diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index 4d14e00..972b53e 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -60,7 +60,7 @@ public boolean containsDim(final int dim) { return dim >= 0 && dim - <= this.getNDim(); + < this.getNDim(); } public boolean compareNuclei(final String[] nuclei) { diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index db4542a..3f02338 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -76,7 +76,7 @@ public boolean containsDim(final int dim) { return dim >= 0 && dim - <= this.getNDim(); + < this.getNDim(); } public boolean compareNuclei(final String[] nuclei) { From 61b9d3be05daef887e2e43073cd9398b74326c9d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 13 Apr 2021 15:23:39 +0200 Subject: [PATCH 186/405] fix: init non-empty arrays for equivalences --- src/casekit/nmr/model/Assignment.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index 5bcd0f9..d71230b 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -45,7 +45,7 @@ public class Assignment public void initAssignments(final int length) { - final int[][][] temp = new int[this.getNDim()][length][1]; + final int[][][] temp = new int[this.getNDim()][length][0]; for (int i = 0; i < this.getNDim(); i++) { for (int j = 0; j @@ -53,7 +53,6 @@ public void initAssignments(final int length) { temp[i][j] = new int[]{}; } } - this.assignments = temp; } From 14c9469f5953efaa183fc5552f635d56a350094f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 19 Apr 2021 02:23:22 +0200 Subject: [PATCH 187/405] fix: use own deep clone method Assignment --- src/casekit/nmr/model/Assignment.java | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index d71230b..0c4db9b 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -37,8 +37,7 @@ @AllArgsConstructor @Getter @Setter -public class Assignment - implements Cloneable { +public class Assignment { private String[] nuclei; private int[][][] assignments; @@ -221,9 +220,26 @@ private boolean checkIndex(final int dim, final int index) { < this.assignments[dim].length); } - @Override - public Assignment clone() throws CloneNotSupportedException { - return (Assignment) super.clone(); + public Assignment buildClone() { + final Assignment clone = new Assignment(); + clone.setNuclei(this.getNuclei() + .clone()); + final int[][][] values = new int[this.getNDim()][][]; + for (int dim = 0; dim + < this.getNDim(); dim++) { + values[dim] = new int[this.getSize()][]; + for (int i = 0; i + < this.assignments[dim].length; i++) { + values[dim][i] = new int[this.assignments[dim][i].length]; + for (int equiv = 0; equiv + < this.assignments[dim][i].length; equiv++) { + values[dim][i][equiv] = this.assignments[dim][i][equiv]; + } + } + } + clone.setAssignments(values); + + return clone; } @Override From 3be9b47d738a2b09c1c3b1b4b4f513d0b9c616cb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 23 Apr 2021 10:07:41 +0200 Subject: [PATCH 188/405] fix: extended hetero atom list in aromatic cycles in isValidBondAddition --- src/casekit/nmr/Utils.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index 38a17b3..1df3486 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -395,16 +395,21 @@ public static boolean isValidBondAddition(final IAtomContainer ac, final int ato // System.out.print(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd)); final IAtom atom = ac.getAtom(atomIndex); + // @TODO include different valencies: N3, N5, S2, S4, S6 etc. // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group if (atom.isAromatic() - && (!atom.getSymbol() - .equals("C"))) { + && (atom.getSymbol() + .equals("N") + || atom.getSymbol() + .equals("S") + || atom.getSymbol() + .equals("P"))) { // System.out.print("[ -1 ]"); bondOrderSum -= 1; } // System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); - // @TODO including charges + // @TODO include charges return bondOrderSum <= atom.getValency(); } From 287baf4738931950b57dcaea2db480b2dd33dce5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 23 Apr 2021 11:06:51 +0200 Subject: [PATCH 189/405] chore: added UTF-8 encoding and updated two dependencies --- pom.xml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index a9cadc3..eff59ea 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,5 @@ - 4.0.0 org.openscience @@ -7,12 +7,16 @@ 1.0-SNAPSHOT casekit + + UTF-8 + + src maven-compiler-plugin - 3.3 + 3.8.1 1.8 1.8 @@ -20,8 +24,9 @@ + org.apache.maven.plugins maven-assembly-plugin - 3.0.0 + 3.3.0 jar-with-dependencies From ec921de1fc486b3b9bd8ed070d6629abd0cafa2d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 23 Apr 2021 11:07:40 +0200 Subject: [PATCH 190/405] fix: switched from SmiFlavor.Absolute to SmiFlavor.Unique --- src/casekit/nmr/utils/Utils.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index aae856a..cc98f29 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -72,7 +72,8 @@ public static IMolecularFormula getMolecularFormulaFromString(final String mf) { } public static String getSmilesFromAtomContainer(final IAtomContainer ac) throws CDKException { - final SmilesGenerator smilesGenerator = new SmilesGenerator(SmiFlavor.Absolute); + // SmiFlavor.Unique instead of SmiFlavor.Absolute because current errors with InChI generator + final SmilesGenerator smilesGenerator = new SmilesGenerator(SmiFlavor.Unique); return smilesGenerator.create(ac); } From 63ef95d6b58487887c6858f13fbdf0938d75f463 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 24 Apr 2021 12:35:37 +0200 Subject: [PATCH 191/405] fix: buildFilters expects direct paths to filters --- .../nmr/lsd/PyLSDInputFileBuilder.java | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index c4446e8..a461c14 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -1,14 +1,11 @@ package casekit.nmr.lsd; -import casekit.io.FileSystem; import casekit.nmr.lsd.model.ElucidationOptions; import casekit.nmr.model.nmrdisplayer.Correlation; import casekit.nmr.model.nmrdisplayer.Data; import casekit.nmr.model.nmrdisplayer.Link; import casekit.nmr.utils.Utils; -import java.io.BufferedReader; -import java.io.IOException; import java.text.SimpleDateFormat; import java.util.*; @@ -411,24 +408,10 @@ private static String buildFilters(final String[] filterPaths) { stringBuilder.append("; externally defined filters\n"); final Map filters = new LinkedHashMap<>(); int counter = 1; - BufferedReader bufferedReader; for (final String filterPath : filterPaths) { - try { - bufferedReader = FileSystem.readFile(filterPath); - if (bufferedReader - != null) { - String line; - while ((line = bufferedReader.readLine()) - != null) { - filters.put("F" - + counter, line); - counter++; - } - bufferedReader.close(); - } - } catch (final IOException e) { - e.printStackTrace(); - } + filters.put("F" + + counter, filterPath); + counter++; } if (!filters.isEmpty()) { From 54347886a04f575fe3c8c051f2579a285a89f3bd Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 25 Apr 2021 22:49:02 +0200 Subject: [PATCH 192/405] feat: added parseRankedResultSDFile methods --- .../nmr/lsd/RankedResultSDFParser.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index 1ac7b3f..3a6a574 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -14,17 +14,27 @@ import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import java.io.FileNotFoundException; -import java.io.FileReader; +import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.*; public class RankedResultSDFParser { - public static List parseRankedResultSDF(final String pathToFile, - final String nucleus) throws CDKException, FileNotFoundException { + public static List parseRankedResultSDFile(final String pathToFile, + final String nucleus) throws CDKException, FileNotFoundException { + return parseRankedResultSDFile(new FileReader(pathToFile), nucleus); + } + + public static List parseRankedResultSDFileContent(final String fileContent, + final String nucleus) throws CDKException { + final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); + return parseRankedResultSDFile(new InputStreamReader(inputStream), nucleus); + } + + public static List parseRankedResultSDFile(final Reader fileReader, + final String nucleus) throws CDKException { final List dataSetList = new ArrayList<>(); - final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToFile), - SilentChemObjectBuilder.getInstance()); + final IteratingSDFReader iterator = new IteratingSDFReader(fileReader, SilentChemObjectBuilder.getInstance()); IAtomContainer structure; Spectrum experimentalSpectrum, predictedSpectrum; Assignment assignment; From acc54bad4f00470f9a1b13c65d3797c35d47b5f2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 29 Apr 2021 12:03:41 +0200 Subject: [PATCH 193/405] feat: integration of HOSECodeBuilder project --- pom.xml | 10 - src/casekit/nmr/hose/HOSECodeBuilder.java | 679 ++++++++++++++++++ src/casekit/nmr/hose/Utils.java | 524 ++++++++++++++ .../nmr/hose/model/ConnectionTree.java | 441 ++++++++++++ .../nmr/hose/model/ConnectionTreeNode.java | 190 +++++ src/casekit/nmr/utils/Predict.java | 2 +- 6 files changed, 1835 insertions(+), 11 deletions(-) create mode 100644 src/casekit/nmr/hose/HOSECodeBuilder.java create mode 100644 src/casekit/nmr/hose/Utils.java create mode 100644 src/casekit/nmr/hose/model/ConnectionTree.java create mode 100644 src/casekit/nmr/hose/model/ConnectionTreeNode.java diff --git a/pom.xml b/pom.xml index eff59ea..30c2ba0 100644 --- a/pom.xml +++ b/pom.xml @@ -60,16 +60,6 @@ commons-lang3 3.5
- - org.openscience - HOSECodeBuilder - 1.0 - - - - - - org.projectlombok lombok diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java new file mode 100644 index 0000000..a685121 --- /dev/null +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -0,0 +1,679 @@ +/* + * The MIT License + * + * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.nmr.hose; + + +import casekit.nmr.hose.model.ConnectionTree; +import casekit.nmr.hose.model.ConnectionTreeNode; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.silent.Atom; +import org.openscience.cdk.silent.Bond; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import java.util.*; + +/** + * Class to build HOSE code strings from molecules and vice versa + * by using connection trees as intermediate forms. + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class HOSECodeBuilder { + + /** + * Creates a partial sphere string content from the children of a given parent node. + * + * @param nodeInPrevSphere parent node to create a partial sphere string content from + * @param useBremserElementNotation whether to use Bremser notation + * + * @return + * + * @throws CDKException + */ + private static List buildPositionsInSphere(final ConnectionTreeNode nodeInPrevSphere, + final boolean useBremserElementNotation) throws CDKException { + final List nodesInSphere = nodeInPrevSphere.getChildNodes(); + final List positionsInSphere = new ArrayList<>(); + ConnectionTreeNode nodeInSphere; + IBond bond; + String position; + for (int j = 0; j + < nodesInSphere.size(); j++) { + nodeInSphere = nodesInSphere.get(j); + bond = nodeInPrevSphere.getBondsToChildren() + .get(j); + position = ""; + if (Utils.getSymbolForBond(bond) + == null) { + throw new CDKException(Thread.currentThread() + .getStackTrace()[1].getMethodName() + + ": no bond information"); + } + position += Utils.getSymbolForBond(bond); + if (nodeInSphere.isRingClosureNode()) { + position += "&"; + } else { + if (useBremserElementNotation) { + position += Utils.toHOSECodeSymbol(nodeInSphere.getAtom() + .getSymbol()); + } else { + position += nodeInSphere.getAtom() + .getSymbol(); + } + // if(nodeInSphere.getAtom().getImplicitHydrogenCount() != null){ + // position += "[" + nodeInSphere.getAtom().getImplicitHydrogenCount() + "]"; + // } + position += buildFormalChargeCode(nodeInSphere.getAtom()); + } + positionsInSphere.add(position); + } + + return positionsInSphere; + } + + /** + * Builds the content of a sphere of the HOSE code which is to generate. + * + * @param connectionTree connection tree to use + * @param sphere sphere to selected from connection tree + * @param delimiter sphere's delimiter + * @param useBremserElementNotation whether to use Bremser notation + * + * @return + * + * @throws CDKException + */ + private static String buildSphereString(final ConnectionTree connectionTree, final int sphere, + final String delimiter, + final boolean useBremserElementNotation) throws CDKException { + StringBuilder sphereString = new StringBuilder(); + final List nodesInPrevSphere = connectionTree.getNodesInSphere(sphere + - 1, true); + ConnectionTreeNode nodeInPrevSphere; + // for all nodes in previous sphere + for (int i = 0; i + < nodesInPrevSphere.size(); i++) { + nodeInPrevSphere = nodesInPrevSphere.get(i); + // skip ring closure nodes + if (nodeInPrevSphere.isRingClosureNode()) { + if ((i + == nodesInPrevSphere.size() + - 1) + && sphereString.toString() + .endsWith(",")) { + sphereString = new StringBuilder(sphereString.substring(0, sphereString.length() + - 1)); + } + continue; + } + // for all child nodes in the requested sphere + if (nodeInPrevSphere.hasChildren()) { + for (final String position : buildPositionsInSphere(nodeInPrevSphere, useBremserElementNotation)) { + sphereString.append(position); + } + } + // add delimiter + if (i + < nodesInPrevSphere.size() + - 1) { + sphereString.append(delimiter); + } + } + + return sphereString.toString(); + } + + private static String buildFormalChargeCode(final IAtom atom) { + if ((atom + == null) + || (atom.getFormalCharge() + == null) + || (atom.getFormalCharge() + == 0)) { + return ""; + } + final String sign = atom.getFormalCharge() + < 0 + ? "-" + : "+"; + + return Math.abs(atom.getFormalCharge()) + == 1 + ? sign + : "'" + + sign + + Math.abs(atom.getFormalCharge()) + + "'"; + } + + /** + * Actual function to build a HOSE code. + * + * @param connectionTree connection tree to use + * @param useBremserElementNotation whether to use Bremser notation + * + * @return + * + * @throws CDKException + */ + private static String buildHOSECodeString(final ConnectionTree connectionTree, + final boolean useBremserElementNotation) throws CDKException { + final IAtom rootAtom = connectionTree.getRootNode() + .getAtom(); + final int maxSphere = connectionTree.getMaxSphere(); + // zeroth sphere + final StringBuilder HOSECode = new StringBuilder(rootAtom.getSymbol() + + "-" + + (connectionTree.getRootNode() + .getChildNodes() + .size() + + (rootAtom.getImplicitHydrogenCount() + == null + ? 0 + : rootAtom.getImplicitHydrogenCount())) + + buildFormalChargeCode(rootAtom)); + HOSECode.append(";"); + String delimiter; + // go through each sphere of the connection tree + for (int s = 1; s + <= maxSphere; s++) { + if (s + == 1) { + delimiter = ""; + } else { + delimiter = ","; + } + // create sphere string and add it to HOSE code string + HOSECode.append(buildSphereString(connectionTree, s, delimiter, useBremserElementNotation)); + if (s + == 1) { + HOSECode.append("("); + } + if (s + > 1 + && s + < maxSphere) { + HOSECode.append("/"); + } + } + if (maxSphere + == 0) { + HOSECode.append("("); + } + HOSECode.append(")"); + + return HOSECode.toString(); + } + + public static String buildHOSECode(final ConnectionTree connectionTree, + final boolean useBremserElementNotation) throws CDKException { + return buildHOSECodeString(connectionTree, useBremserElementNotation); + } + + public static String buildHOSECode(final IAtomContainer ac, final int rootAtomIndex, final Integer maxSphere, + final boolean useBremserElementNotation) throws CDKException { + return HOSECodeBuilder.buildHOSECode(HOSECodeBuilder.buildConnectionTree(ac, rootAtomIndex, maxSphere), + useBremserElementNotation); + } + + /** + * Builds a connection tree of an atom container with specific start atom + * and maximum number of spheres. + * If the atoms in the atom container are not fully connected, then the + * connection tree will be built until the last atom of all connected atoms + * to the start atom is reached. + * + * @param ac atom container + * @param rootAtomIndex starting atom + * @param maxSphere if this is set to null, then the connection tree of whole + * structure will be created + * + * @return + * + * @see ConnectionTree + */ + public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final int rootAtomIndex, + final Integer maxSphere) { + return HOSECodeBuilder.buildConnectionTree(ac, rootAtomIndex, maxSphere, new HashSet<>()); + } + + /** + * Builds a connection tree of an atom container with specific start atom + * and maximum number of spheres. + * If the atoms in the atom container are not fully connected, then the + * connection tree will be built until the last atom of all connected atoms + * to the start atom is reached. + * + * @param ac atom container + * @param rootAtomIndex starting atom + * @param maxSphere if this is set to null, then the connection tree of whole + * structure will be created + * @param visited certain atom indices can be given here to ignore atoms + * in BFS; they are then seen as already visited and not included in + * the connection tree + * + * @return + * + * @see ConnectionTree + */ + public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final int rootAtomIndex, + final Integer maxSphere, final HashSet visited) { + // create queue for BFS and add root atom index + final Queue queue = new LinkedList<>(); + queue.add(rootAtomIndex); + final ConnectionTree connectionTree = new ConnectionTree(ac.getAtom(rootAtomIndex), rootAtomIndex); + BFS(ac, connectionTree, queue, new HashSet<>(visited), maxSphere); + + Utils.rankChildNodes(connectionTree); + + return connectionTree; + } + + /** + * Builds a connection tree from a given HOSE code.
+ * IMPORTANT: At the moment, ring closures can not be restored + * from an HOSE code because of ambiguities. + * So only a structural skeleton will be generated. + * + * @param HOSECode HOSE code + * @param useBremserElementNotation whether given HOSE code contains Bremser notation + * + * @return + * + * @throws CDKException + */ + public static ConnectionTree buildConnectionTree(final String HOSECode, + final boolean useBremserElementNotation) throws CDKException { + final Map> ringClosures = new HashMap<>(); + final List sphereStrings = Utils.splitHOSECodeIntoSpheres(HOSECode); + IAtom atom; + IBond bond; + final int maxSphere; + String bondTypeString, atomTypeString, childElementCore; + Map> positionsInSphere; + ConnectionTreeNode parentNodeInPrevSphere; + // set maxSphere + maxSphere = sphereStrings.size() + - 1; + // zeroth sphere + positionsInSphere = Utils.splitHOSECodeSphereIntoPositions(sphereStrings.get(0), true); + // create root atom + atom = new Atom(positionsInSphere.get(0) + .get(0)); + // add charge to root atom + atom.setFormalCharge(Integer.parseInt(positionsInSphere.get(0) + .get(1))); + final ConnectionTree connectionTree = new ConnectionTree(atom, 0); + // higher spheres + for (int sphere = 1; sphere + <= maxSphere; sphere++) { + // get positions (sections separated by comma) of current sphere + positionsInSphere = Utils.splitHOSECodeSphereIntoPositions(sphereStrings.get(sphere), false); + // for all positions + for (final int positionIndex : positionsInSphere.keySet()) { + // for each child elements (symbols) in position + for (final String childElement : positionsInSphere.get(positionIndex)) { + // ignore children containing null value from previous nodes; previous node has no further (unvisited in BFS) connected atoms + if (childElement + == null) { + continue; + } + bondTypeString = ""; + atomTypeString = ""; + childElementCore = childElement; + childElementCore = childElementCore.replace("+", ""); + childElementCore = childElementCore.replace("-", ""); + childElementCore = childElementCore.replaceAll("\\d", ""); + // add new node and set bond to parent node or set a ring closure + if (childElementCore.contains("&")) { // ring closure + if (childElementCore.length() + == 2) { + bondTypeString = String.valueOf(childElementCore.charAt(0)); + } + // the parent node/atom in previous sphere and its key we already have of a ring closure; + // the bond information we already have too (see below) + parentNodeInPrevSphere = connectionTree.getNodesInSphere(sphere + - 1, true) + .get(positionIndex); + + if (!ringClosures.containsKey(sphere)) { + ringClosures.put(sphere, new ArrayList<>()); + } + bond = SilentChemObjectBuilder.getInstance() + .newBond(); + bond.setOrder(Utils.getBondOrderForSymbol(bondTypeString)); + if (bondTypeString.equals("*")) { + bond.setIsInRing(true); + bond.setIsAromatic(true); + } else { + bond.setIsAromatic(false); + } + // store the ring closures and use them after looking at the HOSE code string + ringClosures.get(sphere) + .add(new Object[]{parentNodeInPrevSphere, bond}); + + + // // check whether the node in previous sphere is already involved in a ring closure; that should be not valid + // if(ConnectionTree.isAtRingClosure(parentNodeInPrevSphere)){ + // continue; + // } + // + // // TODO: what we still not can detect for sure is the correct second node/atom of a ring closure + // ConnectionTreeNode parentNodeInSphere = null; // null is just a dummy value and should be replaced by the correct ConnectionTreeNode object + // + // // TODO: check that the detected node in sphere is not null; could be removed after the implementation of detection of that node + // if(parentNodeInSphere == null){ + // continue; + // } + // // after that both node detections, check if that ring closure was already set beforehand by the reversed node order case + // if(ConnectionTree.nodesFormRingClosure(parentNodeInPrevSphere, parentNodeInSphere)){ + // continue; + // } + // // otherwise build a new bond and fill it with + // bond = SilentChemObjectBuilder.getInstance().newBond(); + // bond.setAtom(parentNodeInPrevSphere.getAtom(), 0); + // bond.setAtom(parentNodeInSphere.getAtom(), 1); + // bond.setOrder(Utils.getBondOrderForSymbol(bondTypeString)); + // if (bondTypeString.equals("*")) { + // bond.setIsAromatic(true); + // } else { + // bond.setIsAromatic(false); + // } + // // set parent nodes as parents to each other, that one can detect them as ring closure afterwards + // parentNodeInPrevSphere.addParentNode(parentNodeInSphere, bond); + // connectionTree.addNode(null, -1 * parentNodeInPrevSphere.getKey(), parentNodeInPrevSphere.getKey(), bond, sphere + 1, true); + // parentNodeInSphere.addParentNode(parentNodeInPrevSphere, bond); + // connectionTree.addNode(null, -1 * parentNodeInSphere.getKey(), parentNodeInSphere.getKey(), bond, sphere + 1, true); + + } else if (Utils.countAtoms(childElementCore) + == 1) { // each position contains either ring closures (&) or one element (e.g. C, Br), plus the bond information + if (childElementCore.length() + == 3) { // in case of bond type and an element with two letters, e.g. *Cl or =Br + bondTypeString = String.valueOf(childElementCore.charAt(0)); + atomTypeString = String.valueOf(childElementCore.charAt(1)); + atomTypeString += String.valueOf(childElementCore.charAt(2)); + } else if (childElementCore.length() + == 2) { // in case of bond type and an element with one letter or an element with two letters, e.g. Cl or =N + if (Character.isLetter(childElementCore.charAt(0))) { + atomTypeString = String.valueOf(childElementCore.charAt(0)); + } else { + bondTypeString = String.valueOf(childElementCore.charAt(0)); + } + atomTypeString += String.valueOf(childElementCore.charAt(1)); + } else if (childElementCore.length() + == 1) { // in case of an element with only one letter + atomTypeString = String.valueOf(childElementCore.charAt(0)); + } + // there has to be some information (at least an element) + if (atomTypeString.isEmpty()) { + throw new CDKException(Thread.currentThread() + .getStackTrace()[1].getMethodName() + + ": no atom information in child element"); + } + // otherwise set a new bond + bond = SilentChemObjectBuilder.getInstance() + .newBond(); + if (useBremserElementNotation) { + atomTypeString = Utils.toElementSymbol(atomTypeString); + } + atom = new Atom(atomTypeString); + bond.setAtom(atom, 0); + bond.setAtom(connectionTree.getNodesInSphere(sphere + - 1, true) + .get(positionIndex) + .getAtom(), 1); + bond.setOrder(Utils.getBondOrderForSymbol(bondTypeString)); + bond.setIsAromatic(bondTypeString.equals("*")); + // set formal charge to atom + if (childElement.contains("-")) { + final String[] splitAtSign = childElement.split("-"); + if (splitAtSign.length + == 1) { + atom.setFormalCharge(-1); + } else { + atom.setFormalCharge(-1 + * Integer.parseInt(splitAtSign[1])); + } + } + if (childElement.contains("+")) { + final String[] splitAtSign = childElement.split("\\+"); + if (splitAtSign.length + == 1) { + atom.setFormalCharge(1); + } else { + atom.setFormalCharge(Integer.parseInt(splitAtSign[1])); + } + } + // create a new node with the new build bond information to its parent node in the connection tree + connectionTree.addNode(atom, connectionTree.getNodesCount(false), + connectionTree.getNodesInSphere(sphere + - 1, true) + .get(positionIndex) + .getKey(), bond); + } else { + throw new CDKException(Thread.currentThread() + .getStackTrace()[1].getMethodName() + + ": no valid components in child element"); + } + } + } + } + + // @TODO after storing the ring closures, try to close the rings + // System.out.println(" -> spheres count with ring closures: " + ringClosures.size()); + // for (final int sphere : ringClosures.keySet()){ + // System.out.println(" -> number of ring closures in sphere: " + sphere + " -> " + ringClosures.get(sphere).size()); + // } + + Utils.rankChildNodes(connectionTree); + + return connectionTree; + } + + /** + * Function for extending a given connection tree only containing + * its root node (0th sphere) by means of Breadth-First-Search (BFS). + * Until a certain maximum sphere, each reachable next neighbor atom + * is stored in a parent-child-relationship. + * + * @param ac atom container to go through + * @param connectionTree connection tree to expand, incl. the root node + * @param queue queue to use containing the atom index of the root node + * @param visited optional: atom indices which are already "visited" and + * should be ignored + * @param maxSphere maximum number of spheres for connection tree extension + */ + private static void BFS(final IAtomContainer ac, final ConnectionTree connectionTree, final Queue queue, + final Set visited, final Integer maxSphere) { + // all nodes visited? + if (queue.isEmpty()) { + return; + } + final int atomIndex = queue.remove(); + final IAtom atom = ac.getAtom(atomIndex); + final ConnectionTreeNode node = connectionTree.getNode(atomIndex); + final int sphere = node.getSphere(); + // check whether the current sphere is to high, if maxSphere parameter is set + if ((maxSphere + != null) + && (sphere + > maxSphere)) { + return; + } + // mark atom as visited + visited.add(atomIndex); + + IBond bond; + ConnectionTreeNode connectedAtomNode; + if ((maxSphere + != null) + && (sphere + == maxSphere)) { + // set connections (parent nodes) in last sphere nodes which have to be connected -> ring closures + // only parent nodes will be set to detect those ring closures again + for (final ConnectionTreeNode nodeInLastSphere : connectionTree.getNodesInSphere(maxSphere, false)) { + if ((ac.getBond(atom, nodeInLastSphere.getAtom()) + != null) + && !ConnectionTree.hasRingClosureParent(node, nodeInLastSphere) + && !ConnectionTree.hasRingClosureParent(nodeInLastSphere, node)) { + bond = ac.getBond(node.getAtom(), nodeInLastSphere.getAtom()); + connectionTree.addRingClosureNode(node.getKey(), nodeInLastSphere.getKey(), bond); + connectionTree.addRingClosureNode(nodeInLastSphere.getKey(), node.getKey(), bond); + } + } + } else { + // add nodes and bonds in lower spheres + // go to all child nodes + int connectedAtomIndex; + for (final IAtom connectedAtom : ac.getConnectedAtomsList(atom)) { + connectedAtomIndex = ac.indexOf(connectedAtom); + bond = ac.getBond(atom, connectedAtom); + // add children to queue if not already visited + if (!visited.contains(connectedAtomIndex)) { + // and not already waiting in queue + if (!queue.contains(connectedAtomIndex)) { + queue.add(connectedAtomIndex); + connectionTree.addNode(connectedAtom, connectedAtomIndex, node.getKey(), bond); + } else { + // node already exists in tree; add a further parent to connected atom (for ring closures) + connectedAtomNode = connectionTree.getNode(connectedAtomIndex); + if (!ConnectionTree.hasRingClosureParent(node, connectedAtomNode) + && !ConnectionTree.hasRingClosureParent(connectedAtomNode, node)) { + connectionTree.addRingClosureNode(connectedAtomIndex, node.getKey(), bond); + connectionTree.addRingClosureNode(node.getKey(), connectedAtomIndex, bond); + } + + } + } + } + } + // further extension of connectivity tree + BFS(ac, connectionTree, queue, visited, maxSphere); + } + + /** + * Reconstructs a structure from a given connection tree, + * including ring closures. + * + * @param connectionTree connection tree + * + * @return IAtomContainer + */ + public static IAtomContainer buildAtomContainer(final ConnectionTree connectionTree) { + // create new atom container and add the connection trees structure, beginning at the root atom + final IAtomContainer ac = SilentChemObjectBuilder.getInstance() + .newAtomContainer(); + HOSECodeBuilder.addToAtomContainer(connectionTree, ac, null, null); + + return ac; + } + + /** + * Adds the substructure of a connection tree to an atom container.
+ * The substructure can be linked via a bond and an atom index in the container, but this is optional. + * If both, the bond and atom index to link, are not given (null) then the substructure will just be added + * to the atom container without linkage. + * + * @param connectionTree + * @param ac + * @param atomIndexInStructureToLink + * @param bondToLink + */ + public static void addToAtomContainer(final ConnectionTree connectionTree, final IAtomContainer ac, + final Integer atomIndexInStructureToLink, final IBond bondToLink) { + List nodesInSphere; + ConnectionTreeNode nodeInSphere, parentNode, partnerNode; + IBond bond, bondToParent; + // add root atom to given atom container and link it via a given linking bond + ac.addAtom(connectionTree.getRootNode() + .getAtom()); + if ((atomIndexInStructureToLink + != null) + && (bondToLink + != null)) { + final IBond bondToAdd = new Bond(ac.getAtom(atomIndexInStructureToLink), ac.getAtom(ac.getAtomCount() + - 1)); + bondToAdd.setOrder(bondToLink.getOrder()); + bondToAdd.setIsInRing(bondToLink.isInRing()); + bondToAdd.setIsAromatic(bondToLink.isAromatic()); + bondToAdd.setAtom(ac.getAtom(atomIndexInStructureToLink), 0); + bondToAdd.setAtom(ac.getAtom(ac.getAtomCount() + - 1), 1); + ac.addBond(bondToAdd); + } + // for each sphere: add the atom which is stored as node to atom container and set bonds between parent nodes + for (int s = 1; s + <= connectionTree.getMaxSphere(); s++) { + // first add all atoms and its parents (previous sphere only, incl. bonds) to structure + nodesInSphere = connectionTree.getNodesInSphere(s, false); + for (int i = 0; i + < nodesInSphere.size(); i++) { + nodeInSphere = nodesInSphere.get(i); + if (nodeInSphere.isRingClosureNode()) { + continue; + } + ac.addAtom(nodeInSphere.getAtom()); + parentNode = nodeInSphere.getParent(); + bondToParent = nodeInSphere.getBondToParent(); + bond = new Bond(nodeInSphere.getAtom(), parentNode.getAtom(), bondToParent.getOrder()); + bond.setIsInRing(bondToParent.isInRing()); + bond.setIsAromatic(bondToParent.isAromatic()); + ac.addBond(bond); + } + } + for (int s = 1; s + <= connectionTree.getMaxSphere(); s++) { + // and as second add the remaining bonds (ring closures) to structure + nodesInSphere = connectionTree.getNodesInSphere(s, true); + for (int i = 0; i + < nodesInSphere.size(); i++) { + nodeInSphere = nodesInSphere.get(i); + if (!nodeInSphere.isRingClosureNode()) { + continue; + } + parentNode = nodeInSphere.getParent(); + partnerNode = nodeInSphere.getRingClosureParent(); + if (ac.getBond(ac.getAtom(ac.indexOf(partnerNode.getAtom())), + ac.getAtom(ac.indexOf(parentNode.getAtom()))) + == null) { + bondToParent = nodeInSphere.getBondToParent(); + bond = new Bond(parentNode.getAtom(), partnerNode.getAtom(), bondToParent.getOrder()); + bond.setIsInRing(bondToParent.isInRing()); + bond.setIsAromatic(bondToParent.isAromatic()); + ac.addBond(bond); + } + } + } + } + + /** + * Reconstructs a structure from a given HOSE code string.
+ * IMPORTANT: Ring closures are not restored, see + * {@link #buildConnectionTree(String, boolean)}. + * + * @param HOSECode HOSE code + * @param useBremserElementNotation whether the HOSE code includes Bremser notation + * + * @return IAtomContainer + * + * @see #buildConnectionTree(String, boolean) + * @see #buildAtomContainer(ConnectionTree) + */ + public static IAtomContainer buildAtomContainer(final String HOSECode, + final boolean useBremserElementNotation) throws CDKException { + return HOSECodeBuilder.buildAtomContainer( + HOSECodeBuilder.buildConnectionTree(HOSECode, useBremserElementNotation)); + } +} \ No newline at end of file diff --git a/src/casekit/nmr/hose/Utils.java b/src/casekit/nmr/hose/Utils.java new file mode 100644 index 0000000..c4ddfb6 --- /dev/null +++ b/src/casekit/nmr/hose/Utils.java @@ -0,0 +1,524 @@ +/* + * The MIT License + * + * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package casekit.nmr.hose; + +import casekit.nmr.hose.model.ConnectionTree; +import casekit.nmr.hose.model.ConnectionTreeNode; +import org.openscience.cdk.interfaces.IBond; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class Utils { + + /** + * Returns the summed subtree weight starting at a specific node in a connection + * tree. The weight of starting node is included here. + * + * @param node node to start und calculate the weight for + * + * @return + */ + public static int calculateSubtreeWeight(final ConnectionTreeNode node) { + return getSubtreeWeight(node, null); + } + + /** + * Returns the summed subtree weight starting at a specific node in a connection + * tree. The weight of starting node and the weight of bond to its given parent + * node is included here. + * + * @param node + * @param parentNode + * + * @return + */ + private static int getSubtreeWeight(final ConnectionTreeNode node, final ConnectionTreeNode parentNode) { + int weight = getNodeWeight(node, parentNode); + for (final ConnectionTreeNode childNode : node.getChildNodes()) { + weight += getSubtreeWeight(childNode, node); + } + + return weight; + } + + /** + * Returns the weight for a node and its connection to a parent node + * (optional). + * + * @param node node to get the weight from + * @param parentNode parent node of node or null + * + * @return the priority weight for node; plus the weight of + * the bond to its parent node if the parent node is not null + * + * @see Utils#getSymbolPriorityWeight(String) + */ + public static Integer getNodeWeight(final ConnectionTreeNode node, final ConnectionTreeNode parentNode) { + int weight = 0; + if (parentNode + != null) { + final String bondSymbol = getSymbolForBond(node.getBondToParent()); + if (bondSymbol + == null) { + return null; + } + // add weight for bond type priority + if (!bondSymbol.isEmpty()) { + weight += getSymbolPriorityWeight(bondSymbol); + } + + } + // add weight for further symbol priority + if (node.isRingClosureNode()) { + weight += getSymbolPriorityWeight("&"); + } else { + weight += getSymbolPriorityWeight(node.getAtom() + .getSymbol()); + } + + return weight; + } + + /** + * Returns an ArrayList of ranked child node indices for a tree node. + * + * @param node node to rank the children + * + * @return + * + * @see #getNodeWeight(ConnectionTreeNode, ConnectionTreeNode) + */ + private static List getRankedChildNodesIndices(final ConnectionTreeNode node) { + final List childNodes = node.getChildNodes(); + final List rankedChildNodesIndices = new ArrayList<>(); + for (int i = 0; i + < childNodes.size(); i++) { + rankedChildNodesIndices.add(i); + } + rankedChildNodesIndices.sort((childNodeIndex1, childNodeIndex2) -> { + final int nodeWeightsComp = -1 + * Integer.compare(getNodeWeight(childNodes.get(childNodeIndex1), node), + getNodeWeight(childNodes.get(childNodeIndex2), node)); + if (nodeWeightsComp + != 0) { + return nodeWeightsComp; + } + return -1 + * Integer.compare(calculateSubtreeWeight(childNodes.get(childNodeIndex1)), + calculateSubtreeWeight(childNodes.get(childNodeIndex2))); + }); + + return rankedChildNodesIndices; + } + + /** + * Sorts the child nodes of a node by HOSE code priority and weight. + * + * @param node node with child nodes to rank + * + * @see #getNodeWeight(ConnectionTreeNode, ConnectionTreeNode) + */ + private static void rankChildNodes(final ConnectionTreeNode node) { + final List rankedChildNodesIndices = getRankedChildNodesIndices(node); + final List rankedChildNodes = new ArrayList<>(); + final List rankedChildNodeBonds = new ArrayList<>(); + for (int i = 0; i + < rankedChildNodesIndices.size(); i++) { + rankedChildNodes.add(node.getChildNodes() + .get(rankedChildNodesIndices.get(i))); + rankedChildNodeBonds.add(node.getBondsToChildren() + .get(rankedChildNodesIndices.get(i))); + } + node.getChildNodes() + .clear(); + node.getBondsToChildren() + .clear(); + node.getChildNodes() + .addAll(rankedChildNodes); + node.getBondsToChildren() + .addAll(rankedChildNodeBonds); + } + + /** + * Sorts the child nodes of each node in the connection tree by HOSE code + * priority and weight. + * + * @param connectionTree connection tree where to rank the child nodes of + * each node. + * + * @see #rankChildNodes(ConnectionTreeNode) + */ + public static void rankChildNodes(final ConnectionTree connectionTree) { + List nodesInSphere; + for (int sphere = 0; sphere + < connectionTree.getMaxSphere(); sphere++) { + nodesInSphere = connectionTree.getNodesInSphere(sphere, true); + // for all nodes in sphere + for (int i = 0; i + < nodesInSphere.size(); i++) { + // findHits all child nodes of that node + if (nodesInSphere.get(i) + .hasChildren()) { + rankChildNodes(nodesInSphere.get(i)); + } + } + } + } + + /** + * Returns the number of non-empty spheres. + * For example: C-3;() -> 1, C-3;=N(C/) -> 3 + * + * @param HOSECode + * + * @return + */ + public static int getSpheresCount(final String HOSECode) { + int spheresCount = 0; + for (final String sphere : splitHOSECodeIntoSpheres(HOSECode)) { + if (!sphere.trim() + .isEmpty()) { + spheresCount++; + } + } + return spheresCount; + } + + /** + * Splits a HOSE code into a list of spheres as strings. + * + * @param HOSECode HOSE code + * + * @return ArrayList of all sphere strings + */ + public static List splitHOSECodeIntoSpheres(final String HOSECode) { + final List HOSECodeSpheres = new ArrayList<>(); + final String[] splitSpheres_0_1 = HOSECode.split(";"); + final String[] splitSpheres_1_2 = splitSpheres_0_1[1].split("\\("); + final String[] splitSpheres_2_n = splitSpheres_1_2[1].substring(0, splitSpheres_1_2[1].length() + - 1) + .split("/"); + HOSECodeSpheres.add(splitSpheres_0_1[0]); + HOSECodeSpheres.add(splitSpheres_1_2[0]); + for (int s = 0; s + < splitSpheres_2_n.length; s++) { + HOSECodeSpheres.add(splitSpheres_2_n[s]); + } + + return HOSECodeSpheres; + } + + /** + * Splits a HOSE code sphere into its positions. Each position includes all + * its elements. + * Example: {@code /CC,*N&,C/} results in: {@code {0: [C,C], 1: [*N,&], 2: [C]}} + * + * @param HOSECodeSphere HOSE code sphere + * @param isCenterSphere whether center (zeroth) sphere is given + * + * @return HashMap of ArrayLists containing elements for each position of that HOSE code sphere + */ + public static Map> splitHOSECodeSphereIntoPositions(final String HOSECodeSphere, + final boolean isCenterSphere) { + final Map> positions = new HashMap<>(); + // zeroth sphere + if (isCenterSphere) { + positions.put(0, new ArrayList<>()); + // add element + positions.get(0) + .add(HOSECodeSphere.split("-")[0]); + // zeroth sphere contains charges + if (HOSECodeSphere.endsWith("-")) { // add negative formal charge with value 1 + positions.get(0) + .add("-1"); + } else if (HOSECodeSphere.endsWith("+")) {// add positive formal charge with value 1 + positions.get(0) + .add("1"); + } else if (HOSECodeSphere.endsWith("'")) { // add formal charge with a higher value + positions.get(0) + .add(HOSECodeSphere.split("'")[1]); + } else { + positions.get(0) + .add("0"); + } + + return positions; + } + // higher spheres + char c; + StringBuilder elem = new StringBuilder(); + int positionCounter = 0; + boolean formalChargeDetected = false; + for (int i = 0; i + < HOSECodeSphere.length(); i++) { + c = HOSECodeSphere.charAt(i); + if ((c + == '=') + || (c + == '%') + || (c + == '*')) { + if (elem.length() + > 0) { + if (!positions.containsKey(positionCounter)) { + positions.put(positionCounter, new ArrayList<>()); + } + positions.get(positionCounter) + .add(elem.toString()); + elem = new StringBuilder(); + } + elem.append(c); + } else if (Character.isUpperCase(c) + || (c + == '&')) { + if ((elem.length() + > 0) + && (Character.isLetter(elem.charAt(elem.length() + - 1)) + || (elem.charAt(elem.length() + - 1) + == '&'))) { + if (!positions.containsKey(positionCounter)) { + positions.put(positionCounter, new ArrayList<>()); + } + positions.get(positionCounter) + .add(elem.toString()); + elem = new StringBuilder(); + elem.append(c); + } else if (formalChargeDetected) { + if (!positions.containsKey(positionCounter)) { + positions.put(positionCounter, new ArrayList<>()); + } + positions.get(positionCounter) + .add(elem.toString()); + elem = new StringBuilder(); + elem.append(c); + formalChargeDetected = false; + } else { + elem.append(c); + } + } else if (Character.isLowerCase(c)) { + elem.append(c); + } else if ((c + == '-') + || (c + == '+') + || Character.isDigit(c)) { + elem.append(c); + formalChargeDetected = true; + } else if (c + == ',') { + if (!positions.containsKey(positionCounter)) { + positions.put(positionCounter, new ArrayList<>()); + } + if (elem.length() + == 0) { + positions.get(positionCounter) + .add(null); + } else { + positions.get(positionCounter) + .add(elem.toString()); + elem = new StringBuilder(); + } + positionCounter++; + formalChargeDetected = false; + } + } + // add last element + if (elem.length() + > 0) { + if (!positions.containsKey(positionCounter)) { + positions.put(positionCounter, new ArrayList<>()); + } + positions.get(positionCounter) + .add(elem.toString()); + } else if (HOSECodeSphere.endsWith(",")) { + if (!positions.containsKey(positionCounter)) { + positions.put(positionCounter, new ArrayList<>()); + } + positions.get(positionCounter) + .add(null); + } + + return positions; + } + + /** + * Counts the number of occurring atoms within a given HOSE code. + * + * @param HOSECode HOSE code to analyse + * + * @return number of atoms within HOSE code + */ + public static int countAtoms(final String HOSECode) { + int counter = 0; + for (int k = 0; k + < HOSECode.length(); k++) { + // Check for uppercase letters + if (Character.isLetter(HOSECode.charAt(k)) + && Character.isUpperCase(HOSECode.charAt(k))) { + counter++; + } + } + + return counter; + } + + /** + * Returns the weight/cost for an HOSE code symbol regarding its priority. + * + * @param symbol HOSE code symbol + * + * @return weight/cost for the symbol + */ + public static int getSymbolPriorityWeight(final String symbol) { + switch (symbol) { + case "%": + return 15; + case "=": + return 14; + case "*": + return 13; + case "C": + return 12; + case "O": + return 11; + case "N": + return 10; + case "S": + return 9; + case "P": + return 8; + case "Si": + case "Q": + return 7; + case "B": + return 6; + case "F": + return 5; + case "Cl": + case "X": + return 4; + case "Br": + case "Y": + return 3; + case "I": + return 2; + case "&": + return 1; + } + + return 0; + } + + /** + * Converts an element symbol into notation as shown in origin article by + * Bremser. + * That includes: Si -> Q, Cl -> X, Br -> Y + * + * @param element + * + * @return HOSE code symbol as in origin article by Bremser + */ + public static String toHOSECodeSymbol(final String element) { + if (element.equals("Si")) { + return "Q"; + } + if (element.equals("Cl")) { + return "X"; + } + if (element.equals("Br")) { + return "Y"; + } + + return element; + } + + /** + * Converts an HOSE code symbol as shown in origin article by + * Bremser into default element notation. + * That includes: Q -> Si, X -> Cl, Y -> Br + * + * @param symbol + * + * @return default element notation + */ + public static String toElementSymbol(final String symbol) { + if (symbol.equals("Q")) { + return "Si"; + } + if (symbol.equals("X")) { + return "Cl"; + } + if (symbol.equals("Y")) { + return "Br"; + } + + return symbol; + } + + /** + * Returns the notation of bond information used in HOSE code. + * The bond has to contain its bond order and aromaticity information. + * + * @param bond bond containing bond order and aromaticity information + * + * @return HOSE code symbol for a bond + */ + public static String getSymbolForBond(final IBond bond) { + if (bond + != null) { + if (bond.isAromatic()) { + return "*"; + } + switch (bond.getOrder()) { + case SINGLE: + return ""; + case DOUBLE: + return "="; + case TRIPLE: + return "%"; + } + } + + return null; + } + + /** + * Returns the bond order from an HOSE code bond symbol. + * One has to consider that in this direction the aromatic HOSE code symbol + * (*) is ambiguous. It means either a single or a double aromatic bond. + * For that case, a single bond will always be returned. + * + * @param symbol HOSE code bond symbol + * + * @return bond order for a bond symbol or if symbol is unknown + */ + public static IBond.Order getBondOrderForSymbol(final String symbol) { + + switch (symbol) { + case "": + case "*": + return IBond.Order.SINGLE; + case "=": + return IBond.Order.DOUBLE; + case "%": + return IBond.Order.TRIPLE; + } + + return null; + } +} diff --git a/src/casekit/nmr/hose/model/ConnectionTree.java b/src/casekit/nmr/hose/model/ConnectionTree.java new file mode 100644 index 0000000..b24911a --- /dev/null +++ b/src/casekit/nmr/hose/model/ConnectionTree.java @@ -0,0 +1,441 @@ +/* + * The MIT License + * + * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.nmr.hose.model; + +import casekit.nmr.hose.Utils; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IBond; + +import java.util.*; + +/** + * Represents a tree of connected atoms (nodes) of a molecule + * in a parent-child-relationship. + * + * @author Michael Wenk [https://github.com/michaelwenk] + * @see ConnectionTreeNode + */ +public class ConnectionTree { + private final ConnectionTreeNode root; + private final Set keySet; + private int maxSphere; + + public ConnectionTree(final IAtom rootAtom, final int key) { + this.root = new ConnectionTreeNode(rootAtom, key, 0, null, null); + this.keySet = new HashSet<>(); + this.keySet.add(this.root.getKey()); + this.maxSphere = 0; + } + + /** + * Checks whether two nodes form a ring closures in one connection tree. + * + * @param node1 first node + * @param node2 second node + * + * @return + */ + public static boolean nodesFormRingClosure(final ConnectionTreeNode node1, final ConnectionTreeNode node2) { + return ConnectionTree.hasRingClosureParent(node1, node2) + && ConnectionTree.hasRingClosureParent(node2, node1); + } + + public static boolean hasRingClosureParent(final ConnectionTreeNode node, + final ConnectionTreeNode ringClosureParentNode) { + for (final ConnectionTreeNode childNode : node.getChildNodes()) { + if (childNode.isRingClosureNode() + && (childNode.getRingClosureParent() + .getKey() + == ringClosureParentNode.getKey())) { + return true; + } + } + + return false; + } + + /** + * Checks whether a node is at a ring closures. + * + * @param node node to check + * + * @return + */ + public static boolean isAtRingClosure(final ConnectionTreeNode node) { + for (final ConnectionTreeNode childNode : node.getChildNodes()) { + if (childNode.isRingClosureNode()) { + return true; + } + } + return false; + } + + /** + * Returns a subtree of a given connection tree by using a given subtree root node. + * + * @param connectionTree connection tree + * @param rootNodeKey root node key for subtree to create + * + * @return + */ + public static ConnectionTree buildSubtree(final ConnectionTree connectionTree, final int rootNodeKey) { + if (!connectionTree.containsKey(rootNodeKey)) { + return null; + } + + final ConnectionTreeNode rootNode = connectionTree.getNode(rootNodeKey); + final ConnectionTree subtree = new ConnectionTree(rootNode.getAtom(), rootNode.getKey()); + + buildSubtree(subtree, rootNode, 1); + + return subtree; + } + + private static void buildSubtree(final ConnectionTree subtree, final ConnectionTreeNode parentNode, + final int sphere) { + int childNodeIndex = 0; + for (final ConnectionTreeNode childNode : parentNode.getChildNodes()) { + if (childNode.isRingClosureNode()) { + subtree.addRingClosureNode(parentNode.getKey(), childNode.getRingClosureParent() + .getKey(), childNode.getBondToParent()); + } else { + subtree.addNode(childNode.getAtom(), childNode.getKey(), parentNode.getKey(), + parentNode.getBondsToChildren() + .get(childNodeIndex)); + } + buildSubtree(subtree, childNode, sphere + + 1); + childNodeIndex++; + } + } + + public ConnectionTreeNode getRootNode() { + return this.root; + } + + public boolean addNode(final IAtom newNodeAtomData, final int newNodeKey, final int parentNodeKey, + final IBond bondToParent) { + if (this.containsKey(newNodeKey)) { + return false; + } + final ConnectionTreeNode parentNode = this.getNode(parentNodeKey); + this.addNode(new ConnectionTreeNode(newNodeAtomData, newNodeKey, parentNode.getSphere() + + 1, parentNode, bondToParent), parentNode); + + return true; + } + + public boolean addRingClosureNode(final int parentNodeKey, final int ringClosureParentNodeKey, + final IBond bondToParent) { + if (!this.containsKey(parentNodeKey) + || !this.containsKey(ringClosureParentNodeKey)) { + return false; + } + final ConnectionTreeNode parentNode = this.getNode(parentNodeKey); + this.addNode(new ConnectionTreeNode(this.getNode(ringClosureParentNodeKey), parentNode.getSphere() + + 1, parentNode, bondToParent), parentNode); + + return true; + } + + private void addNode(final ConnectionTreeNode newNode, final ConnectionTreeNode parentNode) { + parentNode.addChildNode(newNode, newNode.getBondToParent()); + + if (!newNode.isRingClosureNode()) { + this.keySet.add(newNode.getKey()); + } + if (newNode.getSphere() + > this.maxSphere) { + this.maxSphere = newNode.getSphere(); + } + } + + public int getMaxSphere() { + return this.maxSphere; + } + + public int getNodesCount(final boolean withRingClosureNodes) { + return this.getNodes(withRingClosureNodes) + .size(); + } + + public int getNodesCountInSphere(final int sphere, final boolean withRingClosureNodes) { + return this.getNodesInSphere(sphere, withRingClosureNodes) + .size(); + } + + public List getKeys() { + final ArrayList keys = new ArrayList<>(); + for (int s = 0; s + <= this.getMaxSphere(); s++) { + for (final ConnectionTreeNode nodeInSphere : this.getNodesInSphere(s, false)) { + keys.add(nodeInSphere.getKey()); + } + } + + return keys; + } + + public List getNodes(final boolean withRingClosureNodes) { + final List nodes = new ArrayList<>(); + for (int s = 0; s + <= this.getMaxSphere(); s++) { + nodes.addAll(this.getNodesInSphere(s, withRingClosureNodes)); + } + + return nodes; + } + + public boolean containsKey(final int key) { + return this.keySet.contains(key); + } + + public ConnectionTreeNode getNode(final int key) { + if (!this.containsKey(key)) { + return null; + } + + return this.findNode(key, this.root); + } + + private ConnectionTreeNode findNode(final int key, final ConnectionTreeNode currentNode) { + if (currentNode.isRingClosureNode()) { + return null; + } + if (currentNode.getKey() + == key) { + return currentNode; + } + ConnectionTreeNode result = null; + for (final ConnectionTreeNode childNode : currentNode.getChildNodes()) { + result = this.findNode(key, childNode); + if ((result + != null) + && (result.getKey() + == key)) { + break; + } + } + + return result; + } + + public int getNodeIndexInSphere(final ConnectionTreeNode node, final int sphere) { + + return this.getNodesInSphere(sphere, true) + .indexOf(node); + } + + public List getNodeKeysInSphere(final int sphere) { + final List keys = new ArrayList<>(); + for (final ConnectionTreeNode treeNode : this.getNodesInSphere(sphere, false)) { + if (!treeNode.isRingClosureNode()) { + keys.add(treeNode.getKey()); + } + } + + return keys; + } + + public List getNodesInSphere(final int sphere, final boolean withRingClosureNodes) { + final List nodesInSphere = this.findNodesInSphere(sphere, this.root, new ArrayList<>()); + if (withRingClosureNodes) { + return nodesInSphere; + } + // remove ring closure nodes + final List nodesInSphereToRemove = new ArrayList<>(); + for (final ConnectionTreeNode nodeInSphere : nodesInSphere) { + if (nodeInSphere.isRingClosureNode()) { + nodesInSphereToRemove.add(nodeInSphere); + } + } + nodesInSphere.removeAll(nodesInSphereToRemove); + + return nodesInSphere; + } + + private List findNodesInSphere(final int sphere, final ConnectionTreeNode currentNode, + final List indicesInSphere) { + if (currentNode.getSphere() + == sphere) { + indicesInSphere.add(currentNode); + return indicesInSphere; + } + for (final ConnectionTreeNode childNode : currentNode.getChildNodes()) { + this.findNodesInSphere(sphere, childNode, indicesInSphere); + } + + return indicesInSphere; + } + + public IBond getBond(final int nodeKey1, final int nodeKey2) { + if (!this.containsKey(nodeKey1) + || !this.containsKey(nodeKey2)) { + return null; + } + // node1 and node2 have parent-child-relationship + final ConnectionTreeNode node1 = this.getNode(nodeKey1); + for (final ConnectionTreeNode childNode : node1.getChildNodes()) { + if (!childNode.isRingClosureNode() + && (childNode.getKey() + == nodeKey2)) { + return childNode.getBondToParent(); + } + } + // if nodes form a ring closure + if (ConnectionTree.nodesFormRingClosure(node1, this.getNode(nodeKey2))) { + for (final ConnectionTreeNode childNode : node1.getChildNodes()) { + if (childNode.isRingClosureNode() + && (childNode.getRingClosureParent() + .getKey() + == nodeKey2)) { + return childNode.getBondToParent(); + } + } + } + + return null; + } + + public boolean hasParent(final int key, final int parentKey) { + if (!this.containsKey(key) + || !this.containsKey(parentKey)) { + return false; + } + + return this.getNode(key) + .getParent() + .getKey() + == parentKey; + } + + public boolean hasChild(final int key, final int childKey) { + if (!this.containsKey(key) + || !this.containsKey(childKey)) { + return false; + } + + return this.getNode(key) + .hasChild(childKey); + } + + /** + * @param parentKey + * @param childKey1 + * @param childKey2 + * + * @return + */ + public boolean swapChildNodes(final int parentKey, final int childKey1, final int childKey2) { + if (!this.containsKey(parentKey) + || !this.containsKey(childKey1) + || !this.containsKey(childKey2) + || !this.hasChild(parentKey, childKey1) + || !this.hasChild(parentKey, childKey2) + || !this.hasParent(childKey1, parentKey) + || !this.hasParent(childKey2, parentKey)) { + return false; + } + final ConnectionTreeNode parentNode = this.getNode(parentKey); + final ConnectionTreeNode childNode1 = this.getNode(childKey1); + final ConnectionTreeNode childNode2 = this.getNode(childKey2); + + final int indexChildNode1 = parentNode.getChildNodes() + .indexOf(childNode1); + final int indexChildNode2 = parentNode.getChildNodes() + .indexOf(childNode2); + Collections.swap(parentNode.getChildNodes(), indexChildNode1, indexChildNode2); + Collections.swap(parentNode.getBondsToChildren(), indexChildNode1, indexChildNode2); + + + return (parentNode.getChildNodes() + .indexOf(childNode1) + == indexChildNode2) + && (parentNode.getChildNodes() + .indexOf(childNode2) + == indexChildNode1); + } + + @Override + public String toString() { + final StringBuilder treeStringBuilder = new StringBuilder(); + for (int s = 0; s + <= this.maxSphere; s++) { + treeStringBuilder.append("[") + .append(s) + .append("]"); + for (final ConnectionTreeNode nodeInSphere : this.getNodesInSphere(s, true)) { + treeStringBuilder.append(" "); + if (nodeInSphere.isRingClosureNode()) { + treeStringBuilder.append("-") + .append(nodeInSphere.getParent() + .getKey()) + .append(": "); + } else { + treeStringBuilder.append(nodeInSphere.getKey()) + .append(": "); + } + if (nodeInSphere.hasAParent()) { + treeStringBuilder.append(Utils.getSymbolForBond(nodeInSphere.getBondToParent())); + } + if (nodeInSphere.isRingClosureNode()) { + treeStringBuilder.append("&"); + } else { + treeStringBuilder.append(nodeInSphere.getAtom() + .getSymbol()); + } + treeStringBuilder.append(" {"); + if (nodeInSphere.isRingClosureNode()) { + treeStringBuilder.append(nodeInSphere.getRingClosureParent() + .getKey()); + } else { + if (nodeInSphere.getChildNodes() + .size() + > 1) { + for (final ConnectionTreeNode childNode : nodeInSphere.getChildNodes()) { + if (childNode.isRingClosureNode()) { + treeStringBuilder.append("-") + .append(childNode.getRingClosureParent() + .getKey()) + .append(" "); + } else { + treeStringBuilder.append(childNode.getKey()) + .append(" "); + } + } + } else if (nodeInSphere.getChildNodes() + .size() + == 1) { + if (nodeInSphere.getChildNodes() + .get(0) + .isRingClosureNode()) { + treeStringBuilder.append("-") + .append(nodeInSphere.getChildNodes() + .get(0) + .getRingClosureParent() + .getKey()); + } else { + treeStringBuilder.append(nodeInSphere.getChildNodes() + .get(0) + .getKey()); + } + + } + } + + treeStringBuilder.append("} "); + } + } + + return treeStringBuilder.toString(); + } +} diff --git a/src/casekit/nmr/hose/model/ConnectionTreeNode.java b/src/casekit/nmr/hose/model/ConnectionTreeNode.java new file mode 100644 index 0000000..87871f7 --- /dev/null +++ b/src/casekit/nmr/hose/model/ConnectionTreeNode.java @@ -0,0 +1,190 @@ +/* + * The MIT License + * + * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.nmr.hose.model; + +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IBond; + +import java.util.ArrayList; +import java.util.List; + +/** + * Represents a node in a connection tree {@link ConnectionTree}. + * + * @author Michael Wenk [https://github.com/michaelwenk] + */ +public class ConnectionTreeNode { + + private final List children; + private final List bondsToChildren; + private final ConnectionTreeNode parent; + private final IBond bondToParent; + private IAtom atom; + private Integer key; + private int sphere; + private boolean isRingClosure; + private ConnectionTreeNode ringClosureParent; + + + /** + * Pre-defined constructor for creating a non-ring closure node. + * + * @param atom + * @param key + * @param sphere + */ + public ConnectionTreeNode(final IAtom atom, final int key, final int sphere, final ConnectionTreeNode parent, + final IBond bondToParent) { + this.atom = atom; + this.key = key; + this.sphere = sphere; + this.parent = parent; + this.bondToParent = bondToParent; + this.children = new ArrayList<>(); + this.bondsToChildren = new ArrayList<>(); + this.isRingClosure = false; + } + + /** + * Pre-defined constructor for creating a ring closure node. + * + * @param sphere + * @param ringClosurePartner + */ + public ConnectionTreeNode(final ConnectionTreeNode ringClosurePartner, final int sphere, + final ConnectionTreeNode parent, final IBond bondToParent) { + this.sphere = sphere; + this.parent = parent; + this.bondToParent = bondToParent; + this.children = new ArrayList<>(); + this.bondsToChildren = new ArrayList<>(); + this.isRingClosure = true; + this.ringClosureParent = ringClosurePartner; + } + + public IAtom getAtom() { + return this.atom; + } + + public void setAtom(final IAtom atom) { + this.atom = atom; + } + + public ConnectionTreeNode getParent() { + return this.parent; + } + + public IBond getBondToParent() { + return this.bondToParent; + } + + public List getChildNodes() { + return this.children; + } + + public List getBondsToChildren() { + return this.bondsToChildren; + } + + public Integer getKey() { + return this.key; + } + + public void setKey(final int key) { + this.key = key; + } + + public int getSphere() { + return this.sphere; + } + + public void setSphere(final int sphere) { + this.sphere = sphere; + } + + public void setIsRingClosureNode(final boolean isRingClosureNode) { + this.isRingClosure = isRingClosureNode; + } + + public boolean isRingClosureNode() { + return this.isRingClosure; + } + + public ConnectionTreeNode getRingClosureParent() { + return this.ringClosureParent; + } + + public void setRingClosureParent(final ConnectionTreeNode ringClosureParent) { + this.ringClosureParent = ringClosureParent; + } + + public boolean addChildNode(final ConnectionTreeNode childNode, final IBond bondToChild) { + return this.addChildNode(childNode, bondToChild, this.getChildNodes() + .size()); + } + + public boolean addChildNode(final ConnectionTreeNode childNode, final IBond bondToChild, final int pos) { + if (!this.checkListIndex(this.getChildNodes() + .size(), pos)) { + return false; + } + this.getChildNodes() + .add(pos, childNode); + this.getBondsToChildren() + .add(pos, bondToChild); + + return true; + } + + private boolean checkListIndex(final int listLength, final int pos) { + return (pos + >= 0) + && (pos + <= listLength); + } + + public boolean removeChildNode(final ConnectionTreeNode childNode) { + final int indexOfChildNode = this.getChildNodes() + .indexOf(childNode); + if (indexOfChildNode + == -1) { + return false; + } + this.getChildNodes() + .remove(indexOfChildNode); + this.getBondsToChildren() + .remove(indexOfChildNode); + + return true; + } + + public boolean hasAParent() { + return this.parent + != null; + } + + public boolean hasChild(final int childKey) { + for (final ConnectionTreeNode childNode : this.children) { + if (!childNode.isRingClosureNode() + && (childNode.getKey() + == childKey)) { + return true; + } + } + + return false; + } + + public boolean hasChildren() { + return !this.children.isEmpty(); + } +} diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index 20069ac..dcfb4d9 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -25,9 +25,9 @@ import casekit.nmr.Utils; +import casekit.nmr.hose.HOSECodeBuilder; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; -import hose.HOSECodeBuilder; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; From 54afba9d955c118491d9b0ae16bfb2ba5ef51cd7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 29 Apr 2021 12:15:01 +0200 Subject: [PATCH 194/405] feat: updated README.md --- README.md | 66 +++++++------------------------------------------------ 1 file changed, 8 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 4594f8f..9840a35 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,20 @@ -# +# + # The Computer-Assisted-Structure-Elucidation Kit (CASEkit) - + Copyright 2017 Christoph Steinbeck License: MIT, see doc/mit.license ## Introduction -This project hosts various Java classes for teaching and research dealing with spectral data in chemistry and metabolomics. -This project depends on the Chemistry Development Project (CDK), hosted under http://cdk.github.casekit.io/ -Please refer to these pages for updated information and the latest version of the CDK. CDK's API documentation is available though our [Github site](http://cdk.github.io/cdk/). +This project depends on the Chemistry Development Project (CDK), hosted under https://cdk.github.io/ +Please refer to these pages for updated information and the latest version of the CDK. CDK's API documentation is +available though our [Github site](http://cdk.github.io/cdk/). ## Download Source code -This assumes that you have git working on your system and you have initialised your local repository. +This assumes that you have git working on your system and you have initialised your local repository. Then, downloading casekit is just a matter of @@ -23,65 +24,14 @@ $ git clone https://github.com/michaelwenk/casekit ## Compiling -The package HOSECodeBuilder (https://github.com/michaelwenk/HOSECodeBuilder) has to be installed on the local machine. -It is a dependency in casekit's pom.xml and has to ready to use, e.g. installed in Maven's .m2 folder.
-This can be done something like: - - mvn install:install-file -Dfile=PATH/TO/HOSECodeBuilder-1.0-SNAPSHOT-jar-with-dependencies.jar -DgroupId=org.openscience -DartifactId=HOSECodeBuilder -Dversion=1.0 -Dpackaging=jar - Compiling the library is performed with Apache Maven and requires Java 1.7 or later: ```bash cd casekit mvn clean package ``` -will create an all-in-one-jar under ./target - -## Usage - -### Shift Prediction with HOSE codes - -The following classes are to demonstrate the prediction of Carbon-13 casekit.nmr spectra with HOSE codes. They only demonstrate the basic working principle and ignore, for example, stereochemistry, which can lead to large errors in, for example, the prediction of shifts for E/Z configurations of double bonds. If you want to know more about the details and a sophisticated system implementing them, please refer to Schutz V, Purtuc V, Felsinger S, Robien W (1997) CSEARCH-STEREO: A new generation of casekit.nmr database systems allowing three-dimensional spectrum prediction. Fresenius Journal of Analytical Chemistry 359:33–41. doi: 10.1007/s002160050531. - -#### NMRShiftDBSDFParser - -Takes the NMRShiftDB SDF with assigned spectra (download from help section of NMRShiftDB.org) and produces a Tab-separated file with HOSE codes and assigned shift values. This file can then be read by HOSECodePredictor. - -```bash -usage: java -jar casekit.jar casekit.NMRShiftDBSDFParser -i -o - -m [-v] [-d ] -Generates a table of HOSE codes and assigned shifts from an NMRShiftDB SDF -file from http://nmrshiftdb.nmr.uni-koeln.de/portal/js_pane/P-Help. - - -i,--infile filename of NMRShiftDB SDF with spectra - (required) - -o,--outfile filename of generated HOSE code table (required) - -m,--maxspheres maximum sphere size up to which to generate HOSE - codes (required) - -v,--verbose generate messages about progress of operation - -d,--picdir store pictures in given directory -``` -#### HOSECodePredictor - -Predicts a spectrum (chemical shifts, to be precise) for a given molecule provided as SDF file. -It needs the TSV file generated by NMRShiftDBSDFParser as input. - -```bash -usage: java -jar casekit.jar casekit.HOSECodePredictor -s -i - -d -m [-v] -Predict casekit.nmr chemical shifts for a given molecule based on table of HOSE -codes and assigned shifts. - - -s,--hosecodes filename of TSV file with HOSE codes (required) - -i,--infile filename of with SDF/MOL file of a structure to - be predicted (required) - -d,--picdir store picture of structure with assigned shifts - in given directory (required) - -m,--maxspheres maximum sphere size up to which to generate HOSE - codes (required) - -v,--verbose generate messages about progress of operation -``` +will create an all-in-one-jar under ./target which you can use in your Java project. From bcc849e2b98ff6fa8f2a269f5883fa75ad57de19 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 5 May 2021 23:46:18 +0200 Subject: [PATCH 195/405] fix: replaced HashSet by Set --- src/casekit/nmr/hose/HOSECodeBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java index a685121..32389bb 100644 --- a/src/casekit/nmr/hose/HOSECodeBuilder.java +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -269,7 +269,7 @@ public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final * @see ConnectionTree */ public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final int rootAtomIndex, - final Integer maxSphere, final HashSet visited) { + final Integer maxSphere, final Set visited) { // create queue for BFS and add root atom index final Queue queue = new LinkedList<>(); queue.add(rootAtomIndex); From 9b57ef0e474c89a0fdc18088183225c03a1cac15 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 10 May 2021 23:11:00 +0200 Subject: [PATCH 196/405] fix: wrong order if allowLowerEquivalencesCount is checked --- src/casekit/nmr/utils/Match.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index 12d8e95..a8684f8 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -240,10 +240,9 @@ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spec /** * Returns the closest shift matches between two spectra in selected dimensions * as an Assignment object with one set dimension only.
- * Despite intensities are expected, they are still not considered here. * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum (query as exact or subspectrum to check) + * @param spectrum1 first spectrum (possible subspectrum) + * @param spectrum2 second spectrum * @param dim1 dimension in first spectrum to take the shifts from * @param dim2 dimension in second spectrum to take the shifts from * @param shiftTol Tolerance value [ppm] used during spectra shift @@ -284,17 +283,17 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s passed = true; // @TODO maybe consider further parameters to check ? e.g. intensity if (checkMultiplicity) { - passed = spectrum2.getMultiplicity(pickedSignalIndexSpectrum2) - .equals(spectrum1.getMultiplicity(i)); + passed = spectrum1.getMultiplicity(i) + .equals(spectrum2.getMultiplicity(pickedSignalIndexSpectrum2)); } if (passed && checkEquivalencesCount) { if (allowLowerEquivalencesCount) { - passed = spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2) - <= spectrum1.getEquivalencesCount(i); + passed = spectrum1.getEquivalencesCount(i) + <= spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2); } else { - passed = spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2) - == spectrum1.getEquivalencesCount(i); + passed = spectrum1.getEquivalencesCount(i) + == spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2); } } From 0327450ea0960702e368ca13fb0b1153688e87ec Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Jun 2021 08:57:06 +0200 Subject: [PATCH 197/405] feat: added method to remove a node from connection tree --- .../nmr/hose/model/ConnectionTree.java | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/hose/model/ConnectionTree.java b/src/casekit/nmr/hose/model/ConnectionTree.java index b24911a..77733ef 100644 --- a/src/casekit/nmr/hose/model/ConnectionTree.java +++ b/src/casekit/nmr/hose/model/ConnectionTree.java @@ -174,7 +174,7 @@ public int getNodesCountInSphere(final int sphere, final boolean withRingClosure } public List getKeys() { - final ArrayList keys = new ArrayList<>(); + final List keys = new ArrayList<>(); for (int s = 0; s <= this.getMaxSphere(); s++) { for (final ConnectionTreeNode nodeInSphere : this.getNodesInSphere(s, false)) { @@ -328,12 +328,38 @@ public boolean hasChild(final int key, final int childKey) { .hasChild(childKey); } + public void removeNode(final int key) { + final ConnectionTreeNode node = this.getNode(key); + if (node + != null) { + final List children = new ArrayList<>(node.getChildNodes()); + for (final ConnectionTreeNode childNode : children) { + if (childNode.isRingClosureNode()) { + childNode.getRingClosureParent() + .removeChildNode(childNode); + } else { + this.removeNode(childNode.getKey()); + } + node.removeChildNode(childNode); + this.keySet.remove(childNode.getKey()); + } + final ConnectionTreeNode parent = node.getParent(); + if (parent + != null) { + parent.removeChildNode(node); + } + this.keySet.remove(node.getKey()); + } + } + /** * @param parentKey * @param childKey1 * @param childKey2 * * @return + * + * @deprecated */ public boolean swapChildNodes(final int parentKey, final int childKey1, final int childKey2) { if (!this.containsKey(parentKey) From 3f174f754203faff690f0440e2b9120951a760e9 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Jun 2021 08:58:16 +0200 Subject: [PATCH 198/405] fix: parse multiplicities with toLowerCase --- src/casekit/nmr/dbservice/NMRShiftDB.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 67ef510..7599498 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -420,7 +420,7 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect < spectrumStringArray.length; i++) { shift = Double.parseDouble(spectrumStringArray[i][0]); intensity = Double.parseDouble(spectrumStringArray[i][1]); - multiplicity = spectrumStringArray[i][2]; + multiplicity = spectrumStringArray[i][2].toLowerCase(); spectrum.addSignal( new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 1, 0)); @@ -452,7 +452,7 @@ public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBS // just to be sure that we take the right signal if equivalences are present closestSignalList = spectrum.pickByClosestShift(Double.parseDouble(NMRShiftDBSpectrumStringArray[i][0]), 0, 0.0); - multiplicity = NMRShiftDBSpectrumStringArray[i][2]; + multiplicity = NMRShiftDBSpectrumStringArray[i][2].toLowerCase(); closestSignalList.retainAll(spectrum.pickByMultiplicity(multiplicity)); signalIndex = closestSignalList.get(0); From 881fad3194e79892d77693cc0de3d0b580b733f3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Jun 2021 09:01:17 +0200 Subject: [PATCH 199/405] feat: added H to HOSE code node weight --- src/casekit/nmr/hose/Utils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/casekit/nmr/hose/Utils.java b/src/casekit/nmr/hose/Utils.java index c4ddfb6..806ee70 100644 --- a/src/casekit/nmr/hose/Utils.java +++ b/src/casekit/nmr/hose/Utils.java @@ -87,6 +87,9 @@ public static Integer getNodeWeight(final ConnectionTreeNode node, final Connect } else { weight += getSymbolPriorityWeight(node.getAtom() .getSymbol()); + // weight -= node.getAtom() + // .getImplicitHydrogenCount() + // * getSymbolPriorityWeight("H"); } return weight; @@ -418,6 +421,7 @@ public static int getSymbolPriorityWeight(final String symbol) { case "I": return 2; case "&": + case "H": return 1; } From b3a0a6ac3815c0c4557ad8a89576b06b06f2f314 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Jun 2021 09:05:51 +0200 Subject: [PATCH 200/405] feat: added methods to add and convert implicit to explicit hydrogens and to calculate the RMS value --- src/casekit/nmr/Utils.java | 57 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index 1df3486..f10489d 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -298,6 +298,39 @@ public static Double getMean(final Collection data) { * * @return */ + public static Double getRMS(final Collection data) { + if ((data + == null) + || data.isEmpty()) { + return null; + } + double sum = 0; + int nullCounter = 0; + for (final Double d : data) { + if (d + != null) { + sum += d + * d; + } else { + nullCounter++; + } + } + return ((data.size() + - nullCounter) + != 0) + ? Math.sqrt(sum + / (data.size() + - nullCounter)) + : null; + } + + /** + * @param data + * + * @return + * + * @deprecated + */ public static Double getStandardDeviation(final List data) { if ((data == null) @@ -312,6 +345,14 @@ public static Double getStandardDeviation(final List data) { : null; } + + /** + * @param data + * + * @return + * + * @deprecated + */ public static Double getVariance(final Collection data) { if ((data == null) @@ -374,6 +415,13 @@ public static Double getMean(final Double[] data) { : null; } + /** + * @param lookup + * + * @return + * + * @deprecated + */ public static Map getMean(final Map> lookup) { final HashMap means = new HashMap<>(); @@ -475,6 +523,15 @@ public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKExcep adder.addImplicitHydrogens(ac); } + public static void addExplicitHydrogens(final IAtomContainer ac) throws CDKException { + addImplicitHydrogens(ac); + convertImplicitToExplicitHydrogens(ac); + } + + public static void convertImplicitToExplicitHydrogens(final IAtomContainer ac) { + AtomContainerManipulator.convertImplicitToExplicitHydrogens(ac); + } + /** * @param lookup * From af7a27e32e5154295db9df64e139c5da21a63b3e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Jun 2021 09:11:42 +0200 Subject: [PATCH 201/405] fix: take implicit hydrogens into account when building root sphere --- src/casekit/nmr/hose/HOSECodeBuilder.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java index 32389bb..bd7cd06 100644 --- a/src/casekit/nmr/hose/HOSECodeBuilder.java +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -176,9 +176,7 @@ private static String buildHOSECodeString(final ConnectionTree connectionTree, // zeroth sphere final StringBuilder HOSECode = new StringBuilder(rootAtom.getSymbol() + "-" - + (connectionTree.getRootNode() - .getChildNodes() - .size() + + (rootAtom.getBondCount() + (rootAtom.getImplicitHydrogenCount() == null ? 0 From aa194fa1ed2659ac9f0c360b2fe9717bfd0e77d9 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Jun 2021 09:42:47 +0200 Subject: [PATCH 202/405] feat: added method to build HOSE code statistics --- .../HOSECodeShiftStatisticsBuilder.java | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java b/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java new file mode 100644 index 0000000..2b321e6 --- /dev/null +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java @@ -0,0 +1,67 @@ +package casekit.nmr.analysis; + +import casekit.nmr.hose.HOSECodeBuilder; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Signal; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; + +import java.util.*; + +public class HOSECodeShiftStatisticsBuilder { + + public static Map> buildHOSECodeShiftStatistics(final List dataSetList, + final int maxSphere) { + final Map>> hoseCodeShifts = new HashMap<>(); + IAtomContainer structure; + Signal signal; + String hoseCode; + String solvent; + for (final DataSet dataSet : dataSetList) { + structure = dataSet.getStructure() + .toAtomContainer(); + solvent = dataSet.getSpectrum() + .getSolvent(); + for (int i = 0; i + < structure.getAtomCount(); i++) { + signal = dataSet.getSpectrum() + .getSignal(dataSet.getAssignment() + .getIndex(0, i)); + if (signal + != null) { + try { + for (int sphere = 1; sphere + <= maxSphere; sphere++) { + hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); + hoseCodeShifts.get(hoseCode) + .putIfAbsent(solvent, new ArrayList<>()); + hoseCodeShifts.get(hoseCode) + .get(solvent) + .add(signal.getShift(0)); + } + } catch (final CDKException e) { + e.printStackTrace(); + } + } + } + } + final Map> hoseCodeShiftStatistics = new HashMap<>(); + List values; + for (final Map.Entry>> hoseCodes : hoseCodeShifts.entrySet()) { + hoseCodeShiftStatistics.put(hoseCodes.getKey(), new HashMap<>()); + for (final Map.Entry> solvents : hoseCodes.getValue() + .entrySet()) { + values = solvents.getValue(); //casekit.nmr.Utils.removeOutliers(solvents.getValue(), 1.5); + hoseCodeShiftStatistics.get(hoseCodes.getKey()) + .put(solvents.getKey(), + new Double[]{Double.valueOf(values.size()), Collections.min(values), + casekit.nmr.Utils.getMean(values), + // casekit.nmr.Utils.getRMS(values), + casekit.nmr.Utils.getMedian(values), Collections.max(values)}); + } + } + + return hoseCodeShiftStatistics; + } +} From ad3c42ff7e6fd709a84c254508b543097ec8dc20 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 5 Jun 2021 18:57:56 +0200 Subject: [PATCH 203/405] feat: added ExtendedHOSEGenerator --- pom.xml | 5 +++++ .../nmr/analysis/HOSECodeShiftStatisticsBuilder.java | 12 ++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 30c2ba0..35d4662 100644 --- a/pom.xml +++ b/pom.xml @@ -50,6 +50,11 @@ cdk-bundle 2.3
+ + org.openscience.nmrshiftdb + predictorh + 1.0 + commons-cli commons-cli diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java b/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java index 2b321e6..23a3d79 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java @@ -5,14 +5,17 @@ import casekit.nmr.model.Signal; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.nmrshiftdb.util.ExtendedHOSECodeGenerator; import java.util.*; public class HOSECodeShiftStatisticsBuilder { public static Map> buildHOSECodeShiftStatistics(final List dataSetList, - final int maxSphere) { + final int maxSphere, + final boolean use3D) { final Map>> hoseCodeShifts = new HashMap<>(); + final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); IAtomContainer structure; Signal signal; String hoseCode; @@ -32,7 +35,12 @@ public static Map> buildHOSECodeShiftStatistics(fi try { for (int sphere = 1; sphere <= maxSphere; sphere++) { - hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + if (use3D) { + hoseCode = extendedHOSECodeGenerator.getHOSECode(structure, structure.getAtom(i), + maxSphere); + } else { + hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + } hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); hoseCodeShifts.get(hoseCode) .putIfAbsent(solvent, new ArrayList<>()); From ec2d00cc6c2d9294189f7a69b31308f31d842bde Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 5 Jun 2021 18:58:36 +0200 Subject: [PATCH 204/405] feat: added Utils class for DB services --- src/casekit/nmr/dbservice/Utils.java | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 src/casekit/nmr/dbservice/Utils.java diff --git a/src/casekit/nmr/dbservice/Utils.java b/src/casekit/nmr/dbservice/Utils.java new file mode 100644 index 0000000..4fb1bf2 --- /dev/null +++ b/src/casekit/nmr/dbservice/Utils.java @@ -0,0 +1,53 @@ +package casekit.nmr.dbservice; + +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.io.SDFWriter; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import java.io.BufferedWriter; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; + +public class Utils { + + public static int splitSDFile(final String pathToSDFile, final int maxMolPerFile) throws IOException, CDKException { + final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToSDFile), + SilentChemObjectBuilder.getInstance()); + final String fileEnding = pathToSDFile.split("\\.")[1]; + BufferedWriter bufferedWriter; + SDFWriter sdfWriter; + int counter = 0; + int part = 1; + bufferedWriter = new BufferedWriter(new FileWriter(pathToSDFile.split("\\.")[0] + + "_" + + part + + "." + + fileEnding)); + sdfWriter = new SDFWriter(bufferedWriter); + while (iterator.hasNext()) { + if (counter + >= maxMolPerFile) { + sdfWriter.close(); + + part++; + bufferedWriter = new BufferedWriter(new FileWriter(pathToSDFile.split("\\.")[0] + + "_" + + part + + "." + + fileEnding)); + sdfWriter = new SDFWriter(bufferedWriter); + sdfWriter.write(iterator.next()); + counter = 1; + } else { + sdfWriter.write(iterator.next()); + } + counter++; + } + sdfWriter.close(); + + return part + - 1; + } +} From 76e24dbbb3bf45c30542ff124558d95b43f4471d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 5 Jun 2021 19:14:48 +0200 Subject: [PATCH 205/405] feat: added COCONUT data set import --- src/casekit/nmr/dbservice/COCONUT.java | 167 +++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 src/casekit/nmr/dbservice/COCONUT.java diff --git a/src/casekit/nmr/dbservice/COCONUT.java b/src/casekit/nmr/dbservice/COCONUT.java new file mode 100644 index 0000000..e97539e --- /dev/null +++ b/src/casekit/nmr/dbservice/COCONUT.java @@ -0,0 +1,167 @@ +package casekit.nmr.dbservice; + +import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Utils; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.util.*; + +public class COCONUT { + + public static List getDataSetsWithShiftPredictionFromCOCONUT(final String pathToCOCONUT, + final String[] nuclei) throws CDKException, FileNotFoundException { + final List dataSets = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToCOCONUT), + SilentChemObjectBuilder.getInstance()); + IAtomContainer structure; + Spectrum spectrum; + Assignment assignment; + Map meta; + final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); + + String[] split, split2; + String spectrumPropertyString, multiplicity; + IMolecularFormula mf; + double calcShift; + List explicitHydrogenIndices, closestSignalList; + StringBuilder mfAlphabetic; + Map mfAlphabeticMap; + int atomIndex; + + while (iterator.hasNext()) { + structure = iterator.next(); + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + explicitHydrogenIndices = casekit.nmr.Utils.getExplicitHydrogenIndices(structure); + Collections.sort(explicitHydrogenIndices); + if (!explicitHydrogenIndices.isEmpty()) { + // remove explicit hydrogens + casekit.nmr.Utils.removeAtoms(structure, "H"); + } + hydrogenAdder.addImplicitHydrogens(structure); + casekit.nmr.Utils.setAromaticityAndKekulize(structure); + + meta = new HashMap<>(); + meta.put("title", structure.getTitle()); + meta.put("id", structure.getProperty("nmrshiftdb2 ID")); + mf = casekit.nmr.Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mfOriginal", casekit.nmr.Utils.molecularFormularToString(mf)); + mfAlphabetic = new StringBuilder(); + mfAlphabeticMap = new TreeMap<>(casekit.nmr.utils.Utils.getMolecularFormulaElementCounts( + casekit.nmr.Utils.molecularFormularToString(mf))); + for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { + mfAlphabetic.append(entry.getKey()); + if (entry.getValue() + > 1) { + mfAlphabetic.append(entry.getValue()); + } + } + meta.put("mf", mfAlphabetic.toString()); + try { + final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); + meta.put("smiles", smiles); + } catch (final CDKException e) { + e.printStackTrace(); + } + + for (final String nucleus : nuclei) { + final String atomType = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(nucleus); + final List atomIndices = casekit.nmr.Utils.getAtomTypeIndicesByElement(structure, atomType); + // spectrumPropertyString = ((String) structure.getProperty("CNMR_CALC_SHIFTS")).replaceAll("[\\n\\r]", + // ""); + // split = spectrumPropertyString.split("\\d+:"); + // spectrumPropertyString = structure.getProperty("Predicted " + // + nucleus + // + " shifts", String.class); + spectrumPropertyString = structure.getProperty("Predicted " + + nucleus + + " shifts", String.class); + if (spectrumPropertyString + == null) { + spectrumPropertyString = structure.getProperty("Predicted_" + + nucleus + + "_shifts", String.class); + } + spectrumPropertyString = spectrumPropertyString.replaceAll("[\\n\\r]", ";"); + split = spectrumPropertyString.split(";"); + spectrum = new Spectrum(); + spectrum.setNuclei(new String[]{nucleus}); + spectrum.setSignals(new ArrayList<>()); + assignment = new Assignment(); + assignment.setNuclei(spectrum.getNuclei()); + assignment.initAssignments(spectrum.getSignalCount()); + // for (int i = 1; i + for (int i = 0; i + < split.length; i++) { + // split2 = split[i].split(","); + split2 = split[i].split("\\s+"); + // calcShift = Double.parseDouble(split2[0].split("Exact = ")[1]); + atomIndex = atomIndices.get(i); //Integer.parseInt(split2[0].split("\\[")[0]) + //- 1; + // System.out.println("// COCONUT " + // + structure.getProperty("cdk:Title")); + // System.out.println(atomIndex); + calcShift = Double.parseDouble(split2[1]); + // System.out.println(calcShift); + // System.out.println(structure.getAtomCount()); + multiplicity = Utils.getMultiplicityFromProtonsCount(structure.getAtom(atomIndex) + .getImplicitHydrogenCount()) + .toLowerCase(); + // add assignment (at first here because of search for already existing equivalent signals) + // just to be sure that we take the right signal if equivalences are present + closestSignalList = spectrum.pickByClosestShift(calcShift, 0, 0.0); + closestSignalList.retainAll(spectrum.pickByMultiplicity(multiplicity)); + if (closestSignalList.isEmpty()) { + assignment.addAssignment(0, new int[]{atomIndex}); + } else { + assignment.addAssignmentEquivalence(0, closestSignalList.get(0), atomIndex); + } + // add signal + spectrum.addSignal( + new Signal(new String[]{nucleus}, new Double[]{calcShift}, multiplicity, "signal", null, 1, + 0)); + } + + // System.out.println("// COCONUT " + // + structure.getTitle()); + // System.out.println("// ???"); + // System.out.println("// " + // + casekit.nmr.Utils.molecularFormularToString(mf)); + // for (int i = 0; i + // < spectrum.getSignalCount(); i++) { + // System.out.println(nucleus + // + ", " + // + spectrum.getSignal(i) + // .getShift(0) + // + ", " + // + spectrum.getSignal(i) + // .getMultiplicity() + // + ", 0.0, " + // + spectrum.getSignal(i) + // .getEquivalencesCount()); + // } + + // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule + if (casekit.nmr.Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) + != 0) { + continue; + } + + dataSets.add(new DataSet(structure, spectrum, assignment, meta)); + } + } + + return dataSets; + } + +} From 82c0f9959425bb3277b0fd811d274182a6aa420e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 13:38:51 +0200 Subject: [PATCH 206/405] chore: update of toString method --- src/casekit/nmr/model/Signal.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index 972b53e..acadc83 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -92,16 +92,18 @@ public Signal buildClone() { @Override public String toString() { return "Signal{" - + "shifts=" + + "nuclei=" + + Arrays.toString(this.nuclei) + + ", shifts=" + Arrays.toString(this.shifts) + ", multiplicity='" + this.multiplicity + '\'' - + ", intensity=" - + this.intensity + ", kind='" + this.kind + '\'' + + ", intensity=" + + this.intensity + ", equivalencesCount=" + this.equivalencesCount + ", phase=" From aa7ff8a9862ee7876421a22e4f38db5d223a4b4e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 16:47:49 +0200 Subject: [PATCH 207/405] chore: update of toString method --- src/casekit/nmr/model/Spectrum.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 3f02338..a3605d3 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -402,7 +402,9 @@ public Spectrum buildClone() { @Override public String toString() { return "Spectrum{" - + "description='" + + "nuclei=" + + Arrays.toString(this.nuclei) + + ", description='" + this.description + '\'' + ", specType='" From 337fc52776bffee42cd7c2b5ec57fd78b33ddaca Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:00:13 +0200 Subject: [PATCH 208/405] fix: improvement in multiplicity picking --- src/casekit/nmr/model/Spectrum.java | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index a3605d3..4e27d23 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -290,20 +290,21 @@ public int getSignalIndex(final Signal signal) { */ public List pickByMultiplicity(final String multiplicity) { final List matchIndices = new ArrayList<>(); - for (int s = 0; s - < this.getSignalCount(); s++) { - if ((this.getSignal(s) - .getMultiplicity() - == null - && multiplicity - == null) - || (this.getSignal(s) - .getMultiplicity() - != null - && this.getSignal(s) - .getMultiplicity() - .equals(multiplicity))) { - matchIndices.add(s); + if (multiplicity + != null) { + for (int s = 0; s + < this.getSignalCount(); s++) { + if (this.getSignal(s) + .getMultiplicity() + == null + || (this.getSignal(s) + .getMultiplicity() + != null + && this.getSignal(s) + .getMultiplicity() + .equals(multiplicity))) { + matchIndices.add(s); + } } } From e5794d1b0faf335493c37cf8342b216d9557e92a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:01:59 +0200 Subject: [PATCH 209/405] fix: return lower case multiplicity string --- src/casekit/nmr/utils/Utils.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index cc98f29..a11ac83 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -29,13 +29,13 @@ public class Utils { public static String getMultiplicityFromProtonsCount(final int protonsCount) { switch (protonsCount) { case 0: - return "S"; + return "s"; case 1: - return "D"; + return "d"; case 2: - return "T"; + return "t"; case 3: - return "Q"; + return "q"; default: return null; } From 46aa80460aaf603d0358b9a0bd71d66dc099fa1c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:13:49 +0200 Subject: [PATCH 210/405] fix: wrong explicit H removals; optional second sprectrum property string --- src/casekit/nmr/dbservice/COCONUT.java | 37 ++++++++++---------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/casekit/nmr/dbservice/COCONUT.java b/src/casekit/nmr/dbservice/COCONUT.java index e97539e..a1537fd 100644 --- a/src/casekit/nmr/dbservice/COCONUT.java +++ b/src/casekit/nmr/dbservice/COCONUT.java @@ -15,7 +15,10 @@ import java.io.FileNotFoundException; import java.io.FileReader; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class COCONUT { @@ -34,39 +37,22 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri String spectrumPropertyString, multiplicity; IMolecularFormula mf; double calcShift; - List explicitHydrogenIndices, closestSignalList; - StringBuilder mfAlphabetic; - Map mfAlphabeticMap; + List closestSignalList; int atomIndex; while (iterator.hasNext()) { structure = iterator.next(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - explicitHydrogenIndices = casekit.nmr.Utils.getExplicitHydrogenIndices(structure); - Collections.sort(explicitHydrogenIndices); - if (!explicitHydrogenIndices.isEmpty()) { - // remove explicit hydrogens - casekit.nmr.Utils.removeAtoms(structure, "H"); + if (!casekit.nmr.Utils.containsExplicitHydrogens(structure)) { + hydrogenAdder.addImplicitHydrogens(structure); } - hydrogenAdder.addImplicitHydrogens(structure); casekit.nmr.Utils.setAromaticityAndKekulize(structure); meta = new HashMap<>(); meta.put("title", structure.getTitle()); - meta.put("id", structure.getProperty("nmrshiftdb2 ID")); + meta.put("id", structure.getProperty("ID")); mf = casekit.nmr.Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mfOriginal", casekit.nmr.Utils.molecularFormularToString(mf)); - mfAlphabetic = new StringBuilder(); - mfAlphabeticMap = new TreeMap<>(casekit.nmr.utils.Utils.getMolecularFormulaElementCounts( - casekit.nmr.Utils.molecularFormularToString(mf))); - for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { - mfAlphabetic.append(entry.getKey()); - if (entry.getValue() - > 1) { - mfAlphabetic.append(entry.getValue()); - } - } - meta.put("mf", mfAlphabetic.toString()); + meta.put("mf", casekit.nmr.Utils.molecularFormularToString(mf)); try { final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); meta.put("smiles", smiles); @@ -92,6 +78,11 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri + nucleus + "_shifts", String.class); } + if (spectrumPropertyString + == null) { + continue; + } + spectrumPropertyString = spectrumPropertyString.replaceAll("[\\n\\r]", ";"); split = spectrumPropertyString.split(";"); spectrum = new Spectrum(); From e2185a65b70bb945abe9267d3be64429175b247e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:15:53 +0200 Subject: [PATCH 211/405] feat: added HOSECodeShiftStatistics --- .../nmr/analysis/HOSECodeShiftStatistics.java | 192 ++++++++++++++++++ .../HOSECodeShiftStatisticsBuilder.java | 75 ------- 2 files changed, 192 insertions(+), 75 deletions(-) create mode 100644 src/casekit/nmr/analysis/HOSECodeShiftStatistics.java delete mode 100644 src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java new file mode 100644 index 0000000..bca1613 --- /dev/null +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -0,0 +1,192 @@ +package casekit.nmr.analysis; + +import casekit.nmr.hose.HOSECodeBuilder; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Signal; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.google.gson.reflect.TypeToken; +import org.bson.Document; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; + +import java.io.*; +import java.util.*; + +public class HOSECodeShiftStatistics { + + private final static Gson GSON = new GsonBuilder().setLenient() + .create(); //.setPrettyPrinting() + + public static Map>> collectHOSECodeShifts(final List dataSetList, + final int maxSphere, + final boolean use3D) { + return collectHOSECodeShifts(dataSetList, maxSphere, use3D, new HashMap<>()); + } + + public static Map>> collectHOSECodeShifts(final List dataSetList, + final int maxSphere, final boolean use3D, + final Map>> hoseCodeShifts) { + // final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); + IAtomContainer structure; + Signal signal; + String hoseCode; + String solvent; + + for (final DataSet dataSet : dataSetList) { + structure = dataSet.getStructure() + .toAtomContainer(); + // if (use3D) { + // try { + // /* !!! No explicit H in mol !!! */ + // Utils.convertExplicitToImplicitHydrogens(structure); + // /* add explicit H atoms */ + // AtomUtils.addAndPlaceHydrogens(structure); + // /* detect aromaticity */ + // Utils.setAromaticityAndKekulize(structure); + // } catch (final IOException | ClassNotFoundException | CDKException e) { + // e.printStackTrace(); + // continue; + // } + // } + solvent = dataSet.getSpectrum() + .getSolvent(); + if (solvent + == null + || solvent.equals("")) { + solvent = "Unknown"; + } + for (int i = 0; i + < structure.getAtomCount(); i++) { + signal = dataSet.getSpectrum() + .getSignal(dataSet.getAssignment() + .getIndex(0, i)); + if (signal + != null) { + try { + for (int sphere = 1; sphere + <= maxSphere; sphere++) { + // if (use3D) { + // hoseCode = extendedHOSECodeGenerator.getHOSECode(structure, structure.getAtom(i), + // maxSphere); + // } else { + hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + // } + hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); + hoseCodeShifts.get(hoseCode) + .putIfAbsent(solvent, new ArrayList<>()); + hoseCodeShifts.get(hoseCode) + .get(solvent) + .add(signal.getShift(0)); + } + } catch (final CDKException e) { + e.printStackTrace(); + } + } + } + } + + return hoseCodeShifts; + } + + public static Map> buildHOSECodeShiftStatistics( + final Map>> hoseCodeShifts) { + + final Map> hoseCodeShiftStatistics = new HashMap<>(); + List values; + for (final Map.Entry>> hoseCodes : hoseCodeShifts.entrySet()) { + hoseCodeShiftStatistics.put(hoseCodes.getKey(), new HashMap<>()); + for (final Map.Entry> solvents : hoseCodes.getValue() + .entrySet()) { + values = solvents.getValue(); //casekit.nmr.Utils.removeOutliers(solvents.getValue(), 1.5); + hoseCodeShiftStatistics.get(hoseCodes.getKey()) + .put(solvents.getKey(), + new Double[]{(double) values.size(), Collections.min(values), + casekit.nmr.Utils.getMean(values), + // casekit.nmr.Utils.getRMS(values), + casekit.nmr.Utils.getMedian(values), Collections.max(values)}); + } + } + + return hoseCodeShiftStatistics; + } + + public static boolean writeHOSECodeShiftStatistics(final Map> hoseCodeShifts, + final String pathToJsonFile) { + try { + final BufferedWriter bw = new BufferedWriter(new FileWriter(pathToJsonFile)); + bw.append("{"); + bw.newLine(); + bw.flush(); + + Document subDocument; + String json; + long counter = 0; + for (final Map.Entry> entry : hoseCodeShifts.entrySet()) { + subDocument = new Document(); + subDocument.append("HOSECode", entry.getKey()); + subDocument.append("values", GSON.toJson(entry.getValue())); + json = new Document(String.valueOf(counter), subDocument).toJson(); + bw.append(json, 1, json.length() + - 1); + if (counter + < hoseCodeShifts.size() + - 1) { + bw.append(","); + } + bw.newLine(); + bw.flush(); + + counter++; + } + + bw.append("}"); + bw.flush(); + bw.close(); + + return true; + } catch (final IOException e) { + e.printStackTrace(); + } + + return false; + } + + public static Map> readHOSECodeShiftStatistics( + final String pathToJsonFile) throws FileNotFoundException { + final BufferedReader br = new BufferedReader(new FileReader(pathToJsonFile)); + final Map> hoseCodeShiftStatistics = new HashMap<>(); + // add all task to do + br.lines() + .forEach(line -> { + if ((line.trim() + .length() + > 1) + || (!line.trim() + .startsWith("{") + && !line.trim() + .endsWith("}"))) { + final StringBuilder hoseCodeShiftsStatisticInJSON = new StringBuilder(); + if (line.endsWith(",")) { + hoseCodeShiftsStatisticInJSON.append(line, 0, line.length() + - 1); + } else { + hoseCodeShiftsStatisticInJSON.append(line); + } + final JsonObject jsonObject = JsonParser.parseString(hoseCodeShiftsStatisticInJSON.substring( + hoseCodeShiftsStatisticInJSON.toString() + .indexOf("{"))) + .getAsJsonObject(); + hoseCodeShiftStatistics.put(jsonObject.get("HOSECode") + .getAsString(), GSON.fromJson(jsonObject.get("values") + .getAsString(), + new TypeToken>() { + }.getType())); + } + }); + + return hoseCodeShiftStatistics; + } +} diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java b/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java deleted file mode 100644 index 23a3d79..0000000 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatisticsBuilder.java +++ /dev/null @@ -1,75 +0,0 @@ -package casekit.nmr.analysis; - -import casekit.nmr.hose.HOSECodeBuilder; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Signal; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.nmrshiftdb.util.ExtendedHOSECodeGenerator; - -import java.util.*; - -public class HOSECodeShiftStatisticsBuilder { - - public static Map> buildHOSECodeShiftStatistics(final List dataSetList, - final int maxSphere, - final boolean use3D) { - final Map>> hoseCodeShifts = new HashMap<>(); - final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); - IAtomContainer structure; - Signal signal; - String hoseCode; - String solvent; - for (final DataSet dataSet : dataSetList) { - structure = dataSet.getStructure() - .toAtomContainer(); - solvent = dataSet.getSpectrum() - .getSolvent(); - for (int i = 0; i - < structure.getAtomCount(); i++) { - signal = dataSet.getSpectrum() - .getSignal(dataSet.getAssignment() - .getIndex(0, i)); - if (signal - != null) { - try { - for (int sphere = 1; sphere - <= maxSphere; sphere++) { - if (use3D) { - hoseCode = extendedHOSECodeGenerator.getHOSECode(structure, structure.getAtom(i), - maxSphere); - } else { - hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); - } - hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); - hoseCodeShifts.get(hoseCode) - .putIfAbsent(solvent, new ArrayList<>()); - hoseCodeShifts.get(hoseCode) - .get(solvent) - .add(signal.getShift(0)); - } - } catch (final CDKException e) { - e.printStackTrace(); - } - } - } - } - final Map> hoseCodeShiftStatistics = new HashMap<>(); - List values; - for (final Map.Entry>> hoseCodes : hoseCodeShifts.entrySet()) { - hoseCodeShiftStatistics.put(hoseCodes.getKey(), new HashMap<>()); - for (final Map.Entry> solvents : hoseCodes.getValue() - .entrySet()) { - values = solvents.getValue(); //casekit.nmr.Utils.removeOutliers(solvents.getValue(), 1.5); - hoseCodeShiftStatistics.get(hoseCodes.getKey()) - .put(solvents.getKey(), - new Double[]{Double.valueOf(values.size()), Collections.min(values), - casekit.nmr.Utils.getMean(values), - // casekit.nmr.Utils.getRMS(values), - casekit.nmr.Utils.getMedian(values), Collections.max(values)}); - } - } - - return hoseCodeShiftStatistics; - } -} From 14c5f0b4bafeba4ba1a73db8fcd9b3c227e43363 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:17:52 +0200 Subject: [PATCH 212/405] chore: added GSON and BSON as dependencies --- pom.xml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 35d4662..778124f 100644 --- a/pom.xml +++ b/pom.xml @@ -50,11 +50,6 @@ cdk-bundle 2.3 - - org.openscience.nmrshiftdb - predictorh - 1.0 - commons-cli commons-cli @@ -76,5 +71,15 @@ jackson-databind 2.11.3 + + com.google.code.gson + gson + 2.8.6 + + + org.mongodb + bson + 4.2.3 + From ef42758ff4caf7a09da30995a8e157cfd0aeee56 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:18:13 +0200 Subject: [PATCH 213/405] chore: use CDK@2.5 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 778124f..164cc32 100644 --- a/pom.xml +++ b/pom.xml @@ -48,7 +48,7 @@ org.openscience.cdk cdk-bundle - 2.3 + 2.5 commons-cli From 7022ed9dda9124618b765db54f8597bac9b19aaa Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:49:41 +0200 Subject: [PATCH 214/405] feat: added method for simple signal addition --- src/casekit/nmr/model/Spectrum.java | 30 ++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 4e27d23..57b8a0c 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -92,7 +92,29 @@ public int getSignalCountWithEquivalences() { } /** - * Adds a signal to this spectrum with pickPrecision of 0. + * Simply adds a signal without equivalence checks. + * + * @param signal signal to add + * + * @return + * + * @see #addSignal(Signal, double, boolean) + */ + public boolean addSignalWithoutEquivalenceSearch(final Signal signal) { + if ((signal + == null) + || !this.compareNuclei(signal.getNuclei())) { + return false; + } + // add signal at the end of signal list + this.signals.add(signal); + this.signalCount++; + + return true; + } + + /** + * Adds a signal to this spectrum with pickPrecision of 0 and enabled multiplicity check for equivalence search. * * @param signal signal to add * @@ -133,9 +155,7 @@ public boolean addSignal(final Signal signal, final double pickPrecision, final } if (closestSignalList.isEmpty()) { - // add signal at the end of signal list - this.signals.add(signal); - this.signalCount++; + this.addSignalWithoutEquivalenceSearch(signal); } else { Signal closestSignal; for (final Integer closestSignalIndex : closestSignalList) { @@ -282,7 +302,7 @@ public int getSignalIndex(final Signal signal) { } /** - * Returns the indices of signals with same multiplicity. + * Returns the indices of signals with same multiplicity (even null values). * * @param multiplicity multiplicity to search for * From b0ad16a79fb4ac3a1a41ec76dc6ee245a6de35cf Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:50:22 +0200 Subject: [PATCH 215/405] fix: restore to previous multiplicity picking --- src/casekit/nmr/model/Spectrum.java | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 57b8a0c..2356d68 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -310,21 +310,20 @@ public int getSignalIndex(final Signal signal) { */ public List pickByMultiplicity(final String multiplicity) { final List matchIndices = new ArrayList<>(); - if (multiplicity - != null) { - for (int s = 0; s - < this.getSignalCount(); s++) { - if (this.getSignal(s) - .getMultiplicity() - == null - || (this.getSignal(s) - .getMultiplicity() - != null - && this.getSignal(s) - .getMultiplicity() - .equals(multiplicity))) { - matchIndices.add(s); - } + for (int s = 0; s + < this.getSignalCount(); s++) { + if ((this.getSignal(s) + .getMultiplicity() + == null + && multiplicity + == null) + || (this.getSignal(s) + .getMultiplicity() + != null + && this.getSignal(s) + .getMultiplicity() + .equals(multiplicity))) { + matchIndices.add(s); } } From 7188b4e93f2a4fff97c5e75227dc5f0650f21fb0 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 17:59:07 +0200 Subject: [PATCH 216/405] fix: multiple multiplicity filter when adding a signal --- src/casekit/nmr/model/Spectrum.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 2356d68..6514775 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -160,11 +160,8 @@ public boolean addSignal(final Signal signal, final double pickPrecision, final Signal closestSignal; for (final Integer closestSignalIndex : closestSignalList) { closestSignal = this.getSignal(closestSignalIndex); - if (closestSignal.getMultiplicity() - .equals(signal.getMultiplicity())) { - closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() - + signal.getEquivalencesCount()); - } + closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() + + signal.getEquivalencesCount()); } } From 8da8922f2b794d2ad9b7fe8099dfe9bd7b959095 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 6 Jun 2021 21:30:42 +0200 Subject: [PATCH 217/405] fix: set node weight for explicit H to 0 --- src/casekit/nmr/hose/Utils.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/hose/Utils.java b/src/casekit/nmr/hose/Utils.java index 806ee70..ae03ae5 100644 --- a/src/casekit/nmr/hose/Utils.java +++ b/src/casekit/nmr/hose/Utils.java @@ -421,11 +421,12 @@ public static int getSymbolPriorityWeight(final String symbol) { case "I": return 2; case "&": - case "H": return 1; + case "H": + return 0; + default: + return 0; } - - return 0; } /** From ecd4c0813a0e9480f5b519a4da8e542dc04aed68 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 7 Jun 2021 16:26:03 +0200 Subject: [PATCH 218/405] fix: crash when adding assignments in higher dimension --- src/casekit/nmr/model/Assignment.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index 0c4db9b..32972bb 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -196,15 +196,19 @@ public boolean addAssignment(final int dim, final int[] assignment) { if (!this.containsDim(dim)) { return false; } - final int[][][] newAssignments = new int[this.getNDim()][][]; for (int d = 0; d < this.getNDim(); d++) { - newAssignments[d] = new int[this.assignments[d].length - + 1][]; - for (int i = 0; i - < this.assignments[d].length; i++) { - newAssignments[d][i] = this.assignments[d][i]; + if (d + == dim) { + newAssignments[d] = new int[this.assignments[d].length + + 1][]; + for (int i = 0; i + < this.assignments[d].length; i++) { + newAssignments[d][i] = this.assignments[d][i]; + } + } else { + newAssignments[d] = this.assignments[d]; } } newAssignments[dim][this.assignments[dim].length] = assignment; From 7f3abee44bef6b2c76329aa7feda1b4d18f384cd Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 8 Jun 2021 09:34:59 +0200 Subject: [PATCH 219/405] feat: addSignal now returns the index of added signal and enables pick precision for each dimension --- src/casekit/nmr/model/Spectrum.java | 48 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 6514775..ab48d7e 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -92,13 +92,11 @@ public int getSignalCountWithEquivalences() { } /** - * Simply adds a signal without equivalence checks. + * Simply adds a signal to the end of the signal list without equivalence checks. * * @param signal signal to add * * @return - * - * @see #addSignal(Signal, double, boolean) */ public boolean addSignalWithoutEquivalenceSearch(final Signal signal) { if ((signal @@ -120,53 +118,55 @@ public boolean addSignalWithoutEquivalenceSearch(final Signal signal) { * * @return * - * @see #addSignal(Signal, double, boolean) + * @see #addSignal(Signal, double[], boolean) */ - public boolean addSignal(final Signal signal) { - return this.addSignal(signal, 0.0, true); + public Integer addSignal(final Signal signal) { + final double[] pickPrecisions = new double[signal.getNDim()]; + for (int dim = 0; dim + < signal.getNDim(); dim++) { + pickPrecisions[dim] = 0.0; + } + return this.addSignal(signal, pickPrecisions, true); } /** * Adds a signal to this spectrum and stores an equivalent signal index. * * @param signal signal to add - * @param pickPrecision precision to find equivalent signals to store in + * @param pickPrecisions precisions per dimension to find equivalent signals to store in * @param checkMultiplicity indicates whether to compare the multiplicity of signal * to add while searching for equivalences * * @return */ - public boolean addSignal(final Signal signal, final double pickPrecision, final boolean checkMultiplicity) { + public Integer addSignal(final Signal signal, final double[] pickPrecisions, final boolean checkMultiplicity) { if ((signal == null) || !this.compareNuclei(signal.getNuclei())) { - return false; + return null; } // check for equivalent signals in all dimensions - final List closestSignalList = this.pickByClosestShift(signal.getShift(0), 0, pickPrecision); + final List closestSignalIndexList = this.pickByClosestShift(signal.getShift(0), 0, pickPrecisions[0]); for (int dim = 1; dim < this.getNDim(); dim++) { - closestSignalList.retainAll(this.pickByClosestShift(signal.getShift(dim), dim, pickPrecision)); + closestSignalIndexList.retainAll(this.pickByClosestShift(signal.getShift(dim), dim, pickPrecisions[dim])); } - if (checkMultiplicity) { - closestSignalList.retainAll(this.pickByMultiplicity(signal.getMultiplicity())); + closestSignalIndexList.retainAll(this.pickByMultiplicity(signal.getMultiplicity())); } - if (closestSignalList.isEmpty()) { + // if no equivalent signal was found then just add as new signal + if (closestSignalIndexList.isEmpty()) { this.addSignalWithoutEquivalenceSearch(signal); - } else { - Signal closestSignal; - for (final Integer closestSignalIndex : closestSignalList) { - closestSignal = this.getSignal(closestSignalIndex); - closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() - + signal.getEquivalencesCount()); - } + return this.getSignalCount() + - 1; } - - return true; - + // otherwise store as equivalence (in first hit only) + final Signal closestSignal = this.getSignal(closestSignalIndexList.get(0)); + closestSignal.setEquivalencesCount(closestSignal.getEquivalencesCount() + + signal.getEquivalencesCount()); + return closestSignalIndexList.get(0); } public boolean removeSignal(final Signal signal) { From c40b9439f1cc8faad6492df7edbafa57ab5a9d20 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 8 Jun 2021 09:37:46 +0200 Subject: [PATCH 220/405] chore: removed unused method --- src/casekit/nmr/utils/Match.java | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index a8684f8..e428112 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -42,37 +42,7 @@ private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum } /** - * Combines selected dimensions of two spectra while considering possible equivalent signals - * via the {@code pickPrecision} parameter and multiplicity comparison. - * In {@code spectrum1}, the equivalent signals have to be set. - * - * @param spectrum1 first spectrum, incl. equivalent signals - * @param spectrum2 second spectrum - * @param dim1 dimension of first spectrum to combine - * @param dim2 dimension of second spectrum to combine - * @param pickPrecision tolerance value used for signal shift matching to - * find equivalent signals - * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals - * - * @return null if one spectrum does not contain the selected dimension - */ - public static Spectrum combineSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2, final double pickPrecision, final boolean checkMultiplicity) { - if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { - return null; - } - // create new spectra which is to fill with signals of both spectra - final Spectrum combinedSpectrum = spectrum1.buildClone(); - // fill in signals from spectrum2 - // consider the possibility of potential equivalent signals here - for (final Signal signalSpectrum2 : spectrum2.getSignals()) { - combinedSpectrum.addSignal(signalSpectrum2.buildClone(), pickPrecision, checkMultiplicity); - } - return combinedSpectrum; - } - - /** - * Calculates the Tanimoto coefficient between two spectra in given dimensions. + * Calculates the continuous Tanimoto coefficient between two spectra in given dimensions. * * @param spectrum1 first spectrum * @param spectrum2 second spectrum From 51e35d1432e3852e99ea8e00d631060aa5501098 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 8 Jun 2021 14:08:03 +0200 Subject: [PATCH 221/405] feat: integration of 1D and 2D spectrum prediction based on HOSE code (no diastereotopic distinctions for now) --- .../nmr/analysis/HOSECodeShiftStatistics.java | 129 +++++++--- src/casekit/nmr/utils/Predict.java | 233 ++++++++++++------ 2 files changed, 260 insertions(+), 102 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index bca1613..3a3d886 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -1,6 +1,10 @@ package casekit.nmr.analysis; +import casekit.nmr.Utils; +import casekit.nmr.dbservice.COCONUT; +import casekit.nmr.dbservice.NMRShiftDB; import casekit.nmr.hose.HOSECodeBuilder; +import casekit.nmr.hose.model.ConnectionTree; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import com.google.gson.Gson; @@ -21,36 +25,63 @@ public class HOSECodeShiftStatistics { .create(); //.setPrettyPrinting() public static Map>> collectHOSECodeShifts(final List dataSetList, - final int maxSphere, - final boolean use3D) { - return collectHOSECodeShifts(dataSetList, maxSphere, use3D, new HashMap<>()); + final Integer maxSphere) { + return collectHOSECodeShifts(dataSetList, maxSphere, new HashMap<>()); } + /** + * This method expects datasets containing structures without explicit hydrogens. + * + * @param dataSetList + * @param maxSphere + * @param hoseCodeShifts + * + * @return + */ public static Map>> collectHOSECodeShifts(final List dataSetList, - final int maxSphere, final boolean use3D, + final Integer maxSphere, final Map>> hoseCodeShifts) { - // final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); IAtomContainer structure; Signal signal; - String hoseCode; + String hoseCode, atomTypeSpectrum; String solvent; - + Map atomIndexMap; // from explicit H to heavy atom + ConnectionTree connectionTree; + int maxSphereTemp; for (final DataSet dataSet : dataSetList) { + System.out.println(dataSet.getSpectrum()); + System.out.println(Arrays.deepToString(dataSet.getAssignment() + .getAssignments())); structure = dataSet.getStructure() .toAtomContainer(); - // if (use3D) { - // try { - // /* !!! No explicit H in mol !!! */ - // Utils.convertExplicitToImplicitHydrogens(structure); - // /* add explicit H atoms */ - // AtomUtils.addAndPlaceHydrogens(structure); - // /* detect aromaticity */ - // Utils.setAromaticityAndKekulize(structure); - // } catch (final IOException | ClassNotFoundException | CDKException e) { - // e.printStackTrace(); - // continue; - // } - // } + if (Utils.containsExplicitHydrogens(structure)) { + System.out.println("!!!Dataset skipped because of previously set explicit hydrogens!!!"); + continue; + } + try { + // create atom index map to know which indices the explicit hydrogens will have + atomIndexMap = new HashMap<>(); + int nextAtomIndexExplicitH = structure.getAtomCount(); + for (int i = 0; i + < structure.getAtomCount(); i++) { + if (structure.getAtom(i) + .getImplicitHydrogenCount() + != null) { + for (int j = 0; j + < structure.getAtom(i) + .getImplicitHydrogenCount(); j++) { + atomIndexMap.put(nextAtomIndexExplicitH, i); + nextAtomIndexExplicitH++; + } + } + } + + casekit.nmr.Utils.convertImplicitToExplicitHydrogens(structure); + casekit.nmr.Utils.setAromaticityAndKekulize(structure); + } catch (final CDKException e) { + e.printStackTrace(); + continue; + } solvent = dataSet.getSpectrum() .getSolvent(); if (solvent @@ -58,22 +89,37 @@ public static Map>> collectHOSECodeShifts(final || solvent.equals("")) { solvent = "Unknown"; } + atomTypeSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); for (int i = 0; i < structure.getAtomCount(); i++) { - signal = dataSet.getSpectrum() - .getSignal(dataSet.getAssignment() - .getIndex(0, i)); + signal = null; + if (structure.getAtom(i) + .getSymbol() + .equals(atomTypeSpectrum)) { + if (atomTypeSpectrum.equals("H")) { + signal = dataSet.getSpectrum() + .getSignal(dataSet.getAssignment() + .getIndex(0, atomIndexMap.get(i))); + } else { + signal = dataSet.getSpectrum() + .getSignal(dataSet.getAssignment() + .getIndex(0, i)); + } + } if (signal != null) { try { + if (maxSphere + == null) { + connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); + maxSphereTemp = connectionTree.getMaxSphere(); + } else { + maxSphereTemp = maxSphere; + } for (int sphere = 1; sphere - <= maxSphere; sphere++) { - // if (use3D) { - // hoseCode = extendedHOSECodeGenerator.getHOSECode(structure, structure.getAtom(i), - // maxSphere); - // } else { + <= maxSphereTemp; sphere++) { hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); - // } hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); hoseCodeShifts.get(hoseCode) .putIfAbsent(solvent, new ArrayList<>()); @@ -113,6 +159,31 @@ public static Map> buildHOSECodeShiftStatistics( return hoseCodeShiftStatistics; } + public static Map> buildHOSECodeShiftStatistics(final String[] pathsToNMRShiftDBs, + final String[] pathsToCOCONUTs, + final String[] nuclei, + final Integer maxSphere) { + try { + final Map>> hoseCodeShifts = new HashMap<>(); + for (int i = 0; i + < pathsToNMRShiftDBs.length; i++) { + HOSECodeShiftStatistics.collectHOSECodeShifts( + NMRShiftDB.getDataSetsFromNMRShiftDB(pathsToNMRShiftDBs[i], nuclei), maxSphere, hoseCodeShifts); + } + for (int i = 0; i + < pathsToCOCONUTs.length; i++) { + HOSECodeShiftStatistics.collectHOSECodeShifts( + COCONUT.getDataSetsWithShiftPredictionFromCOCONUT(pathsToCOCONUTs[i], nuclei), maxSphere, + hoseCodeShifts); + } + return HOSECodeShiftStatistics.buildHOSECodeShiftStatistics(hoseCodeShifts); + } catch (final FileNotFoundException | CDKException e) { + e.printStackTrace(); + } + + return new HashMap<>(); + } + public static boolean writeHOSECodeShiftStatistics(final Map> hoseCodeShifts, final String pathToJsonFile) { try { diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index dcfb4d9..16334e5 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -24,13 +24,19 @@ package casekit.nmr.utils; -import casekit.nmr.Utils; import casekit.nmr.hose.HOSECodeBuilder; +import casekit.nmr.hose.model.ConnectionTree; +import casekit.nmr.hose.model.ConnectionTreeNode; +import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import java.util.ArrayList; import java.util.HashMap; @@ -43,99 +49,180 @@ public class Predict { /** - * Predicts a shift value for a central atom based on its HOSE code and a - * given HOSE code lookup table. The prediction is done by using the median - * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.nmr.utils.Utils#getMultiplicityFromProtonsCount(int)}. + * Diastereotopic distinctions are not provided yet. * - * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts - * of occurring central atoms as values - * @param HOSECode specific HOSE code to use for shift prediction + * @param hoseCodeShiftStatistics + * @param structure + * @param solvent + * @param nucleus * - * @return null if HOSE code does not exist in lookup table - * - * @see casekit.nmr.Utils#getMedian(List) + * @return */ - public static Double predictShift(final Map> HOSECodeLookupTable, final String HOSECode) { - if (HOSECodeLookupTable.containsKey(HOSECode)) { - return Utils.getMedian(HOSECodeLookupTable.get(HOSECode)); + public static DataSet predict1D(final Map> hoseCodeShiftStatistics, + final IAtomContainer structure, final String solvent, final String nucleus) { + final int minMatchingSphere = 1; + final Spectrum spectrum = new Spectrum(); + spectrum.setNuclei(new String[]{nucleus}); + spectrum.setSolvent(solvent); + spectrum.setSignals(new ArrayList<>()); + final Assignment assignment = new Assignment(); + assignment.setNuclei(spectrum.getNuclei()); + assignment.initAssignments(0); + + final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); + String hoseCode, atomTypeSpectrum; + Signal signal; + Double shift; + Integer addedSignalIndex; + ConnectionTree connectionTree; + + try { + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + casekit.nmr.Utils.convertExplicitToImplicitHydrogens(structure); + hydrogenAdder.addImplicitHydrogens(structure); + casekit.nmr.Utils.convertImplicitToExplicitHydrogens(structure); + casekit.nmr.Utils.setAromaticityAndKekulize(structure); + + for (int i = 0; i + < structure.getAtomCount(); i++) { + atomTypeSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(nucleus); + if (structure.getAtom(i) + .getSymbol() + .equals(atomTypeSpectrum)) { + connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); + shift = null; + for (int s = connectionTree.getMaxSphere(); s + >= minMatchingSphere; s--) { + hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, s, false); + if (hoseCodeShiftStatistics.containsKey(hoseCode) + && hoseCodeShiftStatistics.get(hoseCode) + .containsKey(solvent)) { + shift = hoseCodeShiftStatistics.get(hoseCode) + .get(solvent)[3]; // take median value + break; + } + } + signal = new Signal(); + signal.setNuclei(spectrum.getNuclei()); + signal.setEquivalencesCount(1); + // signal.setMultiplicity(); + signal.setKind("signal"); + signal.setShifts(new Double[]{shift}); + addedSignalIndex = spectrum.addSignal(signal); + if (addedSignalIndex + >= assignment.getSetAssignmentsCount(0)) { + assignment.addAssignment(0, new int[]{i}); + } else { + assignment.addAssignmentEquivalence(0, addedSignalIndex, i); + } + } + } + } catch (final CDKException e) { + e.printStackTrace(); + return null; } - return null; + return new DataSet(structure, spectrum, assignment, new HashMap<>()); } /** - * Predicts a signal for a central atom based on its HOSE code and a - * given HOSE code lookup table. The prediction is done by using the mean - * of all occurring shifts in lookup table for the given HOSE code.
- * Specified for carbons (13C) only -> {@link casekit.nmr.utils.Utils#getMultiplicityFromProtonsCount(int)}. + * Predicts a 2D spectrum from two 1D spectra. Each 1D spectra needs to contain the same solvent information. + * Diastereotopic distinctions are not provided yet. * - * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts - * of occurring central atoms as values - * @param ac structure to predict from - * @param atomIndex index of central atom in structure for HOSE code generation - * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited - * @param nucleus nucleus (e.g. "13C") for signal creation + * @param hoseCodeShiftStatistics HOSE code shift statistics + * @param structure structure to use for prediction + * @param nuclei nuclei for 2D spectrum to predict + * @param solvent solvent + * @param minPathLength minimal path length + * @param maxPathLength maximal path length * - * @return null if HOSE code of selected atom does not exist in lookup table - * - * @throws CDKException - * @see #predictShift(Map, String) + * @return */ - public static Signal predictSignal(final Map> HOSECodeLookupTable, - final IAtomContainer ac, final int atomIndex, final Integer maxSphere, - final String nucleus) throws Exception { - if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { - return null; - } - final String HOSECode = HOSECodeBuilder.buildHOSECode(ac, atomIndex, maxSphere, false); - final Double predictedShift = Predict.predictShift(HOSECodeLookupTable, HOSECode); - if (predictedShift - == null) { - return null; - } - return new Signal(new String[]{nucleus}, new Double[]{predictedShift}, - casekit.nmr.utils.Utils.getMultiplicityFromProtonsCount(ac.getAtom(atomIndex) - .getImplicitHydrogenCount()), - "signal", null, 1, 0); + public static DataSet predict2D(final Map> hoseCodeShiftStatistics, + final IAtomContainer structure, final String[] nuclei, final String solvent, + final int minPathLength, final int maxPathLength) { + final DataSet predictionDim1 = predict1D(hoseCodeShiftStatistics, structure, solvent, nuclei[0]); + final DataSet predictionDim2 = predict1D(hoseCodeShiftStatistics, structure, solvent, nuclei[1]); + return Predict.predict2D(structure, predictionDim1.getSpectrum(), predictionDim2.getSpectrum(), + predictionDim1.getAssignment(), predictionDim2.getAssignment(), minPathLength, + maxPathLength); } /** - * Predicts a spectrum for a given structure based on HOSE code of atoms with specified nucleus and a - * given HOSE code lookup table.
- * Specified for carbons (13C) only -> {@link casekit.nmr.utils.Utils#getMultiplicityFromProtonsCount(int)}. + * Predicts a 2D spectrum from two 1D spectra. Each 1D spectra needs to contain the same solvent information. + * Diastereotopic distinctions are not provided yet. * - * @param HOSECodeLookupTable HashMap containing HOSE codes as keys and a list of chemical shifts - * of occurring central atoms as values - * @param ac structure to predict from - * @param maxSphere maximum sphere to use for HOSE code generation or null for unlimited - * @param nucleus nucleus (e.g. "13C") for signal creation + * @param structure structure to use for prediction + * @param spectrumDim1 1D spectrum of first dimension + * @param spectrumDim2 1D spectrum of second dimension + * @param assignmentDim1 1D assignment of first dimension + * @param assignmentDim2 1D assignment of second dimension + * @param minPathLength minimal path length + * @param maxPathLength maximal path length * - * @return null if a HOSE code of one atom does not exist in lookup table - * - * @throws org.openscience.cdk.exception.CDKException - * @see #predictSignal(Map, IAtomContainer, int, Integer, String) + * @return */ - public static Spectrum predictSpectrum(final HashMap> HOSECodeLookupTable, - final IAtomContainer ac, final Integer maxSphere, - final String nucleus) throws Exception { - final Spectrum predictedSpectrum = new Spectrum(); - predictedSpectrum.setNuclei(new String[]{nucleus}); - predictedSpectrum.setSignals(new ArrayList<>()); - Signal signal; - for (final IAtom atom : ac.atoms()) { + public static DataSet predict2D(final IAtomContainer structure, final Spectrum spectrumDim1, + final Spectrum spectrumDim2, final Assignment assignmentDim1, + final Assignment assignmentDim2, final int minPathLength, final int maxPathLength) { + if (!spectrumDim1.getSolvent() + .equals(spectrumDim2.getSolvent())) { + return null; + } + final String[] nuclei2D = new String[]{spectrumDim1.getNuclei()[0], spectrumDim2.getNuclei()[0]}; + final String atomTypeDim1 = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(spectrumDim1.getNuclei()[0]); + final String atomTypeDim2 = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(spectrumDim2.getNuclei()[0]); + + final Spectrum predictedSpectrum2D = new Spectrum(); + predictedSpectrum2D.setNuclei(nuclei2D); + predictedSpectrum2D.setSignals(new ArrayList<>()); + predictedSpectrum2D.setSolvent(spectrumDim1.getSolvent()); + final Assignment assignment2D = new Assignment(); + assignment2D.setNuclei(predictedSpectrum2D.getNuclei()); + assignment2D.initAssignments(0); + + Signal signal2D; + IAtom atom; + Double shiftDim1, shiftDim2; + int addedSignalIndex; + ConnectionTree connectionTree; + List nodesInSphere; + for (int i = 0; i + < structure.getAtomCount(); i++) { + atom = structure.getAtom(i); if (atom.getSymbol() - .equals(casekit.nmr.utils.Utils.getAtomTypeFromSpectrum(predictedSpectrum, 0))) { - signal = Predict.predictSignal(HOSECodeLookupTable, ac, atom.getIndex(), maxSphere, nucleus); - if (signal - == null) { - continue; - // return null; + .equals(atomTypeDim1)) { + connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, maxPathLength); + for (int s = minPathLength; s + <= connectionTree.getMaxSphere(); s++) { + nodesInSphere = connectionTree.getNodesInSphere(s, false); + for (final ConnectionTreeNode nodeInSphere : nodesInSphere) { + if (nodeInSphere.getAtom() + .getSymbol() + .equals(atomTypeDim2)) { + signal2D = new Signal(); + signal2D.setNuclei(nuclei2D); + signal2D.setKind("signal"); + signal2D.setEquivalencesCount(1); + shiftDim1 = spectrumDim1.getShift(assignmentDim1.getIndex(0, i), 0); + shiftDim2 = spectrumDim2.getShift(assignmentDim2.getIndex(0, nodeInSphere.getKey()), 0); + signal2D.setShifts(new Double[]{shiftDim1, shiftDim2}); + + addedSignalIndex = predictedSpectrum2D.addSignal(signal2D); + if (addedSignalIndex + >= assignment2D.getSetAssignmentsCount(0)) { + assignment2D.addAssignment(0, new int[]{i}); + assignment2D.addAssignment(1, new int[]{nodeInSphere.getKey()}); + } else { + assignment2D.addAssignmentEquivalence(0, addedSignalIndex, i); + assignment2D.addAssignmentEquivalence(1, addedSignalIndex, nodeInSphere.getKey()); + } + } + } } - predictedSpectrum.addSignal(signal); } } - return predictedSpectrum; + return new DataSet(structure, predictedSpectrum2D, assignment2D, new HashMap<>()); } } From a89327c5aa7c263ca92789d11928faed03ed553a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 8 Jun 2021 14:10:27 +0200 Subject: [PATCH 222/405] chore: removed outputs --- src/casekit/nmr/analysis/HOSECodeShiftStatistics.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index 3a3d886..1c1a215 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -49,9 +49,6 @@ public static Map>> collectHOSECodeShifts(final ConnectionTree connectionTree; int maxSphereTemp; for (final DataSet dataSet : dataSetList) { - System.out.println(dataSet.getSpectrum()); - System.out.println(Arrays.deepToString(dataSet.getAssignment() - .getAssignments())); structure = dataSet.getStructure() .toAtomContainer(); if (Utils.containsExplicitHydrogens(structure)) { From bba778433fb9ceb4b8a89fc5b19f9abf083af603 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 10 Jun 2021 13:05:29 +0200 Subject: [PATCH 223/405] feat: specified HSQC und edited HSQC methods --- src/casekit/nmr/utils/Predict.java | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index 16334e5..5c93f63 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -225,4 +225,46 @@ public static DataSet predict2D(final IAtomContainer structure, final Spectrum s return new DataSet(structure, predictedSpectrum2D, assignment2D, new HashMap<>()); } + + public static DataSet predictHSQC(final IAtomContainer structure, final Spectrum spectrumDim1, + final Spectrum spectrumDim2, final Assignment assignmentDim1, + final Assignment assignmentDim2) { + return predict2D(structure, spectrumDim1, spectrumDim2, assignmentDim1, assignmentDim2, 1, 1); + } + + public static DataSet predictHSQCEdited(final IAtomContainer structure, final Spectrum spectrumDim1, + final Spectrum spectrumDim2, final Assignment assignmentDim1, + final Assignment assignmentDim2) { + final DataSet dataSet = predictHSQC(structure, spectrumDim1, spectrumDim2, assignmentDim1, assignmentDim2); + + final String atomTypeDim2 = Utils.getAtomTypeFromSpectrum(spectrumDim2, 0); + IAtom atom; + Integer explicitHydrogensCount; + for (int i = 0; i + < dataSet.getSpectrum() + .getSignalCount(); i++) { + atom = structure.getAtom(dataSet.getAssignment() + .getAssignment(1, i, 0)); + if (!atom.getSymbol() + .equals(atomTypeDim2)) { + continue; + } + explicitHydrogensCount = AtomContainerManipulator.countExplicitHydrogens(structure, atom); + if (explicitHydrogensCount + == 2) { + dataSet.getSpectrum() + .getSignal(i) + .setPhase(-1); + } else if (explicitHydrogensCount + == 1 + || explicitHydrogensCount + == 3) { + dataSet.getSpectrum() + .getSignal(i) + .setPhase(1); + } + } + + return dataSet; + } } From fefb9ef1aa608852fef24db42c8460767c10de71 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 15 Jun 2021 20:35:59 +0200 Subject: [PATCH 224/405] feat: new fragmentation functionality and movement of connection tree (node) model classes --- .../nmr/analysis/HOSECodeShiftStatistics.java | 4 +- .../nmr/fragmentation/Fragmentation.java | 274 ++++++++++++++++++ .../model/ConnectionTree.java | 14 +- .../model/ConnectionTreeNode.java | 2 +- src/casekit/nmr/hose/HOSECodeBuilder.java | 107 +------ src/casekit/nmr/hose/Utils.java | 6 +- src/casekit/nmr/utils/Predict.java | 8 +- 7 files changed, 300 insertions(+), 115 deletions(-) create mode 100644 src/casekit/nmr/fragmentation/Fragmentation.java rename src/casekit/nmr/{hose => fragmentation}/model/ConnectionTree.java (97%) rename src/casekit/nmr/{hose => fragmentation}/model/ConnectionTreeNode.java (99%) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index 1c1a215..5e1209f 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -3,8 +3,8 @@ import casekit.nmr.Utils; import casekit.nmr.dbservice.COCONUT; import casekit.nmr.dbservice.NMRShiftDB; +import casekit.nmr.fragmentation.model.ConnectionTree; import casekit.nmr.hose.HOSECodeBuilder; -import casekit.nmr.hose.model.ConnectionTree; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import com.google.gson.Gson; @@ -110,7 +110,7 @@ public static Map>> collectHOSECodeShifts(final if (maxSphere == null) { connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); - maxSphereTemp = connectionTree.getMaxSphere(); + maxSphereTemp = connectionTree.getMaxSphere(true); } else { maxSphereTemp = maxSphere; } diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java new file mode 100644 index 0000000..ceee971 --- /dev/null +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -0,0 +1,274 @@ +package casekit.nmr.fragmentation; + +import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.silent.Bond; +import org.openscience.cdk.silent.PseudoAtom; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import java.util.*; + +public class Fragmentation { + + /** + * Function for extending a given connection tree only containing + * its root node (0th sphere) by means of Breadth-First-Search (BFS). + * Until a certain maximum sphere, each reachable next neighbor atom + * is stored in a parent-child-relationship. + * In addition, bonds within rings or between hetero atoms will be kept. + * + * @param ac atom container to go through + * @param rootAtomIndex root atom index to start from + * @param maxSphere spherical limit + * @param exclude atom indices which to exclude from search + * @param withPseudoAtoms places pseudo atoms in the "outer" sphere + * + * @return connection tree + */ + public static ConnectionTree BFS(final IAtomContainer ac, final int rootAtomIndex, final int maxSphere, + final Set exclude, final boolean withPseudoAtoms) { + // create queue and connection tree for BFS + final Queue queue = new LinkedList<>(); + queue.add(new int[]{rootAtomIndex, 0}); + final ConnectionTree connectionTree = new ConnectionTree(ac.getAtom(rootAtomIndex), rootAtomIndex); + + BFS(ac, connectionTree, queue, new HashSet<>(), exclude, maxSphere, withPseudoAtoms); + + return connectionTree; + } + + /** + * Function for extending a given connection tree only containing + * its root node (0th sphere) by means of Breadth-First-Search (BFS). + * Until a certain maximum sphere, each reachable next neighbor atom + * is stored in a parent-child-relationship. + * In addition, bonds within rings or between hetero atoms will be kept. + * + * @param ac atom container to go through + * @param connectionTree connection tree to expand, incl. the root node + * @param queue queue to use containing the atom index of the root node and start sphere + * @param visited atom indices which are already "visited" and + * should be ignored + * @param exclude atom indices which to exclude from search + * @param maxSphere spherical limit + * @param withPseudoAtoms places pseudo atoms in the "outer" sphere + */ + private static void BFS(final IAtomContainer ac, final ConnectionTree connectionTree, final Queue queue, + final Set visited, final Set exclude, final int maxSphere, + final boolean withPseudoAtoms) { + // all nodes visited? + if (queue.isEmpty()) { + return; + } + final int[] queueValue = queue.remove(); + final int atomIndex = queueValue[0]; + final int sphere = queueValue[1]; + final IAtom atom = ac.getAtom(atomIndex); + final ConnectionTreeNode node = connectionTree.getNode(atomIndex); + // mark atom as visited + visited.add(atomIndex); + + IBond bond; + ConnectionTreeNode connectedAtomNode; + // add nodes and bonds in lower spheres + // go to all child nodes + int connectedAtomIndex; + for (final IAtom connectedAtom : ac.getConnectedAtomsList(atom)) { + connectedAtomIndex = ac.indexOf(connectedAtom); + bond = ac.getBond(atom, connectedAtom); + // add children to queue if not already visited and connection is allowed or maxSphere is not reached yet + if ((keepConnection(node.getAtom(), connectedAtom, bond) + || sphere + < maxSphere) + && !exclude.contains(connectedAtomIndex)) { + // and not already waiting in queue + if (!visited.contains(connectedAtomIndex) + && !queue.contains(connectedAtomIndex)) { + queue.add(new int[]{connectedAtomIndex, sphere + + 1}); + connectionTree.addNode(connectedAtom, connectedAtomIndex, node.getKey(), bond); + } else { + // node already exists in tree; add a further parent to connected atom (for ring closures) + connectedAtomNode = connectionTree.getNode(connectedAtomIndex); + if (connectedAtomNode + != null + && !ConnectionTree.hasRingClosureParent(node, connectedAtomNode) + && !ConnectionTree.hasRingClosureParent(connectedAtomNode, node)) { + connectionTree.addRingClosureNode(connectedAtomIndex, node.getKey(), bond); + connectionTree.addRingClosureNode(node.getKey(), connectedAtomIndex, bond); + } + } + } else if (withPseudoAtoms) { + connectionTree.addNode(new PseudoAtom(connectedAtom), connectedAtomIndex, node.getKey(), bond); + } + } + + // further extension of connection tree + BFS(ac, connectionTree, queue, visited, exclude, maxSphere, withPseudoAtoms); + } + + public static boolean keepConnection(final IAtom atom1, final IAtom atom2, final IBond bond) { + // hetero-hetero or carbon-hetero + if ((isHeteroAtom(atom1) + && isHeteroAtom(atom2)) + || (isCarbonAtom(atom1) + && isHeteroAtom(atom2)) + || (isHeteroAtom(atom1) + && isCarbonAtom(atom2))) { + return true; + } + // do not cut ring bonds + if (bond.isInRing()) { + return true; + } + // carbon-carbon or carbon-hetero with higher bond order + return ((isCarbonAtom(atom1) + && isHeteroAtom(atom2)) + || (isHeteroAtom(atom1) + && isCarbonAtom(atom2)) + || (isCarbonAtom(atom1) + && isCarbonAtom(atom2))) + && bond.getOrder() + .numeric() + >= 2 + && !bond.isAromatic(); + + // // one carbon has bonds to multiple hetero atoms + // if (isCarbonAtom(atom1) + // && isHeteroAtom(atom2)) { + // int heteroAtomCount = 0; + // for (final IAtom atom3 : atom1.getContainer() + // .getConnectedAtomsList(atom1)) { + // if (isHeteroAtom(atom3)) { + // heteroAtomCount++; + // } + // } + // if (heteroAtomCount + // >= 2) { + // return true; + // } + // } else if (isHeteroAtom(atom1) + // && isCarbonAtom(atom2)) { + // int heteroAtomCount = 0; + // for (final IAtom atom3 : atom2.getContainer() + // .getConnectedAtomsList(atom2)) { + // if (isHeteroAtom(atom3)) { + // heteroAtomCount++; + // } + // } + // if (heteroAtomCount + // >= 2) { + // return true; + // } + // } + } + + public static boolean isHeteroAtom(final IAtom atom) { + return !atom.getSymbol() + .equals("H") + && !isCarbonAtom(atom); + } + + public static boolean isCarbonAtom(final IAtom atom) { + return atom.getSymbol() + .equals("C"); + } + + /** + * Reconstructs a structure from a given connection tree, + * including ring closures. + * + * @param connectionTree connection tree + * + * @return IAtomContainer + */ + public static IAtomContainer buildAtomContainer(final ConnectionTree connectionTree) { + // create new atom container and add the connection trees structure, beginning at the root atom + final IAtomContainer ac = SilentChemObjectBuilder.getInstance() + .newAtomContainer(); + addToAtomContainer(connectionTree, ac, null, null); + + return ac; + } + + /** + * Adds the substructure of a connection tree to an atom container.
+ * The substructure can be linked via a bond and an atom index in the container, but this is optional. + * If both, the bond and atom index to link, are not given (null) then the substructure will just be added + * to the atom container without linkage. + * + * @param connectionTree + * @param ac + * @param atomIndexInStructureToLink + * @param bondToLink + */ + public static void addToAtomContainer(final ConnectionTree connectionTree, final IAtomContainer ac, + final Integer atomIndexInStructureToLink, final IBond bondToLink) { + List nodesInSphere; + ConnectionTreeNode nodeInSphere, parentNode, partnerNode; + IBond bond, bondToParent; + // add root atom to given atom container and link it via a given linking bond + ac.addAtom(connectionTree.getRootNode() + .getAtom()); + if ((atomIndexInStructureToLink + != null) + && (bondToLink + != null)) { + final IBond bondToAdd = new Bond(ac.getAtom(atomIndexInStructureToLink), ac.getAtom(ac.getAtomCount() + - 1)); + bondToAdd.setOrder(bondToLink.getOrder()); + bondToAdd.setIsInRing(bondToLink.isInRing()); + bondToAdd.setIsAromatic(bondToLink.isAromatic()); + bondToAdd.setAtom(ac.getAtom(atomIndexInStructureToLink), 0); + bondToAdd.setAtom(ac.getAtom(ac.getAtomCount() + - 1), 1); + ac.addBond(bondToAdd); + } + // for each sphere: add the atom which is stored as node to atom container and set bonds between parent nodes + for (int s = 1; s + <= connectionTree.getMaxSphere(false); s++) { + // first add all atoms and its parents (previous sphere only, incl. bonds) to structure + nodesInSphere = connectionTree.getNodesInSphere(s, false); + for (int i = 0; i + < nodesInSphere.size(); i++) { + nodeInSphere = nodesInSphere.get(i); + if (nodeInSphere.isRingClosureNode()) { + continue; + } + ac.addAtom(nodeInSphere.getAtom()); + parentNode = nodeInSphere.getParent(); + bondToParent = nodeInSphere.getBondToParent(); + bond = new Bond(nodeInSphere.getAtom(), parentNode.getAtom(), bondToParent.getOrder()); + bond.setIsInRing(bondToParent.isInRing()); + bond.setIsAromatic(bondToParent.isAromatic()); + ac.addBond(bond); + } + } + for (int s = 1; s + <= connectionTree.getMaxSphere(true); s++) { + // and as second add the remaining bonds (ring closures) to structure + nodesInSphere = connectionTree.getNodesInSphere(s, true); + for (int i = 0; i + < nodesInSphere.size(); i++) { + nodeInSphere = nodesInSphere.get(i); + if (!nodeInSphere.isRingClosureNode()) { + continue; + } + parentNode = nodeInSphere.getParent(); + partnerNode = nodeInSphere.getRingClosureParent(); + if (ac.getBond(ac.getAtom(ac.indexOf(partnerNode.getAtom())), + ac.getAtom(ac.indexOf(parentNode.getAtom()))) + == null) { + bondToParent = nodeInSphere.getBondToParent(); + bond = new Bond(parentNode.getAtom(), partnerNode.getAtom(), bondToParent.getOrder()); + bond.setIsInRing(bondToParent.isInRing()); + bond.setIsAromatic(bondToParent.isAromatic()); + ac.addBond(bond); + } + } + } + } +} diff --git a/src/casekit/nmr/hose/model/ConnectionTree.java b/src/casekit/nmr/fragmentation/model/ConnectionTree.java similarity index 97% rename from src/casekit/nmr/hose/model/ConnectionTree.java rename to src/casekit/nmr/fragmentation/model/ConnectionTree.java index 77733ef..3bed867 100644 --- a/src/casekit/nmr/hose/model/ConnectionTree.java +++ b/src/casekit/nmr/fragmentation/model/ConnectionTree.java @@ -9,7 +9,7 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.nmr.hose.model; +package casekit.nmr.fragmentation.model; import casekit.nmr.hose.Utils; import org.openscience.cdk.interfaces.IAtom; @@ -159,7 +159,13 @@ private void addNode(final ConnectionTreeNode newNode, final ConnectionTreeNode } } - public int getMaxSphere() { + public int getMaxSphere(final boolean withRingClosureNodes) { + if (!withRingClosureNodes + && this.getNodesInSphere(this.maxSphere, false) + .isEmpty()) { + return this.maxSphere + - 1; + } return this.maxSphere; } @@ -176,7 +182,7 @@ public int getNodesCountInSphere(final int sphere, final boolean withRingClosure public List getKeys() { final List keys = new ArrayList<>(); for (int s = 0; s - <= this.getMaxSphere(); s++) { + <= this.getMaxSphere(false); s++) { for (final ConnectionTreeNode nodeInSphere : this.getNodesInSphere(s, false)) { keys.add(nodeInSphere.getKey()); } @@ -188,7 +194,7 @@ public List getKeys() { public List getNodes(final boolean withRingClosureNodes) { final List nodes = new ArrayList<>(); for (int s = 0; s - <= this.getMaxSphere(); s++) { + <= this.getMaxSphere(withRingClosureNodes); s++) { nodes.addAll(this.getNodesInSphere(s, withRingClosureNodes)); } diff --git a/src/casekit/nmr/hose/model/ConnectionTreeNode.java b/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java similarity index 99% rename from src/casekit/nmr/hose/model/ConnectionTreeNode.java rename to src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java index 87871f7..38b867b 100644 --- a/src/casekit/nmr/hose/model/ConnectionTreeNode.java +++ b/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java @@ -9,7 +9,7 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.nmr.hose.model; +package casekit.nmr.fragmentation.model; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IBond; diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java index bd7cd06..e77a689 100644 --- a/src/casekit/nmr/hose/HOSECodeBuilder.java +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -12,14 +12,14 @@ package casekit.nmr.hose; -import casekit.nmr.hose.model.ConnectionTree; -import casekit.nmr.hose.model.ConnectionTreeNode; +import casekit.nmr.fragmentation.Fragmentation; +import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragmentation.model.ConnectionTreeNode; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.silent.Atom; -import org.openscience.cdk.silent.Bond; import org.openscience.cdk.silent.SilentChemObjectBuilder; import java.util.*; @@ -172,7 +172,7 @@ private static String buildHOSECodeString(final ConnectionTree connectionTree, final boolean useBremserElementNotation) throws CDKException { final IAtom rootAtom = connectionTree.getRootNode() .getAtom(); - final int maxSphere = connectionTree.getMaxSphere(); + final int maxSphere = connectionTree.getMaxSphere(true); // zeroth sphere final StringBuilder HOSECode = new StringBuilder(rootAtom.getSymbol() + "-" @@ -561,101 +561,6 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection BFS(ac, connectionTree, queue, visited, maxSphere); } - /** - * Reconstructs a structure from a given connection tree, - * including ring closures. - * - * @param connectionTree connection tree - * - * @return IAtomContainer - */ - public static IAtomContainer buildAtomContainer(final ConnectionTree connectionTree) { - // create new atom container and add the connection trees structure, beginning at the root atom - final IAtomContainer ac = SilentChemObjectBuilder.getInstance() - .newAtomContainer(); - HOSECodeBuilder.addToAtomContainer(connectionTree, ac, null, null); - - return ac; - } - - /** - * Adds the substructure of a connection tree to an atom container.
- * The substructure can be linked via a bond and an atom index in the container, but this is optional. - * If both, the bond and atom index to link, are not given (null) then the substructure will just be added - * to the atom container without linkage. - * - * @param connectionTree - * @param ac - * @param atomIndexInStructureToLink - * @param bondToLink - */ - public static void addToAtomContainer(final ConnectionTree connectionTree, final IAtomContainer ac, - final Integer atomIndexInStructureToLink, final IBond bondToLink) { - List nodesInSphere; - ConnectionTreeNode nodeInSphere, parentNode, partnerNode; - IBond bond, bondToParent; - // add root atom to given atom container and link it via a given linking bond - ac.addAtom(connectionTree.getRootNode() - .getAtom()); - if ((atomIndexInStructureToLink - != null) - && (bondToLink - != null)) { - final IBond bondToAdd = new Bond(ac.getAtom(atomIndexInStructureToLink), ac.getAtom(ac.getAtomCount() - - 1)); - bondToAdd.setOrder(bondToLink.getOrder()); - bondToAdd.setIsInRing(bondToLink.isInRing()); - bondToAdd.setIsAromatic(bondToLink.isAromatic()); - bondToAdd.setAtom(ac.getAtom(atomIndexInStructureToLink), 0); - bondToAdd.setAtom(ac.getAtom(ac.getAtomCount() - - 1), 1); - ac.addBond(bondToAdd); - } - // for each sphere: add the atom which is stored as node to atom container and set bonds between parent nodes - for (int s = 1; s - <= connectionTree.getMaxSphere(); s++) { - // first add all atoms and its parents (previous sphere only, incl. bonds) to structure - nodesInSphere = connectionTree.getNodesInSphere(s, false); - for (int i = 0; i - < nodesInSphere.size(); i++) { - nodeInSphere = nodesInSphere.get(i); - if (nodeInSphere.isRingClosureNode()) { - continue; - } - ac.addAtom(nodeInSphere.getAtom()); - parentNode = nodeInSphere.getParent(); - bondToParent = nodeInSphere.getBondToParent(); - bond = new Bond(nodeInSphere.getAtom(), parentNode.getAtom(), bondToParent.getOrder()); - bond.setIsInRing(bondToParent.isInRing()); - bond.setIsAromatic(bondToParent.isAromatic()); - ac.addBond(bond); - } - } - for (int s = 1; s - <= connectionTree.getMaxSphere(); s++) { - // and as second add the remaining bonds (ring closures) to structure - nodesInSphere = connectionTree.getNodesInSphere(s, true); - for (int i = 0; i - < nodesInSphere.size(); i++) { - nodeInSphere = nodesInSphere.get(i); - if (!nodeInSphere.isRingClosureNode()) { - continue; - } - parentNode = nodeInSphere.getParent(); - partnerNode = nodeInSphere.getRingClosureParent(); - if (ac.getBond(ac.getAtom(ac.indexOf(partnerNode.getAtom())), - ac.getAtom(ac.indexOf(parentNode.getAtom()))) - == null) { - bondToParent = nodeInSphere.getBondToParent(); - bond = new Bond(parentNode.getAtom(), partnerNode.getAtom(), bondToParent.getOrder()); - bond.setIsInRing(bondToParent.isInRing()); - bond.setIsAromatic(bondToParent.isAromatic()); - ac.addBond(bond); - } - } - } - } - /** * Reconstructs a structure from a given HOSE code string.
* IMPORTANT: Ring closures are not restored, see @@ -667,11 +572,11 @@ public static void addToAtomContainer(final ConnectionTree connectionTree, final * @return IAtomContainer * * @see #buildConnectionTree(String, boolean) - * @see #buildAtomContainer(ConnectionTree) + * @see Fragmentation#buildAtomContainer(ConnectionTree) */ public static IAtomContainer buildAtomContainer(final String HOSECode, final boolean useBremserElementNotation) throws CDKException { - return HOSECodeBuilder.buildAtomContainer( + return Fragmentation.buildAtomContainer( HOSECodeBuilder.buildConnectionTree(HOSECode, useBremserElementNotation)); } } \ No newline at end of file diff --git a/src/casekit/nmr/hose/Utils.java b/src/casekit/nmr/hose/Utils.java index ae03ae5..80725e9 100644 --- a/src/casekit/nmr/hose/Utils.java +++ b/src/casekit/nmr/hose/Utils.java @@ -12,8 +12,8 @@ package casekit.nmr.hose; -import casekit.nmr.hose.model.ConnectionTree; -import casekit.nmr.hose.model.ConnectionTreeNode; +import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragmentation.model.ConnectionTreeNode; import org.openscience.cdk.interfaces.IBond; import java.util.ArrayList; @@ -167,7 +167,7 @@ private static void rankChildNodes(final ConnectionTreeNode node) { public static void rankChildNodes(final ConnectionTree connectionTree) { List nodesInSphere; for (int sphere = 0; sphere - < connectionTree.getMaxSphere(); sphere++) { + < connectionTree.getMaxSphere(true); sphere++) { nodesInSphere = connectionTree.getNodesInSphere(sphere, true); // for all nodes in sphere for (int i = 0; i diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index 5c93f63..cdc96fe 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -24,9 +24,9 @@ package casekit.nmr.utils; +import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragmentation.model.ConnectionTreeNode; import casekit.nmr.hose.HOSECodeBuilder; -import casekit.nmr.hose.model.ConnectionTree; -import casekit.nmr.hose.model.ConnectionTreeNode; import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; @@ -91,7 +91,7 @@ public static DataSet predict1D(final Map> hoseCod .equals(atomTypeSpectrum)) { connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); shift = null; - for (int s = connectionTree.getMaxSphere(); s + for (int s = connectionTree.getMaxSphere(true); s >= minMatchingSphere; s--) { hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, s, false); if (hoseCodeShiftStatistics.containsKey(hoseCode) @@ -194,7 +194,7 @@ public static DataSet predict2D(final IAtomContainer structure, final Spectrum s .equals(atomTypeDim1)) { connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, maxPathLength); for (int s = minPathLength; s - <= connectionTree.getMaxSphere(); s++) { + <= connectionTree.getMaxSphere(false); s++) { nodesInSphere = connectionTree.getNodesInSphere(s, false); for (final ConnectionTreeNode nodeInSphere : nodesInSphere) { if (nodeInSphere.getAtom() From f153caf617174966120761b503d82ac09599c803 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 15 Jun 2021 20:37:11 +0200 Subject: [PATCH 225/405] feat: added getUnsaturatedAtomIndices method --- src/casekit/nmr/Utils.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index f10489d..2c5a3b7 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -506,12 +506,27 @@ public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { return null; } - return Utils.getBondOrderSum(ac, atomIndex, true) - .intValue() + return ac.getAtom(atomIndex) + .getValency() + != null + && Utils.getBondOrderSum(ac, atomIndex, true) + .intValue() >= ac.getAtom(atomIndex) .getValency(); } + public static List getUnsaturatedAtomIndices(final IAtomContainer ac) { + final List unsaturatedAtomIndices = new ArrayList<>(); + for (int i = 0; i + < ac.getAtomCount(); i++) { + // set the indices of unsaturated atoms in substructure + if (!isSaturated(ac, i)) { + unsaturatedAtomIndices.add(i); + } + } + return unsaturatedAtomIndices; + } + public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException { final CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(ac.getBuilder()); IAtomType type; From 9fbcbf13651a160a5c770586e40e5e4be1a986f5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 16 Jun 2021 00:20:22 +0200 Subject: [PATCH 226/405] feat: added a wrapper method to build a fragment --- .../nmr/fragmentation/Fragmentation.java | 28 ++++++++++++++++--- src/casekit/nmr/hose/HOSECodeBuilder.java | 5 ++-- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index ceee971..043e2ee 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -13,6 +13,26 @@ public class Fragmentation { + /** + * Function for extending a given connection tree only containing + * its root node (0th sphere) by means of Breadth-First-Search (BFS). + * Until a certain maximum sphere, each reachable next neighbor atom + * is stored in a parent-child-relationship. + * In addition, bonds within rings or between hetero atoms will be kept. + * + * @param ac atom container to go through + * @param rootAtomIndex root atom index to start from + * @param maxSphere spherical limit + * @param exclude atom indices which to exclude from search + * @param withPseudoAtoms places pseudo atoms in the "outer" sphere + * + * @return connection tree + */ + public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, final int maxSphere, + final Set exclude, final boolean withPseudoAtoms) { + return toAtomContainer(BFS(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); + } + /** * Function for extending a given connection tree only containing * its root node (0th sphere) by means of Breadth-First-Search (BFS). @@ -110,7 +130,7 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection BFS(ac, connectionTree, queue, visited, exclude, maxSphere, withPseudoAtoms); } - public static boolean keepConnection(final IAtom atom1, final IAtom atom2, final IBond bond) { + private static boolean keepConnection(final IAtom atom1, final IAtom atom2, final IBond bond) { // hetero-hetero or carbon-hetero if ((isHeteroAtom(atom1) && isHeteroAtom(atom2)) @@ -166,13 +186,13 @@ && isCarbonAtom(atom2))) // } } - public static boolean isHeteroAtom(final IAtom atom) { + private static boolean isHeteroAtom(final IAtom atom) { return !atom.getSymbol() .equals("H") && !isCarbonAtom(atom); } - public static boolean isCarbonAtom(final IAtom atom) { + private static boolean isCarbonAtom(final IAtom atom) { return atom.getSymbol() .equals("C"); } @@ -185,7 +205,7 @@ public static boolean isCarbonAtom(final IAtom atom) { * * @return IAtomContainer */ - public static IAtomContainer buildAtomContainer(final ConnectionTree connectionTree) { + public static IAtomContainer toAtomContainer(final ConnectionTree connectionTree) { // create new atom container and add the connection trees structure, beginning at the root atom final IAtomContainer ac = SilentChemObjectBuilder.getInstance() .newAtomContainer(); diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java index e77a689..70f7bb0 100644 --- a/src/casekit/nmr/hose/HOSECodeBuilder.java +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -572,11 +572,10 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection * @return IAtomContainer * * @see #buildConnectionTree(String, boolean) - * @see Fragmentation#buildAtomContainer(ConnectionTree) + * @see Fragmentation#toAtomContainer(ConnectionTree) */ public static IAtomContainer buildAtomContainer(final String HOSECode, final boolean useBremserElementNotation) throws CDKException { - return Fragmentation.buildAtomContainer( - HOSECodeBuilder.buildConnectionTree(HOSECode, useBremserElementNotation)); + return Fragmentation.toAtomContainer(HOSECodeBuilder.buildConnectionTree(HOSECode, useBremserElementNotation)); } } \ No newline at end of file From f67787cbab7b71228c8772347d3366685ec86cd5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 16 Jun 2021 17:01:16 +0200 Subject: [PATCH 227/405] feat: added isPseudoNode property to ConnectionTreeNode --- .../nmr/fragmentation/model/ConnectionTreeNode.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java b/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java index 38b867b..6f69120 100644 --- a/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java +++ b/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java @@ -33,7 +33,7 @@ public class ConnectionTreeNode { private int sphere; private boolean isRingClosure; private ConnectionTreeNode ringClosureParent; - + private boolean isPseudoNode; /** * Pre-defined constructor for creating a non-ring closure node. @@ -52,6 +52,7 @@ public ConnectionTreeNode(final IAtom atom, final int key, final int sphere, fin this.children = new ArrayList<>(); this.bondsToChildren = new ArrayList<>(); this.isRingClosure = false; + this.isPseudoNode = false; } /** @@ -69,6 +70,7 @@ public ConnectionTreeNode(final ConnectionTreeNode ringClosurePartner, final int this.bondsToChildren = new ArrayList<>(); this.isRingClosure = true; this.ringClosureParent = ringClosurePartner; + this.isPseudoNode = false; } public IAtom getAtom() { @@ -127,6 +129,14 @@ public void setRingClosureParent(final ConnectionTreeNode ringClosureParent) { this.ringClosureParent = ringClosureParent; } + public void setIsPseudoNode(final boolean isPseudoNode) { + this.isPseudoNode = isPseudoNode; + } + + public boolean isPseudoNode() { + return this.isPseudoNode; + } + public boolean addChildNode(final ConnectionTreeNode childNode, final IBond bondToChild) { return this.addChildNode(childNode, bondToChild, this.getChildNodes() .size()); From 62a1e3a6030c1e46042bd60afb1f9170be5b28cc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 16 Jun 2021 17:05:38 +0200 Subject: [PATCH 228/405] fix: bugfixes and improvements when building fragments --- .../nmr/fragmentation/Fragmentation.java | 210 +++++++++++------- 1 file changed, 127 insertions(+), 83 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 043e2ee..38da051 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -14,11 +14,7 @@ public class Fragmentation { /** - * Function for extending a given connection tree only containing - * its root node (0th sphere) by means of Breadth-First-Search (BFS). - * Until a certain maximum sphere, each reachable next neighbor atom - * is stored in a parent-child-relationship. - * In addition, bonds within rings or between hetero atoms will be kept. + * Creates an atom container from a given connection tree built by using {@link #BFS(IAtomContainer, int, int, Set, boolean)}. * * @param ac atom container to go through * @param rootAtomIndex root atom index to start from @@ -55,7 +51,52 @@ public static ConnectionTree BFS(final IAtomContainer ac, final int rootAtomInde queue.add(new int[]{rootAtomIndex, 0}); final ConnectionTree connectionTree = new ConnectionTree(ac.getAtom(rootAtomIndex), rootAtomIndex); - BFS(ac, connectionTree, queue, new HashSet<>(), exclude, maxSphere, withPseudoAtoms); + BFS(ac, connectionTree, queue, new HashSet<>(), exclude, maxSphere); + + // close rings + IBond bond; + for (int s = 0; s + < connectionTree.getMaxSphere(false); s++) { + for (final ConnectionTreeNode nodeInSphere1 : connectionTree.getNodesInSphere(s, false)) { + // set connections (parent nodes) in sphere nodes which have to be connected -> ring closures + for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s, false)) { + if ((ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) + != null) + && !ConnectionTree.hasRingClosureParent(nodeInSphere1, nodeInSphere2) + && !ConnectionTree.hasRingClosureParent(nodeInSphere2, nodeInSphere1)) { + bond = ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); + connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); + connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); + } + } + for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s + + 1, false)) { + if ((ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) + != null) + && !ConnectionTree.hasRingClosureParent(nodeInSphere1, nodeInSphere2) + && !ConnectionTree.hasRingClosureParent(nodeInSphere2, nodeInSphere1)) { + bond = ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); + connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); + connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); + } + } + } + } + + // add pseudo atoms + if (withPseudoAtoms) { + for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { + for (final IAtom connectedAtom : ac.getConnectedAtomsList(node.getAtom())) { + if (connectionTree.getBond(node.getKey(), connectedAtom.getIndex()) + == null) { + addPseudoNode(connectionTree, ac.getAtomCount() + + connectionTree.getNodesCount(false), node.getKey(), + ac.getBond(node.getAtom(), connectedAtom)); + } + } + } + } + return connectionTree; } @@ -67,18 +108,16 @@ public static ConnectionTree BFS(final IAtomContainer ac, final int rootAtomInde * is stored in a parent-child-relationship. * In addition, bonds within rings or between hetero atoms will be kept. * - * @param ac atom container to go through - * @param connectionTree connection tree to expand, incl. the root node - * @param queue queue to use containing the atom index of the root node and start sphere - * @param visited atom indices which are already "visited" and - * should be ignored - * @param exclude atom indices which to exclude from search - * @param maxSphere spherical limit - * @param withPseudoAtoms places pseudo atoms in the "outer" sphere + * @param ac atom container to go through + * @param connectionTree connection tree to expand, incl. the root node + * @param queue queue to use containing the atom index of the root node and start sphere + * @param visited atom indices which are already "visited" and + * should be ignored + * @param exclude atom indices which to exclude from search + * @param maxSphere spherical limit */ private static void BFS(final IAtomContainer ac, final ConnectionTree connectionTree, final Queue queue, - final Set visited, final Set exclude, final int maxSphere, - final boolean withPseudoAtoms) { + final Set visited, final Set exclude, final int maxSphere) { // all nodes visited? if (queue.isEmpty()) { return; @@ -87,50 +126,50 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection final int atomIndex = queueValue[0]; final int sphere = queueValue[1]; final IAtom atom = ac.getAtom(atomIndex); - final ConnectionTreeNode node = connectionTree.getNode(atomIndex); // mark atom as visited visited.add(atomIndex); IBond bond; - ConnectionTreeNode connectedAtomNode; // add nodes and bonds in lower spheres // go to all child nodes int connectedAtomIndex; for (final IAtom connectedAtom : ac.getConnectedAtomsList(atom)) { - connectedAtomIndex = ac.indexOf(connectedAtom); + connectedAtomIndex = connectedAtom.getIndex(); bond = ac.getBond(atom, connectedAtom); // add children to queue if not already visited and connection is allowed or maxSphere is not reached yet - if ((keepConnection(node.getAtom(), connectedAtom, bond) - || sphere - < maxSphere) - && !exclude.contains(connectedAtomIndex)) { - // and not already waiting in queue - if (!visited.contains(connectedAtomIndex) - && !queue.contains(connectedAtomIndex)) { - queue.add(new int[]{connectedAtomIndex, sphere - + 1}); - connectionTree.addNode(connectedAtom, connectedAtomIndex, node.getKey(), bond); - } else { - // node already exists in tree; add a further parent to connected atom (for ring closures) - connectedAtomNode = connectionTree.getNode(connectedAtomIndex); - if (connectedAtomNode - != null - && !ConnectionTree.hasRingClosureParent(node, connectedAtomNode) - && !ConnectionTree.hasRingClosureParent(connectedAtomNode, node)) { - connectionTree.addRingClosureNode(connectedAtomIndex, node.getKey(), bond); - connectionTree.addRingClosureNode(node.getKey(), connectedAtomIndex, bond); + if (!exclude.contains(connectedAtomIndex)) { + if (keepBond(atom, connectedAtom, bond) + || sphere + < maxSphere) { + // add children to queue if not already visited and not already waiting in queue + if (!visited.contains(connectedAtomIndex) + && !queue.contains(connectedAtomIndex)) { + queue.add(new int[]{connectedAtomIndex, sphere + + 1}); + connectionTree.addNode(connectedAtom, connectedAtomIndex, atomIndex, bond); } } - } else if (withPseudoAtoms) { - connectionTree.addNode(new PseudoAtom(connectedAtom), connectedAtomIndex, node.getKey(), bond); } } // further extension of connection tree - BFS(ac, connectionTree, queue, visited, exclude, maxSphere, withPseudoAtoms); + BFS(ac, connectionTree, queue, visited, exclude, maxSphere); + } + + private static boolean addPseudoNode(final ConnectionTree connectionTree, final int pseudoNodeKey, + final int parentNodeKey, final IBond bondToParent) { + if (!connectionTree.addNode(new PseudoAtom("R"), pseudoNodeKey, parentNodeKey, bondToParent)) { + return false; + } + final ConnectionTreeNode pseudoNode = connectionTree.getNode(pseudoNodeKey); + pseudoNode.setIsPseudoNode(true); + // pseudoNode.getAtom() + // .setImplicitHydrogenCount(connectedAtom.getImplicitHydrogenCount()); + + return true; } - private static boolean keepConnection(final IAtom atom1, final IAtom atom2, final IBond bond) { + private static boolean keepBond(final IAtom atom1, final IAtom atom2, final IBond bond) { // hetero-hetero or carbon-hetero if ((isHeteroAtom(atom1) && isHeteroAtom(atom2)) @@ -140,50 +179,55 @@ && isHeteroAtom(atom2)) && isCarbonAtom(atom2))) { return true; } - // do not cut ring bonds - if (bond.isInRing()) { + + // // do not cut ring bonds + // if (bond.isInRing()) { + // return true; + // } + + // carbon-carbon or carbon-hetero with higher bond order + if ( + // ((isCarbonAtom(atom1) + // && isHeteroAtom(atom2)) + // || (isHeteroAtom(atom1) + // && isCarbonAtom(atom2)) + // || (isCarbonAtom(atom1) + // && isCarbonAtom(atom2))) + // && + bond.getOrder() + .numeric() + >= 3 + // && !bond.isAromatic() + ) { return true; } - // carbon-carbon or carbon-hetero with higher bond order - return ((isCarbonAtom(atom1) - && isHeteroAtom(atom2)) - || (isHeteroAtom(atom1) - && isCarbonAtom(atom2)) - || (isCarbonAtom(atom1) - && isCarbonAtom(atom2))) - && bond.getOrder() - .numeric() - >= 2 - && !bond.isAromatic(); - // // one carbon has bonds to multiple hetero atoms - // if (isCarbonAtom(atom1) - // && isHeteroAtom(atom2)) { - // int heteroAtomCount = 0; - // for (final IAtom atom3 : atom1.getContainer() - // .getConnectedAtomsList(atom1)) { - // if (isHeteroAtom(atom3)) { - // heteroAtomCount++; - // } - // } - // if (heteroAtomCount - // >= 2) { - // return true; - // } - // } else if (isHeteroAtom(atom1) - // && isCarbonAtom(atom2)) { - // int heteroAtomCount = 0; - // for (final IAtom atom3 : atom2.getContainer() - // .getConnectedAtomsList(atom2)) { - // if (isHeteroAtom(atom3)) { - // heteroAtomCount++; - // } - // } - // if (heteroAtomCount - // >= 2) { - // return true; - // } - // } + // one carbon has bonds to multiple hetero atoms + if (isCarbonAtom(atom1) + && isHeteroAtom(atom2)) { + int heteroAtomCount = 0; + for (final IAtom atom3 : atom1.getContainer() + .getConnectedAtomsList(atom1)) { + if (isHeteroAtom(atom3)) { + heteroAtomCount++; + } + } + return heteroAtomCount + >= 2; + } else if (isHeteroAtom(atom1) + && isCarbonAtom(atom2)) { + int heteroAtomCount = 0; + for (final IAtom atom3 : atom2.getContainer() + .getConnectedAtomsList(atom2)) { + if (isHeteroAtom(atom3)) { + heteroAtomCount++; + } + } + return heteroAtomCount + >= 2; + } + + return false; } private static boolean isHeteroAtom(final IAtom atom) { From 3c171d053f364633daf52428e9cc4a6e9d9bfe32 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 16 Jun 2021 17:51:31 +0200 Subject: [PATCH 229/405] fix: fixed an issue when closing rings --- .../nmr/fragmentation/Fragmentation.java | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 38da051..d300ab7 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -14,7 +14,7 @@ public class Fragmentation { /** - * Creates an atom container from a given connection tree built by using {@link #BFS(IAtomContainer, int, int, Set, boolean)}. + * Creates an atom container from a given connection tree built by using {@link #buildConnectionTree(IAtomContainer, int, int, Set, boolean)}. * * @param ac atom container to go through * @param rootAtomIndex root atom index to start from @@ -26,7 +26,7 @@ public class Fragmentation { */ public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, final int maxSphere, final Set exclude, final boolean withPseudoAtoms) { - return toAtomContainer(BFS(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); + return toAtomContainer(buildConnectionTree(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); } /** @@ -44,8 +44,9 @@ public static IAtomContainer buildFragment(final IAtomContainer ac, final int ro * * @return connection tree */ - public static ConnectionTree BFS(final IAtomContainer ac, final int rootAtomIndex, final int maxSphere, - final Set exclude, final boolean withPseudoAtoms) { + public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final int rootAtomIndex, + final int maxSphere, final Set exclude, + final boolean withPseudoAtoms) { // create queue and connection tree for BFS final Queue queue = new LinkedList<>(); queue.add(new int[]{rootAtomIndex, 0}); @@ -55,34 +56,26 @@ public static ConnectionTree BFS(final IAtomContainer ac, final int rootAtomInde // close rings IBond bond; + final int maxSphereTree = connectionTree.getMaxSphere(false); for (int s = 0; s - < connectionTree.getMaxSphere(false); s++) { + <= maxSphereTree; s++) { for (final ConnectionTreeNode nodeInSphere1 : connectionTree.getNodesInSphere(s, false)) { // set connections (parent nodes) in sphere nodes which have to be connected -> ring closures - for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s, false)) { - if ((ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) - != null) - && !ConnectionTree.hasRingClosureParent(nodeInSphere1, nodeInSphere2) - && !ConnectionTree.hasRingClosureParent(nodeInSphere2, nodeInSphere1)) { - bond = ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); - connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); - connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); - } - } - for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s - + 1, false)) { - if ((ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) - != null) - && !ConnectionTree.hasRingClosureParent(nodeInSphere1, nodeInSphere2) - && !ConnectionTree.hasRingClosureParent(nodeInSphere2, nodeInSphere1)) { - bond = ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); - connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); - connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); + for (int s2 = s; s2 + <= maxSphereTree; s2++) { + for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s2, false)) { + if ((ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) + != null) + && !ConnectionTree.hasRingClosureParent(nodeInSphere1, nodeInSphere2) + && !ConnectionTree.hasRingClosureParent(nodeInSphere2, nodeInSphere1)) { + bond = ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); + connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); + connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); + } } } } } - // add pseudo atoms if (withPseudoAtoms) { for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { @@ -97,7 +90,6 @@ public static ConnectionTree BFS(final IAtomContainer ac, final int rootAtomInde } } - return connectionTree; } @@ -335,4 +327,10 @@ public static void addToAtomContainer(final ConnectionTree connectionTree, final } } } + + public static List buildFragmentAtomIndicesList(final IAtomContainer structure, final int rootAtomIndex, + final Integer maxSphere, final Set exclude, + final boolean withPseudoAtoms) { + return buildConnectionTree(structure, rootAtomIndex, maxSphere, exclude, withPseudoAtoms).getKeys(); + } } From 0e74ce27b222c8230343bf7bb0a34b7f8adfe774 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 18 Jun 2021 19:43:02 +0200 Subject: [PATCH 230/405] feat: added toString method to ConnectionTreeNode and enabled to set parent node and bond to parent node --- .../model/ConnectionTreeNode.java | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java b/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java index 6f69120..e54cff9 100644 --- a/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java +++ b/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java @@ -26,8 +26,8 @@ public class ConnectionTreeNode { private final List children; private final List bondsToChildren; - private final ConnectionTreeNode parent; - private final IBond bondToParent; + private ConnectionTreeNode parent; + private IBond bondToParent; private IAtom atom; private Integer key; private int sphere; @@ -85,10 +85,18 @@ public ConnectionTreeNode getParent() { return this.parent; } + public void setParent(final ConnectionTreeNode parent) { + this.parent = parent; + } + public IBond getBondToParent() { return this.bondToParent; } + public void setBondToParent(final IBond bondToParent) { + this.bondToParent = bondToParent; + } + public List getChildNodes() { return this.children; } @@ -197,4 +205,31 @@ public boolean hasChild(final int childKey) { public boolean hasChildren() { return !this.children.isEmpty(); } + + @Override + public String toString() { + return "ConnectionTreeNode{" + + "key=" + + this.key + + ", sphere=" + + this.sphere + + ", isRingClosure=" + + this.isRingClosure + + ", isPseudoNode=" + + this.isPseudoNode + + ", ..." + // + ", children=" + // + this.children + // + ", bondsToChildren=" + // + this.bondsToChildren + // + ", parent=" + // + this.parent + // + ", bondToParent=" + // + this.bondToParent + // + ", atom=" + // + this.atom + // + ", ringClosureParent=" + // + this.ringClosureParent + + '}'; + } } From ad054e2ebda22f43cd9895d84a1a25fd34b826de Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 19 Jun 2021 00:04:37 +0200 Subject: [PATCH 231/405] feat: added addSubtree method to ConnectionTree class --- .../fragmentation/model/ConnectionTree.java | 76 ++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/fragmentation/model/ConnectionTree.java b/src/casekit/nmr/fragmentation/model/ConnectionTree.java index 3bed867..8d11866 100644 --- a/src/casekit/nmr/fragmentation/model/ConnectionTree.java +++ b/src/casekit/nmr/fragmentation/model/ConnectionTree.java @@ -118,6 +118,72 @@ private static void buildSubtree(final ConnectionTree subtree, final ConnectionT } } + public static boolean addSubtree(final ConnectionTree connectionTree, final int parentNodeKey, + final ConnectionTree subtree, final IBond bondToLink) { + if (!connectionTree.containsKey(parentNodeKey)) { + return false; + } + for (final int key : subtree.getKeys()) { + if (connectionTree.containsKey(key)) { + return false; + } + } + // check ring closure nodes in subtree whether their ring closure parents (on the other side) still exist + for (final ConnectionTreeNode node : subtree.getNodes(true)) { + if (node.isRingClosureNode() + && !subtree.containsKey(node.getRingClosureParent() + .getKey()) + // && !connectionTree.containsKey(node.getRingClosureParent() + // .getKey()) + ) { + if (node.getRingClosureParent() + != null) { + node.getRingClosureParent() + .setRingClosureParent(null); + } + node.getParent() + .removeChildNode(node); + } + } + + final ConnectionTreeNode parentNode = connectionTree.getNode(parentNodeKey); + for (final ConnectionTreeNode subtreeNode : subtree.getNodes(true)) { + if (subtreeNode + == subtree.getRootNode()) { + parentNode.addChildNode(subtree.getRootNode(), bondToLink); + subtree.getRootNode() + .setParent(parentNode); + subtree.getRootNode() + .setBondToParent(bondToLink); + connectionTree.addKey(subtree.getRootNode() + .getKey()); + subtree.getRootNode() + .setSphere(parentNode.getSphere() + + 1); + continue; + } + if (!subtreeNode.isRingClosureNode() + && !connectionTree.containsKey(subtreeNode.getKey())) { + connectionTree.addKey(subtreeNode.getKey()); + } else { + continue; + } + subtreeNode.setSphere(parentNode.getSphere() + + subtreeNode.getSphere() + + 1); + if (subtreeNode.getSphere() + > connectionTree.getMaxSphere(true)) { + connectionTree.maxSphere = subtreeNode.getSphere(); + } + } + + return true; + } + + public boolean addKey(final int key) { + return this.keySet.add(key); + } + public ConnectionTreeNode getRootNode() { return this.root; } @@ -425,7 +491,15 @@ public String toString() { treeStringBuilder.append(nodeInSphere.getAtom() .getSymbol()); } - treeStringBuilder.append(" {"); + if (s + > 0) { + treeStringBuilder.append(" ("); + treeStringBuilder.append(nodeInSphere.getParent() + .getKey()); + treeStringBuilder.append(") {"); + } else { + treeStringBuilder.append(" {"); + } if (nodeInSphere.isRingClosureNode()) { treeStringBuilder.append(nodeInSphere.getRingClosureParent() .getKey()); From 8fc804487d3d32ec2750a8a38781ab9edea47a2b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 19 Jun 2021 00:07:02 +0200 Subject: [PATCH 232/405] feat: improvement of fragmentation by ring extension via a given maxSphereRing and bugfixes --- .../nmr/fragmentation/Fragmentation.java | 174 +++++++++++++++--- 1 file changed, 145 insertions(+), 29 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index d300ab7..7f34dc7 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -2,30 +2,119 @@ import casekit.nmr.fragmentation.model.ConnectionTree; import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.graph.Cycles; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IRingSet; import org.openscience.cdk.silent.Bond; import org.openscience.cdk.silent.PseudoAtom; import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.smiles.SmilesGenerator; import java.util.*; public class Fragmentation { + public static List buildFragments(final IAtomContainer structure, final Integer maxSphere, + final Integer maxSphereRing, final boolean withPseudoAtoms) { + final List fragments = new ArrayList<>(); + try { + // build fragments from detected rings and extend by given maximum sphere for rings + final Set smilesSet = new HashSet<>(); + String smiles; + ConnectionTree connectionTreeRing, connectionTreeOuterSphere, subtreeToAdd; + final IRingSet ringSet = Cycles.all(structure)//essential(structure) + .toRingSet(); + List atomIndicesInRing; + Set atomIndicesOutOfRing; + IAtomContainer ringAtomContainer; + for (int i = 0; i + < ringSet.getAtomContainerCount(); i++) { + ringAtomContainer = ringSet.getAtomContainer(i); + // add already "visited" ring nodes + atomIndicesInRing = new ArrayList<>(); + for (int k = 0; k + < ringAtomContainer.getAtomCount(); k++) { + atomIndicesInRing.add(structure.indexOf(ringAtomContainer.getAtom(k))); + } + atomIndicesOutOfRing = new HashSet<>(); + for (int j = 0; j + < structure.getAtomCount(); j++) { + if (!atomIndicesInRing.contains(j)) { + atomIndicesOutOfRing.add(j); + } + } + connectionTreeRing = buildConnectionTree(structure, atomIndicesInRing.get(0), null, + atomIndicesOutOfRing, false); + // add missing outer sphere nodes to ring + for (int k = 0; k + < ringAtomContainer.getAtomCount(); k++) { + connectionTreeOuterSphere = Fragmentation.buildConnectionTree(structure, structure.indexOf( + ringAtomContainer.getAtom(k)), maxSphereRing, new HashSet<>(atomIndicesInRing), false); + if (connectionTreeOuterSphere.getMaxSphere(false) + == 0) { + continue; + } + for (final int key : connectionTreeOuterSphere.getNodeKeysInSphere(1)) { + subtreeToAdd = ConnectionTree.buildSubtree(connectionTreeOuterSphere, key); + if (!addToConnectionTree(connectionTreeRing, connectionTreeOuterSphere.getRootNode() + .getKey(), subtreeToAdd, + connectionTreeOuterSphere.getBond( + connectionTreeOuterSphere.getRootNode() + .getKey(), key))) { + continue; + } + atomIndicesInRing.addAll(subtreeToAdd.getKeys()); + } + } + // close rings + closeRings(connectionTreeRing, structure); + // attach pseudo atoms if desired + if (withPseudoAtoms) { + attachPseudoAtoms(connectionTreeRing, structure); + } + smiles = SmilesGenerator.absolute() + .create(ringAtomContainer); + if (!smilesSet.contains(smiles)) { + smilesSet.add(smiles); + fragments.add(toAtomContainer(connectionTreeRing)); + } + } + // build fragment for each non-ring atom + for (int i = 0; i + < structure.getAtomCount(); i++) { + final IAtomContainer fragment = Fragmentation.buildFragment(structure, i, maxSphere, new HashSet<>(), + withPseudoAtoms); + smiles = SmilesGenerator.absolute() + .create(fragment); + if (!smilesSet.contains(smiles)) { + smilesSet.add(smiles); + fragments.add(fragment); + } + } + } catch (final CDKException e) { + e.printStackTrace(); + } + + return fragments; + } + /** - * Creates an atom container from a given connection tree built by using {@link #buildConnectionTree(IAtomContainer, int, int, Set, boolean)}. + * Creates an atom container from a given connection tree built by using {@link #buildConnectionTree(IAtomContainer, int, Integer, Set, boolean)}. * * @param ac atom container to go through * @param rootAtomIndex root atom index to start from - * @param maxSphere spherical limit + * @param maxSphere spherical limit, a null value means no limit * @param exclude atom indices which to exclude from search * @param withPseudoAtoms places pseudo atoms in the "outer" sphere * * @return connection tree */ - public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, final int maxSphere, - final Set exclude, final boolean withPseudoAtoms) { + public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, + final Integer maxSphere, final Set exclude, + final boolean withPseudoAtoms) { return toAtomContainer(buildConnectionTree(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); } @@ -36,24 +125,35 @@ public static IAtomContainer buildFragment(final IAtomContainer ac, final int ro * is stored in a parent-child-relationship. * In addition, bonds within rings or between hetero atoms will be kept. * - * @param ac atom container to go through + * @param structure atom container to go through * @param rootAtomIndex root atom index to start from - * @param maxSphere spherical limit + * @param maxSphere spherical limit, a null value means no limit * @param exclude atom indices which to exclude from search * @param withPseudoAtoms places pseudo atoms in the "outer" sphere * * @return connection tree */ - public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final int rootAtomIndex, - final int maxSphere, final Set exclude, + public static ConnectionTree buildConnectionTree(final IAtomContainer structure, final int rootAtomIndex, + final Integer maxSphere, final Set exclude, final boolean withPseudoAtoms) { // create queue and connection tree for BFS final Queue queue = new LinkedList<>(); queue.add(new int[]{rootAtomIndex, 0}); - final ConnectionTree connectionTree = new ConnectionTree(ac.getAtom(rootAtomIndex), rootAtomIndex); + final ConnectionTree connectionTree = new ConnectionTree(structure.getAtom(rootAtomIndex), rootAtomIndex); - BFS(ac, connectionTree, queue, new HashSet<>(), exclude, maxSphere); + BFS(structure, connectionTree, queue, new HashSet<>(), exclude, maxSphere); + // close rings + closeRings(connectionTree, structure); + // add pseudo atoms + if (withPseudoAtoms) { + attachPseudoAtoms(connectionTree, structure); + } + + return connectionTree; + } + + public static void closeRings(final ConnectionTree connectionTree, final IAtomContainer structure) { // close rings IBond bond; final int maxSphereTree = connectionTree.getMaxSphere(false); @@ -64,11 +164,10 @@ public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final for (int s2 = s; s2 <= maxSphereTree; s2++) { for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s2, false)) { - if ((ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) + if ((structure.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) != null) - && !ConnectionTree.hasRingClosureParent(nodeInSphere1, nodeInSphere2) - && !ConnectionTree.hasRingClosureParent(nodeInSphere2, nodeInSphere1)) { - bond = ac.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); + && !ConnectionTree.nodesFormRingClosure(nodeInSphere1, nodeInSphere2)) { + bond = structure.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); } @@ -76,21 +175,23 @@ public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final } } } - // add pseudo atoms - if (withPseudoAtoms) { - for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { - for (final IAtom connectedAtom : ac.getConnectedAtomsList(node.getAtom())) { - if (connectionTree.getBond(node.getKey(), connectedAtom.getIndex()) - == null) { - addPseudoNode(connectionTree, ac.getAtomCount() - + connectionTree.getNodesCount(false), node.getKey(), - ac.getBond(node.getAtom(), connectedAtom)); - } + } + + public static void attachPseudoAtoms(final ConnectionTree connectionTree, final IAtomContainer structure) { + int atomIndexInStructure; + for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { + for (final IAtom connectedAtom : structure.getConnectedAtomsList(node.getAtom())) { + atomIndexInStructure = structure.indexOf(connectedAtom); + if (connectionTree.getBond(node.getKey(), atomIndexInStructure) + == null + && connectionTree.getBond(atomIndexInStructure, node.getKey()) + == null) { + addPseudoNode(connectionTree, structure.getAtomCount() + + connectionTree.getNodesCount(false), node.getKey(), + structure.getBond(node.getAtom(), connectedAtom)); } } } - - return connectionTree; } /** @@ -106,10 +207,10 @@ public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final * @param visited atom indices which are already "visited" and * should be ignored * @param exclude atom indices which to exclude from search - * @param maxSphere spherical limit + * @param maxSphere spherical limit, a null value means no limit */ private static void BFS(final IAtomContainer ac, final ConnectionTree connectionTree, final Queue queue, - final Set visited, final Set exclude, final int maxSphere) { + final Set visited, final Set exclude, final Integer maxSphere) { // all nodes visited? if (queue.isEmpty()) { return; @@ -126,11 +227,13 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection // go to all child nodes int connectedAtomIndex; for (final IAtom connectedAtom : ac.getConnectedAtomsList(atom)) { - connectedAtomIndex = connectedAtom.getIndex(); + connectedAtomIndex = ac.indexOf(connectedAtom); bond = ac.getBond(atom, connectedAtom); // add children to queue if not already visited and connection is allowed or maxSphere is not reached yet if (!exclude.contains(connectedAtomIndex)) { if (keepBond(atom, connectedAtom, bond) + || maxSphere + == null || sphere < maxSphere) { // add children to queue if not already visited and not already waiting in queue @@ -250,6 +353,19 @@ public static IAtomContainer toAtomContainer(final ConnectionTree connectionTree return ac; } + /** + * Adds a subtree to a node in another connection tree. + * + * @param connectionTree connection tree + * @param parentNodeKey parent node key in connection tree + * @param subtree subtree to add + * @param bondToLink bond + */ + public static boolean addToConnectionTree(final ConnectionTree connectionTree, final int parentNodeKey, + final ConnectionTree subtree, final IBond bondToLink) { + return ConnectionTree.addSubtree(connectionTree, parentNodeKey, subtree, bondToLink); + } + /** * Adds the substructure of a connection tree to an atom container.
* The substructure can be linked via a bond and an atom index in the container, but this is optional. From 34ee70538222496c95cb59ef3491eebbe8b2ecf5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 19 Jun 2021 13:25:43 +0200 Subject: [PATCH 233/405] feat: added method for checking equivalences --- src/casekit/nmr/model/Spectrum.java | 36 +++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index ab48d7e..85b730b 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -129,6 +129,30 @@ public Integer addSignal(final Signal signal) { return this.addSignal(signal, pickPrecisions, true); } + /** + * Checks for equivalent signals in all dimensions. + * + * @param signal signal + * @param pickPrecisions picking precision per dimension + * @param checkMultiplicity whether multiplicity has to be checked too + * + * @return + */ + public List checkForEquivalences(final Signal signal, final double[] pickPrecisions, + final boolean checkMultiplicity) { + // check for equivalent signals in all dimensions + final List closestSignalIndexList = this.pickByClosestShift(signal.getShift(0), 0, pickPrecisions[0]); + for (int dim = 1; dim + < this.getNDim(); dim++) { + closestSignalIndexList.retainAll(this.pickByClosestShift(signal.getShift(dim), dim, pickPrecisions[dim])); + } + if (checkMultiplicity) { + closestSignalIndexList.retainAll(this.pickByMultiplicity(signal.getMultiplicity())); + } + + return closestSignalIndexList; + } + /** * Adds a signal to this spectrum and stores an equivalent signal index. * @@ -146,16 +170,8 @@ public Integer addSignal(final Signal signal, final double[] pickPrecisions, fin return null; } - // check for equivalent signals in all dimensions - final List closestSignalIndexList = this.pickByClosestShift(signal.getShift(0), 0, pickPrecisions[0]); - for (int dim = 1; dim - < this.getNDim(); dim++) { - closestSignalIndexList.retainAll(this.pickByClosestShift(signal.getShift(dim), dim, pickPrecisions[dim])); - } - if (checkMultiplicity) { - closestSignalIndexList.retainAll(this.pickByMultiplicity(signal.getMultiplicity())); - } - + final List closestSignalIndexList = this.checkForEquivalences(signal, pickPrecisions, + checkMultiplicity); // if no equivalent signal was found then just add as new signal if (closestSignalIndexList.isEmpty()) { this.addSignalWithoutEquivalenceSearch(signal); From e3f6f768baa8d0babea73c43a994298473e2a190 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 19 Jun 2021 13:34:31 +0200 Subject: [PATCH 234/405] feat: added methods for building fragment trees and fragment datasets --- .../nmr/fragmentation/Fragmentation.java | 136 ++++++++++++++---- 1 file changed, 109 insertions(+), 27 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 7f34dc7..ff2b95d 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -2,6 +2,8 @@ import casekit.nmr.fragmentation.model.ConnectionTree; import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import casekit.nmr.model.*; +import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.graph.Cycles; import org.openscience.cdk.interfaces.IAtom; @@ -11,19 +13,110 @@ import org.openscience.cdk.silent.Bond; import org.openscience.cdk.silent.PseudoAtom; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.smiles.SmilesGenerator; import java.util.*; +import java.util.stream.Collectors; public class Fragmentation { + public static List buildFragments(final DataSet dataSet, final Integer maxSphere, + final Integer maxSphereRing, final boolean withPseudoAtoms) { + final List fragmentDataSetList = new ArrayList<>(); + final List fragmentTrees = buildFragmentTrees(dataSet.getStructure() + .toAtomContainer(), maxSphere, + maxSphereRing, withPseudoAtoms); + final IAtomContainer structure = dataSet.getStructure() + .toAtomContainer(); + final String spectrumAtomType = Utils.getAtomTypeFromSpectrum(dataSet.getSpectrum(), 0); + List substructureAtomIndices; + Spectrum subspectrum; + Assignment subassignment; + IAtom atomInStructure; + Signal signal; + DataSet subDataSet; + for (final ConnectionTree fragmentTree : fragmentTrees) { + substructureAtomIndices = fragmentTree.getKeys(); + subspectrum = new Spectrum(); + subspectrum.setNuclei(dataSet.getSpectrum() + .getNuclei()); + subspectrum.setSignals(new ArrayList<>()); + subassignment = new Assignment(); + subassignment.setNuclei(subspectrum.getNuclei()); + subassignment.initAssignments(0); + for (int j = 0; j + < substructureAtomIndices.size(); j++) { + atomInStructure = structure.getAtom(substructureAtomIndices.get(j)); + if (atomInStructure.getSymbol() + .equals(spectrumAtomType)) { + if (dataSet.getAssignment() + .getIndex(0, substructureAtomIndices.get(j)) + == null + || dataSet.getSpectrum() + .getSignal(dataSet.getAssignment() + .getIndex(0, substructureAtomIndices.get(j))) + == null) { + return null; + } + + signal = dataSet.getSpectrum() + .getSignal(dataSet.getAssignment() + .getIndex(0, substructureAtomIndices.get(j))); + if (signal + != null) { + signal = signal.buildClone(); + final int atomIndex = j; + final List closestSignalIndexList = subspectrum.checkForEquivalences(signal, + new double[]{0.0}, + true); + if (closestSignalIndexList.isEmpty()) { + signal.setEquivalencesCount(1); + subspectrum.addSignal(signal); + subassignment.addAssignment(0, new int[]{atomIndex}); + } else { + final int signalIndex = closestSignalIndexList.get(0); + if (Arrays.stream(subassignment.getAssignment(0, signalIndex)) + .noneMatch(equiv -> equiv + == atomIndex)) { + subspectrum.getSignal(signalIndex) + .setEquivalencesCount(subspectrum.getSignal(signalIndex) + .getEquivalencesCount() + + 1); // + 1 because we add one atom only + subassignment.addAssignmentEquivalence(0, signalIndex, atomIndex); + } + } + } + } + } + subspectrum.setSolvent(dataSet.getSpectrum() + .getSolvent()); + subspectrum.setSpectrometerFrequency(dataSet.getSpectrum() + .getSpectrometerFrequency()); + + subDataSet = new DataSet(); + subDataSet.setStructure(new ExtendedConnectionMatrix(toAtomContainer(fragmentTree))); + subDataSet.setSpectrum(subspectrum); + subDataSet.setAssignment(subassignment); + + fragmentDataSetList.add(subDataSet); + } + + return fragmentDataSetList; + } + public static List buildFragments(final IAtomContainer structure, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { - final List fragments = new ArrayList<>(); + final List fragmentTrees = buildFragmentTrees(structure, maxSphere, maxSphereRing, + withPseudoAtoms); + return fragmentTrees.stream() + .map(Fragmentation::toAtomContainer) + .collect(Collectors.toList()); + } + + public static List buildFragmentTrees(final IAtomContainer structure, final Integer maxSphere, + final Integer maxSphereRing, final boolean withPseudoAtoms) { + final List fragments = new ArrayList<>(); try { // build fragments from detected rings and extend by given maximum sphere for rings - final Set smilesSet = new HashSet<>(); - String smiles; ConnectionTree connectionTreeRing, connectionTreeOuterSphere, subtreeToAdd; final IRingSet ringSet = Cycles.all(structure)//essential(structure) .toRingSet(); @@ -46,12 +139,12 @@ public static List buildFragments(final IAtomContainer structure atomIndicesOutOfRing.add(j); } } - connectionTreeRing = buildConnectionTree(structure, atomIndicesInRing.get(0), null, - atomIndicesOutOfRing, false); + connectionTreeRing = buildFragmentTree(structure, atomIndicesInRing.get(0), null, atomIndicesOutOfRing, + false); // add missing outer sphere nodes to ring for (int k = 0; k < ringAtomContainer.getAtomCount(); k++) { - connectionTreeOuterSphere = Fragmentation.buildConnectionTree(structure, structure.indexOf( + connectionTreeOuterSphere = Fragmentation.buildFragmentTree(structure, structure.indexOf( ringAtomContainer.getAtom(k)), maxSphereRing, new HashSet<>(atomIndicesInRing), false); if (connectionTreeOuterSphere.getMaxSphere(false) == 0) { @@ -75,24 +168,13 @@ public static List buildFragments(final IAtomContainer structure if (withPseudoAtoms) { attachPseudoAtoms(connectionTreeRing, structure); } - smiles = SmilesGenerator.absolute() - .create(ringAtomContainer); - if (!smilesSet.contains(smiles)) { - smilesSet.add(smiles); - fragments.add(toAtomContainer(connectionTreeRing)); - } + fragments.add(connectionTreeRing); } // build fragment for each non-ring atom for (int i = 0; i < structure.getAtomCount(); i++) { - final IAtomContainer fragment = Fragmentation.buildFragment(structure, i, maxSphere, new HashSet<>(), - withPseudoAtoms); - smiles = SmilesGenerator.absolute() - .create(fragment); - if (!smilesSet.contains(smiles)) { - smilesSet.add(smiles); - fragments.add(fragment); - } + fragments.add( + Fragmentation.buildFragmentTree(structure, i, maxSphere, new HashSet<>(), withPseudoAtoms)); } } catch (final CDKException e) { e.printStackTrace(); @@ -102,7 +184,7 @@ public static List buildFragments(final IAtomContainer structure } /** - * Creates an atom container from a given connection tree built by using {@link #buildConnectionTree(IAtomContainer, int, Integer, Set, boolean)}. + * Creates an atom container from a given connection tree built by using {@link #buildFragmentTree(IAtomContainer, int, Integer, Set, boolean)}. * * @param ac atom container to go through * @param rootAtomIndex root atom index to start from @@ -115,7 +197,7 @@ public static List buildFragments(final IAtomContainer structure public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, final Integer maxSphere, final Set exclude, final boolean withPseudoAtoms) { - return toAtomContainer(buildConnectionTree(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); + return toAtomContainer(buildFragmentTree(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); } /** @@ -133,9 +215,9 @@ public static IAtomContainer buildFragment(final IAtomContainer ac, final int ro * * @return connection tree */ - public static ConnectionTree buildConnectionTree(final IAtomContainer structure, final int rootAtomIndex, - final Integer maxSphere, final Set exclude, - final boolean withPseudoAtoms) { + public static ConnectionTree buildFragmentTree(final IAtomContainer structure, final int rootAtomIndex, + final Integer maxSphere, final Set exclude, + final boolean withPseudoAtoms) { // create queue and connection tree for BFS final Queue queue = new LinkedList<>(); queue.add(new int[]{rootAtomIndex, 0}); @@ -447,6 +529,6 @@ public static void addToAtomContainer(final ConnectionTree connectionTree, final public static List buildFragmentAtomIndicesList(final IAtomContainer structure, final int rootAtomIndex, final Integer maxSphere, final Set exclude, final boolean withPseudoAtoms) { - return buildConnectionTree(structure, rootAtomIndex, maxSphere, exclude, withPseudoAtoms).getKeys(); + return buildFragmentTree(structure, rootAtomIndex, maxSphere, exclude, withPseudoAtoms).getKeys(); } } From b19035c626f91914c7aaf0b8ccb495b5a0c9cc97 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 20 Jun 2021 13:00:06 +0200 Subject: [PATCH 235/405] fix: fixed invalid atom index usage --- src/casekit/nmr/fragmentation/Fragmentation.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index ff2b95d..6e21dde 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -45,6 +45,11 @@ public static List buildFragments(final DataSet dataSet, final Integer subassignment.initAssignments(0); for (int j = 0; j < substructureAtomIndices.size(); j++) { + if (substructureAtomIndices.get(j) + >= structure.getAtomCount()) { + // current node/atom is pseudo + continue; + } atomInStructure = structure.getAtom(substructureAtomIndices.get(j)); if (atomInStructure.getSymbol() .equals(spectrumAtomType)) { From 6f682a347b38fe5f188260ed78d290d838b3813d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 20 Jun 2021 13:12:16 +0200 Subject: [PATCH 236/405] fix: ensure pseudo atom when converting ExtendedConnectionMatrix to IAtomContainer --- src/casekit/nmr/model/ExtendedConnectionMatrix.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/model/ExtendedConnectionMatrix.java b/src/casekit/nmr/model/ExtendedConnectionMatrix.java index 1199bf8..630b44b 100644 --- a/src/casekit/nmr/model/ExtendedConnectionMatrix.java +++ b/src/casekit/nmr/model/ExtendedConnectionMatrix.java @@ -23,6 +23,7 @@ import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.silent.Atom; import org.openscience.cdk.silent.Bond; +import org.openscience.cdk.silent.PseudoAtom; import org.openscience.cdk.silent.SilentChemObjectBuilder; import java.util.Arrays; @@ -370,7 +371,11 @@ public IAtomContainer toAtomContainer() { IAtom atom; for (int i = 0; i < this.connectionMatrix.length; i++) { - atom = new Atom(this.atomTypes[i]); + if (this.atomTypes[i].equals("R")) { + atom = new PseudoAtom("R"); + } else { + atom = new Atom(this.atomTypes[i]); + } atom.setImplicitHydrogenCount(this.atomPropertiesNumeric[i][0]); atom.setValency(this.atomPropertiesNumeric[i][1]); atom.setFormalCharge(this.atomPropertiesNumeric[i][2]); From d45b897bfaec4f286affab24f257f6b1ce9f80f7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 20 Jun 2021 14:38:52 +0200 Subject: [PATCH 237/405] feat: addition and usage of method for removing identical fragment trees --- .../nmr/fragmentation/Fragmentation.java | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 6e21dde..7d8c28d 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -119,9 +119,9 @@ public static List buildFragments(final IAtomContainer structure public static List buildFragmentTrees(final IAtomContainer structure, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { - final List fragments = new ArrayList<>(); + final List fragmentTrees = new ArrayList<>(); try { - // build fragments from detected rings and extend by given maximum sphere for rings + // build fragmentTrees from detected rings and extend by given maximum sphere for rings ConnectionTree connectionTreeRing, connectionTreeOuterSphere, subtreeToAdd; final IRingSet ringSet = Cycles.all(structure)//essential(structure) .toRingSet(); @@ -173,19 +173,42 @@ public static List buildFragmentTrees(final IAtomContainer struc if (withPseudoAtoms) { attachPseudoAtoms(connectionTreeRing, structure); } - fragments.add(connectionTreeRing); + fragmentTrees.add(connectionTreeRing); } // build fragment for each non-ring atom for (int i = 0; i < structure.getAtomCount(); i++) { - fragments.add( + fragmentTrees.add( Fragmentation.buildFragmentTree(structure, i, maxSphere, new HashSet<>(), withPseudoAtoms)); } } catch (final CDKException e) { e.printStackTrace(); } + removeDuplicates(fragmentTrees); - return fragments; + return fragmentTrees; + } + + public static void removeDuplicates(final List fragmentTrees) { + final List> keySets = new ArrayList<>(); + final List fragmentsToRemove = new ArrayList<>(); + for (final ConnectionTree fragment : fragmentTrees) { + // ignore pseudo nodes + final Set keySet = fragment.getNodes(false) + .stream() + .filter(node -> !node.isPseudoNode()) + .map(ConnectionTreeNode::getKey) + .collect(Collectors.toSet()); + if (keySets.stream() + .noneMatch(keySetTemp -> keySetTemp.size() + == keySet.size() + && keySetTemp.containsAll(keySet))) { + keySets.add(keySet); + } else { + fragmentsToRemove.add(fragment); + } + } + fragmentTrees.removeAll(fragmentsToRemove); } /** From 31c5c8b94e37be432f37ef66ba4d065e820ea841 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 21 Jun 2021 11:07:13 +0200 Subject: [PATCH 238/405] feat: consider fused ring systems in fragmentation too --- src/casekit/nmr/fragmentation/Fragmentation.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 7d8c28d..c82f150 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -10,6 +10,7 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IRingSet; +import org.openscience.cdk.ringsearch.RingSearch; import org.openscience.cdk.silent.Bond; import org.openscience.cdk.silent.PseudoAtom; import org.openscience.cdk.silent.SilentChemObjectBuilder; @@ -125,12 +126,20 @@ public static List buildFragmentTrees(final IAtomContainer struc ConnectionTree connectionTreeRing, connectionTreeOuterSphere, subtreeToAdd; final IRingSet ringSet = Cycles.all(structure)//essential(structure) .toRingSet(); + final List ringFragments = new ArrayList<>(); + for (int i = 0; i + < ringSet.getAtomContainerCount(); i++) { + ringFragments.add(ringSet.getAtomContainer(i)); + } + // add missing fused rings + final RingSearch ringSearch = new RingSearch(structure); + ringFragments.addAll(ringSearch.fusedRingFragments()); List atomIndicesInRing; Set atomIndicesOutOfRing; IAtomContainer ringAtomContainer; for (int i = 0; i - < ringSet.getAtomContainerCount(); i++) { - ringAtomContainer = ringSet.getAtomContainer(i); + < ringFragments.size(); i++) { + ringAtomContainer = ringFragments.get(i); // add already "visited" ring nodes atomIndicesInRing = new ArrayList<>(); for (int k = 0; k From 97311d11b3785dfe0710d7bc962ded1b9397661d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 21 Jun 2021 11:49:27 +0200 Subject: [PATCH 239/405] feat: implemented separated method buildRingFragmentTrees and method descriptions were added --- .../nmr/fragmentation/Fragmentation.java | 81 +++++++++++++++---- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index c82f150..5b368fa 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -20,6 +20,19 @@ public class Fragmentation { + /** + * Builds connection trees starting at each atom in structure, including ring atoms. + * Duplicates will be removed and subspectra and assignments set. + * + * @param dataSet dataset with structure to build the fragments from + * @param maxSphere maximum spherical limit for single atom fragment creation + * @param maxSphereRing maximum spherical limit for ring atom fragment creation + * @param withPseudoAtoms whether to place pseudo atoms in "outer" sphere + * + * @return + * + * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, boolean) + */ public static List buildFragments(final DataSet dataSet, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { final List fragmentDataSetList = new ArrayList<>(); @@ -109,6 +122,20 @@ public static List buildFragments(final DataSet dataSet, final Integer return fragmentDataSetList; } + /** + * Builds fragments as atom containers starting at each atom in structure, including ring atoms. + * Duplicates will be removed. + * + * @param structure structure to build the fragments from + * @param maxSphere maximum spherical limit for single atom fragment creation + * @param maxSphereRing maximum spherical limit for ring atom fragment creation + * @param withPseudoAtoms whether to place pseudo atoms in "outer" sphere + * + * @return + * + * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, boolean) + * @see #toAtomContainer(ConnectionTree) + */ public static List buildFragments(final IAtomContainer structure, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { final List fragmentTrees = buildFragmentTrees(structure, maxSphere, maxSphereRing, @@ -118,11 +145,12 @@ public static List buildFragments(final IAtomContainer structure .collect(Collectors.toList()); } - public static List buildFragmentTrees(final IAtomContainer structure, final Integer maxSphere, - final Integer maxSphereRing, final boolean withPseudoAtoms) { - final List fragmentTrees = new ArrayList<>(); + public static List buildRingFragmentTrees(final IAtomContainer structure, + final Integer maxSphereRing, + final boolean withPseudoAtoms) { + final List ringFragmentTrees = new ArrayList<>(); try { - // build fragmentTrees from detected rings and extend by given maximum sphere for rings + // build ring fragment trees from detected rings and extend by given maximum sphere for rings ConnectionTree connectionTreeRing, connectionTreeOuterSphere, subtreeToAdd; final IRingSet ringSet = Cycles.all(structure)//essential(structure) .toRingSet(); @@ -182,17 +210,40 @@ public static List buildFragmentTrees(final IAtomContainer struc if (withPseudoAtoms) { attachPseudoAtoms(connectionTreeRing, structure); } - fragmentTrees.add(connectionTreeRing); - } - // build fragment for each non-ring atom - for (int i = 0; i - < structure.getAtomCount(); i++) { - fragmentTrees.add( - Fragmentation.buildFragmentTree(structure, i, maxSphere, new HashSet<>(), withPseudoAtoms)); + ringFragmentTrees.add(connectionTreeRing); } } catch (final CDKException e) { e.printStackTrace(); } + + return ringFragmentTrees; + } + + /** + * Builds connection trees starting at each atom in structure, including ring atoms. + * Duplicates are removed. + * + * @param structure structure to build the fragments from + * @param maxSphere maximum spherical limit for single atom fragment creation + * @param maxSphereRing maximum spherical limit for ring atom fragment creation + * @param withPseudoAtoms whether to place pseudo atoms in "outer" sphere + * + * @return + * + * @see #buildRingFragmentTrees(IAtomContainer, Integer, boolean) + * @see #buildFragmentTree(IAtomContainer, int, Integer, Set, boolean) + * @see #removeDuplicates(List) + */ + public static List buildFragmentTrees(final IAtomContainer structure, final Integer maxSphere, + final Integer maxSphereRing, final boolean withPseudoAtoms) { + // build fragment trees for rings + final List fragmentTrees = buildRingFragmentTrees(structure, maxSphereRing, withPseudoAtoms); + // build fragment for each single atom + for (int i = 0; i + < structure.getAtomCount(); i++) { + fragmentTrees.add( + Fragmentation.buildFragmentTree(structure, i, maxSphere, new HashSet<>(), withPseudoAtoms)); + } removeDuplicates(fragmentTrees); return fragmentTrees; @@ -238,11 +289,11 @@ public static IAtomContainer buildFragment(final IAtomContainer ac, final int ro } /** - * Function for extending a given connection tree only containing - * its root node (0th sphere) by means of Breadth-First-Search (BFS). + * Builds a fragment as connection tree from a given structure.
* Until a certain maximum sphere, each reachable next neighbor atom - * is stored in a parent-child-relationship. - * In addition, bonds within rings or between hetero atoms will be kept. + * is stored in a parent-child-relationship.
+ * And in addition, bonds between hetero atoms or carbon-hetero bonds will be kept. In such cases + * the maximum spherical limit will be ignored. * * @param structure atom container to go through * @param rootAtomIndex root atom index to start from From b01d927ed45cfdd9c189b27ef02c5447a860c366 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 23 Jun 2021 14:41:01 +0200 Subject: [PATCH 240/405] feat: added closeRings for IAtomContainer as input too --- src/casekit/nmr/fragmentation/Fragmentation.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 5b368fa..4e466f8 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -323,6 +323,14 @@ public static ConnectionTree buildFragmentTree(final IAtomContainer structure, f return connectionTree; } + public static IAtomContainer closeRings(final IAtomContainer substructure, final IAtomContainer structure) { + final ConnectionTree fragmentTree = Fragmentation.buildFragmentTree(substructure, 0, null, new HashSet<>(), + false); + Fragmentation.closeRings(fragmentTree, structure); + + return Fragmentation.toAtomContainer(fragmentTree); + } + public static void closeRings(final ConnectionTree connectionTree, final IAtomContainer structure) { // close rings IBond bond; From 0d0652b1932543da54bf01b8d05db2b4314c5253 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 23 Jun 2021 16:42:11 +0200 Subject: [PATCH 241/405] feat: added initKeySet method to ConnectionTree --- src/casekit/nmr/fragmentation/model/ConnectionTree.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/casekit/nmr/fragmentation/model/ConnectionTree.java b/src/casekit/nmr/fragmentation/model/ConnectionTree.java index 8d11866..bc735b9 100644 --- a/src/casekit/nmr/fragmentation/model/ConnectionTree.java +++ b/src/casekit/nmr/fragmentation/model/ConnectionTree.java @@ -16,6 +16,7 @@ import org.openscience.cdk.interfaces.IBond; import java.util.*; +import java.util.stream.Collectors; /** * Represents a tree of connected atoms (nodes) of a molecule @@ -180,6 +181,14 @@ public static boolean addSubtree(final ConnectionTree connectionTree, final int return true; } + public void initKeySet() { + this.keySet.clear(); + this.keySet.addAll(this.getNodes(false) + .stream() + .map(ConnectionTreeNode::getKey) + .collect(Collectors.toSet())); + } + public boolean addKey(final int key) { return this.keySet.add(key); } From 03f4d2ff53c50035867a0003ba2d664855e599e2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 23 Jun 2021 16:43:55 +0200 Subject: [PATCH 242/405] chore: moved additional methods to FragmentationUtils --- .../nmr/fragmentation/Fragmentation.java | 208 +++--------------- .../nmr/fragmentation/FragmentationUtils.java | 198 +++++++++++++++++ src/casekit/nmr/hose/HOSECodeBuilder.java | 7 +- 3 files changed, 227 insertions(+), 186 deletions(-) create mode 100644 src/casekit/nmr/fragmentation/FragmentationUtils.java diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 4e466f8..7e06f6d 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -11,9 +11,7 @@ import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IRingSet; import org.openscience.cdk.ringsearch.RingSearch; -import org.openscience.cdk.silent.Bond; import org.openscience.cdk.silent.PseudoAtom; -import org.openscience.cdk.silent.SilentChemObjectBuilder; import java.util.*; import java.util.stream.Collectors; @@ -33,12 +31,18 @@ public class Fragmentation { * * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, boolean) */ - public static List buildFragments(final DataSet dataSet, final Integer maxSphere, - final Integer maxSphereRing, final boolean withPseudoAtoms) { - final List fragmentDataSetList = new ArrayList<>(); + public static List buildFragmentDataSets(final DataSet dataSet, final Integer maxSphere, + final Integer maxSphereRing, final boolean withPseudoAtoms) { + final List fragmentTrees = buildFragmentTrees(dataSet.getStructure() .toAtomContainer(), maxSphere, maxSphereRing, withPseudoAtoms); + return fragmentTreesToSubDataSets(dataSet, fragmentTrees); + } + + public static List fragmentTreesToSubDataSets(final DataSet dataSet, + final List fragmentTrees) { + final List fragmentDataSetList = new ArrayList<>(); final IAtomContainer structure = dataSet.getStructure() .toAtomContainer(); final String spectrumAtomType = Utils.getAtomTypeFromSpectrum(dataSet.getSpectrum(), 0); @@ -112,7 +116,7 @@ public static List buildFragments(final DataSet dataSet, final Integer .getSpectrometerFrequency()); subDataSet = new DataSet(); - subDataSet.setStructure(new ExtendedConnectionMatrix(toAtomContainer(fragmentTree))); + subDataSet.setStructure(new ExtendedConnectionMatrix(FragmentationUtils.toAtomContainer(fragmentTree))); subDataSet.setSpectrum(subspectrum); subDataSet.setAssignment(subassignment); @@ -134,14 +138,14 @@ public static List buildFragments(final DataSet dataSet, final Integer * @return * * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, boolean) - * @see #toAtomContainer(ConnectionTree) + * @see FragmentationUtils#toAtomContainer(ConnectionTree) */ public static List buildFragments(final IAtomContainer structure, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { final List fragmentTrees = buildFragmentTrees(structure, maxSphere, maxSphereRing, withPseudoAtoms); return fragmentTrees.stream() - .map(Fragmentation::toAtomContainer) + .map(FragmentationUtils::toAtomContainer) .collect(Collectors.toList()); } @@ -194,18 +198,19 @@ public static List buildRingFragmentTrees(final IAtomContainer s } for (final int key : connectionTreeOuterSphere.getNodeKeysInSphere(1)) { subtreeToAdd = ConnectionTree.buildSubtree(connectionTreeOuterSphere, key); - if (!addToConnectionTree(connectionTreeRing, connectionTreeOuterSphere.getRootNode() - .getKey(), subtreeToAdd, - connectionTreeOuterSphere.getBond( - connectionTreeOuterSphere.getRootNode() - .getKey(), key))) { + if (!FragmentationUtils.addToConnectionTree(connectionTreeRing, + connectionTreeOuterSphere.getRootNode() + .getKey(), subtreeToAdd, + connectionTreeOuterSphere.getBond( + connectionTreeOuterSphere.getRootNode() + .getKey(), key))) { continue; } atomIndicesInRing.addAll(subtreeToAdd.getKeys()); } } // close rings - closeRings(connectionTreeRing, structure); + FragmentationUtils.closeRings(connectionTreeRing, structure); // attach pseudo atoms if desired if (withPseudoAtoms) { attachPseudoAtoms(connectionTreeRing, structure); @@ -232,7 +237,7 @@ public static List buildRingFragmentTrees(final IAtomContainer s * * @see #buildRingFragmentTrees(IAtomContainer, Integer, boolean) * @see #buildFragmentTree(IAtomContainer, int, Integer, Set, boolean) - * @see #removeDuplicates(List) + * @see FragmentationUtils#removeDuplicates(List) */ public static List buildFragmentTrees(final IAtomContainer structure, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { @@ -244,33 +249,11 @@ public static List buildFragmentTrees(final IAtomContainer struc fragmentTrees.add( Fragmentation.buildFragmentTree(structure, i, maxSphere, new HashSet<>(), withPseudoAtoms)); } - removeDuplicates(fragmentTrees); + FragmentationUtils.removeDuplicates(fragmentTrees); return fragmentTrees; } - public static void removeDuplicates(final List fragmentTrees) { - final List> keySets = new ArrayList<>(); - final List fragmentsToRemove = new ArrayList<>(); - for (final ConnectionTree fragment : fragmentTrees) { - // ignore pseudo nodes - final Set keySet = fragment.getNodes(false) - .stream() - .filter(node -> !node.isPseudoNode()) - .map(ConnectionTreeNode::getKey) - .collect(Collectors.toSet()); - if (keySets.stream() - .noneMatch(keySetTemp -> keySetTemp.size() - == keySet.size() - && keySetTemp.containsAll(keySet))) { - keySets.add(keySet); - } else { - fragmentsToRemove.add(fragment); - } - } - fragmentTrees.removeAll(fragmentsToRemove); - } - /** * Creates an atom container from a given connection tree built by using {@link #buildFragmentTree(IAtomContainer, int, Integer, Set, boolean)}. * @@ -285,7 +268,8 @@ public static void removeDuplicates(final List fragmentTrees) { public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, final Integer maxSphere, final Set exclude, final boolean withPseudoAtoms) { - return toAtomContainer(buildFragmentTree(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); + return FragmentationUtils.toAtomContainer( + buildFragmentTree(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); } /** @@ -314,7 +298,7 @@ public static ConnectionTree buildFragmentTree(final IAtomContainer structure, f BFS(structure, connectionTree, queue, new HashSet<>(), exclude, maxSphere); // close rings - closeRings(connectionTree, structure); + FragmentationUtils.closeRings(connectionTree, structure); // add pseudo atoms if (withPseudoAtoms) { attachPseudoAtoms(connectionTree, structure); @@ -323,39 +307,7 @@ public static ConnectionTree buildFragmentTree(final IAtomContainer structure, f return connectionTree; } - public static IAtomContainer closeRings(final IAtomContainer substructure, final IAtomContainer structure) { - final ConnectionTree fragmentTree = Fragmentation.buildFragmentTree(substructure, 0, null, new HashSet<>(), - false); - Fragmentation.closeRings(fragmentTree, structure); - - return Fragmentation.toAtomContainer(fragmentTree); - } - - public static void closeRings(final ConnectionTree connectionTree, final IAtomContainer structure) { - // close rings - IBond bond; - final int maxSphereTree = connectionTree.getMaxSphere(false); - for (int s = 0; s - <= maxSphereTree; s++) { - for (final ConnectionTreeNode nodeInSphere1 : connectionTree.getNodesInSphere(s, false)) { - // set connections (parent nodes) in sphere nodes which have to be connected -> ring closures - for (int s2 = s; s2 - <= maxSphereTree; s2++) { - for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s2, false)) { - if ((structure.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) - != null) - && !ConnectionTree.nodesFormRingClosure(nodeInSphere1, nodeInSphere2)) { - bond = structure.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); - connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); - connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); - } - } - } - } - } - } - - public static void attachPseudoAtoms(final ConnectionTree connectionTree, final IAtomContainer structure) { + private static void attachPseudoAtoms(final ConnectionTree connectionTree, final IAtomContainer structure) { int atomIndexInStructure; for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { for (final IAtom connectedAtom : structure.getConnectedAtomsList(node.getAtom())) { @@ -436,8 +388,6 @@ private static boolean addPseudoNode(final ConnectionTree connectionTree, final } final ConnectionTreeNode pseudoNode = connectionTree.getNode(pseudoNodeKey); pseudoNode.setIsPseudoNode(true); - // pseudoNode.getAtom() - // .setImplicitHydrogenCount(connectedAtom.getImplicitHydrogenCount()); return true; } @@ -514,114 +464,6 @@ private static boolean isCarbonAtom(final IAtom atom) { .equals("C"); } - /** - * Reconstructs a structure from a given connection tree, - * including ring closures. - * - * @param connectionTree connection tree - * - * @return IAtomContainer - */ - public static IAtomContainer toAtomContainer(final ConnectionTree connectionTree) { - // create new atom container and add the connection trees structure, beginning at the root atom - final IAtomContainer ac = SilentChemObjectBuilder.getInstance() - .newAtomContainer(); - addToAtomContainer(connectionTree, ac, null, null); - - return ac; - } - - /** - * Adds a subtree to a node in another connection tree. - * - * @param connectionTree connection tree - * @param parentNodeKey parent node key in connection tree - * @param subtree subtree to add - * @param bondToLink bond - */ - public static boolean addToConnectionTree(final ConnectionTree connectionTree, final int parentNodeKey, - final ConnectionTree subtree, final IBond bondToLink) { - return ConnectionTree.addSubtree(connectionTree, parentNodeKey, subtree, bondToLink); - } - - /** - * Adds the substructure of a connection tree to an atom container.
- * The substructure can be linked via a bond and an atom index in the container, but this is optional. - * If both, the bond and atom index to link, are not given (null) then the substructure will just be added - * to the atom container without linkage. - * - * @param connectionTree - * @param ac - * @param atomIndexInStructureToLink - * @param bondToLink - */ - public static void addToAtomContainer(final ConnectionTree connectionTree, final IAtomContainer ac, - final Integer atomIndexInStructureToLink, final IBond bondToLink) { - List nodesInSphere; - ConnectionTreeNode nodeInSphere, parentNode, partnerNode; - IBond bond, bondToParent; - // add root atom to given atom container and link it via a given linking bond - ac.addAtom(connectionTree.getRootNode() - .getAtom()); - if ((atomIndexInStructureToLink - != null) - && (bondToLink - != null)) { - final IBond bondToAdd = new Bond(ac.getAtom(atomIndexInStructureToLink), ac.getAtom(ac.getAtomCount() - - 1)); - bondToAdd.setOrder(bondToLink.getOrder()); - bondToAdd.setIsInRing(bondToLink.isInRing()); - bondToAdd.setIsAromatic(bondToLink.isAromatic()); - bondToAdd.setAtom(ac.getAtom(atomIndexInStructureToLink), 0); - bondToAdd.setAtom(ac.getAtom(ac.getAtomCount() - - 1), 1); - ac.addBond(bondToAdd); - } - // for each sphere: add the atom which is stored as node to atom container and set bonds between parent nodes - for (int s = 1; s - <= connectionTree.getMaxSphere(false); s++) { - // first add all atoms and its parents (previous sphere only, incl. bonds) to structure - nodesInSphere = connectionTree.getNodesInSphere(s, false); - for (int i = 0; i - < nodesInSphere.size(); i++) { - nodeInSphere = nodesInSphere.get(i); - if (nodeInSphere.isRingClosureNode()) { - continue; - } - ac.addAtom(nodeInSphere.getAtom()); - parentNode = nodeInSphere.getParent(); - bondToParent = nodeInSphere.getBondToParent(); - bond = new Bond(nodeInSphere.getAtom(), parentNode.getAtom(), bondToParent.getOrder()); - bond.setIsInRing(bondToParent.isInRing()); - bond.setIsAromatic(bondToParent.isAromatic()); - ac.addBond(bond); - } - } - for (int s = 1; s - <= connectionTree.getMaxSphere(true); s++) { - // and as second add the remaining bonds (ring closures) to structure - nodesInSphere = connectionTree.getNodesInSphere(s, true); - for (int i = 0; i - < nodesInSphere.size(); i++) { - nodeInSphere = nodesInSphere.get(i); - if (!nodeInSphere.isRingClosureNode()) { - continue; - } - parentNode = nodeInSphere.getParent(); - partnerNode = nodeInSphere.getRingClosureParent(); - if (ac.getBond(ac.getAtom(ac.indexOf(partnerNode.getAtom())), - ac.getAtom(ac.indexOf(parentNode.getAtom()))) - == null) { - bondToParent = nodeInSphere.getBondToParent(); - bond = new Bond(parentNode.getAtom(), partnerNode.getAtom(), bondToParent.getOrder()); - bond.setIsInRing(bondToParent.isInRing()); - bond.setIsAromatic(bondToParent.isAromatic()); - ac.addBond(bond); - } - } - } - } - public static List buildFragmentAtomIndicesList(final IAtomContainer structure, final int rootAtomIndex, final Integer maxSphere, final Set exclude, final boolean withPseudoAtoms) { diff --git a/src/casekit/nmr/fragmentation/FragmentationUtils.java b/src/casekit/nmr/fragmentation/FragmentationUtils.java new file mode 100644 index 0000000..8920efb --- /dev/null +++ b/src/casekit/nmr/fragmentation/FragmentationUtils.java @@ -0,0 +1,198 @@ +package casekit.nmr.fragmentation; + +import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.silent.Bond; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class FragmentationUtils { + + public static boolean replaceNodeKeys(final ConnectionTree fragmentTree, final IAtomContainer structure) { + System.out.println("before: " + + fragmentTree.getKeys()); + int atomIndex; + for (final ConnectionTreeNode node : fragmentTree.getNodes(false)) { + atomIndex = structure.indexOf(node.getAtom()); + if (atomIndex + < 0) { + return false; + } + node.setKey(atomIndex); + } + fragmentTree.initKeySet(); + System.out.println("after: " + + fragmentTree.getKeys()); + + return true; + } + + public static void removeDuplicates(final List fragmentTrees) { + final List> keySets = new ArrayList<>(); + final List fragmentsToRemove = new ArrayList<>(); + for (final ConnectionTree fragment : fragmentTrees) { + // ignore pseudo nodes + final Set keySet = fragment.getNodes(false) + .stream() + .filter(node -> !node.isPseudoNode()) + .map(ConnectionTreeNode::getKey) + .collect(Collectors.toSet()); + if (keySets.stream() + .noneMatch(keySetTemp -> keySetTemp.size() + == keySet.size() + && keySetTemp.containsAll(keySet))) { + keySets.add(keySet); + } else { + fragmentsToRemove.add(fragment); + } + } + fragmentTrees.removeAll(fragmentsToRemove); + } + + public static IAtomContainer closeRings(final IAtomContainer substructure, final IAtomContainer structure) { + final ConnectionTree fragmentTree = Fragmentation.buildFragmentTree(substructure, 0, null, new HashSet<>(), + false); + closeRings(fragmentTree, structure); + + return toAtomContainer(fragmentTree); + } + + public static void closeRings(final ConnectionTree connectionTree, final IAtomContainer structure) { + // close rings + IBond bond; + final int maxSphereTree = connectionTree.getMaxSphere(false); + for (int s = 0; s + <= maxSphereTree; s++) { + for (final ConnectionTreeNode nodeInSphere1 : connectionTree.getNodesInSphere(s, false)) { + // set connections (parent nodes) in sphere nodes which have to be connected -> ring closures + for (int s2 = s; s2 + <= maxSphereTree; s2++) { + for (final ConnectionTreeNode nodeInSphere2 : connectionTree.getNodesInSphere(s2, false)) { + if ((structure.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()) + != null) + && !ConnectionTree.nodesFormRingClosure(nodeInSphere1, nodeInSphere2)) { + bond = structure.getBond(nodeInSphere1.getAtom(), nodeInSphere2.getAtom()); + connectionTree.addRingClosureNode(nodeInSphere1.getKey(), nodeInSphere2.getKey(), bond); + connectionTree.addRingClosureNode(nodeInSphere2.getKey(), nodeInSphere1.getKey(), bond); + } + } + } + } + } + } + + /** + * Reconstructs a structure from a given connection tree, + * including ring closures. + * + * @param connectionTree connection tree + * + * @return IAtomContainer + */ + public static IAtomContainer toAtomContainer(final ConnectionTree connectionTree) { + // create new atom container and add the connection trees structure, beginning at the root atom + final IAtomContainer ac = SilentChemObjectBuilder.getInstance() + .newAtomContainer(); + addToAtomContainer(connectionTree, ac, null, null); + + return ac; + } + + /** + * Adds a subtree to a node in another connection tree. + * + * @param connectionTree connection tree + * @param parentNodeKey parent node key in connection tree + * @param subtree subtree to add + * @param bondToLink bond + */ + public static boolean addToConnectionTree(final ConnectionTree connectionTree, final int parentNodeKey, + final ConnectionTree subtree, final IBond bondToLink) { + return ConnectionTree.addSubtree(connectionTree, parentNodeKey, subtree, bondToLink); + } + + /** + * Adds the substructure of a connection tree to an atom container.
+ * The substructure can be linked via a bond and an atom index in the container, but this is optional. + * If both, the bond and atom index to link, are not given (null) then the substructure will just be added + * to the atom container without linkage. + * + * @param connectionTree + * @param ac + * @param atomIndexInStructureToLink + * @param bondToLink + */ + public static void addToAtomContainer(final ConnectionTree connectionTree, final IAtomContainer ac, + final Integer atomIndexInStructureToLink, final IBond bondToLink) { + List nodesInSphere; + ConnectionTreeNode nodeInSphere, parentNode, partnerNode; + IBond bond, bondToParent; + // add root atom to given atom container and link it via a given linking bond + ac.addAtom(connectionTree.getRootNode() + .getAtom()); + if ((atomIndexInStructureToLink + != null) + && (bondToLink + != null)) { + final IBond bondToAdd = new Bond(ac.getAtom(atomIndexInStructureToLink), ac.getAtom(ac.getAtomCount() + - 1)); + bondToAdd.setOrder(bondToLink.getOrder()); + bondToAdd.setIsInRing(bondToLink.isInRing()); + bondToAdd.setIsAromatic(bondToLink.isAromatic()); + bondToAdd.setAtom(ac.getAtom(atomIndexInStructureToLink), 0); + bondToAdd.setAtom(ac.getAtom(ac.getAtomCount() + - 1), 1); + ac.addBond(bondToAdd); + } + // for each sphere: add the atom which is stored as node to atom container and set bonds between parent nodes + for (int s = 1; s + <= connectionTree.getMaxSphere(false); s++) { + // first add all atoms and its parents (previous sphere only, incl. bonds) to structure + nodesInSphere = connectionTree.getNodesInSphere(s, false); + for (int i = 0; i + < nodesInSphere.size(); i++) { + nodeInSphere = nodesInSphere.get(i); + if (nodeInSphere.isRingClosureNode()) { + continue; + } + ac.addAtom(nodeInSphere.getAtom()); + parentNode = nodeInSphere.getParent(); + bondToParent = nodeInSphere.getBondToParent(); + bond = new Bond(nodeInSphere.getAtom(), parentNode.getAtom(), bondToParent.getOrder()); + bond.setIsInRing(bondToParent.isInRing()); + bond.setIsAromatic(bondToParent.isAromatic()); + ac.addBond(bond); + } + } + for (int s = 1; s + <= connectionTree.getMaxSphere(true); s++) { + // and as second add the remaining bonds (ring closures) to structure + nodesInSphere = connectionTree.getNodesInSphere(s, true); + for (int i = 0; i + < nodesInSphere.size(); i++) { + nodeInSphere = nodesInSphere.get(i); + if (!nodeInSphere.isRingClosureNode()) { + continue; + } + parentNode = nodeInSphere.getParent(); + partnerNode = nodeInSphere.getRingClosureParent(); + if (ac.getBond(ac.getAtom(ac.indexOf(partnerNode.getAtom())), + ac.getAtom(ac.indexOf(parentNode.getAtom()))) + == null) { + bondToParent = nodeInSphere.getBondToParent(); + bond = new Bond(parentNode.getAtom(), partnerNode.getAtom(), bondToParent.getOrder()); + bond.setIsInRing(bondToParent.isInRing()); + bond.setIsAromatic(bondToParent.isAromatic()); + ac.addBond(bond); + } + } + } + } +} diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java index 70f7bb0..74d2a57 100644 --- a/src/casekit/nmr/hose/HOSECodeBuilder.java +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -12,7 +12,7 @@ package casekit.nmr.hose; -import casekit.nmr.fragmentation.Fragmentation; +import casekit.nmr.fragmentation.FragmentationUtils; import casekit.nmr.fragmentation.model.ConnectionTree; import casekit.nmr.fragmentation.model.ConnectionTreeNode; import org.openscience.cdk.exception.CDKException; @@ -572,10 +572,11 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection * @return IAtomContainer * * @see #buildConnectionTree(String, boolean) - * @see Fragmentation#toAtomContainer(ConnectionTree) + * @see FragmentationUtils#toAtomContainer(ConnectionTree) */ public static IAtomContainer buildAtomContainer(final String HOSECode, final boolean useBremserElementNotation) throws CDKException { - return Fragmentation.toAtomContainer(HOSECodeBuilder.buildConnectionTree(HOSECode, useBremserElementNotation)); + return FragmentationUtils.toAtomContainer( + HOSECodeBuilder.buildConnectionTree(HOSECode, useBremserElementNotation)); } } \ No newline at end of file From 1d7269f61983cffda2c4623fd711a315289b1987 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 23 Jun 2021 17:00:52 +0200 Subject: [PATCH 243/405] chore: moved attachPseudoAtoms and addPseudoNode to FragmentationUtils --- .../nmr/fragmentation/Fragmentation.java | 34 ++---------------- .../nmr/fragmentation/FragmentationUtils.java | 36 ++++++++++++++++--- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 7e06f6d..c225774 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -1,7 +1,6 @@ package casekit.nmr.fragmentation; import casekit.nmr.fragmentation.model.ConnectionTree; -import casekit.nmr.fragmentation.model.ConnectionTreeNode; import casekit.nmr.model.*; import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; @@ -11,7 +10,6 @@ import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IRingSet; import org.openscience.cdk.ringsearch.RingSearch; -import org.openscience.cdk.silent.PseudoAtom; import java.util.*; import java.util.stream.Collectors; @@ -213,7 +211,7 @@ public static List buildRingFragmentTrees(final IAtomContainer s FragmentationUtils.closeRings(connectionTreeRing, structure); // attach pseudo atoms if desired if (withPseudoAtoms) { - attachPseudoAtoms(connectionTreeRing, structure); + FragmentationUtils.attachPseudoAtoms(connectionTreeRing, structure); } ringFragmentTrees.add(connectionTreeRing); } @@ -301,29 +299,12 @@ public static ConnectionTree buildFragmentTree(final IAtomContainer structure, f FragmentationUtils.closeRings(connectionTree, structure); // add pseudo atoms if (withPseudoAtoms) { - attachPseudoAtoms(connectionTree, structure); + FragmentationUtils.attachPseudoAtoms(connectionTree, structure); } return connectionTree; } - private static void attachPseudoAtoms(final ConnectionTree connectionTree, final IAtomContainer structure) { - int atomIndexInStructure; - for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { - for (final IAtom connectedAtom : structure.getConnectedAtomsList(node.getAtom())) { - atomIndexInStructure = structure.indexOf(connectedAtom); - if (connectionTree.getBond(node.getKey(), atomIndexInStructure) - == null - && connectionTree.getBond(atomIndexInStructure, node.getKey()) - == null) { - addPseudoNode(connectionTree, structure.getAtomCount() - + connectionTree.getNodesCount(false), node.getKey(), - structure.getBond(node.getAtom(), connectedAtom)); - } - } - } - } - /** * Function for extending a given connection tree only containing * its root node (0th sphere) by means of Breadth-First-Search (BFS). @@ -381,17 +362,6 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection BFS(ac, connectionTree, queue, visited, exclude, maxSphere); } - private static boolean addPseudoNode(final ConnectionTree connectionTree, final int pseudoNodeKey, - final int parentNodeKey, final IBond bondToParent) { - if (!connectionTree.addNode(new PseudoAtom("R"), pseudoNodeKey, parentNodeKey, bondToParent)) { - return false; - } - final ConnectionTreeNode pseudoNode = connectionTree.getNode(pseudoNodeKey); - pseudoNode.setIsPseudoNode(true); - - return true; - } - private static boolean keepBond(final IAtom atom1, final IAtom atom2, final IBond bond) { // hetero-hetero or carbon-hetero if ((isHeteroAtom(atom1) diff --git a/src/casekit/nmr/fragmentation/FragmentationUtils.java b/src/casekit/nmr/fragmentation/FragmentationUtils.java index 8920efb..316341a 100644 --- a/src/casekit/nmr/fragmentation/FragmentationUtils.java +++ b/src/casekit/nmr/fragmentation/FragmentationUtils.java @@ -2,9 +2,11 @@ import casekit.nmr.fragmentation.model.ConnectionTree; import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.silent.Bond; +import org.openscience.cdk.silent.PseudoAtom; import org.openscience.cdk.silent.SilentChemObjectBuilder; import java.util.ArrayList; @@ -15,9 +17,7 @@ public class FragmentationUtils { - public static boolean replaceNodeKeys(final ConnectionTree fragmentTree, final IAtomContainer structure) { - System.out.println("before: " - + fragmentTree.getKeys()); + public static boolean adjustNodeKeys(final ConnectionTree fragmentTree, final IAtomContainer structure) { int atomIndex; for (final ConnectionTreeNode node : fragmentTree.getNodes(false)) { atomIndex = structure.indexOf(node.getAtom()); @@ -28,8 +28,6 @@ public static boolean replaceNodeKeys(final ConnectionTree fragmentTree, final I node.setKey(atomIndex); } fragmentTree.initKeySet(); - System.out.println("after: " - + fragmentTree.getKeys()); return true; } @@ -195,4 +193,32 @@ public static void addToAtomContainer(final ConnectionTree connectionTree, final } } } + + public static void attachPseudoAtoms(final ConnectionTree connectionTree, final IAtomContainer structure) { + int atomIndexInStructure; + for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { + for (final IAtom connectedAtom : structure.getConnectedAtomsList(node.getAtom())) { + atomIndexInStructure = structure.indexOf(connectedAtom); + if (connectionTree.getBond(node.getKey(), atomIndexInStructure) + == null + && connectionTree.getBond(atomIndexInStructure, node.getKey()) + == null) { + addPseudoNode(connectionTree, structure.getAtomCount() + + connectionTree.getNodesCount(false), node.getKey(), + structure.getBond(node.getAtom(), connectedAtom)); + } + } + } + } + + private static boolean addPseudoNode(final ConnectionTree connectionTree, final int pseudoNodeKey, + final int parentNodeKey, final IBond bondToParent) { + if (!connectionTree.addNode(new PseudoAtom("R"), pseudoNodeKey, parentNodeKey, bondToParent)) { + return false; + } + final ConnectionTreeNode pseudoNode = connectionTree.getNode(pseudoNodeKey); + pseudoNode.setIsPseudoNode(true); + + return true; + } } From 36781bca01d1e368ddc3e03934f72cba2a1c6994 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 27 Jun 2021 15:01:14 +0200 Subject: [PATCH 244/405] fix: getIndex method in Assignment did not return all indices --- .../nmr/fragmentation/Fragmentation.java | 47 +++++++++++++------ src/casekit/nmr/model/Assignment.java | 11 +++-- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index c225774..9622634 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -45,11 +45,14 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, .toAtomContainer(); final String spectrumAtomType = Utils.getAtomTypeFromSpectrum(dataSet.getSpectrum(), 0); List substructureAtomIndices; + IAtomContainer substructure; Spectrum subspectrum; Assignment subassignment; IAtom atomInStructure; Signal signal; DataSet subDataSet; + Map meta; + String smiles; for (final ConnectionTree fragmentTree : fragmentTrees) { substructureAtomIndices = fragmentTree.getKeys(); subspectrum = new Spectrum(); @@ -68,23 +71,22 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, } atomInStructure = structure.getAtom(substructureAtomIndices.get(j)); if (atomInStructure.getSymbol() - .equals(spectrumAtomType)) { - if (dataSet.getAssignment() - .getIndex(0, substructureAtomIndices.get(j)) + .equals(spectrumAtomType) + || (spectrumAtomType.equals("H") + && atomInStructure.getImplicitHydrogenCount() + > 0)) { + final List indices = dataSet.getAssignment() + .getIndices(0, substructureAtomIndices.get(j)); + if (indices == null - || dataSet.getSpectrum() - .getSignal(dataSet.getAssignment() - .getIndex(0, substructureAtomIndices.get(j))) - == null) { + || indices.isEmpty()) { return null; } - signal = dataSet.getSpectrum() - .getSignal(dataSet.getAssignment() - .getIndex(0, substructureAtomIndices.get(j))); - if (signal - != null) { - signal = signal.buildClone(); + for (final int index : indices) { + signal = dataSet.getSpectrum() + .getSignal(index) + .buildClone(); final int atomIndex = j; final List closestSignalIndexList = subspectrum.checkForEquivalences(signal, new double[]{0.0}, @@ -113,11 +115,28 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, subspectrum.setSpectrometerFrequency(dataSet.getSpectrum() .getSpectrometerFrequency()); + substructure = FragmentationUtils.toAtomContainer(fragmentTree); subDataSet = new DataSet(); - subDataSet.setStructure(new ExtendedConnectionMatrix(FragmentationUtils.toAtomContainer(fragmentTree))); + subDataSet.setStructure(new ExtendedConnectionMatrix(substructure)); subDataSet.setSpectrum(subspectrum); subDataSet.setAssignment(subassignment); + meta = new HashMap<>(); + try { + smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(substructure); + meta.put("smiles", smiles); + + } catch (final CDKException e) { + e.printStackTrace(); + } + meta.put("title", dataSet.getMeta() + .get("title")); + meta.put("id", dataSet.getMeta() + .get("id")); + meta.put("mf", casekit.nmr.Utils.molecularFormularToString( + casekit.nmr.Utils.getMolecularFormulaFromAtomContainer(substructure))); + subDataSet.setMeta(meta); + fragmentDataSetList.add(subDataSet); } diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index 32972bb..f2fb63f 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -28,7 +28,9 @@ import lombok.NoArgsConstructor; import lombok.Setter; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; /** * @author Michael Wenk [https://github.com/michaelwenk] @@ -135,20 +137,21 @@ public void addAssignmentEquivalence(final int dim, final int index, final int e this.setAssignment(dim, index, equivalenceIndices); } - public Integer getIndex(final int dim, final int assignment) { + public List getIndices(final int dim, final int assignment) { if (!this.containsDim(dim)) { return null; } + final List indices = new ArrayList<>(); for (int index = 0; index < this.assignments[dim].length; index++) { if (Arrays.stream(this.getAssignment(dim, index)) - .anyMatch(value -> value + .anyMatch(equiv -> equiv == assignment)) { - return index; + indices.add(index); } } - return -1; + return indices; } public int[][] getAssignments(final int dim) { From 47b9fadec27040ea1d1e5e2595db6bad4c2b76fa Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 27 Jun 2021 15:03:09 +0200 Subject: [PATCH 245/405] fix: thrown exception if both multiplicities are null while checking them --- src/casekit/nmr/utils/Match.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index e428112..d42d7b0 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -253,8 +253,14 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s passed = true; // @TODO maybe consider further parameters to check ? e.g. intensity if (checkMultiplicity) { - passed = spectrum1.getMultiplicity(i) - .equals(spectrum2.getMultiplicity(pickedSignalIndexSpectrum2)); + passed = (spectrum1.getMultiplicity(i) + == null + && spectrum2.getMultiplicity(pickedSignalIndexSpectrum2) + == null) + || (spectrum1.getMultiplicity(i) + != null + && spectrum1.getMultiplicity(i) + .equals(spectrum2.getMultiplicity(pickedSignalIndexSpectrum2))); } if (passed && checkEquivalencesCount) { @@ -293,8 +299,8 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s * N here means the number of dimensions in both spectra.
* Despite intensities are expected, they are still not considered here. * - * @param spectrum1 first spectrum - * @param spectrum2 second spectrum (query as exact or subspectrum to check) + * @param spectrum1 first spectrum (possible subspectrum) + * @param spectrum2 second spectrum * @param shiftTols tolerance values [ppm] per each dimension used during spectra shift * comparisons * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals From 7f521eded177010201d5ae2e686e7ca2ebe89b47 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 27 Jun 2021 15:04:04 +0200 Subject: [PATCH 246/405] fix: parse empty multiplicity as null --- src/casekit/nmr/dbservice/NMRShiftDB.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 7599498..2a5e277 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -420,7 +420,11 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect < spectrumStringArray.length; i++) { shift = Double.parseDouble(spectrumStringArray[i][0]); intensity = Double.parseDouble(spectrumStringArray[i][1]); - multiplicity = spectrumStringArray[i][2].toLowerCase(); + multiplicity = spectrumStringArray[i][2].trim() + .isEmpty() + ? null + : spectrumStringArray[i][2].trim() + .toLowerCase(); spectrum.addSignal( new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 1, 0)); @@ -452,7 +456,11 @@ public static Assignment NMRShiftDBSpectrumToAssignment(final String NMRShiftDBS // just to be sure that we take the right signal if equivalences are present closestSignalList = spectrum.pickByClosestShift(Double.parseDouble(NMRShiftDBSpectrumStringArray[i][0]), 0, 0.0); - multiplicity = NMRShiftDBSpectrumStringArray[i][2].toLowerCase(); + multiplicity = NMRShiftDBSpectrumStringArray[i][2].trim() + .isEmpty() + ? null + : NMRShiftDBSpectrumStringArray[i][2].trim() + .toLowerCase(); closestSignalList.retainAll(spectrum.pickByMultiplicity(multiplicity)); signalIndex = closestSignalList.get(0); From 43a23aa4afb7127f23e988305cb950763eb6270f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 27 Jun 2021 15:06:08 +0200 Subject: [PATCH 247/405] feat: added methods for MF comparisons and allow different aromaticity models --- src/casekit/nmr/Utils.java | 11 +++++++++ src/casekit/nmr/utils/Utils.java | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java index 2c5a3b7..7193aa6 100644 --- a/src/casekit/nmr/Utils.java +++ b/src/casekit/nmr/Utils.java @@ -697,6 +697,17 @@ public static void setAromaticityAndKekulize(final IAtomContainer ac) throws CDK Kekulization.kekulize(ac); } + public static void setAromaticity(final IAtomContainer ac, final Aromaticity aromaticity) throws CDKException { + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); + aromaticity.apply(ac); + } + + public static void setAromaticityAndKekulize(final IAtomContainer ac, + final Aromaticity aromaticity) throws CDKException { + Utils.setAromaticity(ac, aromaticity); + Kekulization.kekulize(ac); + } + /** * Removes atoms from a given atom type from an atom container. diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index a11ac83..a913a1d 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -139,4 +139,45 @@ public static Map getMolecularFormulaElementCounts(final String return counts; } + + public static int getAtomTypeCount(final IAtomContainer structure, final String atomType) { + return casekit.nmr.Utils.getAtomTypeIndicesByElement(structure, atomType) + .size(); + } + + public static int getAtomTypeCount(final String mf, final String atomType) { + return MolecularFormulaManipulator.getElementCount(getMolecularFormulaFromString(mf), atomType); + } + + public static boolean compareWithMolecularFormulaLessOrEqual(final IAtomContainer structure, final String mf) { + if (mf + != null + && !mf.trim() + .isEmpty()) { + for (final String atomType : casekit.nmr.Utils.getAtomTypesInAtomContainer(structure)) { + if (getAtomTypeCount(structure, atomType) + > getAtomTypeCount(mf, atomType)) { + return false; + } + } + } + + return true; + } + + public static boolean compareWithMolecularFormulaEqual(final IAtomContainer structure, final String mf) { + if (mf + != null + && !mf.trim() + .isEmpty()) { + for (final String atomType : casekit.nmr.Utils.getAtomTypesInAtomContainer(structure)) { + if (getAtomTypeCount(structure, atomType) + != getAtomTypeCount(mf, atomType)) { + return false; + } + } + } + + return true; + } } From c3df5ba3b3b4b54a2c0cb6ef4dc903fae7b67b4e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 27 Jun 2021 15:13:16 +0200 Subject: [PATCH 248/405] feat: added ErtlFunctionalGroup class containing helper methods regarding ErtlFunctionalGroupsFinder --- .../ErtlFunctionalGroupsUtilities.java | 237 ++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java diff --git a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java new file mode 100644 index 0000000..7444530 --- /dev/null +++ b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java @@ -0,0 +1,237 @@ +package casekit.nmr.fragmentation.functionalgroup; + +import casekit.nmr.Utils; +import casekit.nmr.dbservice.NMRShiftDB; +import casekit.nmr.fragmentation.Fragmentation; +import casekit.nmr.fragmentation.FragmentationUtils; +import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Match; +import org.openscience.cdk.aromaticity.Aromaticity; +import org.openscience.cdk.aromaticity.ElectronDonation; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.graph.CycleFinder; +import org.openscience.cdk.graph.Cycles; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.smiles.SmilesParser; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; + +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; + +public class ErtlFunctionalGroupsUtilities { + + public static final Map> buildFunctionalGroupDataSets(final String pathToNMRShiftDB, + final String[] nuclei) { + final Map> functionalGroupDataSets = new HashMap<>(); + try { + final ErtlFunctionalGroupsFinder ertlFunctionalGroupsFinder = new ErtlFunctionalGroupsFinder( + ErtlFunctionalGroupsFinder.Mode.NO_GENERALIZATION); + final List dataSetsFromNMRShiftDB = NMRShiftDB.getDataSetsFromNMRShiftDB(pathToNMRShiftDB, nuclei); + List dataSetList; + List groups; + List fragmentTrees; + ConnectionTree fragmentTree; + IAtomContainer structure, fragment; + String atomTypeInSpectrum, smiles; + Aromaticity[] aromaticities; + for (final DataSet dataSet : dataSetsFromNMRShiftDB) { + structure = dataSet.getStructure() + .toAtomContainer(); + aromaticities = buildDefaultAromaticities(structure); + fragmentTrees = new ArrayList<>(); + for (final Aromaticity aromaticity : aromaticities) { + try { + Utils.setAromaticityAndKekulize(structure, aromaticity); + groups = ertlFunctionalGroupsFinder.find(structure, false); + } catch (final IllegalArgumentException | CDKException e) { + e.printStackTrace(); + continue; + } + restoreOriginalEnvironmentalCarbons(groups, structure); + fragmentTrees = new ArrayList<>(); + for (final IAtomContainer group : groups) { + // each group has to contain at least one atom of specific spectrum + atomTypeInSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); + if (atomTypeInSpectrum.equals("H")) { + if (AtomContainerManipulator.getImplicitHydrogenCount(group) + == 0) { + continue; + } + } else if (Utils.getAtomTypeIndicesByElement(group, atomTypeInSpectrum) + .isEmpty()) { + continue; + } + fragmentTree = Fragmentation.buildFragmentTree(group, 0, null, new HashSet<>(), false); + FragmentationUtils.adjustNodeKeys(fragmentTree, structure); + FragmentationUtils.closeRings(fragmentTree, structure); + + fragmentTrees.add(fragmentTree); + } + } + FragmentationUtils.removeDuplicates(fragmentTrees); + dataSetList = Fragmentation.fragmentTreesToSubDataSets(dataSet, fragmentTrees); + if (dataSetList + != null) { + for (final DataSet dataSetTemp : dataSetList) { + fragment = dataSetTemp.getStructure() + .toAtomContainer(); + smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(fragment); + functionalGroupDataSets.putIfAbsent(smiles, new ArrayList<>()); + functionalGroupDataSets.get(smiles) + .add(dataSetTemp); + } + } + } + } catch (final IOException | CDKException e) { + e.printStackTrace(); + } + + return functionalGroupDataSets; + } + + public static List>> sortByFrequency( + final Map> functionalGroupDataSets) { + return functionalGroupDataSets.entrySet() + .stream() + .sorted(Map.Entry.comparingByValue((list1, list2) -> -1 + * Integer.compare(list1.size(), list2.size()))) + .collect(Collectors.toList()); + } + + public static Map> findMatches(final Map> functionalGroupDataSets, + final Spectrum querySpectrum, final String mf, + final double shiftTol, final double maxAverageDeviation) { + final Map> matches = new HashMap<>(); + final SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); + List matchesInGroup; + final IMolecularFormula iMolecularFormula = MolecularFormulaManipulator.getMolecularFormula(mf, + SilentChemObjectBuilder.getInstance()); + for (final Map.Entry> entry : functionalGroupDataSets.entrySet()) { + try { + final IAtomContainer group = smilesParser.parseSmiles(entry.getKey()); + Utils.setAromaticity(group); + + matchesInGroup = entry.getValue() + .stream() + .filter(dataSet -> { + if (!dataSet.getSpectrum() + .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { + return false; + } + final String atomTypeInSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus( + dataSet.getSpectrum() + .getNuclei()[0]); + if (atomTypeInSpectrum.equals("H")) { + if (AtomContainerManipulator.getImplicitHydrogenCount( + dataSet.getStructure() + .toAtomContainer()) + > MolecularFormulaManipulator.getElementCount(iMolecularFormula, + atomTypeInSpectrum)) { + return false; + } + } else { + // check molecular formula with atom types in group + if (!casekit.nmr.utils.Utils.compareWithMolecularFormulaLessOrEqual(group, + mf)) { + return false; + } + // do not allow unsaturated fragments with different size than given molecular formula + if (Utils.getUnsaturatedAtomIndices(group) + .isEmpty() + && !casekit.nmr.utils.Utils.compareWithMolecularFormulaEqual( + group, mf)) { + return false; + } + } + // check average deviation + final Double averageDeviation = Match.calculateAverageDeviation( + dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, true, true, + true); + return averageDeviation + != null + && averageDeviation + <= maxAverageDeviation; + }) + .collect(Collectors.toList()); + if (matchesInGroup.size() + > 0) { + matches.put(entry.getKey(), matchesInGroup); + } + } catch (final CDKException e) { + e.printStackTrace(); + } + } + + return matches; + } + + /** + * Replaces the inserted environmental carbon atoms (new IAtom objects) by the + * carbon IAtom objects from original structure. + * + * @param groups groups created by ErtlFunctionalGroupFinder + * @param structure original structure used to create the groups + */ + private static void restoreOriginalEnvironmentalCarbons(final List groups, + final IAtomContainer structure) { + IAtomContainer group; + IAtom connectedAtomInGroup; + List atomList; + for (int i = 0; i + < groups.size(); i++) { + group = groups.get(i); + // convert explicit hydrogens back to implicit + Utils.convertExplicitToImplicitHydrogens(group); + atomList = new ArrayList<>(); + // create a list (copy) of all atoms of the group because of atom removals and additions in group atom container + group.atoms() + .spliterator() + .forEachRemaining(atomList::add); + for (final IAtom atom : atomList) { + // detect whether the current atom is an "unknown" one, inserted as new environmental IAtom object + if (!structure.contains(atom)) { + // take its single parent from which should be in original + connectedAtomInGroup = group.getConnectedAtomsList(atom) + .get(0); + // remove the inserted atom and the bond to it + group.removeBond(atom, connectedAtomInGroup); + group.removeAtom(atom); + // from the parent node search for neighboring carbons which are not already in the group + // and add them + for (final IAtom connectedAtomInOriginalStructure : structure.getConnectedAtomsList( + connectedAtomInGroup)) { + if (connectedAtomInOriginalStructure.getSymbol() + .equals("C") + && !group.contains(connectedAtomInOriginalStructure)) { + group.addAtom(connectedAtomInOriginalStructure); + group.addBond(structure.getBond(connectedAtomInGroup, connectedAtomInOriginalStructure)); + } + } + } + } + } + } + + public static Aromaticity[] buildDefaultAromaticities(final IAtomContainer structure) { + final CycleFinder cycles = Cycles.all(structure.getAtomCount()); + final ElectronDonation[] models = new ElectronDonation[]{ElectronDonation.cdk(), + ElectronDonation.cdkAllowingExocyclic(), + ElectronDonation.daylight(), + ElectronDonation.piBonds()}; + final Aromaticity[] aromaticities = new Aromaticity[models.length]; + for (int i = 0; i + < models.length; i++) { + aromaticities[i] = new Aromaticity(models[i], cycles); + } + + return aromaticities; + } +} From 9565873af794f083743e01f3a08d2dcbf70b409e Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Sun, 27 Jun 2021 15:16:06 +0200 Subject: [PATCH 249/405] Delete LICENSE --- LICENSE | 165 -------------------------------------------------------- 1 file changed, 165 deletions(-) delete mode 100644 LICENSE diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 65c5ca8..0000000 --- a/LICENSE +++ /dev/null @@ -1,165 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. From 5e225d580ef867207accbc55c62aafbcae40d505 Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Sun, 27 Jun 2021 15:21:57 +0200 Subject: [PATCH 250/405] Create LICENSE --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4824617 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018-2021 Michael Wenk + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 184c7609c17a1ae22b83ac05f3478b083d484f42 Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Sun, 27 Jun 2021 15:23:14 +0200 Subject: [PATCH 251/405] Update README.md --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 9840a35..5b33ee5 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,6 @@ # The Computer-Assisted-Structure-Elucidation Kit (CASEkit) -Copyright 2017 Christoph Steinbeck - -License: MIT, see doc/mit.license - ## Introduction This project depends on the Chemistry Development Project (CDK), hosted under https://cdk.github.io/ From f64bf3791c33596d97ae3db4101835e6a9501801 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 02:09:58 +0200 Subject: [PATCH 252/405] fix: adaption to changes of getIndices() in Assignment --- .../nmr/analysis/HOSECodeShiftStatistics.java | 59 ++++++++++--------- .../nmr/fragmentation/Fragmentation.java | 12 ++-- src/casekit/nmr/utils/Predict.java | 44 +++++++++----- 3 files changed, 67 insertions(+), 48 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index 5e1209f..b726d07 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -48,6 +48,7 @@ public static Map>> collectHOSECodeShifts(final Map atomIndexMap; // from explicit H to heavy atom ConnectionTree connectionTree; int maxSphereTemp; + List signalIndices; for (final DataSet dataSet : dataSetList) { structure = dataSet.getStructure() .toAtomContainer(); @@ -90,42 +91,46 @@ public static Map>> collectHOSECodeShifts(final .getNuclei()[0]); for (int i = 0; i < structure.getAtomCount(); i++) { - signal = null; + signalIndices = null; if (structure.getAtom(i) .getSymbol() .equals(atomTypeSpectrum)) { if (atomTypeSpectrum.equals("H")) { - signal = dataSet.getSpectrum() - .getSignal(dataSet.getAssignment() - .getIndex(0, atomIndexMap.get(i))); + // could be multiple signals + signalIndices = dataSet.getAssignment() + .getIndices(0, atomIndexMap.get(i)); } else { - signal = dataSet.getSpectrum() - .getSignal(dataSet.getAssignment() - .getIndex(0, i)); + // should be one only + signalIndices = dataSet.getAssignment() + .getIndices(0, i); } } - if (signal + if (signalIndices != null) { - try { - if (maxSphere - == null) { - connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); - maxSphereTemp = connectionTree.getMaxSphere(true); - } else { - maxSphereTemp = maxSphere; - } - for (int sphere = 1; sphere - <= maxSphereTemp; sphere++) { - hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); - hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); - hoseCodeShifts.get(hoseCode) - .putIfAbsent(solvent, new ArrayList<>()); - hoseCodeShifts.get(hoseCode) - .get(solvent) - .add(signal.getShift(0)); + for (final Integer signalIndex : signalIndices) { + signal = dataSet.getSpectrum() + .getSignal(signalIndex); + try { + if (maxSphere + == null) { + connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); + maxSphereTemp = connectionTree.getMaxSphere(true); + } else { + maxSphereTemp = maxSphere; + } + for (int sphere = 1; sphere + <= maxSphereTemp; sphere++) { + hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); + hoseCodeShifts.get(hoseCode) + .putIfAbsent(solvent, new ArrayList<>()); + hoseCodeShifts.get(hoseCode) + .get(solvent) + .add(signal.getShift(0)); + } + } catch (final CDKException e) { + e.printStackTrace(); } - } catch (final CDKException e) { - e.printStackTrace(); } } } diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 9622634..58a7f49 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -44,7 +44,7 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, final IAtomContainer structure = dataSet.getStructure() .toAtomContainer(); final String spectrumAtomType = Utils.getAtomTypeFromSpectrum(dataSet.getSpectrum(), 0); - List substructureAtomIndices; + List substructureAtomIndices, signalIndices; IAtomContainer substructure; Spectrum subspectrum; Assignment subassignment; @@ -75,15 +75,15 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, || (spectrumAtomType.equals("H") && atomInStructure.getImplicitHydrogenCount() > 0)) { - final List indices = dataSet.getAssignment() - .getIndices(0, substructureAtomIndices.get(j)); - if (indices + signalIndices = dataSet.getAssignment() + .getIndices(0, substructureAtomIndices.get(j)); + if (signalIndices == null - || indices.isEmpty()) { + || signalIndices.isEmpty()) { return null; } - for (final int index : indices) { + for (final int index : signalIndices) { signal = dataSet.getSpectrum() .getSignal(index) .buildClone(); diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index cdc96fe..5764a96 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -127,7 +127,7 @@ public static DataSet predict1D(final Map> hoseCod /** * Predicts a 2D spectrum from two 1D spectra. Each 1D spectra needs to contain the same solvent information. - * Diastereotopic distinctions are not provided yet. + * Diastereotopic distinctions are not provided yet ({@link #predict1D(Map, IAtomContainer, String, String)}). * * @param hoseCodeShiftStatistics HOSE code shift statistics * @param structure structure to use for prediction @@ -137,6 +137,9 @@ public static DataSet predict1D(final Map> hoseCod * @param maxPathLength maximal path length * * @return + * + * @see #predict1D(Map, IAtomContainer, String, String) + * @see #predict2D(IAtomContainer, Spectrum, Spectrum, Assignment, Assignment, int, int) */ public static DataSet predict2D(final Map> hoseCodeShiftStatistics, final IAtomContainer structure, final String[] nuclei, final String solvent, @@ -149,8 +152,9 @@ public static DataSet predict2D(final Map> hoseCod } /** - * Predicts a 2D spectrum from two 1D spectra. Each 1D spectra needs to contain the same solvent information. - * Diastereotopic distinctions are not provided yet. + * Predicts a 2D spectrum from two 1D spectra.
+ * Each 1D spectra needs to contain the same solvent information.
+ * Note: If 1H is used then it needs to be in first dimension, e.g. 1H, 13C. * * @param structure structure to use for prediction * @param spectrumDim1 1D spectrum of first dimension @@ -187,6 +191,7 @@ public static DataSet predict2D(final IAtomContainer structure, final Spectrum s int addedSignalIndex; ConnectionTree connectionTree; List nodesInSphere; + List signalIndicesDim1, signalIndicesDim2; for (int i = 0; i < structure.getAtomCount(); i++) { atom = structure.getAtom(i); @@ -204,18 +209,27 @@ public static DataSet predict2D(final IAtomContainer structure, final Spectrum s signal2D.setNuclei(nuclei2D); signal2D.setKind("signal"); signal2D.setEquivalencesCount(1); - shiftDim1 = spectrumDim1.getShift(assignmentDim1.getIndex(0, i), 0); - shiftDim2 = spectrumDim2.getShift(assignmentDim2.getIndex(0, nodeInSphere.getKey()), 0); - signal2D.setShifts(new Double[]{shiftDim1, shiftDim2}); - - addedSignalIndex = predictedSpectrum2D.addSignal(signal2D); - if (addedSignalIndex - >= assignment2D.getSetAssignmentsCount(0)) { - assignment2D.addAssignment(0, new int[]{i}); - assignment2D.addAssignment(1, new int[]{nodeInSphere.getKey()}); - } else { - assignment2D.addAssignmentEquivalence(0, addedSignalIndex, i); - assignment2D.addAssignmentEquivalence(1, addedSignalIndex, nodeInSphere.getKey()); + // on first axis go through all possible assignments, i.e. in case of 1H + signalIndicesDim1 = assignmentDim1.getIndices(0, i); + for (final int signalIndexDim1 : signalIndicesDim1) { + shiftDim1 = spectrumDim1.getShift(signalIndexDim1, 0); + // on second axis go through all possible assignments, i.e. in case of 1H + signalIndicesDim2 = assignmentDim2.getIndices(0, nodeInSphere.getKey()); + for (final int signalIndexDim2 : signalIndicesDim2) { + shiftDim2 = spectrumDim2.getShift(signalIndexDim2, 0); + signal2D.setShifts(new Double[]{shiftDim1, shiftDim2}); + // add 2D signal + addedSignalIndex = predictedSpectrum2D.addSignal(signal2D); + if (addedSignalIndex + >= assignment2D.getSetAssignmentsCount(0)) { + assignment2D.addAssignment(0, new int[]{i}); + assignment2D.addAssignment(1, new int[]{nodeInSphere.getKey()}); + } else { + assignment2D.addAssignmentEquivalence(0, addedSignalIndex, i); + assignment2D.addAssignmentEquivalence(1, addedSignalIndex, + nodeInSphere.getKey()); + } + } } } } From 287e84cfd9aa71d6a5da5c1e099b66d94d7e6ec2 Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Mon, 28 Jun 2021 09:49:05 +0200 Subject: [PATCH 253/405] Delete LICENSE --- LICENSE | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 LICENSE diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 4824617..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2018-2021 Michael Wenk - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. From 4a37bddcacb6afa61046306b7856be22f9ac1d6f Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Mon, 28 Jun 2021 09:58:00 +0200 Subject: [PATCH 254/405] Create LICENSE --- LICENSE | 504 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 504 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8000a6f --- /dev/null +++ b/LICENSE @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 + USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random + Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! From 95a8c777290ae32089b9ca95dd9b6befbe43d1f9 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 09:59:53 +0200 Subject: [PATCH 255/405] feat: pasted ErtlFunctionalGroupsFinder class --- .../ErtlFunctionalGroupsFinder.java | 979 ++++++++++++++++++ 1 file changed, 979 insertions(+) create mode 100644 src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsFinder.java diff --git a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsFinder.java b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsFinder.java new file mode 100644 index 0000000..7d765e6 --- /dev/null +++ b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsFinder.java @@ -0,0 +1,979 @@ +/** + * ErtlFunctionalGroupsFinder for CDK + * Copyright (C) 2019 Sebastian Fritsch + *

+ * Source code is available at + *

+ * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + *

+ * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + *

+ * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package casekit.nmr.fragmentation.functionalgroup; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import org.openscience.cdk.graph.ConnectedComponents; +import org.openscience.cdk.graph.GraphUtil; +import org.openscience.cdk.graph.GraphUtil.EdgeToBondMap; +import org.openscience.cdk.interfaces.*; +import org.openscience.cdk.interfaces.IBond.Order; +import org.openscience.cdk.tools.ILoggingTool; +import org.openscience.cdk.tools.LoggingToolFactory; + +import java.util.*; + +/** + * Finds and extracts a molecules's functional groups in a purely rule-based manner. + *

+ * This class implements Peter Ertl's algorithm for the automated detection and extraction + * of functional groups in organic molecules + * [Ertl P. An algorithm to identify functional groups in organic molecules. J Cheminform. 2017; 9:36.]. + * + * @author Sebastian Fritsch + * @version 1.0.0.0 + */ +public class ErtlFunctionalGroupsFinder { + + private final static String CARBONYL_C_MARKER = "Carbonyl-C"; + private static final ILoggingTool log = LoggingToolFactory.createLoggingTool(ErtlFunctionalGroupsFinder.class); + private final Set nonmetalAtomicNumbers; + private final Mode mode; + private EdgeToBondMap bondMap; + private int[][] adjList; + private HashSet markedAtoms; + private HashMap aromaticHeteroAtoms; // key: atom idx, value: isInGroup + private Map> environmentsMap; + + /** + * Default constructor for ErtlFunctionalGroupsFinder. + */ + public ErtlFunctionalGroupsFinder() { + this(Mode.DEFAULT); + } + + /** + * Constructor for ErtlFunctionalGroupsFinder. + * + * @param mode working mode (see {@code ErtlFunctionalGroupsFinder.Mode}). + */ + public ErtlFunctionalGroupsFinder(final Mode mode) { + this.mode = mode; + + // init non-metal and non-metalloid atom numbers + this.nonmetalAtomicNumbers = ImmutableSet.of(1, 2, 6, 7, 8, 9, 10, 15, 16, 17, 18, 34, 35, 36, 53, 54, 86); + } + + private static final boolean isHeteroatom(final IAtom atom) { + final int atomicNr = atom.getAtomicNumber(); + return atomicNr + != 1 + && atomicNr + != 6; + } + + /** + * Find all functional groups contained in a molecule. + *

+ * NOTE: The input must consist of one connected structure and may not contain charged atoms, metals or metalloids. + * + * @param container the molecule which contains the functional groups (may not contain charged atoms, metals, + * metalloids or unconnected components!) + * + * @return a list with all functional groups found in the molecule. + */ + public List find(final IAtomContainer container) { + return this.find(container, true); + } + + /** + * Find all functional groups contained in a molecule. + *

+ * NOTE: The input must consist of one connected structure and may not contain charged atoms, metals or metalloids. + * + * @param container the molecule which contains the functional groups (may not contain charged atoms, metals, + * metalloids or unconnected components!) + * @param clone Use 'false' to reuse the input container's bonds and atoms in the extraction of the functional + * groups. This may speed up the extraction and lower the memory consumption for processing large + * amounts of data but corrupts the original input container. + * Use 'true' to work with a clone and leave the input container intact (default). + * + * @return a list with all functional groups found in the molecule. + */ + public List find(final IAtomContainer container, final boolean clone) { + // work with a clone? + final IAtomContainer mol; + if (clone) { + try { + mol = container.clone(); + } catch (final CloneNotSupportedException e) { + throw new IllegalStateException("Atom container could not be cloned"); + } + } else { + mol = container; + } + + // init GraphUtil & EdgeToBondMap + this.bondMap = EdgeToBondMap.withSpaceFor(mol); + this.adjList = GraphUtil.toAdjList(mol, this.bondMap); + + this.checkConstraints(mol); + + // atom marking + this.markAtoms(mol); + + // extract raw groups + final List groups = this.extractGroups(mol); + + // handle environment + if (this.mode + == Mode.DEFAULT) { + this.expandGeneralizedEnvironments(groups); + } else if (this.mode + == Mode.NO_GENERALIZATION) { + this.expandFullEnvironments(groups); + } else { + throw new IllegalStateException("Unknown mode."); + } + + // clear fields + this.bondMap = null; + this.adjList = null; + this.markedAtoms = null; + this.aromaticHeteroAtoms = null; + this.environmentsMap = null; + + return groups; + } + + /** + * Mark all atoms and store them in a set for further processing. + * + * @param molecule Molecule with atoms to mark + */ + private void markAtoms(final IAtomContainer molecule) { + if (this.isDbg()) { + log.debug("########## Starting search for atoms to mark ... ##########"); + } + + // store marked atoms + this.markedAtoms = Sets.newHashSetWithExpectedSize(molecule.getAtomCount()); + // store aromatic heteroatoms + this.aromaticHeteroAtoms = new HashMap<>(); + + for (int idx = 0; idx + < molecule.getAtomCount(); idx++) { + // skip atoms that already got marked in a previous iteration + if (this.markedAtoms.contains(idx)) { + continue; + } + final IAtom cAtom = molecule.getAtom(idx); + // skip aromatic atoms but add them to set + if (cAtom.isAromatic()) { + if (isHeteroatom(cAtom)) { + this.aromaticHeteroAtoms.put(idx, false); + } + continue; + } + + final int atomicNr = cAtom.getAtomicNumber(); + + // if C... + if (atomicNr + == 6) { + boolean isMarked = false; // to detect if foor loop ran with or without marking the C atom + int oNSCounter = 0; // count for the number of connected O, N & S atoms + for (final int connectedIdx : this.adjList[idx]) { + final IAtom connectedAtom = molecule.getAtom(connectedIdx); + final IBond connectedBond = this.bondMap.get(idx, connectedIdx); + + // if connected to Heteroatom or C in aliphatic double or triple bond... [CONDITIONS 2.1 & 2.2] + if (connectedAtom.getAtomicNumber() + != 1 + && ((connectedBond.getOrder() + == Order.DOUBLE + || connectedBond.getOrder() + == Order.TRIPLE) + && !connectedBond.isAromatic())) { + + // set the connected atom as marked + if (this.markedAtoms.add(connectedIdx)) { + final String connectedAtomCondition = connectedAtom.getAtomicNumber() + == 6 + ? "2.1/2.2" + : "1"; + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition %s", connectedIdx, + connectedAtom.getSymbol(), connectedAtomCondition)); + } + } + + // set the current atom as marked and break out of connected atoms + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.1/2.2", idx, + cAtom.getSymbol())); + } + isMarked = true; + + // but check for carbonyl-C before break + if (connectedAtom.getAtomicNumber() + == 8 + && connectedBond.getOrder() + == Order.DOUBLE + && this.adjList[idx].length + == 3) { + if (this.isDbg()) { + log.debug(" - was flagged as Carbonly-C"); + } + cAtom.setProperty(CARBONYL_C_MARKER, true); + } + + break; + } + // if connected to O/N/S in single bond... + else if ((connectedAtom.getAtomicNumber() + == 7 + || connectedAtom.getAtomicNumber() + == 8 + || connectedAtom.getAtomicNumber() + == 16) + && connectedBond.getOrder() + == Order.SINGLE) { + // if connected O/N/S is not aromatic... + if (!connectedAtom.isAromatic()) { + // set the connected O/N/S atom as marked + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition 1", connectedIdx, + connectedAtom.getSymbol())); + } + this.markedAtoms.add(connectedIdx); + + // if "acetal C" (2+ O/N/S in single bonds connected to sp3-C)... [CONDITION 2.3] + boolean isAllSingleBonds = true; + for (final int connectedInSphere2Idx : this.adjList[connectedIdx]) { + final IBond sphere2Bond = this.bondMap.get(connectedIdx, connectedInSphere2Idx); + if (sphere2Bond.getOrder() + != Order.SINGLE) { + isAllSingleBonds = false; + break; + } + } + if (isAllSingleBonds) { + oNSCounter++; + if (oNSCounter + > 1 + && this.adjList[idx].length + + cAtom.getImplicitHydrogenCount() + == 4) { + // set as marked and break out of connected atoms + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.3", idx, + cAtom.getSymbol())); + } + isMarked = true; + break; + } + } + } + // if part of oxirane, aziridine and thiirane ring... [CONDITION 2.4] + for (final int connectedInSphere2Idx : this.adjList[connectedIdx]) { + final IAtom connectedInSphere2Atom = molecule.getAtom(connectedInSphere2Idx); + if (connectedInSphere2Atom.getAtomicNumber() + == 6) { + for (final int connectedInSphere3Idx : this.adjList[connectedInSphere2Idx]) { + final IAtom connectedInSphere3Atom = molecule.getAtom(connectedInSphere3Idx); + if (connectedInSphere3Atom.equals(cAtom)) { + // set connected atoms as marked + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", + connectedInSphere2Idx, + connectedInSphere2Atom.getSymbol())); + } + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", + connectedInSphere3Idx, + connectedInSphere3Atom.getSymbol())); + } + this.markedAtoms.add(connectedInSphere2Idx); + this.markedAtoms.add(connectedInSphere3Idx); + // set current atom as marked and break out of connected atoms + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", idx, + cAtom.getSymbol())); + } + isMarked = true; + break; + } + } + } + } + } + } + if (isMarked) { + this.markedAtoms.add(idx); + continue; + } + // if none of the conditions 2.X apply, we have an unmarked C (not relevant here) + } + // if H... + else if (atomicNr + == 1) { + // convert to implicit H + final IAtom connectedAtom; + try { + connectedAtom = molecule.getAtom(this.adjList[idx][0]); + } catch (final ArrayIndexOutOfBoundsException e) { + break; + } + + + if (connectedAtom.getImplicitHydrogenCount() + == null) { + connectedAtom.setImplicitHydrogenCount(1); + } else { + connectedAtom.setImplicitHydrogenCount(connectedAtom.getImplicitHydrogenCount() + + 1); + } + continue; + } + // if heteroatom... (CONDITION 1) + else { + if (this.isDbg()) { + log.debug(String.format("Marking Atom #%d (%s) - Met condition 1", idx, cAtom.getSymbol())); + } + this.markedAtoms.add(idx); + continue; + } + } + if (this.isDbg()) { + log.debug(String.format("########## End of search. Marked %d/%d atoms. ##########", this.markedAtoms.size(), + molecule.getAtomCount())); + } + } + + /** + * Searches the molecule for groups of connected marked atoms and extracts each as a new functional group. + * The extraction process includes marked atom's "environments". Connected H's are captured implicitly. + * + * @param molecule the molecule which contains the functional groups + * + * @return a list of all functional groups (including "environments") extracted from the molecule + */ + private List extractGroups(final IAtomContainer molecule) { + if (this.isDbg()) { + log.debug("########## Starting identification & extraction of functional groups... ##########"); + } + + this.environmentsMap = Maps.newHashMapWithExpectedSize(molecule.getAtomCount()); + final int[] atomIdxToFGMap = new int[molecule.getAtomCount()]; + Arrays.fill(atomIdxToFGMap, -1); + int fGroupIdx = -1; + + while (!this.markedAtoms.isEmpty()) { + // search for another functional group + fGroupIdx++; + + // get next markedAtom as the starting node for the search + final int beginIdx = this.markedAtoms.iterator() + .next(); + if (this.isDbg()) { + log.debug(String.format("Searching new functional group from atom #%d (%s)...", beginIdx, + molecule.getAtom(beginIdx) + .getSymbol())); + } + + // do a BFS from there + final Queue queue = new ArrayDeque<>(); + queue.add(beginIdx); + + while (!queue.isEmpty()) { + final int currentIdx = queue.poll(); + + // we are only interested in marked atoms that are not yet included in a group + if (!this.markedAtoms.contains(currentIdx)) { + continue; + } + + // if it isn't... + final IAtom currentAtom = molecule.getAtom(currentIdx); + if (this.isDbg()) { + log.debug(String.format(" visiting marked atom: #%d (%s)", currentIdx, currentAtom.getSymbol())); + } + + // add its index to the functional group + atomIdxToFGMap[currentIdx] = fGroupIdx; + // also scratch the index from markedAtoms + this.markedAtoms.remove(currentIdx); + + // and take look at the connected atoms + final List currentEnvironment = new ArrayList<>(); + for (final int connectedIdx : this.adjList[currentIdx]) { + // add connected marked atoms to queue + if (this.markedAtoms.contains(connectedIdx)) { + queue.add(connectedIdx); + continue; + } + + // ignore already handled connected atoms + if (atomIdxToFGMap[connectedIdx] + >= 0) { + continue; + } + + // add unmarked connected aromatic heteroatoms + final IAtom connectedAtom = molecule.getAtom(connectedIdx); + if (isHeteroatom(connectedAtom) + && connectedAtom.isAromatic()) { + if (this.isDbg()) { + log.debug(" added connected aromatic heteroatom " + + connectedAtom.getSymbol()); + } + atomIdxToFGMap[connectedIdx] = fGroupIdx; + // note that this aromatic heteroatom has been added to a group + this.aromaticHeteroAtoms.put(connectedIdx, true); + } + + // add unmarked connected atoms to current marked atom's environment + final IBond connectedBond = this.bondMap.get(currentIdx, connectedIdx); + + final EnvironmentCalCType type; + if (connectedAtom.getAtomicNumber() + == 6) { + if (connectedAtom.isAromatic()) { + type = EnvironmentCalCType.C_AROMATIC; + } else { + type = EnvironmentCalCType.C_ALIPHATIC; + } + } else { + // aromatic heteroatom, so just ignore + continue; + } + currentEnvironment.add(new EnvironmentalC(type, connectedBond, connectedBond.getBegin() + == connectedAtom + ? 0 + : 1)); + } + this.environmentsMap.put(currentAtom, currentEnvironment); + + // debug logging + if (this.isDbg()) { + int cAromCount = 0, cAliphCount = 0; + for (final EnvironmentalC comp : currentEnvironment) { + if (comp.getType() + == EnvironmentCalCType.C_AROMATIC) { + cAromCount++; + } else if (comp.getType() + == EnvironmentCalCType.C_ALIPHATIC) { + cAliphCount++; + } + } + log.debug(String.format( + " logged marked atom's environment: C_ar:%d, C_al:%d (and %d implicit hydrogens)", + cAromCount, cAliphCount, currentAtom.getImplicitHydrogenCount())); + } + } + + if (this.isDbg()) { + log.debug(" search completed."); + } + } + + // also create FG for lone aromatic heteroatoms, not connected to a FG yet. + for (final int atomIdx : this.aromaticHeteroAtoms.keySet()) { + if (!this.aromaticHeteroAtoms.get(atomIdx)) { + fGroupIdx++; + atomIdxToFGMap[atomIdx] = fGroupIdx; + if (this.isDbg()) { + log.debug("Created FG for lone aromatic heteroatom: " + + molecule.getAtom(atomIdx) + .getSymbol()); + } + } + } + + final List fGs = this.partitionIntoGroups(molecule, atomIdxToFGMap, fGroupIdx + + 1); + + if (this.isDbg()) { + log.debug(String.format("########## Found & extracted %d functional groups. ##########", fGroupIdx + + 1)); + } + return fGs; + } + + /** + * Generalizes the full environments of functional groups, providing a good balance between preserving + * meaningful detail and generalization. + * + * @param fGroups the list of functional groups including "environments" + */ + private void expandGeneralizedEnvironments(final List fGroups) { + if (this.isDbg()) { + log.debug("########## Starting generalization of functional groups... ##########"); + } + + for (final IAtomContainer fGroup : fGroups) { + final int atomCount = fGroup.getAtomCount(); + + if (this.isDbg()) { + log.debug(String.format("Generalizing functional group (%d atoms)...", atomCount)); + } + + // prechecking for special cases... + if (fGroup.getAtomCount() + == 1) { + final IAtom atom = fGroup.getAtom(0); + final List environment = this.environmentsMap.get(atom); + + if (environment + != null) { + final int envCCount = environment.size(); + + // for H2N-C_env & HO-C_env -> do not replace H & C_env by R! + if ((atom.getAtomicNumber() + == 8 + && envCCount + == 1) + || (atom.getAtomicNumber() + == 7 + && envCCount + == 1)) { + if (this.isDbg()) { + log.debug(String.format( + " - found single atomic N or O FG with one env. C. Expanding environment...", + atom.getSymbol())); + } + this.expandEnvironment(atom, fGroup); + + final int hCount = atom.getImplicitHydrogenCount(); + if (hCount + != 0) { + if (this.isDbg()) { + log.debug(String.format(" - adding %d hydrogens...", hCount)); + } + this.addHydrogens(atom, hCount, fGroup); + atom.setImplicitHydrogenCount(0); + } + continue; + } + // for HN-(C_env)-C_env & HS-C_env -> do not replace H by R! (only C_env!) + if ((atom.getAtomicNumber() + == 7 + && envCCount + == 2) + || (atom.getAtomicNumber() + == 16 + && envCCount + == 1)) { + if (this.isDbg()) { + log.debug(" - found sec. amine or simple thiol"); + } + final int hCount = atom.getImplicitHydrogenCount(); + if (hCount + != 0) { + if (this.isDbg()) { + log.debug(String.format(" - adding %d hydrogens...", hCount)); + } + this.addHydrogens(atom, hCount, fGroup); + atom.setImplicitHydrogenCount(0); + } + if (this.isDbg()) { + log.debug(" - expanding environment..."); + } + this.expandEnvironmentGeneralized(atom, fGroup); + continue; + } + } else if (isHeteroatom(atom)) { + final int rAtomCount = atom.getValency(); + final Integer hCount = atom.getImplicitHydrogenCount(); + if (hCount + != null + && hCount + != 0) { + atom.setImplicitHydrogenCount(0); + } + final String atomTypeName = atom.getAtomTypeName(); + if (this.isDbg()) { + log.debug(String.format( + " - found single aromatic heteroatom (%s, Atomtype %s). Adding %d R-Atoms...", + atom.getSymbol(), atomTypeName, rAtomCount)); + } + this.addRAtoms(atom, rAtomCount, fGroup); + continue; + } + } + + // get atoms to process + final List fGroupAtoms = Lists.newArrayList(fGroup.atoms()); + + // process atoms... + for (final IAtom atom : fGroupAtoms) { + final List environment = this.environmentsMap.get(atom); + + if (environment + == null) { + if (atom.getImplicitHydrogenCount() + != 0) { + atom.setImplicitHydrogenCount(0); + } + final int rAtomCount = atom.getValency() + - 1; + if (this.isDbg()) { + log.debug(String.format(" - found connected aromatic heteroatom (%s). Adding %d R-Atoms...", + atom.getSymbol(), rAtomCount)); + } + this.addRAtoms(atom, rAtomCount, fGroup); + } + + // processing carbons... + if (atom.getAtomicNumber() + == 6) { + if (atom.getProperty(CARBONYL_C_MARKER) + == null) { + if (atom.getImplicitHydrogenCount() + != 0) { + atom.setImplicitHydrogenCount(0); + } + if (this.isDbg()) { + log.debug(" - ignoring environment for marked carbon atom"); + } + continue; + } else { + if (this.isDbg()) { + log.debug(" - found carbonyl-carbon. Expanding environment..."); + } + this.expandEnvironmentGeneralized(atom, fGroup); + continue; + } + } + // processing heteroatoms... + else { + if (this.isDbg()) { + log.debug(String.format(" - found heteroatom (%s). Expanding environment...", + atom.getSymbol())); + } + this.expandEnvironmentGeneralized(atom, fGroup); + continue; + } + } + } + + if (this.isDbg()) { + log.debug("########## Generalization of functional groups completed. ##########"); + } + } + + /** + * Expands the full environments of functional groups, converted into atoms and bonds. + * + * @param fGroups the list of functional groups including "environments" + */ + private void expandFullEnvironments(final List fGroups) { + if (this.isDbg()) { + log.debug("########## Starting expansion of full environments for functional groups... ##########"); + } + + for (final IAtomContainer fGroup : fGroups) { + final int atomCount = fGroup.getAtomCount(); + if (this.isDbg()) { + log.debug(String.format("Expanding environment on functional group (%d atoms)...", atomCount)); + } + + for (int i = 0; i + < atomCount; i++) { + final IAtom atom = fGroup.getAtom(i); + + if (this.isDbg()) { + log.debug(String.format(" - Atom #%d:% - Expanding environment...", i)); + } + this.expandEnvironment(atom, fGroup); + + final int hCount = atom.getImplicitHydrogenCount(); + if (hCount + != 0) { + if (this.isDbg()) { + log.debug(String.format(" - adding %d hydrogens...", hCount)); + } + this.addHydrogens(atom, hCount, fGroup); + atom.setImplicitHydrogenCount(0); + } + } + } + + if (this.isDbg()) { + log.debug("########## Expansion of full environments for functional groups completed. ##########"); + } + } + + private void expandEnvironment(final IAtom atom, final IAtomContainer container) { + final List environment = this.environmentsMap.get(atom); + + if (environment + == null + || environment.isEmpty()) { + if (this.isDbg()) { + log.debug(" found no environment to expand."); + } + return; + } + + int cAromCount = 0, cAliphCount = 0; + for (final EnvironmentalC envC : environment) { + final IAtom cAtom = atom.getBuilder() + .newInstance(IAtom.class, "C"); + cAtom.setAtomTypeName("C"); + cAtom.setImplicitHydrogenCount(0); + if (envC.getType() + == EnvironmentCalCType.C_AROMATIC) { + cAtom.setIsAromatic(true); + cAromCount++; + } else { + cAliphCount++; + } + + final IBond bond = envC.createBond(atom, cAtom); + + container.addAtom(cAtom); + container.addBond(bond); + } + + if (this.isDbg()) { + log.debug(String.format(" expanded environment: %dx C_ar and %dx C_al", cAromCount, cAliphCount)); + } + } + + // only call this on marked heteroatoms / carbonyl-C's! + private void expandEnvironmentGeneralized(final IAtom atom, final IAtomContainer container) { + + final List environment = this.environmentsMap.get(atom); + + if (environment + == null) { + if (this.isDbg()) { + log.debug(" found no environment to expand."); + } + return; + } + + int rAtomCount = environment.size(); + final int rAtomsForCCount = rAtomCount; + if (atom.getAtomicNumber() + == 8 + && atom.getImplicitHydrogenCount() + == 1) { + this.addHydrogens(atom, 1, container); + atom.setImplicitHydrogenCount(0); + if (this.isDbg()) { + log.debug(" expanded hydrogen on connected OH-Group"); + } + } else if (isHeteroatom(atom)) { + rAtomCount += atom.getImplicitHydrogenCount(); + } + this.addRAtoms(atom, rAtomCount, container); + + if (atom.getImplicitHydrogenCount() + != 0) { + atom.setImplicitHydrogenCount(0); + } + + if (this.isDbg()) { + log.debug(String.format(" expanded environment: %dx R-atom (incl. %d for H replacement)", rAtomCount, + rAtomCount + - rAtomsForCCount)); + } + } + + private final boolean isNonmetal(final IAtom atom) { + return this.nonmetalAtomicNumbers.contains(atom.getAtomicNumber()); + } + + private void addHydrogens(final IAtom atom, final int number, final IAtomContainer container) { + for (int i = 0; i + < number; i++) { + final IAtom hydrogen = atom.getBuilder() + .newInstance(IAtom.class, "H"); + hydrogen.setAtomTypeName("H"); + hydrogen.setImplicitHydrogenCount(0); + + container.addAtom(hydrogen); + container.addBond(atom.getBuilder() + .newInstance(IBond.class, atom, hydrogen, Order.SINGLE)); + } + } + + private void addRAtoms(final IAtom atom, final int number, final IAtomContainer container) { + for (int i = 0; i + < number; i++) { + final IPseudoAtom rAtom = atom.getBuilder() + .newInstance(IPseudoAtom.class, "R"); + rAtom.setAttachPointNum(1); + rAtom.setImplicitHydrogenCount(0); + + container.addAtom(rAtom); + container.addBond(atom.getBuilder() + .newInstance(IBond.class, atom, rAtom, Order.SINGLE)); + } + } + + private List partitionIntoGroups(final IAtomContainer sourceContainer, final int[] atomIdxToFGMap, + final int fGroupCount) { + final List groups = new ArrayList<>(fGroupCount); + for (int i = 0; i + < fGroupCount; i++) { + groups.add(sourceContainer.getBuilder() + .newInstance(IAtomContainer.class)); + } + + final Map atomtoFGMap = Maps.newHashMapWithExpectedSize(sourceContainer.getAtomCount()); + + // atoms + for (int atomIdx = 0; atomIdx + < sourceContainer.getAtomCount(); atomIdx++) { + final int fGroupId = atomIdxToFGMap[atomIdx]; + + if (fGroupId + == -1) { + continue; + } + + final IAtom atom = sourceContainer.getAtom(atomIdx); + final IAtomContainer myGroup = groups.get(fGroupId); + myGroup.addAtom(atom); + atomtoFGMap.put(atom, myGroup); + } + + // bonds + for (final IBond bond : sourceContainer.bonds()) { + final IAtomContainer beginGroup = atomtoFGMap.get(bond.getBegin()); + final IAtomContainer endGroup = atomtoFGMap.get(bond.getEnd()); + + if (beginGroup + == null + || endGroup + == null + || beginGroup + != endGroup) { + continue; + } + + beginGroup.addBond(bond); + } + + // single electrons + for (final ISingleElectron electron : sourceContainer.singleElectrons()) { + final IAtomContainer group = atomtoFGMap.get(electron.getAtom()); + if (group + != null) { + group.addSingleElectron(electron); + } + } + + // lone pairs + for (final ILonePair lonePair : sourceContainer.lonePairs()) { + final IAtomContainer group = atomtoFGMap.get(lonePair.getAtom()); + if (group + != null) { + group.addLonePair(lonePair); + } + } + + return groups; + } + + private boolean isDbg() { + return log.isDebugEnabled(); + } + + private boolean checkConstraints(final IAtomContainer molecule) { + for (final IAtom atom : molecule.atoms()) { + if (atom.getFormalCharge() + != null + && atom.getFormalCharge() + != 0) { + throw new IllegalArgumentException("Input molecule must not contain any charges."); + } + if (!this.isNonmetal(atom)) { + throw new IllegalArgumentException("Input molecule must not contain metals or metalloids."); + } + if (atom.getImplicitHydrogenCount() + == null) { + atom.setImplicitHydrogenCount(0); + } + } + + final ConnectedComponents cc = new ConnectedComponents(this.adjList); + if (cc.nComponents() + != 1) { + throw new IllegalArgumentException("Input molecule must consist of only a single connected stucture."); + } + + return true; + } + + /** + * Defines the working mode. + */ + public enum Mode { + /** + * Default mode including the generalization step. + */ + DEFAULT, + /** + * Skips the generalization step. Functional groups will keep their full "environment". + */ + NO_GENERALIZATION + } + + private enum EnvironmentCalCType {C_AROMATIC, C_ALIPHATIC} + + /** + * Describes one carbon atom in the environment of a marked atom. It can either be aromatic + * or aliphatic and also contains a clone of its connecting bond. + */ + private class EnvironmentalC { + private final EnvironmentCalCType type; + private final int bondIndex; + private final Order bondOrder; + private final IBond.Stereo bondStereo; + private final boolean[] bondFlags; + + public EnvironmentalC(final EnvironmentCalCType type, final IBond bond, final int indexInBond) { + this.type = type; + + this.bondIndex = indexInBond; + this.bondOrder = bond.getOrder(); + this.bondStereo = bond.getStereo(); + this.bondFlags = bond.getFlags(); + } + + public EnvironmentCalCType getType() { + return this.type; + } + + public IBond createBond(final IAtom targetAtom, final IAtom cAtom) { + final IBond bond = targetAtom.getBuilder() + .newInstance(IBond.class); + if (this.bondIndex + == 0) { + bond.setAtoms(new IAtom[]{cAtom, targetAtom}); + } else { + bond.setAtoms(new IAtom[]{targetAtom, cAtom}); + } + bond.setOrder(this.bondOrder); + bond.setStereo(this.bondStereo); + bond.setFlags(this.bondFlags); + + return bond; + } + } +} \ No newline at end of file From ffa81d817dd5853cde5cd27fbfb90fd138417b4d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 15:34:50 +0200 Subject: [PATCH 256/405] feat: store RMSD and average deviation in sub-datasets via findMatches method --- .../ErtlFunctionalGroupsUtilities.java | 184 +++++++++++------- 1 file changed, 110 insertions(+), 74 deletions(-) diff --git a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java index 7444530..925a3a7 100644 --- a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java +++ b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java @@ -17,7 +17,6 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.smiles.SmilesParser; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; @@ -27,9 +26,9 @@ public class ErtlFunctionalGroupsUtilities { - public static final Map> buildFunctionalGroupDataSets(final String pathToNMRShiftDB, - final String[] nuclei) { - final Map> functionalGroupDataSets = new HashMap<>(); + public static final List buildFunctionalGroupDataSets(final String pathToNMRShiftDB, + final String[] nuclei) { + final List functionalGroupDataSets = new ArrayList<>(); try { final ErtlFunctionalGroupsFinder ertlFunctionalGroupsFinder = new ErtlFunctionalGroupsFinder( ErtlFunctionalGroupsFinder.Mode.NO_GENERALIZATION); @@ -38,8 +37,8 @@ public static final Map> buildFunctionalGroupDataSets(fina List groups; List fragmentTrees; ConnectionTree fragmentTree; - IAtomContainer structure, fragment; - String atomTypeInSpectrum, smiles; + IAtomContainer structure; + String atomTypeInSpectrum; Aromaticity[] aromaticities; for (final DataSet dataSet : dataSetsFromNMRShiftDB) { structure = dataSet.getStructure() @@ -80,14 +79,7 @@ public static final Map> buildFunctionalGroupDataSets(fina dataSetList = Fragmentation.fragmentTreesToSubDataSets(dataSet, fragmentTrees); if (dataSetList != null) { - for (final DataSet dataSetTemp : dataSetList) { - fragment = dataSetTemp.getStructure() - .toAtomContainer(); - smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(fragment); - functionalGroupDataSets.putIfAbsent(smiles, new ArrayList<>()); - functionalGroupDataSets.get(smiles) - .add(dataSetTemp); - } + functionalGroupDataSets.addAll(dataSetList); } } } catch (final IOException | CDKException e) { @@ -97,7 +89,19 @@ public static final Map> buildFunctionalGroupDataSets(fina return functionalGroupDataSets; } - public static List>> sortByFrequency( + public static LinkedHashMap> sortByFrequencies( + final Map> functionalGroupDataSetsMap) { + final LinkedHashMap> sortedCollection = new LinkedHashMap<>(); + final List>> sortedFrequencies = getSortedFrequencies( + functionalGroupDataSetsMap); + for (final Map.Entry> frequency : sortedFrequencies) { + sortedCollection.put(frequency.getKey(), frequency.getValue()); + } + + return sortedCollection; + } + + public static List>> getSortedFrequencies( final Map> functionalGroupDataSets) { return functionalGroupDataSets.entrySet() .stream() @@ -106,71 +110,103 @@ public static List>> sortByFrequency( .collect(Collectors.toList()); } - public static Map> findMatches(final Map> functionalGroupDataSets, - final Spectrum querySpectrum, final String mf, - final double shiftTol, final double maxAverageDeviation) { - final Map> matches = new HashMap<>(); - final SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); - List matchesInGroup; + public static Map countFrequencies(final List functionalGroupDataSets) { + final Map frequencies = new HashMap<>(); + String smiles; + for (final DataSet functionalGroupDataSet : functionalGroupDataSets) { + smiles = functionalGroupDataSet.getMeta() + .get("smiles"); + if (smiles + != null) { + frequencies.putIfAbsent(smiles, 0); + frequencies.put(smiles, frequencies.get(smiles) + + 1); + } + } + + return frequencies; + } + + public static Map> collectBySmiles(final List functionalGroupDataSets) { + final Map> collection = new HashMap<>(); + String smiles; + for (final DataSet functionalGroupDataSet : functionalGroupDataSets) { + smiles = functionalGroupDataSet.getMeta() + .get("smiles"); + if (smiles + != null) { + collection.putIfAbsent(smiles, new ArrayList<>()); + collection.get(smiles) + .add(functionalGroupDataSet); + } + } + + return collection; + } + + public static List findMatches(final List functionalGroupDataSets, final Spectrum querySpectrum, + final String mf, final double shiftTol, final double maxAverageDeviation, + final boolean checkMultiplicity) { + final List matches = new ArrayList<>(); + for (final DataSet dataSet : functionalGroupDataSets) { + if (isValidMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { + matches.add(dataSet); + } + } + + return matches; + } + + public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final double shiftTol, final double maxAverageDeviation, + final boolean checkMultiplicity) { final IMolecularFormula iMolecularFormula = MolecularFormulaManipulator.getMolecularFormula(mf, SilentChemObjectBuilder.getInstance()); - for (final Map.Entry> entry : functionalGroupDataSets.entrySet()) { - try { - final IAtomContainer group = smilesParser.parseSmiles(entry.getKey()); - Utils.setAromaticity(group); + final IAtomContainer group = dataSet.getStructure() + .toAtomContainer(); - matchesInGroup = entry.getValue() - .stream() - .filter(dataSet -> { - if (!dataSet.getSpectrum() - .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { - return false; - } - final String atomTypeInSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus( - dataSet.getSpectrum() - .getNuclei()[0]); - if (atomTypeInSpectrum.equals("H")) { - if (AtomContainerManipulator.getImplicitHydrogenCount( - dataSet.getStructure() - .toAtomContainer()) - > MolecularFormulaManipulator.getElementCount(iMolecularFormula, - atomTypeInSpectrum)) { - return false; - } - } else { - // check molecular formula with atom types in group - if (!casekit.nmr.utils.Utils.compareWithMolecularFormulaLessOrEqual(group, - mf)) { - return false; - } - // do not allow unsaturated fragments with different size than given molecular formula - if (Utils.getUnsaturatedAtomIndices(group) - .isEmpty() - && !casekit.nmr.utils.Utils.compareWithMolecularFormulaEqual( - group, mf)) { - return false; - } - } - // check average deviation - final Double averageDeviation = Match.calculateAverageDeviation( - dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, true, true, - true); - return averageDeviation - != null - && averageDeviation - <= maxAverageDeviation; - }) - .collect(Collectors.toList()); - if (matchesInGroup.size() - > 0) { - matches.put(entry.getKey(), matchesInGroup); - } - } catch (final CDKException e) { - e.printStackTrace(); + if (!dataSet.getSpectrum() + .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { + return false; + } + final String atomTypeInSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); + if (atomTypeInSpectrum.equals("H")) { + if (AtomContainerManipulator.getImplicitHydrogenCount(dataSet.getStructure() + .toAtomContainer()) + > MolecularFormulaManipulator.getElementCount(iMolecularFormula, atomTypeInSpectrum)) { + return false; + } + } else { + // check molecular formula with atom types in group + if (!casekit.nmr.utils.Utils.compareWithMolecularFormulaLessOrEqual(group, mf)) { + return false; + } + // do not allow unsaturated fragments with different size than given molecular formula + if (Utils.getUnsaturatedAtomIndices(group) + .isEmpty() + && !casekit.nmr.utils.Utils.compareWithMolecularFormulaEqual(group, mf)) { + return false; } } + // check average deviation + final Double averageDeviation = Match.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, + shiftTol, checkMultiplicity, true, true); - return matches; + if (averageDeviation + == null + || averageDeviation + > maxAverageDeviation) { + return false; + } + final Double rmsd = Match.calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, checkMultiplicity, + true, true); + dataSet.getMeta() + .put("avgDev", Double.toString(averageDeviation)); + dataSet.getMeta() + .put("rmsd", Double.toString(rmsd)); + + return true; } /** From 3dd950c1f8f1ec5ee2b5dcd12a157ba38d7c1a29 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 15:35:16 +0200 Subject: [PATCH 257/405] chore: removed unused MongoDB class --- src/casekit/nmr/dbservice/MongoDB.java | 65 -------------------------- 1 file changed, 65 deletions(-) delete mode 100644 src/casekit/nmr/dbservice/MongoDB.java diff --git a/src/casekit/nmr/dbservice/MongoDB.java b/src/casekit/nmr/dbservice/MongoDB.java deleted file mode 100644 index 8b06bf6..0000000 --- a/src/casekit/nmr/dbservice/MongoDB.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -package casekit.nmr.dbservice; - - -/** - * @author Michael Wenk [https://github.com/michaelwenk] - */ -@Deprecated -public class MongoDB { - - - // public static MongoClient login(final String mongoUser, final String mongoPassword, final String mongoAuthDB) { - // MongoClient mongo; - // try { - // // Creating a Mongo client - // mongo = new MongoClient( - // new ServerAddress("127.0.0.1", 27017), - // MongoCredential.createCredential( - // mongoUser, - // mongoAuthDB, - // mongoPassword.toCharArray()), - // MongoClientOptions.builder().build()); - // System.out.println("Login to MongoDB was successfull"); - // // Accessing the database - // } catch (Exception e) { - // e.printStackTrace(); - // System.err.println(Thread.currentThread().getStackTrace()[1].getMethodName() + ": could not connect to MongoDB!"); - // - // return null; - // } - // - // return mongo; - // } - // - // public static MongoDatabase getDatabase(final MongoClient mongo, final String mongoDBName){ - // return mongo.getDatabase(mongoDBName); - // } - // - // public static MongoCollection getCollection(final MongoClient mongo, final String mongoDBName, final String mongoDBCollection) { - // final MongoDatabase database = MongoDB.getDatabase(mongo, mongoDBName); - //// if (database == null) { - //// return null; - //// } - // System.out.println("Access to database \"" + mongoDBName + "\" was successfull"); - // // Retrieving a collection - // final MongoCollection collection = database.getCollection(mongoDBCollection); - // System.out.println("Retrieval of collection \"" + mongoDBCollection + "\" was successfull -> size: " + collection.countDocuments()); - // - // return collection; - // } - // - // public static void logout(final MongoClient mongo) { - // mongo.close(); - // } -} From 0c5a1bdccfb85d279611f2db5a3a04c9d1fee042 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 15:53:48 +0200 Subject: [PATCH 258/405] chore: moved methods from old Utils class to new Statistics and Utils class --- src/casekit/nmr/Utils.java | 825 ------------------ .../nmr/analysis/HOSECodeShiftStatistics.java | 17 +- src/casekit/nmr/dbservice/COCONUT.java | 14 +- src/casekit/nmr/dbservice/NMRShiftDB.java | 19 +- .../nmr/fragmentation/Fragmentation.java | 3 +- .../ErtlFunctionalGroupsUtilities.java | 12 +- .../fragmentation/model/ConnectionTree.java | 4 +- src/casekit/nmr/hose/HOSECodeBuilder.java | 28 +- .../{Utils.java => HOSECodeUtilities.java} | 7 +- .../nmr/lsd/RankedResultSDFParser.java | 11 +- .../nmr/model/ExtendedConnectionMatrix.java | 2 +- src/casekit/nmr/utils/Match.java | 5 +- src/casekit/nmr/utils/Predict.java | 6 +- src/casekit/nmr/utils/Statistics.java | 271 ++++++ src/casekit/nmr/utils/Utils.java | 446 +++++++++- 15 files changed, 733 insertions(+), 937 deletions(-) delete mode 100644 src/casekit/nmr/Utils.java rename src/casekit/nmr/hose/{Utils.java => HOSECodeUtilities.java} (98%) create mode 100644 src/casekit/nmr/utils/Statistics.java diff --git a/src/casekit/nmr/Utils.java b/src/casekit/nmr/Utils.java deleted file mode 100644 index 7193aa6..0000000 --- a/src/casekit/nmr/Utils.java +++ /dev/null @@ -1,825 +0,0 @@ -/* - * The MIT License - * - * Copyright 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -package casekit.nmr; - - -import casekit.nmr.model.Spectrum; -import org.openscience.cdk.aromaticity.Aromaticity; -import org.openscience.cdk.aromaticity.ElectronDonation; -import org.openscience.cdk.aromaticity.Kekulization; -import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.graph.CycleFinder; -import org.openscience.cdk.graph.Cycles; -import org.openscience.cdk.interfaces.*; -import org.openscience.cdk.tools.CDKHydrogenAdder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; - -import java.util.*; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -/** - * @author Michael Wenk [https://github.com/michaelwenk] - * @deprecated - */ -public class Utils { - - /** - * Returns a hashmap consisting of lists of atom indices in an atom container. - * This is done for all atom types (e.g. C or Br) in given atom container. - * - * @param ac IAtomContainer to look in - * - * @return - * - * @see #getAtomTypeIndicesByElement(org.openscience.cdk.interfaces.IAtomContainer, java.lang.String) - */ - public static Map> getAtomTypeIndices(final IAtomContainer ac) { - - final Map> atomTypeIndices = new HashMap<>(); - final Set atomTypes = new HashSet<>(); - for (final IAtom heavyAtom : AtomContainerManipulator.getHeavyAtoms(ac)) { - atomTypes.add(heavyAtom.getSymbol()); - } - for (final String atomType : atomTypes) { - atomTypeIndices.put(atomType, Utils.getAtomTypeIndicesByElement(ac, atomType)); - } - - return atomTypeIndices; - } - - - /** - * Returns a list of atom indices in an atom container for a given atom - * type (e.g. C or Br) - * - * @param ac IAtomContainer to use for search - * @param atomType Atom type to find in atom container - * - * @return - */ - public static List getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType) { - - final ArrayList indices = new ArrayList<>(); - for (int i = 0; i - < ac.getAtomCount(); i++) { - if (ac.getAtom(i) - .getSymbol() - .equals(atomType)) { - indices.add(i); - } - } - - return indices; - } - - - public static IMolecularFormula getMolecularFormulaFromAtomContainer(final IAtomContainer ac) { - return MolecularFormulaManipulator.getMolecularFormula(ac); - } - - - public static String molecularFormularToString(final IMolecularFormula molecularFormula) { - return MolecularFormulaManipulator.getString(molecularFormula); - } - - public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, - final IMolecularFormula molFormula, - final int dim) throws CDKException { - if (!spectrum.containsDim(dim)) { - throw new CDKException(Thread.currentThread() - .getStackTrace()[2].getClassName() - + "." - + Thread.currentThread() - .getStackTrace()[2].getMethodName() - + ": invalid dimension in spectrum given"); - } - final String atomType = casekit.nmr.utils.Utils.getAtomTypeFromSpectrum(spectrum, dim); - int atomsInMolFormula = 0; - if (molFormula - != null) { - atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molFormula, atomType); - } - return atomsInMolFormula - - spectrum.getSignalCountWithEquivalences(); - } - - /** - * Returns the casekit.nmr isotope identifier for a given element, e.g. C -> 13C. - * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. - * - * @param element element's symbol (e.g. "C") - * - * @return - */ - public static String getIsotopeIdentifier(final String element) { - switch (element) { - case "C": - return "13C"; - case "H": - return "1H"; - case "N": - return "15N"; - case "P": - return "31P"; - case "F": - return "19F"; - case "O": - return "17O"; - case "S": - return "33S"; - case "Si": - return "29Si"; - case "B": - return "11B"; - case "Pt": - return "195Pt"; - default: - return element; - } - } - - - public static Set getAtomTypesInAtomContainer(final IAtomContainer ac) { - final HashSet atomTypes = new HashSet<>(); - for (final IAtom atom : ac.atoms()) { - atomTypes.add(atom.getSymbol()); - } - - return atomTypes; - } - - - /** - * Detects outliers in given array list of input values and removes them.
- * Here, outliers are those which are outside of a calculated lower and upper bound (whisker). - * The interquartile range (IQR) of the input values is therefore multiplied with a given value - * for whisker creation. - * - * @param input list of values to process - * @param multiplierIQR multiplier for IQR to use for lower and upper bound creation - * - * @return new array list without values outside the generated boundaries - */ - public static List removeOutliers(final List input, final double multiplierIQR) { - final ArrayList inputWithoutOutliers = new ArrayList<>(input); - inputWithoutOutliers.removeAll(Utils.getOutliers(inputWithoutOutliers, multiplierIQR)); - - return inputWithoutOutliers; - } - - /** - * @param input - * - * @return - */ - public static List getOutliers(final List input, final double multiplierIQR) { - final ArrayList outliers = new ArrayList<>(); - if (input.size() - <= 1) { - return outliers; - } - Collections.sort(input); - final ArrayList data1 = new ArrayList<>(input.subList(0, input.size() - / 2)); - final ArrayList data2; - if (input.size() - % 2 - == 0) { - data2 = new ArrayList<>(input.subList(input.size() - / 2, input.size())); - } else { - data2 = new ArrayList<>(input.subList(input.size() - / 2 - + 1, input.size())); - } - final double q1 = getMedian(new ArrayList<>(data1)); - final double q3 = getMedian(new ArrayList<>(data2)); - final double iqr = q3 - - q1; - final double lowerBound = q1 - - multiplierIQR - * iqr; - final double upperBound = q3 - + multiplierIQR - * iqr; - for (int i = 0; i - < input.size(); i++) { - if ((input.get(i) - < lowerBound) - || (input.get(i) - > upperBound)) { - outliers.add(input.get(i)); - } - } - // System.out.println("input size: " + input.size()); - // System.out.println("output size: " + outliers.size()); - return outliers; - } - - - /** - * @param data - * - * @return - */ - public static Double getMedian(final List data) { - if ((data - == null) - || data.isEmpty()) { - return null; - } - if (data.size() - == 1) { - return data.get(0); - } - Collections.sort(data); - if (data.size() - % 2 - == 1) { - return data.get(data.size() - / 2); - } else { - return (data.get(data.size() - / 2 - - 1) - + data.get(data.size() - / 2)) - / 2.0; - } - } - - - /** - * @param data - * - * @return - */ - public static Double getMean(final Collection data) { - if ((data - == null) - || data.isEmpty()) { - return null; - } - double sum = 0; - int nullCounter = 0; - for (final Double d : data) { - if (d - != null) { - sum += d; - } else { - nullCounter++; - } - } - return ((data.size() - - nullCounter) - != 0) - ? (sum - / (data.size() - - nullCounter)) - : null; - } - - /** - * @param data - * - * @return - */ - public static Double getRMS(final Collection data) { - if ((data - == null) - || data.isEmpty()) { - return null; - } - double sum = 0; - int nullCounter = 0; - for (final Double d : data) { - if (d - != null) { - sum += d - * d; - } else { - nullCounter++; - } - } - return ((data.size() - - nullCounter) - != 0) - ? Math.sqrt(sum - / (data.size() - - nullCounter)) - : null; - } - - /** - * @param data - * - * @return - * - * @deprecated - */ - public static Double getStandardDeviation(final List data) { - if ((data - == null) - || data.isEmpty()) { - return null; - } - final Double variance = Utils.getVariance(data); - - return (variance - != null) - ? Math.sqrt(variance) - : null; - } - - - /** - * @param data - * - * @return - * - * @deprecated - */ - public static Double getVariance(final Collection data) { - if ((data - == null) - || data.isEmpty()) { - return null; - } - final int nullCounter = Collections.frequency(data, null); - double quadrSum = 0.0; - final Double mean = Utils.getMean(data); - if (mean - == null) { - return null; - } - for (final Double d : data) { - if (d - != null) { - quadrSum += Math.pow(d - - mean, 2); - } - } - - return ((data.size() - - nullCounter) - != 0) - ? (quadrSum - / (data.size() - - nullCounter)) - : null; - } - - - /** - * @param data - * - * @return - */ - public static Double getMean(final Double[] data) { - if ((data - == null) - || (data.length - == 0)) { - return null; - } - double sum = 0; - int nullCounter = 0; - for (final Double d : data) { - if (d - != null) { - sum += d; - } else { - nullCounter++; - } - } - return ((data.length - - nullCounter) - != 0) - ? (sum - / (data.length - - nullCounter)) - : null; - } - - /** - * @param lookup - * - * @return - * - * @deprecated - */ - public static Map getMean(final Map> lookup) { - - final HashMap means = new HashMap<>(); - Double meanInList; - for (final String key : lookup.keySet()) { - meanInList = Utils.getMean(lookup.get(key)); - if (meanInList - != null) { - means.put(key, meanInList); - } - } - - return means; - } - - public static boolean isValidBondAddition(final IAtomContainer ac, final int atomIndex, final IBond bondToAdd) { - float bondOrderSum = Utils.getBondOrderSum(ac, atomIndex, true); - bondOrderSum += Utils.getBondOrderAsNumeric(bondToAdd); - - // System.out.print(atomIndex + " --> " + Utils.getBondOrderSum(ac, atomIndex, true) + " + " + Utils.getBondOrderAsNumeric(bondToAdd)); - final IAtom atom = ac.getAtom(atomIndex); - // @TODO include different valencies: N3, N5, S2, S4, S6 etc. - // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group - if (atom.isAromatic() - && (atom.getSymbol() - .equals("N") - || atom.getSymbol() - .equals("S") - || atom.getSymbol() - .equals("P"))) { - // System.out.print("[ -1 ]"); - bondOrderSum -= 1; - } - // System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); - - // @TODO include charges - return bondOrderSum - <= atom.getValency(); - } - - - /** - * @param pathToFile - * - * @return - */ - public static String getFileFormat(final String pathToFile) { - - if (pathToFile - == null - || pathToFile.trim() - .isEmpty()) { - return ""; - } - final String[] split = pathToFile.split("\\."); - - return split[split.length - - 1]; - } - - - // /** - // * @param lookup - // * - // * @return - // */ - // public static Map getRMSD(final Map> lookup) { - // final HashMap rmsd = new HashMap<>(); - // Double rmsdInList; - // for (final String key : lookup.keySet()) { - // rmsdInList = casekit.nmr.utils.Utils.getRMSD(lookup.get(key)); - // if (rmsdInList - // != null) { - // rmsd.put(key, rmsdInList); - // } - // } - // - // return rmsd; - // } - - public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) { - if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { - return null; - } - return ac.getAtom(atomIndex) - .getValency() - != null - && Utils.getBondOrderSum(ac, atomIndex, true) - .intValue() - >= ac.getAtom(atomIndex) - .getValency(); - } - - public static List getUnsaturatedAtomIndices(final IAtomContainer ac) { - final List unsaturatedAtomIndices = new ArrayList<>(); - for (int i = 0; i - < ac.getAtomCount(); i++) { - // set the indices of unsaturated atoms in substructure - if (!isSaturated(ac, i)) { - unsaturatedAtomIndices.add(i); - } - } - return unsaturatedAtomIndices; - } - - public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException { - final CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(ac.getBuilder()); - IAtomType type; - for (final IAtom atom : ac.atoms()) { - type = matcher.findMatchingAtomType(ac, atom); - AtomTypeManipulator.configure(atom, type); - } - final CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(ac.getBuilder()); - adder.addImplicitHydrogens(ac); - } - - public static void addExplicitHydrogens(final IAtomContainer ac) throws CDKException { - addImplicitHydrogens(ac); - convertImplicitToExplicitHydrogens(ac); - } - - public static void convertImplicitToExplicitHydrogens(final IAtomContainer ac) { - AtomContainerManipulator.convertImplicitToExplicitHydrogens(ac); - } - - /** - * @param lookup - * - * @return - */ - public static Map getMedian(final Map> lookup) { - - final Map medians = new HashMap<>(); - Double medianInList; - for (final String key : lookup.keySet()) { - medianInList = Utils.getMedian(lookup.get(key)); - if (medianInList - != null) { - medians.put(key, medianInList); - } - } - - return medians; - } - - /** - * @param hoseLookupToExtend - * @param hoseLookup - * - * @deprecated - */ - public static void combineHashMaps(final Map> hoseLookupToExtend, - final Map> hoseLookup) { - for (final String hose : hoseLookup.keySet()) { - if (!hoseLookupToExtend.containsKey(hose)) { - hoseLookupToExtend.put(hose, new ArrayList<>()); - } - hoseLookupToExtend.get(hose) - .addAll(hoseLookup.get(hose)); - } - } - - public static Double roundDouble(final Double value, final int decimalPlaces) { - if (value - == null) { - return null; - } - final int decimalFactor = (int) (Math.pow(10, decimalPlaces)); - - return (Math.round(value - * decimalFactor) - / (double) decimalFactor); - } - - /** - * Checks whether a structure contains explicit hydrogen atoms or not. - * - * @param ac structure to check - * - * @return - */ - public static boolean containsExplicitHydrogens(final IAtomContainer ac) { - return getExplicitHydrogenCount(ac) - > 0; - } - - /** - * Stores all explicit hydrogens as implicit counter for the bonded heavy - * atoms and removes those from the atom container.
- * Also, a HashMap containing non-hydrogen atoms and its indices - * before the removals will be returned which one can use for atom index - * comparison (before and after the removals). - * - * @param ac the structure to lsd - * - * @return - * - * @see #containsExplicitHydrogens(org.openscience.cdk.interfaces.IAtomContainer) - */ - public static Map convertExplicitToImplicitHydrogens(final IAtomContainer ac) { - // create a list of atom indices which one can use for index comparison (before vs. after) after removing the explicit hydrogens - final Map atomIndices = new HashMap<>(); - final List toRemoveList = new ArrayList<>(); - IAtom atomB; - for (final IAtom atomA : ac.atoms()) { - // check each atom whether it is an hydrogen; - // if yes then store (increase) the number of implicit hydrogens - // for its bonded heavy atom - if (atomA.getSymbol() - .equals("H")) { - atomB = ac.getConnectedAtomsList(atomA) - .get(0); - if (atomB.getImplicitHydrogenCount() - == null) { - atomB.setImplicitHydrogenCount(0); - } - atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() - + 1); - toRemoveList.add(atomA); - } else { - // store all non-hydrogen atoms and their indices - atomIndices.put(atomA, atomA.getIndex()); - } - - } - // remove all explicit hydrogen atoms - for (final IAtom iAtom : toRemoveList) { - ac.removeAtom(iAtom); - } - - return atomIndices; - } - - /** - * @param ac - * - * @return - */ - public static List getExplicitHydrogenIndices(final IAtomContainer ac) { - final List explicitHydrogenIndicesList = new ArrayList<>(); - for (int i = 0; i - < ac.getAtomCount(); i++) { - if (ac.getAtom(i) - .getSymbol() - .equals("H")) { - explicitHydrogenIndicesList.add(i); - } - } - - return explicitHydrogenIndicesList; - } - - /** - * @param ac - * - * @return - */ - public static int getExplicitHydrogenCount(final IAtomContainer ac) { - return getExplicitHydrogenIndices(ac).size(); - } - - - public static void setAromaticity(final IAtomContainer ac) throws CDKException { - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); - final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); - final CycleFinder cycles = Cycles.all(ac.getAtomCount()); - final Aromaticity aromaticity = new Aromaticity(model, cycles); - aromaticity.apply(ac); - } - - public static void setAromaticityAndKekulize(final IAtomContainer ac) throws CDKException { - Utils.setAromaticity(ac); - Kekulization.kekulize(ac); - } - - public static void setAromaticity(final IAtomContainer ac, final Aromaticity aromaticity) throws CDKException { - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); - aromaticity.apply(ac); - } - - public static void setAromaticityAndKekulize(final IAtomContainer ac, - final Aromaticity aromaticity) throws CDKException { - Utils.setAromaticity(ac, aromaticity); - Kekulization.kekulize(ac); - } - - - /** - * Removes atoms from a given atom type from an atom container. - * - * @param ac IAtomContainer object where to remove the atoms - * @param atomType Atom type (element's name, e.g. C or Br) - * - * @return IAtomContainer where the atoms were removed - */ - public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType) { - - final ArrayList toRemoveList = new ArrayList<>(); - for (final IAtom atomA : ac.atoms()) { - if (atomA.getSymbol() - .equals(atomType)) {// detect whether the current atom A is a from the given atom type - toRemoveList.add(atomA); - } - } - for (final IAtom iAtom : toRemoveList) { - ac.removeAtom(iAtom); - } - - return ac; - } - - // public static String getSpectrumNucleiAsString(final Spectrum spectrum) { - // String specID = ""; - // for (int i = 0; i < spectrum.getNDim(); i++) { - // specID += spectrum.getNuclei()[i]; - // if (i < spectrum.getNDim() - 1) { - // specID += "-"; - // } - // } - // - // return specID; - // } - - public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex) { - return ((atomIndex - >= 0) - && atomIndex - < ac.getAtomCount()); - } - - public static ExecutorService initExecuter(final int nThreads) { - return Executors.newFixedThreadPool(nThreads); - } - - public static void stopExecuter(final ExecutorService executor, final long seconds) { - executor.shutdown(); - try { - if (!executor.awaitTermination(seconds, TimeUnit.SECONDS)) { - System.err.println("killing non-finished tasks!"); - executor.shutdownNow(); - } - } catch (final InterruptedException e) { - System.err.println("killing non-finished tasks!"); - executor.shutdownNow(); - } - } - - /** - * Returns the bond order for a numeric order value. - * - * @param orderAsNumeric - * - * @return - */ - public static IBond.Order getBondOrder(final int orderAsNumeric) { - for (final IBond.Order order : IBond.Order.values()) { - if (order.numeric() - == orderAsNumeric) { - return order; - } - } - - return null; - } - - public static Float getBondOrderAsNumeric(final IBond bond) { - if (bond - == null) { - return null; - } - final float bondOrderAsNumeric; - if (bond.isAromatic()) { - bondOrderAsNumeric = (float) 1.5; - } else { - bondOrderAsNumeric = bond.getOrder() - .numeric(); - } - - return bondOrderAsNumeric; - } - - public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, - final boolean includeImplicitHydrogenCount) { - if (!Utils.checkIndexInAtomContainer(ac, atomIndex)) { - return null; - } - float bondsOrderSum = 0; - final IAtom atom = ac.getAtom(atomIndex); - for (final IBond bond : ac.getConnectedBondsList(atom)) { - bondsOrderSum += Utils.getBondOrderAsNumeric(bond); - } - if (includeImplicitHydrogenCount - && (atom.getImplicitHydrogenCount() - != null)) { - bondsOrderSum += atom.getImplicitHydrogenCount(); - } - - return bondsOrderSum; - } - -} diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index b726d07..b217a5a 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -1,12 +1,13 @@ package casekit.nmr.analysis; -import casekit.nmr.Utils; import casekit.nmr.dbservice.COCONUT; import casekit.nmr.dbservice.NMRShiftDB; import casekit.nmr.fragmentation.model.ConnectionTree; import casekit.nmr.hose.HOSECodeBuilder; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; +import casekit.nmr.utils.Statistics; +import casekit.nmr.utils.Utils; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; @@ -52,7 +53,7 @@ public static Map>> collectHOSECodeShifts(final for (final DataSet dataSet : dataSetList) { structure = dataSet.getStructure() .toAtomContainer(); - if (Utils.containsExplicitHydrogens(structure)) { + if (casekit.nmr.utils.Utils.containsExplicitHydrogens(structure)) { System.out.println("!!!Dataset skipped because of previously set explicit hydrogens!!!"); continue; } @@ -74,8 +75,8 @@ public static Map>> collectHOSECodeShifts(final } } - casekit.nmr.Utils.convertImplicitToExplicitHydrogens(structure); - casekit.nmr.Utils.setAromaticityAndKekulize(structure); + casekit.nmr.utils.Utils.convertImplicitToExplicitHydrogens(structure); + Utils.setAromaticityAndKekulize(structure); } catch (final CDKException e) { e.printStackTrace(); continue; @@ -148,13 +149,13 @@ public static Map> buildHOSECodeShiftStatistics( hoseCodeShiftStatistics.put(hoseCodes.getKey(), new HashMap<>()); for (final Map.Entry> solvents : hoseCodes.getValue() .entrySet()) { - values = solvents.getValue(); //casekit.nmr.Utils.removeOutliers(solvents.getValue(), 1.5); + values = solvents.getValue(); //casekit.nmr.HOSECodeUtilities.removeOutliers(solvents.getValue(), 1.5); hoseCodeShiftStatistics.get(hoseCodes.getKey()) .put(solvents.getKey(), new Double[]{(double) values.size(), Collections.min(values), - casekit.nmr.Utils.getMean(values), - // casekit.nmr.Utils.getRMS(values), - casekit.nmr.Utils.getMedian(values), Collections.max(values)}); + Statistics.getMean(values), + // casekit.nmr.HOSECodeUtilities.getRMS(values), + Statistics.getMedian(values), Collections.max(values)}); } } diff --git a/src/casekit/nmr/dbservice/COCONUT.java b/src/casekit/nmr/dbservice/COCONUT.java index a1537fd..e65692c 100644 --- a/src/casekit/nmr/dbservice/COCONUT.java +++ b/src/casekit/nmr/dbservice/COCONUT.java @@ -43,16 +43,16 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri while (iterator.hasNext()) { structure = iterator.next(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - if (!casekit.nmr.Utils.containsExplicitHydrogens(structure)) { + if (!Utils.containsExplicitHydrogens(structure)) { hydrogenAdder.addImplicitHydrogens(structure); } - casekit.nmr.Utils.setAromaticityAndKekulize(structure); + Utils.setAromaticityAndKekulize(structure); meta = new HashMap<>(); meta.put("title", structure.getTitle()); meta.put("id", structure.getProperty("ID")); - mf = casekit.nmr.Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mf", casekit.nmr.Utils.molecularFormularToString(mf)); + mf = Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mf", Utils.molecularFormularToString(mf)); try { final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); meta.put("smiles", smiles); @@ -62,7 +62,7 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri for (final String nucleus : nuclei) { final String atomType = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(nucleus); - final List atomIndices = casekit.nmr.Utils.getAtomTypeIndicesByElement(structure, atomType); + final List atomIndices = Utils.getAtomTypeIndicesByElement(structure, atomType); // spectrumPropertyString = ((String) structure.getProperty("CNMR_CALC_SHIFTS")).replaceAll("[\\n\\r]", // ""); // split = spectrumPropertyString.split("\\d+:"); @@ -127,7 +127,7 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri // + structure.getTitle()); // System.out.println("// ???"); // System.out.println("// " - // + casekit.nmr.Utils.molecularFormularToString(mf)); + // + casekit.nmr.HOSECodeUtilities.molecularFormularToString(mf)); // for (int i = 0; i // < spectrum.getSignalCount(); i++) { // System.out.println(nucleus @@ -143,7 +143,7 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri // } // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if (casekit.nmr.Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) + if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) != 0) { continue; } diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 2a5e277..f10c3a0 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -12,11 +12,11 @@ package casekit.nmr.dbservice; -import casekit.nmr.Utils; import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IMolecularFormula; @@ -114,23 +114,23 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif while (iterator.hasNext()) { structure = iterator.next(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - explicitHydrogenIndices = Utils.getExplicitHydrogenIndices(structure); + explicitHydrogenIndices = casekit.nmr.utils.Utils.getExplicitHydrogenIndices(structure); Collections.sort(explicitHydrogenIndices); if (!explicitHydrogenIndices.isEmpty()) { // remove explicit hydrogens Utils.removeAtoms(structure, "H"); } hydrogenAdder.addImplicitHydrogens(structure); - Utils.setAromaticityAndKekulize(structure); + casekit.nmr.utils.Utils.setAromaticityAndKekulize(structure); meta = new HashMap<>(); meta.put("title", structure.getTitle()); meta.put("id", structure.getProperty("nmrshiftdb2 ID")); - mf = Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mfOriginal", Utils.molecularFormularToString(mf)); + mf = casekit.nmr.utils.Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mfOriginal", casekit.nmr.utils.Utils.molecularFormularToString(mf)); mfAlphabetic = new StringBuilder(); - mfAlphabeticMap = new TreeMap<>( - casekit.nmr.utils.Utils.getMolecularFormulaElementCounts(Utils.molecularFormularToString(mf))); + mfAlphabeticMap = new TreeMap<>(casekit.nmr.utils.Utils.getMolecularFormulaElementCounts( + casekit.nmr.utils.Utils.molecularFormularToString(mf))); for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { mfAlphabetic.append(entry.getKey()); if (entry.getValue() @@ -160,7 +160,8 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule if ((spectrum == null) - || Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) + || casekit.nmr.utils.Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, + 0) != 0) { continue; } @@ -320,7 +321,7 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif // final HashSet atomTypes = new HashSet<>(); // final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToDB), SilentChemObjectBuilder.getInstance()); // while (iterator.hasNext()) { - // atomTypes.addAll(Utils.getAtomTypesInAtomContainer(iterator.next())); + // atomTypes.addAll(HOSECodeUtilities.getAtomTypesInAtomContainer(iterator.next())); // } // // return atomTypes; diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragmentation/Fragmentation.java index 58a7f49..3e69ec9 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragmentation/Fragmentation.java @@ -133,8 +133,7 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, .get("title")); meta.put("id", dataSet.getMeta() .get("id")); - meta.put("mf", casekit.nmr.Utils.molecularFormularToString( - casekit.nmr.Utils.getMolecularFormulaFromAtomContainer(substructure))); + meta.put("mf", Utils.molecularFormularToString(Utils.getMolecularFormulaFromAtomContainer(substructure))); subDataSet.setMeta(meta); fragmentDataSetList.add(subDataSet); diff --git a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java index 925a3a7..7f2121c 100644 --- a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java +++ b/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java @@ -1,6 +1,5 @@ package casekit.nmr.fragmentation.functionalgroup; -import casekit.nmr.Utils; import casekit.nmr.dbservice.NMRShiftDB; import casekit.nmr.fragmentation.Fragmentation; import casekit.nmr.fragmentation.FragmentationUtils; @@ -8,6 +7,7 @@ import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Match; +import casekit.nmr.utils.Utils; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; import org.openscience.cdk.exception.CDKException; @@ -64,8 +64,8 @@ public static final List buildFunctionalGroupDataSets(final String path == 0) { continue; } - } else if (Utils.getAtomTypeIndicesByElement(group, atomTypeInSpectrum) - .isEmpty()) { + } else if (casekit.nmr.utils.Utils.getAtomTypeIndicesByElement(group, atomTypeInSpectrum) + .isEmpty()) { continue; } fragmentTree = Fragmentation.buildFragmentTree(group, 0, null, new HashSet<>(), false); @@ -183,8 +183,8 @@ public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySp return false; } // do not allow unsaturated fragments with different size than given molecular formula - if (Utils.getUnsaturatedAtomIndices(group) - .isEmpty() + if (casekit.nmr.utils.Utils.getUnsaturatedAtomIndices(group) + .isEmpty() && !casekit.nmr.utils.Utils.compareWithMolecularFormulaEqual(group, mf)) { return false; } @@ -225,7 +225,7 @@ private static void restoreOriginalEnvironmentalCarbons(final List(); // create a list (copy) of all atoms of the group because of atom removals and additions in group atom container group.atoms() diff --git a/src/casekit/nmr/fragmentation/model/ConnectionTree.java b/src/casekit/nmr/fragmentation/model/ConnectionTree.java index bc735b9..91cd6ab 100644 --- a/src/casekit/nmr/fragmentation/model/ConnectionTree.java +++ b/src/casekit/nmr/fragmentation/model/ConnectionTree.java @@ -11,7 +11,7 @@ */ package casekit.nmr.fragmentation.model; -import casekit.nmr.hose.Utils; +import casekit.nmr.hose.HOSECodeUtilities; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IBond; @@ -492,7 +492,7 @@ public String toString() { .append(": "); } if (nodeInSphere.hasAParent()) { - treeStringBuilder.append(Utils.getSymbolForBond(nodeInSphere.getBondToParent())); + treeStringBuilder.append(HOSECodeUtilities.getSymbolForBond(nodeInSphere.getBondToParent())); } if (nodeInSphere.isRingClosureNode()) { treeStringBuilder.append("&"); diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java index 74d2a57..ae94d25 100644 --- a/src/casekit/nmr/hose/HOSECodeBuilder.java +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -55,19 +55,19 @@ private static List buildPositionsInSphere(final ConnectionTreeNode node bond = nodeInPrevSphere.getBondsToChildren() .get(j); position = ""; - if (Utils.getSymbolForBond(bond) + if (HOSECodeUtilities.getSymbolForBond(bond) == null) { throw new CDKException(Thread.currentThread() .getStackTrace()[1].getMethodName() + ": no bond information"); } - position += Utils.getSymbolForBond(bond); + position += HOSECodeUtilities.getSymbolForBond(bond); if (nodeInSphere.isRingClosureNode()) { position += "&"; } else { if (useBremserElementNotation) { - position += Utils.toHOSECodeSymbol(nodeInSphere.getAtom() - .getSymbol()); + position += HOSECodeUtilities.toHOSECodeSymbol(nodeInSphere.getAtom() + .getSymbol()); } else { position += nodeInSphere.getAtom() .getSymbol(); @@ -274,7 +274,7 @@ public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final final ConnectionTree connectionTree = new ConnectionTree(ac.getAtom(rootAtomIndex), rootAtomIndex); BFS(ac, connectionTree, queue, new HashSet<>(visited), maxSphere); - Utils.rankChildNodes(connectionTree); + HOSECodeUtilities.rankChildNodes(connectionTree); return connectionTree; } @@ -295,7 +295,7 @@ public static ConnectionTree buildConnectionTree(final IAtomContainer ac, final public static ConnectionTree buildConnectionTree(final String HOSECode, final boolean useBremserElementNotation) throws CDKException { final Map> ringClosures = new HashMap<>(); - final List sphereStrings = Utils.splitHOSECodeIntoSpheres(HOSECode); + final List sphereStrings = HOSECodeUtilities.splitHOSECodeIntoSpheres(HOSECode); IAtom atom; IBond bond; final int maxSphere; @@ -306,7 +306,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, maxSphere = sphereStrings.size() - 1; // zeroth sphere - positionsInSphere = Utils.splitHOSECodeSphereIntoPositions(sphereStrings.get(0), true); + positionsInSphere = HOSECodeUtilities.splitHOSECodeSphereIntoPositions(sphereStrings.get(0), true); // create root atom atom = new Atom(positionsInSphere.get(0) .get(0)); @@ -318,7 +318,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, for (int sphere = 1; sphere <= maxSphere; sphere++) { // get positions (sections separated by comma) of current sphere - positionsInSphere = Utils.splitHOSECodeSphereIntoPositions(sphereStrings.get(sphere), false); + positionsInSphere = HOSECodeUtilities.splitHOSECodeSphereIntoPositions(sphereStrings.get(sphere), false); // for all positions for (final int positionIndex : positionsInSphere.keySet()) { // for each child elements (symbols) in position @@ -351,7 +351,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, } bond = SilentChemObjectBuilder.getInstance() .newBond(); - bond.setOrder(Utils.getBondOrderForSymbol(bondTypeString)); + bond.setOrder(HOSECodeUtilities.getBondOrderForSymbol(bondTypeString)); if (bondTypeString.equals("*")) { bond.setIsInRing(true); bond.setIsAromatic(true); @@ -383,7 +383,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, // bond = SilentChemObjectBuilder.getInstance().newBond(); // bond.setAtom(parentNodeInPrevSphere.getAtom(), 0); // bond.setAtom(parentNodeInSphere.getAtom(), 1); - // bond.setOrder(Utils.getBondOrderForSymbol(bondTypeString)); + // bond.setOrder(HOSECodeUtilities.getBondOrderForSymbol(bondTypeString)); // if (bondTypeString.equals("*")) { // bond.setIsAromatic(true); // } else { @@ -395,7 +395,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, // parentNodeInSphere.addParentNode(parentNodeInPrevSphere, bond); // connectionTree.addNode(null, -1 * parentNodeInSphere.getKey(), parentNodeInSphere.getKey(), bond, sphere + 1, true); - } else if (Utils.countAtoms(childElementCore) + } else if (HOSECodeUtilities.countAtoms(childElementCore) == 1) { // each position contains either ring closures (&) or one element (e.g. C, Br), plus the bond information if (childElementCore.length() == 3) { // in case of bond type and an element with two letters, e.g. *Cl or =Br @@ -424,7 +424,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, bond = SilentChemObjectBuilder.getInstance() .newBond(); if (useBremserElementNotation) { - atomTypeString = Utils.toElementSymbol(atomTypeString); + atomTypeString = HOSECodeUtilities.toElementSymbol(atomTypeString); } atom = new Atom(atomTypeString); bond.setAtom(atom, 0); @@ -432,7 +432,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, - 1, true) .get(positionIndex) .getAtom(), 1); - bond.setOrder(Utils.getBondOrderForSymbol(bondTypeString)); + bond.setOrder(HOSECodeUtilities.getBondOrderForSymbol(bondTypeString)); bond.setIsAromatic(bondTypeString.equals("*")); // set formal charge to atom if (childElement.contains("-")) { @@ -475,7 +475,7 @@ public static ConnectionTree buildConnectionTree(final String HOSECode, // System.out.println(" -> number of ring closures in sphere: " + sphere + " -> " + ringClosures.get(sphere).size()); // } - Utils.rankChildNodes(connectionTree); + HOSECodeUtilities.rankChildNodes(connectionTree); return connectionTree; } diff --git a/src/casekit/nmr/hose/Utils.java b/src/casekit/nmr/hose/HOSECodeUtilities.java similarity index 98% rename from src/casekit/nmr/hose/Utils.java rename to src/casekit/nmr/hose/HOSECodeUtilities.java index 80725e9..0e66dad 100644 --- a/src/casekit/nmr/hose/Utils.java +++ b/src/casekit/nmr/hose/HOSECodeUtilities.java @@ -21,7 +21,7 @@ import java.util.List; import java.util.Map; -public class Utils { +public class HOSECodeUtilities { /** * Returns the summed subtree weight starting at a specific node in a connection @@ -64,7 +64,7 @@ private static int getSubtreeWeight(final ConnectionTreeNode node, final Connect * @return the priority weight for node; plus the weight of * the bond to its parent node if the parent node is not null * - * @see Utils#getSymbolPriorityWeight(String) + * @see HOSECodeUtilities#getSymbolPriorityWeight(String) */ public static Integer getNodeWeight(final ConnectionTreeNode node, final ConnectionTreeNode parentNode) { int weight = 0; @@ -87,9 +87,6 @@ public static Integer getNodeWeight(final ConnectionTreeNode node, final Connect } else { weight += getSymbolPriorityWeight(node.getAtom() .getSymbol()); - // weight -= node.getAtom() - // .getImplicitHydrogenCount() - // * getSymbolPriorityWeight("H"); } return weight; diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index 3a6a574..d38c5a8 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -5,6 +5,7 @@ import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Match; +import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; @@ -56,12 +57,12 @@ public static List parseRankedResultSDFile(final Reader fileReader, structure = iterator.next(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); hydrogenAdder.addImplicitHydrogens(structure); - casekit.nmr.Utils.setAromaticityAndKekulize(structure); + Utils.setAromaticityAndKekulize(structure); meta = new HashMap<>(); meta.put("title", structure.getTitle()); meta.put("id", structure.getProperty("nmrshiftdb2 ID")); - mf = casekit.nmr.Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mf", casekit.nmr.Utils.molecularFormularToString(mf)); + mf = Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mf", Utils.molecularFormularToString(mf)); try { final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); meta.put("smiles", smiles); @@ -124,13 +125,13 @@ public static List parseRankedResultSDFile(final Reader fileReader, } for (final int signalIndex : signalShiftList.keySet()) { predictedSpectrum.getSignal(signalIndex) - .setShift(casekit.nmr.Utils.getMedian(signalShiftList.get(signalIndex)), 0); + .setShift(Statistics.getMedian(signalShiftList.get(signalIndex)), 0); predictedSpectrum.getSignal(signalIndex) .setEquivalencesCount(signalShiftList.get(signalIndex) .size()); } // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if (casekit.nmr.Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(predictedSpectrum, mf, 0) + if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(predictedSpectrum, mf, 0) != 0) { continue; } diff --git a/src/casekit/nmr/model/ExtendedConnectionMatrix.java b/src/casekit/nmr/model/ExtendedConnectionMatrix.java index 630b44b..a5bd652 100644 --- a/src/casekit/nmr/model/ExtendedConnectionMatrix.java +++ b/src/casekit/nmr/model/ExtendedConnectionMatrix.java @@ -11,7 +11,7 @@ */ package casekit.nmr.model; -import casekit.nmr.Utils; +import casekit.nmr.utils.Utils; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index d42d7b0..20329d4 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -12,7 +12,6 @@ package casekit.nmr.utils; -import casekit.nmr.Utils; import casekit.nmr.model.Assignment; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; @@ -129,7 +128,7 @@ public static Double calculateAverageDeviation(final Double[] deviations) { } } - return Utils.getMean(deviations); + return Statistics.getMean(deviations); } /** @@ -177,7 +176,7 @@ public static Double calculateRMSD(final Double[] data) { } } - return casekit.nmr.utils.Utils.getRMSD(data); + return Statistics.getRMSD(data); } /** diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index 5764a96..4d846d9 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -78,10 +78,10 @@ public static DataSet predict1D(final Map> hoseCod try { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - casekit.nmr.Utils.convertExplicitToImplicitHydrogens(structure); + Utils.convertExplicitToImplicitHydrogens(structure); hydrogenAdder.addImplicitHydrogens(structure); - casekit.nmr.Utils.convertImplicitToExplicitHydrogens(structure); - casekit.nmr.Utils.setAromaticityAndKekulize(structure); + Utils.convertImplicitToExplicitHydrogens(structure); + Utils.setAromaticityAndKekulize(structure); for (int i = 0; i < structure.getAtomCount(); i++) { diff --git a/src/casekit/nmr/utils/Statistics.java b/src/casekit/nmr/utils/Statistics.java new file mode 100644 index 0000000..b90fe6b --- /dev/null +++ b/src/casekit/nmr/utils/Statistics.java @@ -0,0 +1,271 @@ +package casekit.nmr.utils; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +public class Statistics { + /** + * Detects outliers in given array list of input values and removes them.
+ * Here, outliers are those which are outside of a calculated lower and upper bound (whisker). + * The interquartile range (IQR) of the input values is therefore multiplied with a given value + * for whisker creation. + * + * @param input list of values to process + * @param multiplierIQR multiplier for IQR to use for lower and upper bound creation + * + * @return new array list without values outside the generated boundaries + */ + public static List removeOutliers(final List input, final double multiplierIQR) { + final ArrayList inputWithoutOutliers = new ArrayList<>(input); + inputWithoutOutliers.removeAll(getOutliers(inputWithoutOutliers, multiplierIQR)); + + return inputWithoutOutliers; + } + + /** + * @param input + * @param multiplierIQR + * + * @return + */ + public static List getOutliers(final List input, final double multiplierIQR) { + final ArrayList outliers = new ArrayList<>(); + if (input.size() + <= 1) { + return outliers; + } + Collections.sort(input); + final ArrayList data1 = new ArrayList<>(input.subList(0, input.size() + / 2)); + final ArrayList data2; + if (input.size() + % 2 + == 0) { + data2 = new ArrayList<>(input.subList(input.size() + / 2, input.size())); + } else { + data2 = new ArrayList<>(input.subList(input.size() + / 2 + + 1, input.size())); + } + final double q1 = getMedian(new ArrayList<>(data1)); + final double q3 = getMedian(new ArrayList<>(data2)); + final double iqr = q3 + - q1; + final double lowerBound = q1 + - multiplierIQR + * iqr; + final double upperBound = q3 + + multiplierIQR + * iqr; + for (int i = 0; i + < input.size(); i++) { + if ((input.get(i) + < lowerBound) + || (input.get(i) + > upperBound)) { + outliers.add(input.get(i)); + } + } + // System.out.println("input size: " + input.size()); + // System.out.println("output size: " + outliers.size()); + return outliers; + } + + /** + * @param data + * + * @return + */ + public static Double getMedian(final List data) { + if ((data + == null) + || data.isEmpty()) { + return null; + } + if (data.size() + == 1) { + return data.get(0); + } + Collections.sort(data); + if (data.size() + % 2 + == 1) { + return data.get(data.size() + / 2); + } else { + return (data.get(data.size() + / 2 + - 1) + + data.get(data.size() + / 2)) + / 2.0; + } + } + + /** + * @param data + * + * @return + */ + public static Double getMean(final Collection data) { + if ((data + == null) + || data.isEmpty()) { + return null; + } + double sum = 0; + int nullCounter = 0; + for (final Double d : data) { + if (d + != null) { + sum += d; + } else { + nullCounter++; + } + } + return ((data.size() + - nullCounter) + != 0) + ? (sum + / (data.size() + - nullCounter)) + : null; + } + + /** + * @param data + * + * @return + */ + public static Double getStandardDeviation(final List data) { + if ((data + == null) + || data.isEmpty()) { + return null; + } + final Double variance = getVariance(data); + + return (variance + != null) + ? Math.sqrt(variance) + : null; + } + + /** + * @param data + * + * @return + */ + public static Double getVariance(final Collection data) { + if ((data + == null) + || data.isEmpty()) { + return null; + } + final int nullCounter = Collections.frequency(data, null); + double quadrSum = 0.0; + final Double mean = getMean(data); + if (mean + == null) { + return null; + } + for (final Double d : data) { + if (d + != null) { + quadrSum += Math.pow(d + - mean, 2); + } + } + + return ((data.size() + - nullCounter) + != 0) + ? (quadrSum + / (data.size() + - nullCounter)) + : null; + } + + /** + * @param data + * + * @return + */ + public static Double getMean(final Double[] data) { + if ((data + == null) + || (data.length + == 0)) { + return null; + } + double sum = 0; + int nullCounter = 0; + for (final Double d : data) { + if (d + != null) { + sum += d; + } else { + nullCounter++; + } + } + return ((data.length + - nullCounter) + != 0) + ? (sum + / (data.length + - nullCounter)) + : null; + } + + public static Double roundDouble(final Double value, final int decimalPlaces) { + if (value + == null) { + return null; + } + final int decimalFactor = (int) (Math.pow(10, decimalPlaces)); + + return (Math.round(value + * decimalFactor) + / (double) decimalFactor); + } + + /** + * @param data + * + * @return + */ + public static Double getRMSD(final Double[] data) { + if (data + == null + || data.length + == 0) { + return null; + } + if (data.length + == 1) { + return data[0]; + } + int nullCounter = 0; + double qSum = 0; + for (final Double d : data) { + if (d + != null) { + qSum += d + * d; + } else { + nullCounter++; + } + } + + return ((data.length + - nullCounter) + != 0) + ? Math.sqrt(qSum + / (data.length + - nullCounter)) + : null; + } +} diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index a913a1d..65350d4 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -2,18 +2,23 @@ import casekit.nmr.model.Spectrum; import casekit.nmr.model.nmrdisplayer.Correlation; +import org.openscience.cdk.aromaticity.Aromaticity; +import org.openscience.cdk.aromaticity.ElectronDonation; +import org.openscience.cdk.aromaticity.Kekulization; +import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.graph.CycleFinder; +import org.openscience.cdk.graph.Cycles; +import org.openscience.cdk.interfaces.*; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.SmiFlavor; import org.openscience.cdk.smiles.SmilesGenerator; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -78,43 +83,6 @@ public static String getSmilesFromAtomContainer(final IAtomContainer ac) throws return smilesGenerator.create(ac); } - /** - * @param data - * - * @return - */ - public static Double getRMSD(final Double[] data) { - if (data - == null - || data.length - == 0) { - return null; - } - if (data.length - == 1) { - return data[0]; - } - int nullCounter = 0; - double qSum = 0; - for (final Double d : data) { - if (d - != null) { - qSum += d - * d; - } else { - nullCounter++; - } - } - - return ((data.length - - nullCounter) - != 0) - ? Math.sqrt(qSum - / (data.length - - nullCounter)) - : null; - } - public static Map getMolecularFormulaElementCounts(final String mf) { final LinkedHashMap counts = new LinkedHashMap<>(); final List elements = new ArrayList<>(); @@ -141,8 +109,7 @@ public static Map getMolecularFormulaElementCounts(final String } public static int getAtomTypeCount(final IAtomContainer structure, final String atomType) { - return casekit.nmr.Utils.getAtomTypeIndicesByElement(structure, atomType) - .size(); + return getAtomTypeIndicesByElement(structure, atomType).size(); } public static int getAtomTypeCount(final String mf, final String atomType) { @@ -154,7 +121,7 @@ public static boolean compareWithMolecularFormulaLessOrEqual(final IAtomContaine != null && !mf.trim() .isEmpty()) { - for (final String atomType : casekit.nmr.Utils.getAtomTypesInAtomContainer(structure)) { + for (final String atomType : getAtomTypesInAtomContainer(structure)) { if (getAtomTypeCount(structure, atomType) > getAtomTypeCount(mf, atomType)) { return false; @@ -170,7 +137,7 @@ public static boolean compareWithMolecularFormulaEqual(final IAtomContainer stru != null && !mf.trim() .isEmpty()) { - for (final String atomType : casekit.nmr.Utils.getAtomTypesInAtomContainer(structure)) { + for (final String atomType : getAtomTypesInAtomContainer(structure)) { if (getAtomTypeCount(structure, atomType) != getAtomTypeCount(mf, atomType)) { return false; @@ -180,4 +147,389 @@ public static boolean compareWithMolecularFormulaEqual(final IAtomContainer stru return true; } + + /** + * Returns a hashmap consisting of lists of atom indices in an atom container. + * This is done for all atom types (e.g. C or Br) in given atom container. + * + * @param ac IAtomContainer to look in + * + * @return + * + * @see #getAtomTypeIndicesByElement(IAtomContainer, String) + */ + public static Map> getAtomTypeIndices(final IAtomContainer ac) { + + final Map> atomTypeIndices = new HashMap<>(); + final Set atomTypes = new HashSet<>(); + for (final IAtom heavyAtom : AtomContainerManipulator.getHeavyAtoms(ac)) { + atomTypes.add(heavyAtom.getSymbol()); + } + for (final String atomType : atomTypes) { + atomTypeIndices.put(atomType, getAtomTypeIndicesByElement(ac, atomType)); + } + + return atomTypeIndices; + } + + /** + * Returns a list of atom indices in an atom container for a given atom + * type (e.g. C or Br) + * + * @param ac IAtomContainer to use for search + * @param atomType Atom type to find in atom container + * + * @return + */ + public static List getAtomTypeIndicesByElement(final IAtomContainer ac, final String atomType) { + + final ArrayList indices = new ArrayList<>(); + for (int i = 0; i + < ac.getAtomCount(); i++) { + if (ac.getAtom(i) + .getSymbol() + .equals(atomType)) { + indices.add(i); + } + } + + return indices; + } + + public static IMolecularFormula getMolecularFormulaFromAtomContainer(final IAtomContainer ac) { + return MolecularFormulaManipulator.getMolecularFormula(ac); + } + + public static String molecularFormularToString(final IMolecularFormula molecularFormula) { + return MolecularFormulaManipulator.getString(molecularFormula); + } + + public static int getDifferenceSpectrumSizeAndMolecularFormulaCount(final Spectrum spectrum, + final IMolecularFormula molFormula, + final int dim) throws CDKException { + if (!spectrum.containsDim(dim)) { + throw new CDKException(Thread.currentThread() + .getStackTrace()[2].getClassName() + + "." + + Thread.currentThread() + .getStackTrace()[2].getMethodName() + + ": invalid dimension in spectrum given"); + } + final String atomType = getAtomTypeFromSpectrum(spectrum, dim); + int atomsInMolFormula = 0; + if (molFormula + != null) { + atomsInMolFormula = MolecularFormulaManipulator.getElementCount(molFormula, atomType); + } + return atomsInMolFormula + - spectrum.getSignalCountWithEquivalences(); + } + + /** + * Returns the casekit.nmr isotope identifier for a given element, e.g. C -> 13C. + * Elements defined so far: C, H, N, P, F, D, O, S, Si, B, Pt. + * + * @param element element's symbol (e.g. "C") + * + * @return + */ + public static String getIsotopeIdentifier(final String element) { + switch (element) { + case "C": + return "13C"; + case "H": + return "1H"; + case "N": + return "15N"; + case "P": + return "31P"; + case "F": + return "19F"; + case "O": + return "17O"; + case "S": + return "33S"; + case "Si": + return "29Si"; + case "B": + return "11B"; + case "Pt": + return "195Pt"; + default: + return element; + } + } + + public static Set getAtomTypesInAtomContainer(final IAtomContainer ac) { + final HashSet atomTypes = new HashSet<>(); + for (final IAtom atom : ac.atoms()) { + atomTypes.add(atom.getSymbol()); + } + + return atomTypes; + } + + public static boolean isValidBondAddition(final IAtomContainer ac, final int atomIndex, final IBond bondToAdd) { + float bondOrderSum = getBondOrderSum(ac, atomIndex, true); + bondOrderSum += getBondOrderAsNumeric(bondToAdd); + + // System.out.print(atomIndex + " --> " + HOSECodeUtilities.getBondOrderSum(ac, atomIndex, true) + " + " + HOSECodeUtilities.getBondOrderAsNumeric(bondToAdd)); + final IAtom atom = ac.getAtom(atomIndex); + // @TODO include different valencies: N3, N5, S2, S4, S6 etc. + // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group + if (atom.isAromatic() + && (atom.getSymbol() + .equals("N") + || atom.getSymbol() + .equals("S") + || atom.getSymbol() + .equals("P"))) { + // System.out.print("[ -1 ]"); + bondOrderSum -= 1; + } + // System.out.print(" = " + bondOrderSum + " <= " + atom.getValency() + " ? -> " + (bondOrderSum <= atom.getValency()) + "\n"); + + // @TODO include charges + return bondOrderSum + <= atom.getValency(); + } + + public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) { + if (!checkIndexInAtomContainer(ac, atomIndex)) { + return null; + } + return ac.getAtom(atomIndex) + .getValency() + != null + && getBondOrderSum(ac, atomIndex, true).intValue() + >= ac.getAtom(atomIndex) + .getValency(); + } + + public static List getUnsaturatedAtomIndices(final IAtomContainer ac) { + final List unsaturatedAtomIndices = new ArrayList<>(); + for (int i = 0; i + < ac.getAtomCount(); i++) { + // set the indices of unsaturated atoms in substructure + if (!isSaturated(ac, i)) { + unsaturatedAtomIndices.add(i); + } + } + return unsaturatedAtomIndices; + } + + public static void addImplicitHydrogens(final IAtomContainer ac) throws CDKException { + final CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(ac.getBuilder()); + IAtomType type; + for (final IAtom atom : ac.atoms()) { + type = matcher.findMatchingAtomType(ac, atom); + AtomTypeManipulator.configure(atom, type); + } + final CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(ac.getBuilder()); + adder.addImplicitHydrogens(ac); + } + + public static void addExplicitHydrogens(final IAtomContainer ac) throws CDKException { + addImplicitHydrogens(ac); + convertImplicitToExplicitHydrogens(ac); + } + + public static void convertImplicitToExplicitHydrogens(final IAtomContainer ac) { + AtomContainerManipulator.convertImplicitToExplicitHydrogens(ac); + } + + /** + * Checks whether a structure contains explicit hydrogen atoms or not. + * + * @param ac structure to check + * + * @return + */ + public static boolean containsExplicitHydrogens(final IAtomContainer ac) { + return getExplicitHydrogenCount(ac) + > 0; + } + + /** + * Stores all explicit hydrogens as implicit counter for the bonded heavy + * atoms and removes those from the atom container.
+ * Also, a HashMap containing non-hydrogen atoms and its indices + * before the removals will be returned which one can use for atom index + * comparison (before and after the removals). + * + * @param ac the structure to lsd + * + * @return + * + * @see #containsExplicitHydrogens(IAtomContainer) + */ + public static Map convertExplicitToImplicitHydrogens(final IAtomContainer ac) { + // create a list of atom indices which one can use for index comparison (before vs. after) after removing the explicit hydrogens + final Map atomIndices = new HashMap<>(); + final List toRemoveList = new ArrayList<>(); + IAtom atomB; + for (final IAtom atomA : ac.atoms()) { + // check each atom whether it is an hydrogen; + // if yes then store (increase) the number of implicit hydrogens + // for its bonded heavy atom + if (atomA.getSymbol() + .equals("H")) { + atomB = ac.getConnectedAtomsList(atomA) + .get(0); + if (atomB.getImplicitHydrogenCount() + == null) { + atomB.setImplicitHydrogenCount(0); + } + atomB.setImplicitHydrogenCount(atomB.getImplicitHydrogenCount() + + 1); + toRemoveList.add(atomA); + } else { + // store all non-hydrogen atoms and their indices + atomIndices.put(atomA, atomA.getIndex()); + } + + } + // remove all explicit hydrogen atoms + for (final IAtom iAtom : toRemoveList) { + ac.removeAtom(iAtom); + } + + return atomIndices; + } + + /** + * @param ac + * + * @return + */ + public static List getExplicitHydrogenIndices(final IAtomContainer ac) { + final List explicitHydrogenIndicesList = new ArrayList<>(); + for (int i = 0; i + < ac.getAtomCount(); i++) { + if (ac.getAtom(i) + .getSymbol() + .equals("H")) { + explicitHydrogenIndicesList.add(i); + } + } + + return explicitHydrogenIndicesList; + } + + /** + * @param ac + * + * @return + */ + public static int getExplicitHydrogenCount(final IAtomContainer ac) { + return getExplicitHydrogenIndices(ac).size(); + } + + public static void setAromaticity(final IAtomContainer ac) throws CDKException { + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); + final ElectronDonation model = ElectronDonation.cdkAllowingExocyclic(); + final CycleFinder cycles = Cycles.all(ac.getAtomCount()); + final Aromaticity aromaticity = new Aromaticity(model, cycles); + aromaticity.apply(ac); + } + + public static void setAromaticityAndKekulize(final IAtomContainer ac) throws CDKException { + setAromaticity(ac); + Kekulization.kekulize(ac); + } + + public static void setAromaticity(final IAtomContainer ac, final Aromaticity aromaticity) throws CDKException { + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); + aromaticity.apply(ac); + } + + public static void setAromaticityAndKekulize(final IAtomContainer ac, + final Aromaticity aromaticity) throws CDKException { + setAromaticity(ac, aromaticity); + Kekulization.kekulize(ac); + } + + /** + * Removes atoms from a given atom type from an atom container. + * + * @param ac IAtomContainer object where to remove the atoms + * @param atomType Atom type (element's name, e.g. C or Br) + * + * @return IAtomContainer where the atoms were removed + */ + public static IAtomContainer removeAtoms(final IAtomContainer ac, final String atomType) { + + final ArrayList toRemoveList = new ArrayList<>(); + for (final IAtom atomA : ac.atoms()) { + if (atomA.getSymbol() + .equals(atomType)) {// detect whether the current atom A is a from the given atom type + toRemoveList.add(atomA); + } + } + for (final IAtom iAtom : toRemoveList) { + ac.removeAtom(iAtom); + } + + return ac; + } + + public static boolean checkIndexInAtomContainer(final IAtomContainer ac, final int atomIndex) { + return ((atomIndex + >= 0) + && atomIndex + < ac.getAtomCount()); + } + + /** + * Returns the bond order for a numeric order value. + * + * @param orderAsNumeric + * + * @return + */ + public static IBond.Order getBondOrder(final int orderAsNumeric) { + for (final IBond.Order order : IBond.Order.values()) { + if (order.numeric() + == orderAsNumeric) { + return order; + } + } + + return null; + } + + public static Float getBondOrderAsNumeric(final IBond bond) { + if (bond + == null) { + return null; + } + final float bondOrderAsNumeric; + if (bond.isAromatic()) { + bondOrderAsNumeric = (float) 1.5; + } else { + bondOrderAsNumeric = bond.getOrder() + .numeric(); + } + + return bondOrderAsNumeric; + } + + public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex, + final boolean includeImplicitHydrogenCount) { + if (!checkIndexInAtomContainer(ac, atomIndex)) { + return null; + } + float bondsOrderSum = 0; + final IAtom atom = ac.getAtom(atomIndex); + for (final IBond bond : ac.getConnectedBondsList(atom)) { + bondsOrderSum += getBondOrderAsNumeric(bond); + } + if (includeImplicitHydrogenCount + && (atom.getImplicitHydrogenCount() + != null)) { + bondsOrderSum += atom.getImplicitHydrogenCount(); + } + + return bondsOrderSum; + } } From a555a9d08c7a438621634685ef2127952753126b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 15:55:38 +0200 Subject: [PATCH 259/405] chore: moved calculation methods for RMSD and average deviation to Statistics class --- .../nmr/lsd/RankedResultSDFParser.java | 2 +- src/casekit/nmr/utils/Match.java | 49 +++---------------- src/casekit/nmr/utils/Statistics.java | 38 ++++++++++++++ 3 files changed, 45 insertions(+), 44 deletions(-) diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index d38c5a8..765def9 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -136,7 +136,7 @@ public static List parseRankedResultSDFile(final Reader fileReader, continue; } dataSet = new DataSet(structure, predictedSpectrum, assignment, meta); - dataSet.addMetaInfo("rmsd", String.valueOf(Match.calculateRMSD(deviations))); + dataSet.addMetaInfo("rmsd", String.valueOf(Statistics.calculateRMSD(deviations))); dataSet.addMetaInfo("tanimoto", String.valueOf( Match.calculateTanimotoCoefficient(dataSet.getSpectrum(), experimentalSpectrum, 0, 0))); diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index 20329d4..4b17337 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -112,25 +112,6 @@ public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum sp return deviations; } - /** - * Returns the average of all deviations within a given input array. - * - * @param deviations array of deviations - * - * @return - */ - public static Double calculateAverageDeviation(final Double[] deviations) { - // every signal has to have a match - for (final Double deviation : deviations) { - if (deviation - == null) { - return null; - } - } - - return Statistics.getMean(deviations); - } - /** * Returns the average of all deviations of matched shifts between two * spectra. @@ -148,37 +129,18 @@ public static Double calculateAverageDeviation(final Double[] deviations) { * @return * * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) - * @see #calculateAverageDeviation(Double[]) + * @see Statistics#calculateAverageDeviation(Double[]) */ public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol, final boolean checkMultiplicity, final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { - return Match.calculateAverageDeviation( + return Statistics.calculateAverageDeviation( Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount)); } - /** - * Returns the average of all deviations within a given input array. - * - * @param data array of deviations - * - * @return - */ - public static Double calculateRMSD(final Double[] data) { - // every signal has to have a match - for (final Double value : data) { - if (value - == null) { - return null; - } - } - - return Statistics.getRMSD(data); - } - /** * Returns the average of all deviations of matched shifts between two * spectra. @@ -196,14 +158,15 @@ public static Double calculateRMSD(final Double[] data) { * @return * * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) - * @see #calculateAverageDeviation(Double[]) + * @see Statistics#calculateAverageDeviation(Double[]) */ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTol, final boolean checkMultiplicity, final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { - return Match.calculateRMSD(Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, - checkEquivalencesCount, allowLowerEquivalencesCount)); + return Statistics.calculateRMSD( + Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, + checkEquivalencesCount, allowLowerEquivalencesCount)); } /** diff --git a/src/casekit/nmr/utils/Statistics.java b/src/casekit/nmr/utils/Statistics.java index b90fe6b..1c28853 100644 --- a/src/casekit/nmr/utils/Statistics.java +++ b/src/casekit/nmr/utils/Statistics.java @@ -268,4 +268,42 @@ public static Double getRMSD(final Double[] data) { - nullCounter)) : null; } + + /** + * Returns the average of all deviations within a given input array. + * + * @param deviations array of deviations + * + * @return + */ + public static Double calculateAverageDeviation(final Double[] deviations) { + // every signal has to have a match + for (final Double deviation : deviations) { + if (deviation + == null) { + return null; + } + } + + return getMean(deviations); + } + + /** + * Returns the average of all deviations within a given input array. + * + * @param data array of deviations + * + * @return + */ + public static Double calculateRMSD(final Double[] data) { + // every signal has to have a match + for (final Double value : data) { + if (value + == null) { + return null; + } + } + + return getRMSD(data); + } } From 6bb045a227b29951a0ec8a590aeedd28d9dc8905 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 16:52:05 +0200 Subject: [PATCH 260/405] chore: moved "fragmentation" into new "fragments" package; moved general utility methods for fragments to new FragmentUtilities class --- .../nmr/analysis/HOSECodeShiftStatistics.java | 2 +- .../nmr/fragments/FragmentUtilities.java | 70 +++++++++ .../fragmentation/Fragmentation.java | 36 ++--- .../FragmentationUtilities.java} | 8 +- .../ErtlFunctionalGroupsFinder.java | 2 +- .../ErtlFunctionalGroupsUtilities.java | 144 ++---------------- .../model/ConnectionTree.java | 2 +- .../model/ConnectionTreeNode.java | 2 +- src/casekit/nmr/hose/HOSECodeBuilder.java | 10 +- src/casekit/nmr/hose/HOSECodeUtilities.java | 4 +- src/casekit/nmr/utils/Match.java | 71 +++++++++ src/casekit/nmr/utils/Predict.java | 4 +- 12 files changed, 186 insertions(+), 169 deletions(-) create mode 100644 src/casekit/nmr/fragments/FragmentUtilities.java rename src/casekit/nmr/{ => fragments}/fragmentation/Fragmentation.java (94%) rename src/casekit/nmr/{fragmentation/FragmentationUtils.java => fragments/fragmentation/FragmentationUtilities.java} (98%) rename src/casekit/nmr/{fragmentation => fragments}/functionalgroup/ErtlFunctionalGroupsFinder.java (99%) rename src/casekit/nmr/{fragmentation => fragments}/functionalgroup/ErtlFunctionalGroupsUtilities.java (52%) rename src/casekit/nmr/{fragmentation => fragments}/model/ConnectionTree.java (99%) rename src/casekit/nmr/{fragmentation => fragments}/model/ConnectionTreeNode.java (99%) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index b217a5a..8692784 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -2,7 +2,7 @@ import casekit.nmr.dbservice.COCONUT; import casekit.nmr.dbservice.NMRShiftDB; -import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragments.model.ConnectionTree; import casekit.nmr.hose.HOSECodeBuilder; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java new file mode 100644 index 0000000..52aa2e5 --- /dev/null +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -0,0 +1,70 @@ +package casekit.nmr.fragments; + +import casekit.nmr.model.DataSet; + +import java.util.*; +import java.util.stream.Collectors; + +public class FragmentUtilities { + public static LinkedHashMap> sortByFrequencies( + final Map> functionalGroupDataSetsMap) { + final LinkedHashMap> sortedCollection = new LinkedHashMap<>(); + final List>> sortedFrequencies = getSortedFrequencies( + functionalGroupDataSetsMap); + for (final Map.Entry> frequency : sortedFrequencies) { + sortedCollection.put(frequency.getKey(), frequency.getValue()); + } + + return sortedCollection; + } + + private static List>> getSortedFrequencies( + final Map> functionalGroupDataSets) { + return functionalGroupDataSets.entrySet() + .stream() + .sorted(Map.Entry.comparingByValue((list1, list2) -> -1 + * Integer.compare(list1.size(), list2.size()))) + .collect(Collectors.toList()); + } + + public static Map> collectBySmiles(final List functionalGroupDataSets) { + final Map> collection = new HashMap<>(); + String smiles; + for (final DataSet functionalGroupDataSet : functionalGroupDataSets) { + smiles = functionalGroupDataSet.getMeta() + .get("smiles"); + if (smiles + != null) { + collection.putIfAbsent(smiles, new ArrayList<>()); + collection.get(smiles) + .add(functionalGroupDataSet); + } + } + + return collection; + } + + public static void filterByCommonSubstructures(final Map> smilesCollection1, + final Map> smilesCollection2) { + // filter first by second collection + Set keysToRemove = new HashSet<>(); + for (final String keyCollection1 : smilesCollection1.keySet()) { + if (!smilesCollection2.containsKey(keyCollection1)) { + keysToRemove.add(keyCollection1); + } + } + for (final String keyCollection1 : keysToRemove) { + smilesCollection1.remove(keyCollection1); + } + // filter second by first collection + keysToRemove = new HashSet<>(); + for (final String keyCollection2 : smilesCollection2.keySet()) { + if (!smilesCollection1.containsKey(keyCollection2)) { + keysToRemove.add(keyCollection2); + } + } + for (final String keyCollection2 : keysToRemove) { + smilesCollection2.remove(keyCollection2); + } + } +} diff --git a/src/casekit/nmr/fragmentation/Fragmentation.java b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java similarity index 94% rename from src/casekit/nmr/fragmentation/Fragmentation.java rename to src/casekit/nmr/fragments/fragmentation/Fragmentation.java index 3e69ec9..9ba7cdf 100644 --- a/src/casekit/nmr/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java @@ -1,6 +1,6 @@ -package casekit.nmr.fragmentation; +package casekit.nmr.fragments.fragmentation; -import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragments.model.ConnectionTree; import casekit.nmr.model.*; import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; @@ -115,7 +115,7 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, subspectrum.setSpectrometerFrequency(dataSet.getSpectrum() .getSpectrometerFrequency()); - substructure = FragmentationUtils.toAtomContainer(fragmentTree); + substructure = FragmentationUtilities.toAtomContainer(fragmentTree); subDataSet = new DataSet(); subDataSet.setStructure(new ExtendedConnectionMatrix(substructure)); subDataSet.setSpectrum(subspectrum); @@ -154,14 +154,14 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, * @return * * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, boolean) - * @see FragmentationUtils#toAtomContainer(ConnectionTree) + * @see FragmentationUtilities#toAtomContainer(ConnectionTree) */ public static List buildFragments(final IAtomContainer structure, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { final List fragmentTrees = buildFragmentTrees(structure, maxSphere, maxSphereRing, withPseudoAtoms); return fragmentTrees.stream() - .map(FragmentationUtils::toAtomContainer) + .map(FragmentationUtilities::toAtomContainer) .collect(Collectors.toList()); } @@ -214,22 +214,22 @@ public static List buildRingFragmentTrees(final IAtomContainer s } for (final int key : connectionTreeOuterSphere.getNodeKeysInSphere(1)) { subtreeToAdd = ConnectionTree.buildSubtree(connectionTreeOuterSphere, key); - if (!FragmentationUtils.addToConnectionTree(connectionTreeRing, - connectionTreeOuterSphere.getRootNode() - .getKey(), subtreeToAdd, - connectionTreeOuterSphere.getBond( - connectionTreeOuterSphere.getRootNode() - .getKey(), key))) { + if (!FragmentationUtilities.addToConnectionTree(connectionTreeRing, + connectionTreeOuterSphere.getRootNode() + .getKey(), + subtreeToAdd, connectionTreeOuterSphere.getBond( + connectionTreeOuterSphere.getRootNode() + .getKey(), key))) { continue; } atomIndicesInRing.addAll(subtreeToAdd.getKeys()); } } // close rings - FragmentationUtils.closeRings(connectionTreeRing, structure); + FragmentationUtilities.closeRings(connectionTreeRing, structure); // attach pseudo atoms if desired if (withPseudoAtoms) { - FragmentationUtils.attachPseudoAtoms(connectionTreeRing, structure); + FragmentationUtilities.attachPseudoAtoms(connectionTreeRing, structure); } ringFragmentTrees.add(connectionTreeRing); } @@ -253,7 +253,7 @@ public static List buildRingFragmentTrees(final IAtomContainer s * * @see #buildRingFragmentTrees(IAtomContainer, Integer, boolean) * @see #buildFragmentTree(IAtomContainer, int, Integer, Set, boolean) - * @see FragmentationUtils#removeDuplicates(List) + * @see FragmentationUtilities#removeDuplicates(List) */ public static List buildFragmentTrees(final IAtomContainer structure, final Integer maxSphere, final Integer maxSphereRing, final boolean withPseudoAtoms) { @@ -265,7 +265,7 @@ public static List buildFragmentTrees(final IAtomContainer struc fragmentTrees.add( Fragmentation.buildFragmentTree(structure, i, maxSphere, new HashSet<>(), withPseudoAtoms)); } - FragmentationUtils.removeDuplicates(fragmentTrees); + FragmentationUtilities.removeDuplicates(fragmentTrees); return fragmentTrees; } @@ -284,7 +284,7 @@ public static List buildFragmentTrees(final IAtomContainer struc public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, final Integer maxSphere, final Set exclude, final boolean withPseudoAtoms) { - return FragmentationUtils.toAtomContainer( + return FragmentationUtilities.toAtomContainer( buildFragmentTree(ac, rootAtomIndex, maxSphere, exclude, withPseudoAtoms)); } @@ -314,10 +314,10 @@ public static ConnectionTree buildFragmentTree(final IAtomContainer structure, f BFS(structure, connectionTree, queue, new HashSet<>(), exclude, maxSphere); // close rings - FragmentationUtils.closeRings(connectionTree, structure); + FragmentationUtilities.closeRings(connectionTree, structure); // add pseudo atoms if (withPseudoAtoms) { - FragmentationUtils.attachPseudoAtoms(connectionTree, structure); + FragmentationUtilities.attachPseudoAtoms(connectionTree, structure); } return connectionTree; diff --git a/src/casekit/nmr/fragmentation/FragmentationUtils.java b/src/casekit/nmr/fragments/fragmentation/FragmentationUtilities.java similarity index 98% rename from src/casekit/nmr/fragmentation/FragmentationUtils.java rename to src/casekit/nmr/fragments/fragmentation/FragmentationUtilities.java index 316341a..c45f6ab 100644 --- a/src/casekit/nmr/fragmentation/FragmentationUtils.java +++ b/src/casekit/nmr/fragments/fragmentation/FragmentationUtilities.java @@ -1,7 +1,7 @@ -package casekit.nmr.fragmentation; +package casekit.nmr.fragments.fragmentation; -import casekit.nmr.fragmentation.model.ConnectionTree; -import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import casekit.nmr.fragments.model.ConnectionTree; +import casekit.nmr.fragments.model.ConnectionTreeNode; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; @@ -15,7 +15,7 @@ import java.util.Set; import java.util.stream.Collectors; -public class FragmentationUtils { +public class FragmentationUtilities { public static boolean adjustNodeKeys(final ConnectionTree fragmentTree, final IAtomContainer structure) { int atomIndex; diff --git a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsFinder.java b/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsFinder.java similarity index 99% rename from src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsFinder.java rename to src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsFinder.java index 7d765e6..a35da6b 100644 --- a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsFinder.java +++ b/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsFinder.java @@ -17,7 +17,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -package casekit.nmr.fragmentation.functionalgroup; +package casekit.nmr.fragments.functionalgroup; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; diff --git a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java b/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsUtilities.java similarity index 52% rename from src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java rename to src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsUtilities.java index 7f2121c..818b586 100644 --- a/src/casekit/nmr/fragmentation/functionalgroup/ErtlFunctionalGroupsUtilities.java +++ b/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsUtilities.java @@ -1,12 +1,10 @@ -package casekit.nmr.fragmentation.functionalgroup; +package casekit.nmr.fragments.functionalgroup; import casekit.nmr.dbservice.NMRShiftDB; -import casekit.nmr.fragmentation.Fragmentation; -import casekit.nmr.fragmentation.FragmentationUtils; -import casekit.nmr.fragmentation.model.ConnectionTree; +import casekit.nmr.fragments.fragmentation.Fragmentation; +import casekit.nmr.fragments.fragmentation.FragmentationUtilities; +import casekit.nmr.fragments.model.ConnectionTree; import casekit.nmr.model.DataSet; -import casekit.nmr.model.Spectrum; -import casekit.nmr.utils.Match; import casekit.nmr.utils.Utils; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; @@ -15,14 +13,12 @@ import org.openscience.cdk.graph.Cycles; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import java.io.IOException; -import java.util.*; -import java.util.stream.Collectors; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; public class ErtlFunctionalGroupsUtilities { @@ -69,13 +65,13 @@ public static final List buildFunctionalGroupDataSets(final String path continue; } fragmentTree = Fragmentation.buildFragmentTree(group, 0, null, new HashSet<>(), false); - FragmentationUtils.adjustNodeKeys(fragmentTree, structure); - FragmentationUtils.closeRings(fragmentTree, structure); + FragmentationUtilities.adjustNodeKeys(fragmentTree, structure); + FragmentationUtilities.closeRings(fragmentTree, structure); fragmentTrees.add(fragmentTree); } } - FragmentationUtils.removeDuplicates(fragmentTrees); + FragmentationUtilities.removeDuplicates(fragmentTrees); dataSetList = Fragmentation.fragmentTreesToSubDataSets(dataSet, fragmentTrees); if (dataSetList != null) { @@ -89,126 +85,6 @@ public static final List buildFunctionalGroupDataSets(final String path return functionalGroupDataSets; } - public static LinkedHashMap> sortByFrequencies( - final Map> functionalGroupDataSetsMap) { - final LinkedHashMap> sortedCollection = new LinkedHashMap<>(); - final List>> sortedFrequencies = getSortedFrequencies( - functionalGroupDataSetsMap); - for (final Map.Entry> frequency : sortedFrequencies) { - sortedCollection.put(frequency.getKey(), frequency.getValue()); - } - - return sortedCollection; - } - - public static List>> getSortedFrequencies( - final Map> functionalGroupDataSets) { - return functionalGroupDataSets.entrySet() - .stream() - .sorted(Map.Entry.comparingByValue((list1, list2) -> -1 - * Integer.compare(list1.size(), list2.size()))) - .collect(Collectors.toList()); - } - - public static Map countFrequencies(final List functionalGroupDataSets) { - final Map frequencies = new HashMap<>(); - String smiles; - for (final DataSet functionalGroupDataSet : functionalGroupDataSets) { - smiles = functionalGroupDataSet.getMeta() - .get("smiles"); - if (smiles - != null) { - frequencies.putIfAbsent(smiles, 0); - frequencies.put(smiles, frequencies.get(smiles) - + 1); - } - } - - return frequencies; - } - - public static Map> collectBySmiles(final List functionalGroupDataSets) { - final Map> collection = new HashMap<>(); - String smiles; - for (final DataSet functionalGroupDataSet : functionalGroupDataSets) { - smiles = functionalGroupDataSet.getMeta() - .get("smiles"); - if (smiles - != null) { - collection.putIfAbsent(smiles, new ArrayList<>()); - collection.get(smiles) - .add(functionalGroupDataSet); - } - } - - return collection; - } - - public static List findMatches(final List functionalGroupDataSets, final Spectrum querySpectrum, - final String mf, final double shiftTol, final double maxAverageDeviation, - final boolean checkMultiplicity) { - final List matches = new ArrayList<>(); - for (final DataSet dataSet : functionalGroupDataSets) { - if (isValidMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { - matches.add(dataSet); - } - } - - return matches; - } - - public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, - final double shiftTol, final double maxAverageDeviation, - final boolean checkMultiplicity) { - final IMolecularFormula iMolecularFormula = MolecularFormulaManipulator.getMolecularFormula(mf, - SilentChemObjectBuilder.getInstance()); - final IAtomContainer group = dataSet.getStructure() - .toAtomContainer(); - - if (!dataSet.getSpectrum() - .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { - return false; - } - final String atomTypeInSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() - .getNuclei()[0]); - if (atomTypeInSpectrum.equals("H")) { - if (AtomContainerManipulator.getImplicitHydrogenCount(dataSet.getStructure() - .toAtomContainer()) - > MolecularFormulaManipulator.getElementCount(iMolecularFormula, atomTypeInSpectrum)) { - return false; - } - } else { - // check molecular formula with atom types in group - if (!casekit.nmr.utils.Utils.compareWithMolecularFormulaLessOrEqual(group, mf)) { - return false; - } - // do not allow unsaturated fragments with different size than given molecular formula - if (casekit.nmr.utils.Utils.getUnsaturatedAtomIndices(group) - .isEmpty() - && !casekit.nmr.utils.Utils.compareWithMolecularFormulaEqual(group, mf)) { - return false; - } - } - // check average deviation - final Double averageDeviation = Match.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, - shiftTol, checkMultiplicity, true, true); - - if (averageDeviation - == null - || averageDeviation - > maxAverageDeviation) { - return false; - } - final Double rmsd = Match.calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, checkMultiplicity, - true, true); - dataSet.getMeta() - .put("avgDev", Double.toString(averageDeviation)); - dataSet.getMeta() - .put("rmsd", Double.toString(rmsd)); - - return true; - } - /** * Replaces the inserted environmental carbon atoms (new IAtom objects) by the * carbon IAtom objects from original structure. diff --git a/src/casekit/nmr/fragmentation/model/ConnectionTree.java b/src/casekit/nmr/fragments/model/ConnectionTree.java similarity index 99% rename from src/casekit/nmr/fragmentation/model/ConnectionTree.java rename to src/casekit/nmr/fragments/model/ConnectionTree.java index 91cd6ab..83adf85 100644 --- a/src/casekit/nmr/fragmentation/model/ConnectionTree.java +++ b/src/casekit/nmr/fragments/model/ConnectionTree.java @@ -9,7 +9,7 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.nmr.fragmentation.model; +package casekit.nmr.fragments.model; import casekit.nmr.hose.HOSECodeUtilities; import org.openscience.cdk.interfaces.IAtom; diff --git a/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java b/src/casekit/nmr/fragments/model/ConnectionTreeNode.java similarity index 99% rename from src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java rename to src/casekit/nmr/fragments/model/ConnectionTreeNode.java index e54cff9..1ccd5f1 100644 --- a/src/casekit/nmr/fragmentation/model/ConnectionTreeNode.java +++ b/src/casekit/nmr/fragments/model/ConnectionTreeNode.java @@ -9,7 +9,7 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.nmr.fragmentation.model; +package casekit.nmr.fragments.model; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IBond; diff --git a/src/casekit/nmr/hose/HOSECodeBuilder.java b/src/casekit/nmr/hose/HOSECodeBuilder.java index ae94d25..f95fdcf 100644 --- a/src/casekit/nmr/hose/HOSECodeBuilder.java +++ b/src/casekit/nmr/hose/HOSECodeBuilder.java @@ -12,9 +12,9 @@ package casekit.nmr.hose; -import casekit.nmr.fragmentation.FragmentationUtils; -import casekit.nmr.fragmentation.model.ConnectionTree; -import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import casekit.nmr.fragments.fragmentation.FragmentationUtilities; +import casekit.nmr.fragments.model.ConnectionTree; +import casekit.nmr.fragments.model.ConnectionTreeNode; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; @@ -572,11 +572,11 @@ private static void BFS(final IAtomContainer ac, final ConnectionTree connection * @return IAtomContainer * * @see #buildConnectionTree(String, boolean) - * @see FragmentationUtils#toAtomContainer(ConnectionTree) + * @see FragmentationUtilities#toAtomContainer(ConnectionTree) */ public static IAtomContainer buildAtomContainer(final String HOSECode, final boolean useBremserElementNotation) throws CDKException { - return FragmentationUtils.toAtomContainer( + return FragmentationUtilities.toAtomContainer( HOSECodeBuilder.buildConnectionTree(HOSECode, useBremserElementNotation)); } } \ No newline at end of file diff --git a/src/casekit/nmr/hose/HOSECodeUtilities.java b/src/casekit/nmr/hose/HOSECodeUtilities.java index 0e66dad..9165324 100644 --- a/src/casekit/nmr/hose/HOSECodeUtilities.java +++ b/src/casekit/nmr/hose/HOSECodeUtilities.java @@ -12,8 +12,8 @@ package casekit.nmr.hose; -import casekit.nmr.fragmentation.model.ConnectionTree; -import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import casekit.nmr.fragments.model.ConnectionTree; +import casekit.nmr.fragments.model.ConnectionTreeNode; import org.openscience.cdk.interfaces.IBond; import java.util.ArrayList; diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/utils/Match.java index 4b17337..8a9aa49 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/utils/Match.java @@ -13,11 +13,17 @@ package casekit.nmr.utils; import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import org.apache.commons.lang3.ArrayUtils; import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.similarity.Tanimoto; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import java.util.*; @@ -297,4 +303,69 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s return matchAssignment; } + + public static List findMatches(final List dataSetList, final Spectrum querySpectrum, + final String mf, final double shiftTol, final double maxAverageDeviation, + final boolean checkMultiplicity) { + final List matches = new ArrayList<>(); + for (final DataSet dataSet : dataSetList) { + if (isValidMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { + matches.add(dataSet); + } + } + + return matches; + } + + public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final double shiftTol, final double maxAverageDeviation, + final boolean checkMultiplicity) { + final IMolecularFormula iMolecularFormula = MolecularFormulaManipulator.getMolecularFormula(mf, + SilentChemObjectBuilder.getInstance()); + final IAtomContainer group = dataSet.getStructure() + .toAtomContainer(); + + if (!dataSet.getSpectrum() + .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { + return false; + } + final String atomTypeInSpectrum = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); + if (atomTypeInSpectrum.equals("H")) { + if (AtomContainerManipulator.getImplicitHydrogenCount(dataSet.getStructure() + .toAtomContainer()) + > MolecularFormulaManipulator.getElementCount(iMolecularFormula, atomTypeInSpectrum)) { + return false; + } + } else { + // check molecular formula with atom types in group + if (!Utils.compareWithMolecularFormulaLessOrEqual(group, mf)) { + return false; + } + // do not allow unsaturated fragments with different size than given molecular formula + if (Utils.getUnsaturatedAtomIndices(group) + .isEmpty() + && !Utils.compareWithMolecularFormulaEqual(group, mf)) { + return false; + } + } + // check average deviation + final Double averageDeviation = calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, + checkMultiplicity, true, true); + + if (averageDeviation + == null + || averageDeviation + > maxAverageDeviation) { + return false; + } + final Double rmsd = calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, checkMultiplicity, true, + true); + dataSet.getMeta() + .put("avgDev", Double.toString(averageDeviation)); + dataSet.getMeta() + .put("rmsd", Double.toString(rmsd)); + + return true; + } } diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/utils/Predict.java index 4d846d9..e8e5792 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/utils/Predict.java @@ -24,8 +24,8 @@ package casekit.nmr.utils; -import casekit.nmr.fragmentation.model.ConnectionTree; -import casekit.nmr.fragmentation.model.ConnectionTreeNode; +import casekit.nmr.fragments.model.ConnectionTree; +import casekit.nmr.fragments.model.ConnectionTreeNode; import casekit.nmr.hose.HOSECodeBuilder; import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; From 2cf185dcf4f7508daeace0cdfee13355d9ca4537 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 28 Jun 2021 22:30:47 +0200 Subject: [PATCH 261/405] chore: put classes from "utils" package to separate packages --- .../nmr/fragments/FragmentUtilities.java | 73 ++++++++++++++++ .../nmr/lsd/RankedResultSDFParser.java | 4 +- .../Prediction.java} | 11 +-- .../Match.java => similarity/Similarity.java} | 87 ++----------------- 4 files changed, 90 insertions(+), 85 deletions(-) rename src/casekit/nmr/{utils/Predict.java => prediction/Prediction.java} (97%) rename src/casekit/nmr/{utils/Match.java => similarity/Similarity.java} (78%) diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index 52aa2e5..af6a467 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -1,6 +1,14 @@ package casekit.nmr.fragments; import casekit.nmr.model.DataSet; +import casekit.nmr.model.Spectrum; +import casekit.nmr.similarity.Similarity; +import casekit.nmr.utils.Utils; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import java.util.*; import java.util.stream.Collectors; @@ -67,4 +75,69 @@ public static void filterByCommonSubstructures(final Map> smilesCollection2.remove(keyCollection2); } } + + public static List findMatches(final List dataSetList, final Spectrum querySpectrum, + final String mf, final double shiftTol, final double maxAverageDeviation, + final boolean checkMultiplicity) { + final List matches = new ArrayList<>(); + for (final DataSet dataSet : dataSetList) { + if (isValidMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { + matches.add(dataSet); + } + } + + return matches; + } + + public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final double shiftTol, final double maxAverageDeviation, + final boolean checkMultiplicity) { + final IMolecularFormula iMolecularFormula = MolecularFormulaManipulator.getMolecularFormula(mf, + SilentChemObjectBuilder.getInstance()); + final IAtomContainer group = dataSet.getStructure() + .toAtomContainer(); + + if (!dataSet.getSpectrum() + .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { + return false; + } + final String atomTypeInSpectrum = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); + if (atomTypeInSpectrum.equals("H")) { + if (AtomContainerManipulator.getImplicitHydrogenCount(dataSet.getStructure() + .toAtomContainer()) + > MolecularFormulaManipulator.getElementCount(iMolecularFormula, atomTypeInSpectrum)) { + return false; + } + } else { + // check molecular formula with atom types in group + if (!Utils.compareWithMolecularFormulaLessOrEqual(group, mf)) { + return false; + } + // do not allow unsaturated fragments with different size than given molecular formula + if (Utils.getUnsaturatedAtomIndices(group) + .isEmpty() + && !Utils.compareWithMolecularFormulaEqual(group, mf)) { + return false; + } + } + // check average deviation + final Double averageDeviation = Similarity.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, + shiftTol, checkMultiplicity, true, true); + + if (averageDeviation + == null + || averageDeviation + > maxAverageDeviation) { + return false; + } + final Double rmsd = Similarity.calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, + checkMultiplicity, true, true); + dataSet.getMeta() + .put("avgDev", Double.toString(averageDeviation)); + dataSet.getMeta() + .put("rmsd", Double.toString(rmsd)); + + return true; + } } diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index 765def9..1043102 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -4,7 +4,7 @@ import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; -import casekit.nmr.utils.Match; +import casekit.nmr.similarity.Similarity; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; @@ -138,7 +138,7 @@ public static List parseRankedResultSDFile(final Reader fileReader, dataSet = new DataSet(structure, predictedSpectrum, assignment, meta); dataSet.addMetaInfo("rmsd", String.valueOf(Statistics.calculateRMSD(deviations))); dataSet.addMetaInfo("tanimoto", String.valueOf( - Match.calculateTanimotoCoefficient(dataSet.getSpectrum(), experimentalSpectrum, 0, 0))); + Similarity.calculateTanimotoCoefficient(dataSet.getSpectrum(), experimentalSpectrum, 0, 0))); dataSetList.add(dataSet); } diff --git a/src/casekit/nmr/utils/Predict.java b/src/casekit/nmr/prediction/Prediction.java similarity index 97% rename from src/casekit/nmr/utils/Predict.java rename to src/casekit/nmr/prediction/Prediction.java index e8e5792..0a86982 100644 --- a/src/casekit/nmr/utils/Predict.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -package casekit.nmr.utils; +package casekit.nmr.prediction; import casekit.nmr.fragments.model.ConnectionTree; @@ -31,6 +31,7 @@ import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; @@ -46,7 +47,7 @@ /** * @author Michael Wenk [https://github.com/michaelwenk] */ -public class Predict { +public class Prediction { /** * Diastereotopic distinctions are not provided yet. @@ -146,9 +147,9 @@ public static DataSet predict2D(final Map> hoseCod final int minPathLength, final int maxPathLength) { final DataSet predictionDim1 = predict1D(hoseCodeShiftStatistics, structure, solvent, nuclei[0]); final DataSet predictionDim2 = predict1D(hoseCodeShiftStatistics, structure, solvent, nuclei[1]); - return Predict.predict2D(structure, predictionDim1.getSpectrum(), predictionDim2.getSpectrum(), - predictionDim1.getAssignment(), predictionDim2.getAssignment(), minPathLength, - maxPathLength); + return Prediction.predict2D(structure, predictionDim1.getSpectrum(), predictionDim2.getSpectrum(), + predictionDim1.getAssignment(), predictionDim2.getAssignment(), minPathLength, + maxPathLength); } /** diff --git a/src/casekit/nmr/utils/Match.java b/src/casekit/nmr/similarity/Similarity.java similarity index 78% rename from src/casekit/nmr/utils/Match.java rename to src/casekit/nmr/similarity/Similarity.java index 8a9aa49..c529cda 100644 --- a/src/casekit/nmr/utils/Match.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -10,24 +10,19 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package casekit.nmr.utils; +package casekit.nmr.similarity; import casekit.nmr.model.Assignment; -import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; +import casekit.nmr.utils.Statistics; import org.apache.commons.lang3.ArrayUtils; import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.similarity.Tanimoto; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import java.util.*; -public class Match { +public class Similarity { /** @@ -58,7 +53,7 @@ private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum */ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) { - if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + if (!Similarity.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } final double[] shiftsSpectrum1 = ArrayUtils.toPrimitive(spectrum1.getShifts(dim1) @@ -143,8 +138,8 @@ public static Double calculateAverageDeviation(final Spectrum spectrum1, final S final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { return Statistics.calculateAverageDeviation( - Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, - checkEquivalencesCount, allowLowerEquivalencesCount)); + Similarity.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, + checkEquivalencesCount, allowLowerEquivalencesCount)); } /** @@ -171,8 +166,8 @@ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spec final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { return Statistics.calculateRMSD( - Match.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, - checkEquivalencesCount, allowLowerEquivalencesCount)); + Similarity.getDeviations(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, + checkEquivalencesCount, allowLowerEquivalencesCount)); } /** @@ -197,7 +192,7 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s final int dim2, final double shiftTol, final boolean checkMultiplicity, final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { - if (!Match.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + if (!Similarity.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } final Assignment matchAssignments = new Assignment(); @@ -304,68 +299,4 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s return matchAssignment; } - public static List findMatches(final List dataSetList, final Spectrum querySpectrum, - final String mf, final double shiftTol, final double maxAverageDeviation, - final boolean checkMultiplicity) { - final List matches = new ArrayList<>(); - for (final DataSet dataSet : dataSetList) { - if (isValidMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { - matches.add(dataSet); - } - } - - return matches; - } - - public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, - final double shiftTol, final double maxAverageDeviation, - final boolean checkMultiplicity) { - final IMolecularFormula iMolecularFormula = MolecularFormulaManipulator.getMolecularFormula(mf, - SilentChemObjectBuilder.getInstance()); - final IAtomContainer group = dataSet.getStructure() - .toAtomContainer(); - - if (!dataSet.getSpectrum() - .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { - return false; - } - final String atomTypeInSpectrum = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() - .getNuclei()[0]); - if (atomTypeInSpectrum.equals("H")) { - if (AtomContainerManipulator.getImplicitHydrogenCount(dataSet.getStructure() - .toAtomContainer()) - > MolecularFormulaManipulator.getElementCount(iMolecularFormula, atomTypeInSpectrum)) { - return false; - } - } else { - // check molecular formula with atom types in group - if (!Utils.compareWithMolecularFormulaLessOrEqual(group, mf)) { - return false; - } - // do not allow unsaturated fragments with different size than given molecular formula - if (Utils.getUnsaturatedAtomIndices(group) - .isEmpty() - && !Utils.compareWithMolecularFormulaEqual(group, mf)) { - return false; - } - } - // check average deviation - final Double averageDeviation = calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, - checkMultiplicity, true, true); - - if (averageDeviation - == null - || averageDeviation - > maxAverageDeviation) { - return false; - } - final Double rmsd = calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, checkMultiplicity, true, - true); - dataSet.getMeta() - .put("avgDev", Double.toString(averageDeviation)); - dataSet.getMeta() - .put("rmsd", Double.toString(rmsd)); - - return true; - } } From 65de333f3890328c1a01019dceba8a42684b7e24 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 29 Jun 2021 00:27:53 +0200 Subject: [PATCH 262/405] feat: extended findMatches in FragmentUtilities by goodlist and badlist --- .../nmr/fragments/FragmentUtilities.java | 96 ++++++++++++------- 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index af6a467..04fcbfd 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -1,14 +1,12 @@ package casekit.nmr.fragments; +import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; import casekit.nmr.similarity.Similarity; import casekit.nmr.utils.Utils; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import java.util.*; import java.util.stream.Collectors; @@ -76,55 +74,42 @@ public static void filterByCommonSubstructures(final Map> } } - public static List findMatches(final List dataSetList, final Spectrum querySpectrum, - final String mf, final double shiftTol, final double maxAverageDeviation, - final boolean checkMultiplicity) { + public static Map> getGoodlistAndBadlist(final List dataSetList, + final Spectrum querySpectrum, final String mf, + final double shiftTol, + final double maxAverageDeviation, + final boolean checkMultiplicity) { final List matches = new ArrayList<>(); + final List nonMatches = new ArrayList<>(); for (final DataSet dataSet : dataSetList) { - if (isValidMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { + if (isMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { matches.add(dataSet); + } else if (isNonMatch(dataSet, querySpectrum, mf, shiftTol, checkMultiplicity)) { + nonMatches.add(dataSet); } } + final Map> lists = new HashMap<>(); + lists.put("goodlist", matches); + lists.put("badlist", nonMatches); - return matches; + return lists; } - public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, - final double shiftTol, final double maxAverageDeviation, - final boolean checkMultiplicity) { - final IMolecularFormula iMolecularFormula = MolecularFormulaManipulator.getMolecularFormula(mf, - SilentChemObjectBuilder.getInstance()); - final IAtomContainer group = dataSet.getStructure() - .toAtomContainer(); - + public static boolean isMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final double shiftTol, final double maxAverageDeviation, + final boolean checkMultiplicity) { + // check for nuclei if (!dataSet.getSpectrum() .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { return false; } - final String atomTypeInSpectrum = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() - .getNuclei()[0]); - if (atomTypeInSpectrum.equals("H")) { - if (AtomContainerManipulator.getImplicitHydrogenCount(dataSet.getStructure() - .toAtomContainer()) - > MolecularFormulaManipulator.getElementCount(iMolecularFormula, atomTypeInSpectrum)) { - return false; - } - } else { - // check molecular formula with atom types in group - if (!Utils.compareWithMolecularFormulaLessOrEqual(group, mf)) { - return false; - } - // do not allow unsaturated fragments with different size than given molecular formula - if (Utils.getUnsaturatedAtomIndices(group) - .isEmpty() - && !Utils.compareWithMolecularFormulaEqual(group, mf)) { - return false; - } + // check for structural problems in fragment regarding the molecular formula + if (!isStructuralMatch(dataSet, mf)) { + return false; } // check average deviation final Double averageDeviation = Similarity.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, checkMultiplicity, true, true); - if (averageDeviation == null || averageDeviation @@ -140,4 +125,43 @@ public static boolean isValidMatch(final DataSet dataSet, final Spectrum querySp return true; } + + public static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final double shiftTol, final boolean checkMultiplicity) { + boolean isSpectralMatch = false; + if (dataSet.getSpectrum() + .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { + final Assignment matchAssigment = Similarity.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, + shiftTol, checkMultiplicity, true, true); + if (matchAssigment + != null + && matchAssigment.getSetAssignmentsCount(0) + > 0) { + isSpectralMatch = true; + } + } + + return !isSpectralMatch + && !isStructuralMatch(dataSet, mf); + } + + public static boolean isStructuralMatch(final DataSet dataSet, final String mf) { + final IAtomContainer fragment = dataSet.getStructure() + .toAtomContainer(); + if (Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]) + .equals("H")) { + return AtomContainerManipulator.getImplicitHydrogenCount(fragment) + <= Utils.getAtomTypeCount(mf, "H"); + } else { + // check molecular formula with atom types in group + if (!Utils.compareWithMolecularFormulaLessOrEqual(fragment, mf)) { + return false; + } + // do not allow unsaturated fragments with different size than given molecular formula + return !Utils.getUnsaturatedAtomIndices(fragment) + .isEmpty() + || Utils.compareWithMolecularFormulaEqual(fragment, mf); + } + } } From 2506cf4a8fa5ebe80aa8bdfe22f6bad3e2c9817f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 30 Jun 2021 16:32:04 +0200 Subject: [PATCH 263/405] fix: allow unsaturated fragments only --- .../nmr/fragments/FragmentUtilities.java | 27 +++++++------------ src/casekit/nmr/utils/Utils.java | 4 +++ 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index 04fcbfd..342e6bb 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -6,7 +6,6 @@ import casekit.nmr.similarity.Similarity; import casekit.nmr.utils.Utils; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import java.util.*; import java.util.stream.Collectors; @@ -128,6 +127,9 @@ public static boolean isMatch(final DataSet dataSet, final Spectrum querySpectru public static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, final double shiftTol, final boolean checkMultiplicity) { + if (!isStructuralMatch(dataSet, mf)) { + return false; + } boolean isSpectralMatch = false; if (dataSet.getSpectrum() .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { @@ -141,27 +143,16 @@ public static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpec } } - return !isSpectralMatch - && !isStructuralMatch(dataSet, mf); + return !isSpectralMatch; } public static boolean isStructuralMatch(final DataSet dataSet, final String mf) { final IAtomContainer fragment = dataSet.getStructure() .toAtomContainer(); - if (Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() - .getNuclei()[0]) - .equals("H")) { - return AtomContainerManipulator.getImplicitHydrogenCount(fragment) - <= Utils.getAtomTypeCount(mf, "H"); - } else { - // check molecular formula with atom types in group - if (!Utils.compareWithMolecularFormulaLessOrEqual(fragment, mf)) { - return false; - } - // do not allow unsaturated fragments with different size than given molecular formula - return !Utils.getUnsaturatedAtomIndices(fragment) - .isEmpty() - || Utils.compareWithMolecularFormulaEqual(fragment, mf); - } + // check molecular formula with atom types in group + // do not allow unsaturated fragments + return Utils.compareWithMolecularFormulaLessOrEqual(fragment, mf) + && !Utils.getUnsaturatedAtomIndices(fragment) + .isEmpty(); } } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 65350d4..04f633b 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -127,6 +127,8 @@ > getAtomTypeCount(mf, atomType)) { return false; } } + return AtomContainerManipulator.getImplicitHydrogenCount(structure) + <= getAtomTypeCount(mf, "H"); } return true; @@ -143,6 +145,8 @@ public static boolean compareWithMolecularFormulaEqual(final IAtomContainer stru return false; } } + return AtomContainerManipulator.getImplicitHydrogenCount(structure) + == Utils.getAtomTypeCount(mf, "H"); } return true; From adc838ad2856208373f932f1da895f0b106bf8a5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 30 Jun 2021 17:06:17 +0200 Subject: [PATCH 264/405] feat: added general getDeviation method and possibility to use previous assignments --- src/casekit/nmr/similarity/Similarity.java | 91 ++++++++++++++++++---- 1 file changed, 76 insertions(+), 15 deletions(-) diff --git a/src/casekit/nmr/similarity/Similarity.java b/src/casekit/nmr/similarity/Similarity.java index c529cda..d5a66f5 100644 --- a/src/casekit/nmr/similarity/Similarity.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -50,6 +50,8 @@ private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum * @param dim2 dimension in second spectrum to take the shifts from * * @return + * + * @deprecated */ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2) { @@ -73,6 +75,37 @@ public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final return null; } + /** + * Returns deviations between two already matched spectra. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param assignments assignments from previous matching + * + * @return + */ + public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final Assignment assignments) { + final Double[] deviations = new Double[spectrum1.getSignalCount()]; + Signal matchedSignalInSpectrum2; + for (int i = 0; i + < spectrum1.getSignalCount(); i++) { + if (assignments.getAssignment(0, i).length + == 0) { + deviations[i] = null; + } else { + matchedSignalInSpectrum2 = spectrum2.getSignal(assignments.getAssignment(0, i)[0]); + deviations[i] = Math.abs(spectrum1.getSignal(i) + .getShift(dim1) + - matchedSignalInSpectrum2.getShift(dim2)); + } + } + + return deviations; + } + /** * Returns deviations between matched shifts of two spectra. * The matching procedure is already included here. @@ -94,23 +127,10 @@ public static Double[] getDeviations(final Spectrum spectrum1, final Spectrum sp final int dim2, final double shiftTol, final boolean checkMultiplicity, final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { - final Double[] deviations = new Double[spectrum1.getSignalCount()]; final Assignment matchAssignments = matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTol, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount); - Signal matchedSignalInSpectrum2; - for (int i = 0; i - < spectrum1.getSignalCount(); i++) { - if (matchAssignments.getAssignment(0, i).length - == 0) { - deviations[i] = null; - } else { - matchedSignalInSpectrum2 = spectrum2.getSignal(matchAssignments.getAssignment(0, i)[0]); - deviations[i] = Math.abs(spectrum1.getSignal(i) - .getShift(dim1) - - matchedSignalInSpectrum2.getShift(dim2)); - } - } - return deviations; + + return getDeviations(spectrum1, spectrum2, dim1, dim2, matchAssignments); } /** @@ -142,6 +162,27 @@ public static Double calculateAverageDeviation(final Spectrum spectrum1, final S checkEquivalencesCount, allowLowerEquivalencesCount)); } + /** + * Returns the average of all deviations of matched shifts between two + * spectra. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param assignments assignments from previous matching + * + * @return + * + * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) + * @see Statistics#calculateAverageDeviation(Double[]) + */ + public static Double calculateAverageDeviation(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final Assignment assignments) { + return Statistics.calculateAverageDeviation( + Similarity.getDeviations(spectrum1, spectrum2, dim1, dim2, assignments)); + } + /** * Returns the average of all deviations of matched shifts between two * spectra. @@ -170,6 +211,26 @@ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spec checkEquivalencesCount, allowLowerEquivalencesCount)); } + /** + * Returns the average of all deviations of matched shifts between two + * spectra. + * + * @param spectrum1 first spectrum + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param assignments assignments from previous matching + * + * @return + * + * @see #getDeviations(Spectrum, Spectrum, int, int, double, boolean, boolean, boolean) + * @see Statistics#calculateAverageDeviation(Double[]) + */ + public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final Assignment assignments) { + return Statistics.calculateRMSD(Similarity.getDeviations(spectrum1, spectrum2, dim1, dim2, assignments)); + } + /** * Returns the closest shift matches between two spectra in selected dimensions * as an Assignment object with one set dimension only.
From 17b9b089f8eaf89d981ac9488524722e7ffb06e7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 30 Jun 2021 18:00:49 +0200 Subject: [PATCH 265/405] chore: added notice to BFS method (book by Elyashberg, Williams and Blinov) --- .../nmr/fragments/fragmentation/Fragmentation.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java index 9ba7cdf..6cc0910 100644 --- a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java @@ -280,6 +280,9 @@ public static List buildFragmentTrees(final IAtomContainer struc * @param withPseudoAtoms places pseudo atoms in the "outer" sphere * * @return connection tree + * + * @see #BFS(IAtomContainer, ConnectionTree, Queue, Set, Set, Integer) + * @see #buildFragmentTree(IAtomContainer, int, Integer, Set, boolean) */ public static IAtomContainer buildFragment(final IAtomContainer ac, final int rootAtomIndex, final Integer maxSphere, final Set exclude, @@ -328,7 +331,11 @@ public static ConnectionTree buildFragmentTree(final IAtomContainer structure, f * its root node (0th sphere) by means of Breadth-First-Search (BFS). * Until a certain maximum sphere, each reachable next neighbor atom * is stored in a parent-child-relationship. - * In addition, bonds within rings or between hetero atoms will be kept. + * And in addition, bonds between hetero atoms, carbon-hetero bonds and triple bonds will be kept. + * In such cases the maximum spherical limit will be ignored. + *

+ * This method follows the rules given in section 7.4.1 of
+ * "Contemporary Computer-Assisted Approaches to Molecular Structure Elucidation (New Developments in NMR) by Mikhail E Elyashberg, Antony Williams and Kirill Blinov. Edited by William Price. RSC Publishing, 2012. ISBN: 978-1-84973-432-5; eISBN: 978-1-84973-457-8" * * @param ac atom container to go through * @param connectionTree connection tree to expand, incl. the root node From 42722b0e2adcb272b34dc6b48e9ad6fd866bc767 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 6 Jul 2021 13:17:59 +0200 Subject: [PATCH 266/405] feat: introduction of maximum average deviation for dereplication and elucidation --- .../nmr/lsd/RankedResultSDFParser.java | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index 1043102..d202e89 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -4,7 +4,6 @@ import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; -import casekit.nmr.similarity.Similarity; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; @@ -21,19 +20,19 @@ public class RankedResultSDFParser { - public static List parseRankedResultSDFile(final String pathToFile, - final String nucleus) throws CDKException, FileNotFoundException { - return parseRankedResultSDFile(new FileReader(pathToFile), nucleus); + public static List parseRankedResultSDFile(final String pathToFile, final String nucleus, + final double maxAverageDeviation) throws CDKException, FileNotFoundException { + return parseRankedResultSDFile(new FileReader(pathToFile), nucleus, maxAverageDeviation); } - public static List parseRankedResultSDFileContent(final String fileContent, - final String nucleus) throws CDKException { + public static List parseRankedResultSDFileContent(final String fileContent, final String nucleus, + final double maxAverageDeviation) throws CDKException { final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); - return parseRankedResultSDFile(new InputStreamReader(inputStream), nucleus); + return parseRankedResultSDFile(new InputStreamReader(inputStream), nucleus, maxAverageDeviation); } - public static List parseRankedResultSDFile(final Reader fileReader, - final String nucleus) throws CDKException { + public static List parseRankedResultSDFile(final Reader fileReader, final String nucleus, + final double maxAverageDeviation) throws CDKException { final List dataSetList = new ArrayList<>(); final IteratingSDFReader iterator = new IteratingSDFReader(fileReader, SilentChemObjectBuilder.getInstance()); IAtomContainer structure; @@ -52,6 +51,7 @@ public static List parseRankedResultSDFile(final Reader fileReader, Double[] deviations; int signalCounter, matchedSignalIndex; List closestSignalList; + Double rmsd, averageDeviation; while (iterator.hasNext()) { structure = iterator.next(); @@ -136,11 +136,18 @@ public static List parseRankedResultSDFile(final Reader fileReader, continue; } dataSet = new DataSet(structure, predictedSpectrum, assignment, meta); - dataSet.addMetaInfo("rmsd", String.valueOf(Statistics.calculateRMSD(deviations))); - dataSet.addMetaInfo("tanimoto", String.valueOf( - Similarity.calculateTanimotoCoefficient(dataSet.getSpectrum(), experimentalSpectrum, 0, 0))); - dataSetList.add(dataSet); + averageDeviation = Statistics.calculateAverageDeviation(deviations); + if (averageDeviation + != null + && averageDeviation + <= maxAverageDeviation) { + dataSet.addMetaInfo("averageDeviation", String.valueOf(averageDeviation)); + rmsd = Statistics.calculateRMSD(deviations); + dataSet.addMetaInfo("rmsd", String.valueOf(rmsd)); + + dataSetList.add(dataSet); + } } // pre-sort by RMSD value dataSetList.sort((dataSet1, dataSet2) -> { From 866ccf4276cadc3e5eeabc6b3c9c521be09d99d0 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 13 Jul 2021 21:21:10 +0200 Subject: [PATCH 267/405] feat: added usePrediction to ElucidationOptions --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 14 ++++++++------ src/casekit/nmr/lsd/model/ElucidationOptions.java | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index a461c14..1524623 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -197,9 +197,9 @@ private static String buildMULT(final Correlation correlation, final int index, .append(" ") .append(correlation.getAtomType()) .append(" ") - .append(hybridizationStringBuilder.toString()) + .append(hybridizationStringBuilder) .append(" ") - .append(attachedProtonsCountStringBuilder.toString()); + .append(attachedProtonsCountStringBuilder); if (j >= 2) { stringBuilder.append("; equivalent to ") @@ -499,10 +499,12 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf collection.get("COSY") .add(buildCOSY(correlation, i, data, indicesMap, elucidationOptions.getCosyP3(), elucidationOptions.getCosyP4())); - collection.get("SHIX") - .add(buildSHIX(correlation, i, indicesMap)); - collection.get("SHIH") - .add(buildSHIH(correlation, i, indicesMap)); + if (elucidationOptions.isUsePrediction()) { + collection.get("SHIX") + .add(buildSHIX(correlation, i, indicesMap)); + collection.get("SHIH") + .add(buildSHIH(correlation, i, indicesMap)); + } } collection.keySet() diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java index 84822d8..820e463 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -21,4 +21,5 @@ public class ElucidationOptions { private int hmbcP4; private int cosyP3; private int cosyP4; + private boolean usePrediction; } From d9c1a6ddee1d5b265dee80662afff21c5e2577ad Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 13 Jul 2021 21:22:54 +0200 Subject: [PATCH 268/405] chore: improvement of outlier detection and removal methods --- src/casekit/nmr/utils/Statistics.java | 29 ++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/casekit/nmr/utils/Statistics.java b/src/casekit/nmr/utils/Statistics.java index 1c28853..75c1227 100644 --- a/src/casekit/nmr/utils/Statistics.java +++ b/src/casekit/nmr/utils/Statistics.java @@ -17,11 +17,8 @@ public class Statistics { * * @return new array list without values outside the generated boundaries */ - public static List removeOutliers(final List input, final double multiplierIQR) { - final ArrayList inputWithoutOutliers = new ArrayList<>(input); - inputWithoutOutliers.removeAll(getOutliers(inputWithoutOutliers, multiplierIQR)); - - return inputWithoutOutliers; + public static void removeOutliers(final List input, final double multiplierIQR) { + input.removeAll(getOutliers(input, multiplierIQR)); } /** @@ -31,27 +28,27 @@ public static List removeOutliers(final List input, final double * @return */ public static List getOutliers(final List input, final double multiplierIQR) { - final ArrayList outliers = new ArrayList<>(); + final List outliers = new ArrayList<>(); if (input.size() <= 1) { return outliers; } Collections.sort(input); - final ArrayList data1 = new ArrayList<>(input.subList(0, input.size() - / 2)); - final ArrayList data2; + final List data1 = input.subList(0, input.size() + / 2); + final List data2; if (input.size() % 2 == 0) { - data2 = new ArrayList<>(input.subList(input.size() - / 2, input.size())); + data2 = input.subList(input.size() + / 2, input.size()); } else { - data2 = new ArrayList<>(input.subList(input.size() - / 2 - + 1, input.size())); + data2 = input.subList(input.size() + / 2 + + 1, input.size()); } - final double q1 = getMedian(new ArrayList<>(data1)); - final double q3 = getMedian(new ArrayList<>(data2)); + final double q1 = getMedian(data1); + final double q3 = getMedian(data2); final double iqr = q3 - q1; final double lowerBound = q1 From 3d4a7f165ec074e566859572c64ce30653abc7e5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 13 Jul 2021 21:23:41 +0200 Subject: [PATCH 269/405] feat: added general SDF parser --- src/casekit/nmr/utils/SDFParser.java | 64 ++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 src/casekit/nmr/utils/SDFParser.java diff --git a/src/casekit/nmr/utils/SDFParser.java b/src/casekit/nmr/utils/SDFParser.java new file mode 100644 index 0000000..4b41129 --- /dev/null +++ b/src/casekit/nmr/utils/SDFParser.java @@ -0,0 +1,64 @@ +package casekit.nmr.utils; + +import casekit.nmr.model.DataSet; +import casekit.nmr.model.ExtendedConnectionMatrix; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IMolecularFormula; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class SDFParser { + + public static List parseSDFile(final String pathToFile) throws CDKException, FileNotFoundException { + return parseSDFile(new FileReader(pathToFile)); + } + + public static List parseSDFileContent(final String fileContent) throws CDKException { + final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); + return parseSDFile(new InputStreamReader(inputStream)); + } + + public static List parseSDFile(final Reader fileReader) throws CDKException { + final List dataSetList = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader(fileReader, SilentChemObjectBuilder.getInstance()); + IAtomContainer structure; + Map meta; + final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); + IMolecularFormula mf; + DataSet dataSet; + + while (iterator.hasNext()) { + structure = iterator.next(); + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + hydrogenAdder.addImplicitHydrogens(structure); + Utils.setAromaticityAndKekulize(structure); + meta = new HashMap<>(); + meta.put("title", structure.getTitle()); + mf = Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mf", Utils.molecularFormularToString(mf)); + try { + final String smiles = Utils.getSmilesFromAtomContainer(structure); + meta.put("smiles", smiles); + } catch (final CDKException e) { + e.printStackTrace(); + } + dataSet = new DataSet(); + dataSet.setStructure(new ExtendedConnectionMatrix(structure)); + dataSet.setMeta(meta); + + dataSetList.add(dataSet); + } + + return dataSetList; + } +} From abddd24d7017ad96450d802bfd23efbaf89ffda1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 13 Jul 2021 21:25:50 +0200 Subject: [PATCH 270/405] chore: switched solvent and nucleus parameter --- src/casekit/nmr/prediction/Prediction.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index 0a86982..1c988a9 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -60,7 +60,7 @@ public class Prediction { * @return */ public static DataSet predict1D(final Map> hoseCodeShiftStatistics, - final IAtomContainer structure, final String solvent, final String nucleus) { + final IAtomContainer structure, final String nucleus, final String solvent) { final int minMatchingSphere = 1; final Spectrum spectrum = new Spectrum(); spectrum.setNuclei(new String[]{nucleus}); @@ -145,8 +145,8 @@ public static DataSet predict1D(final Map> hoseCod public static DataSet predict2D(final Map> hoseCodeShiftStatistics, final IAtomContainer structure, final String[] nuclei, final String solvent, final int minPathLength, final int maxPathLength) { - final DataSet predictionDim1 = predict1D(hoseCodeShiftStatistics, structure, solvent, nuclei[0]); - final DataSet predictionDim2 = predict1D(hoseCodeShiftStatistics, structure, solvent, nuclei[1]); + final DataSet predictionDim1 = predict1D(hoseCodeShiftStatistics, structure, nuclei[0], solvent); + final DataSet predictionDim2 = predict1D(hoseCodeShiftStatistics, structure, nuclei[1], solvent); return Prediction.predict2D(structure, predictionDim1.getSpectrum(), predictionDim2.getSpectrum(), predictionDim1.getAssignment(), predictionDim2.getAssignment(), minPathLength, maxPathLength); From cd4df4d36bc0b54ae835faad872780c9d2fe3c07 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 13 Jul 2021 21:27:21 +0200 Subject: [PATCH 271/405] feat: added withExplicitH parameter to HOSE code statistic collection and usage of outlier removal --- .../nmr/analysis/HOSECodeShiftStatistics.java | 72 ++++++++++--------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index 8692784..c10ea3b 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -26,8 +26,9 @@ public class HOSECodeShiftStatistics { .create(); //.setPrettyPrinting() public static Map>> collectHOSECodeShifts(final List dataSetList, - final Integer maxSphere) { - return collectHOSECodeShifts(dataSetList, maxSphere, new HashMap<>()); + final Integer maxSphere, + final boolean withExplicitH) { + return collectHOSECodeShifts(dataSetList, maxSphere, withExplicitH, new HashMap<>()); } /** @@ -41,6 +42,7 @@ public static Map>> collectHOSECodeShifts(final */ public static Map>> collectHOSECodeShifts(final List dataSetList, final Integer maxSphere, + final boolean withExplicitH, final Map>> hoseCodeShifts) { IAtomContainer structure; Signal signal; @@ -53,33 +55,35 @@ public static Map>> collectHOSECodeShifts(final for (final DataSet dataSet : dataSetList) { structure = dataSet.getStructure() .toAtomContainer(); - if (casekit.nmr.utils.Utils.containsExplicitHydrogens(structure)) { - System.out.println("!!!Dataset skipped because of previously set explicit hydrogens!!!"); + if (Utils.containsExplicitHydrogens(structure)) { + System.out.println("!!!Dataset skipped must not contain (previously set) explicit hydrogens!!!"); continue; } - try { - // create atom index map to know which indices the explicit hydrogens will have - atomIndexMap = new HashMap<>(); - int nextAtomIndexExplicitH = structure.getAtomCount(); - for (int i = 0; i - < structure.getAtomCount(); i++) { - if (structure.getAtom(i) - .getImplicitHydrogenCount() - != null) { - for (int j = 0; j - < structure.getAtom(i) - .getImplicitHydrogenCount(); j++) { - atomIndexMap.put(nextAtomIndexExplicitH, i); - nextAtomIndexExplicitH++; + // create atom index map to know which indices the explicit hydrogens will have + atomIndexMap = new HashMap<>(); + if (withExplicitH) { + try { + int nextAtomIndexExplicitH = structure.getAtomCount(); + for (int i = 0; i + < structure.getAtomCount(); i++) { + if (structure.getAtom(i) + .getImplicitHydrogenCount() + != null) { + for (int j = 0; j + < structure.getAtom(i) + .getImplicitHydrogenCount(); j++) { + atomIndexMap.put(nextAtomIndexExplicitH, i); + nextAtomIndexExplicitH++; + } } } - } - casekit.nmr.utils.Utils.convertImplicitToExplicitHydrogens(structure); - Utils.setAromaticityAndKekulize(structure); - } catch (final CDKException e) { - e.printStackTrace(); - continue; + Utils.convertImplicitToExplicitHydrogens(structure); + Utils.setAromaticityAndKekulize(structure); + } catch (final CDKException e) { + e.printStackTrace(); + continue; + } } solvent = dataSet.getSpectrum() .getSolvent(); @@ -88,8 +92,8 @@ public static Map>> collectHOSECodeShifts(final || solvent.equals("")) { solvent = "Unknown"; } - atomTypeSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() - .getNuclei()[0]); + atomTypeSpectrum = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); for (int i = 0; i < structure.getAtomCount(); i++) { signalIndices = null; @@ -149,13 +153,13 @@ public static Map> buildHOSECodeShiftStatistics( hoseCodeShiftStatistics.put(hoseCodes.getKey(), new HashMap<>()); for (final Map.Entry> solvents : hoseCodes.getValue() .entrySet()) { - values = solvents.getValue(); //casekit.nmr.HOSECodeUtilities.removeOutliers(solvents.getValue(), 1.5); + values = solvents.getValue(); + Statistics.removeOutliers(values, 1.5); hoseCodeShiftStatistics.get(hoseCodes.getKey()) .put(solvents.getKey(), new Double[]{(double) values.size(), Collections.min(values), - Statistics.getMean(values), - // casekit.nmr.HOSECodeUtilities.getRMS(values), - Statistics.getMedian(values), Collections.max(values)}); + Statistics.getMean(values), Statistics.getMedian(values), + Collections.max(values)}); } } @@ -165,19 +169,21 @@ public static Map> buildHOSECodeShiftStatistics( public static Map> buildHOSECodeShiftStatistics(final String[] pathsToNMRShiftDBs, final String[] pathsToCOCONUTs, final String[] nuclei, - final Integer maxSphere) { + final Integer maxSphere, + final boolean withExplicitH) { try { final Map>> hoseCodeShifts = new HashMap<>(); for (int i = 0; i < pathsToNMRShiftDBs.length; i++) { HOSECodeShiftStatistics.collectHOSECodeShifts( - NMRShiftDB.getDataSetsFromNMRShiftDB(pathsToNMRShiftDBs[i], nuclei), maxSphere, hoseCodeShifts); + NMRShiftDB.getDataSetsFromNMRShiftDB(pathsToNMRShiftDBs[i], nuclei), maxSphere, withExplicitH, + hoseCodeShifts); } for (int i = 0; i < pathsToCOCONUTs.length; i++) { HOSECodeShiftStatistics.collectHOSECodeShifts( COCONUT.getDataSetsWithShiftPredictionFromCOCONUT(pathsToCOCONUTs[i], nuclei), maxSphere, - hoseCodeShifts); + withExplicitH, hoseCodeShifts); } return HOSECodeShiftStatistics.buildHOSECodeShiftStatistics(hoseCodeShifts); } catch (final FileNotFoundException | CDKException e) { From 0f2b27fd23cdc3da934005da56b0b09f2d44e25c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 20 Jul 2021 18:05:54 +0200 Subject: [PATCH 272/405] feat: added default values for fluorine --- src/casekit/nmr/lsd/Constants.java | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index de34ce5..eb60abb 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -29,6 +29,7 @@ private static Map createDefaultHybridizationMap() { defaultHybridization.put("O", "(2 3)"); defaultHybridization.put("S", "(1 2 3)"); defaultHybridization.put("I", "3"); + defaultHybridization.put("F", "3"); return Collections.unmodifiableMap(defaultHybridization); } @@ -44,7 +45,8 @@ private static Map createDefaultProtonsCountPerValencyMap() { defaultProtonsCountPerValency.put("S6", "(0 1 2 3)"); defaultProtonsCountPerValency.put("S246", "(0 1 2 3)"); defaultProtonsCountPerValency.put("O", "(0 1)"); - defaultProtonsCountPerValency.put("I", "(0 1)"); + defaultProtonsCountPerValency.put("I", "0"); + defaultProtonsCountPerValency.put("F", "0"); return defaultProtonsCountPerValency; } @@ -56,6 +58,7 @@ private static Map createDefaultAtomLabelMap() { defaultAtomLabel.put("O", "O"); defaultAtomLabel.put("S", "S246"); defaultAtomLabel.put("I", "I"); + defaultAtomLabel.put("F", "F"); return Collections.unmodifiableMap(defaultAtomLabel); } @@ -66,15 +69,23 @@ private static Map> createHybridizationConversionMa // nucleus -> hybridization string -> number final Map> hybridizationConversionMap = new HashMap<>(); hybridizationConversionMap.put("13C", new HashMap<>()); - hybridizationConversionMap.get("13C").put("PLANAR3", 3); - hybridizationConversionMap.get("13C").put("SP3", 3); - hybridizationConversionMap.get("13C").put("SP2", 2); - hybridizationConversionMap.get("13C").put("SP1", 1); + hybridizationConversionMap.get("13C") + .put("PLANAR3", 3); + hybridizationConversionMap.get("13C") + .put("SP3", 3); + hybridizationConversionMap.get("13C") + .put("SP2", 2); + hybridizationConversionMap.get("13C") + .put("SP1", 1); hybridizationConversionMap.put("15N", new HashMap<>()); - hybridizationConversionMap.get("15N").put("PLANAR3", 3); - hybridizationConversionMap.get("15N").put("SP3", 3); - hybridizationConversionMap.get("15N").put("SP2", 2); - hybridizationConversionMap.get("15N").put("SP1", 1); + hybridizationConversionMap.get("15N") + .put("PLANAR3", 3); + hybridizationConversionMap.get("15N") + .put("SP3", 3); + hybridizationConversionMap.get("15N") + .put("SP2", 2); + hybridizationConversionMap.get("15N") + .put("SP1", 1); return Collections.unmodifiableMap(hybridizationConversionMap); } From 01a681ce2f34e6f7114944a2b680cebedfb1b1c5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 20 Jul 2021 18:22:49 +0200 Subject: [PATCH 273/405] fix: wrong number of proton equivalences, i.e. in case of CH3 groups --- .../nmr/lsd/PyLSDInputFileBuilder.java | 151 +++++++++++------- 1 file changed, 97 insertions(+), 54 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 1524623..091be2e 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -4,6 +4,7 @@ import casekit.nmr.model.nmrdisplayer.Correlation; import casekit.nmr.model.nmrdisplayer.Data; import casekit.nmr.model.nmrdisplayer.Link; +import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; import java.text.SimpleDateFormat; @@ -47,7 +48,8 @@ private static String buildELIM(final int elimP1, final int elimP2) { + elimP2; } - private static Map buildIndicesMap(final Data data, final Map elementCounts) { + private static Map buildIndicesMap(final List correlationList, + final Map elementCounts) { // index in correlation data -> [atom type, indices in PyLSD file...] final Map indicesMap = new HashMap<>(); // init element indices within correlations with same order as in correlation data input @@ -60,15 +62,11 @@ private static Map buildIndicesMap(final Data data, final Map int heavyAtomIndexInPyLSDFile = 1; int protonIndexInPyLSDFile = totalHeavyAtomsCount + 1; - int protonsToInsert; + int protonsToInsert, protonsCount; Correlation correlation; for (int i = 0; i - < data.getCorrelations() - .getValues() - .size(); i++) { - correlation = data.getCorrelations() - .getValues() - .get(i); + < correlationList.size(); i++) { + correlation = correlationList.get(i); // set entry for each correlation with consideration of equivalences if (correlation.getAtomType() .equals("H")) { @@ -78,11 +76,22 @@ private static Map buildIndicesMap(final Data data, final Map .equals("hsqc") || link.getExperimentType() .equals("hmqc")) { - protonsToInsert += data.getCorrelations() - .getValues() - .get(link.getMatch() - .get(0)) - .getEquivalence(); + for (final int matchIndex : link.getMatch()) { + protonsCount = correlationList.get(matchIndex) + .getProtonsCount() + .get(0); + if (protonsCount + == 3) { + protonsCount = 1; + } + protonsToInsert += correlationList.get(matchIndex) + .getEquivalence() + * (protonsCount + / correlationList.get(matchIndex) + .getAttachment() + .get("H") + .size()); + } } } indicesMap.put(i, new Object[1 @@ -200,6 +209,10 @@ private static String buildMULT(final Correlation correlation, final int index, .append(hybridizationStringBuilder) .append(" ") .append(attachedProtonsCountStringBuilder); + if (!correlation.isPseudo()) { + stringBuilder.append("; ") + .append(buildShiftString(correlation)); + } if (j >= 2) { stringBuilder.append("; equivalent to ") @@ -211,27 +224,59 @@ private static String buildMULT(final Correlation correlation, final int index, return stringBuilder.toString(); } - private static String buildHSQC(final Correlation correlation, final int index, + private static String buildShiftString(final Correlation correlation) { + return correlation.isPseudo() + ? "?" + : String.valueOf(Statistics.roundDouble(correlation.getSignal() + .getDelta(), 2)); + } + + private static String buildShiftsComment(final Correlation correlation1, final Correlation correlation2) { + return "; " + + buildShiftString(correlation1) + + " -> " + + buildShiftString(correlation2); + } + + private static String buildHSQC(final List correlationList, final int index, final Map indicesMap) { + final Correlation correlation = correlationList.get(index); if (correlation.getAtomType() .equals("H")) { return null; } final StringBuilder stringBuilder = new StringBuilder(); + final Map protonEquivalenceIndexMap = new HashMap<>(); + int protonsCount; for (final Link link : correlation.getLink()) { if (link.getExperimentType() .equals("hsqc") || link.getExperimentType() .equals("hmqc")) { for (final int matchIndex : link.getMatch()) { - // for each equivalence of heavy atom and attached equivalent proton + protonEquivalenceIndexMap.putIfAbsent(matchIndex, 1); // k = 1 in indicesMap + // for each equivalence of heavy atom and attached protons for (int k = 1; k < indicesMap.get(index).length; k++) { - stringBuilder.append("HSQC ") - .append(indicesMap.get(index)[k]) - .append(" ") - .append(indicesMap.get(matchIndex)[k]) - .append("\n"); + protonsCount = correlation.getProtonsCount() + .get(0); + // consider CH3 same as CH (avoid multiple entries in PyLSD input file) + if (protonsCount + == 3) { + protonsCount = 1; + } + for (int p = 0; p + < Math.min(protonsCount, correlationList.get(matchIndex) + .getEquivalence()); p++) { + stringBuilder.append("HSQC ") + .append(indicesMap.get(index)[k]) + .append(" ") + .append(indicesMap.get(matchIndex)[protonEquivalenceIndexMap.get(matchIndex)]) + .append(buildShiftsComment(correlation, correlationList.get(matchIndex))) + .append("\n"); + protonEquivalenceIndexMap.put(matchIndex, protonEquivalenceIndexMap.get(matchIndex) + + 1); + } } } } @@ -240,8 +285,9 @@ private static String buildHSQC(final Correlation correlation, final int index, return stringBuilder.toString(); } - private static String buildHMBC(final Correlation correlation, final int index, final Data data, + private static String buildHMBC(final List correlationList, final int index, final Map indicesMap, final int hmbcP3, final int hmbcP4) { + final Correlation correlation = correlationList.get(index); if (correlation.getAtomType() .equals("H")) { return null; @@ -259,22 +305,22 @@ private static String buildHMBC(final Correlation correlation, final int index, for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { // only add an HMBC correlation if there is no direct link via HSQC and the equivalence index is not equal - if (!(data.getCorrelations() - .getValues() - .get(matchIndex) - .getAttachment() - .containsKey(correlation.getAtomType()) - && data.getCorrelations() - .getValues() - .get(matchIndex) - .getAttachment() - .get(correlation.getAtomType()) - .contains(index) + if (!(correlationList.get(matchIndex) + .getAttachment() + .containsKey(correlation.getAtomType()) + && correlationList.get(matchIndex) + .getAttachment() + .get(correlation.getAtomType()) + .contains(index) && l == k)) { uniqueSet.add(indicesMap.get(index)[k] + " " - + indicesMap.get(matchIndex)[l]); + + indicesMap.get(matchIndex)[l] + + " " + + defaultBondDistance + + buildShiftsComment(correlation, + correlationList.get(matchIndex))); } } } @@ -285,15 +331,14 @@ private static String buildHMBC(final Correlation correlation, final int index, return uniqueSet.stream() .map(str -> "HMBC " + str - + " " - + defaultBondDistance + "\n") .reduce("", (strAll, str) -> strAll + str); } - private static String buildCOSY(final Correlation correlation, final int index, final Data data, + private static String buildCOSY(final List correlationList, final int index, final Map indicesMap, final int cosyP3, final int cosyP4) { + final Correlation correlation = correlationList.get(index); if (!correlation.getAtomType() .equals("H")) { return null; @@ -307,11 +352,9 @@ private static String buildCOSY(final Correlation correlation, final int index, .equals("cosy")) { for (final int matchIndex : link.getMatch()) { // only add an COSY correlation if the two signals there is not equivalent - if (!data.getCorrelations() - .getValues() - .get(matchIndex) - .getId() - .equals(correlation.getId())) { + if (!correlationList.get(matchIndex) + .getId() + .equals(correlation.getId())) { for (int k = 1; k < indicesMap.get(index).length; k++) { // only allow COSY values between possible equivalent protons and only one another non-equivalent proton @@ -319,7 +362,11 @@ private static String buildCOSY(final Correlation correlation, final int index, == 2) { uniqueSet.add(indicesMap.get(index)[k] + " " - + indicesMap.get(matchIndex)[1]); + + indicesMap.get(matchIndex)[1] + + " " + + defaultBondDistance + + buildShiftsComment(correlation, + correlationList.get(matchIndex))); } } } @@ -330,8 +377,6 @@ private static String buildCOSY(final Correlation correlation, final int index, return uniqueSet.stream() .map(str -> "COSY " + str - + " " - + defaultBondDistance + "\n") .reduce("", (strAll, str) -> strAll + str); @@ -454,6 +499,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf if (mf != null && !hasErrors) { + final List correlationList = data.getCorrelations() + .getValues(); final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); final StringBuilder stringBuilder = new StringBuilder(); // create header @@ -479,25 +526,21 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf collection.put("SHIX", new ArrayList<>()); collection.put("SHIH", new ArrayList<>()); // index in correlation data -> [atom type, index in PyLSD file] - final Map indicesMap = buildIndicesMap(data, elementCounts); + final Map indicesMap = buildIndicesMap(correlationList, elementCounts); Correlation correlation; for (int i = 0; i - < data.getCorrelations() - .getValues() - .size(); i++) { - correlation = data.getCorrelations() - .getValues() - .get(i); + < correlationList.size(); i++) { + correlation = correlationList.get(i); collection.get("MULT") .add(buildMULT(correlation, i, indicesMap, detectedHybridizations)); collection.get("HSQC") - .add(buildHSQC(correlation, i, indicesMap)); + .add(buildHSQC(correlationList, i, indicesMap)); collection.get("HMBC") - .add(buildHMBC(correlation, i, data, indicesMap, elucidationOptions.getHmbcP3(), + .add(buildHMBC(correlationList, i, indicesMap, elucidationOptions.getHmbcP3(), elucidationOptions.getHmbcP4())); collection.get("COSY") - .add(buildCOSY(correlation, i, data, indicesMap, elucidationOptions.getCosyP3(), + .add(buildCOSY(correlationList, i, indicesMap, elucidationOptions.getCosyP3(), elucidationOptions.getCosyP4())); if (elucidationOptions.isUsePrediction()) { collection.get("SHIX") From 7638f88e307ed5b86a2ae285b5c9622c47245b3f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 21 Jul 2021 12:35:12 +0200 Subject: [PATCH 274/405] feat: added methods for files cleanup and SMILES file parsing --- src/casekit/io/FileSystem.java | 71 +++++++++++++++++++++++++++++++- src/casekit/nmr/utils/Utils.java | 14 +++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/src/casekit/io/FileSystem.java b/src/casekit/io/FileSystem.java index f68fcbf..7e42144 100644 --- a/src/casekit/io/FileSystem.java +++ b/src/casekit/io/FileSystem.java @@ -12,14 +12,23 @@ package casekit.io; +import casekit.nmr.model.DataSet; + import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class FileSystem { public static BufferedReader readFile(final String pathToFile) { try { return new BufferedReader(new FileReader(pathToFile)); - } catch (IOException e) { + } catch (final IOException e) { e.printStackTrace(); } @@ -34,10 +43,68 @@ public static boolean writeFile(final String pathToFile, final String content) { bufferedWriter.close(); return true; - } catch (IOException e) { + } catch (final IOException e) { e.printStackTrace(); } return false; } + + public static boolean cleanup(final String[] directoriesToCheck, final String requestID) { + boolean cleaned = false; + + for (final String dir : directoriesToCheck) { + try { + cleaned = Files.walk(Paths.get(dir)) + .map(Path::toFile) + .filter(file -> file.getAbsolutePath() + .contains(requestID)) + .allMatch(File::delete); + + } catch (final IOException e) { + System.out.println("Not all files could be deleted!"); + e.printStackTrace(); + } + } + + return cleaned; + } + + public static List getSmilesListFromFile(final String pathToSmilesFile) { + final List smilesList = new ArrayList<>(); + try { + final BufferedReader bufferedReader = FileSystem.readFile(pathToSmilesFile); + if (bufferedReader + != null) { + String line; + while ((line = bufferedReader.readLine()) + != null) { + smilesList.add(line); + } + bufferedReader.close(); + } + } catch (final IOException e) { + e.printStackTrace(); + } + + return smilesList; + } + + public static List retrieveFromSmilesFile(final String pathToResultsFile) { + final List dataSetList = new ArrayList<>(); + final List smilesList = FileSystem.getSmilesListFromFile(pathToResultsFile); + + DataSet dataSet; + Map meta; + for (final String smiles : smilesList) { + meta = new HashMap<>(); + meta.put("smiles", smiles); + dataSet = new DataSet(); + dataSet.setMeta(meta); + + dataSetList.add(dataSet); + } + + return dataSetList; + } } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 04f633b..3609625 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -83,6 +83,20 @@ public static String getSmilesFromAtomContainer(final IAtomContainer ac) throws return smilesGenerator.create(ac); } + public static String getAlphabeticMF(final String mf) { + final StringBuilder mfAlphabeticStringBuilder = new StringBuilder(); + final Map mfAlphabeticMap = new TreeMap<>(getMolecularFormulaElementCounts(mf)); + for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { + mfAlphabeticStringBuilder.append(entry.getKey()); + if (entry.getValue() + > 1) { + mfAlphabeticStringBuilder.append(entry.getValue()); + } + } + + return mfAlphabeticStringBuilder.toString(); + } + public static Map getMolecularFormulaElementCounts(final String mf) { final LinkedHashMap counts = new LinkedHashMap<>(); final List elements = new ArrayList<>(); From eb12c6a2cd00dc964089231fc5e8d1c9c9168586 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 21 Jul 2021 13:45:49 +0200 Subject: [PATCH 275/405] chore: marked RankedResultSDFParser.java as deprecated --- src/casekit/nmr/lsd/RankedResultSDFParser.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java index d202e89..6a6457a 100644 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ b/src/casekit/nmr/lsd/RankedResultSDFParser.java @@ -18,6 +18,7 @@ import java.nio.charset.StandardCharsets; import java.util.*; +@Deprecated public class RankedResultSDFParser { public static List parseRankedResultSDFile(final String pathToFile, final String nucleus, From abfa9ca3c4f7e85908e7d63e445cabda8fb7384e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 29 Jul 2021 09:31:17 +0200 Subject: [PATCH 276/405] feat: added a method to build HOSE codes statistics by list of datasets --- src/casekit/nmr/analysis/HOSECodeShiftStatistics.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index c10ea3b..c338db5 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -193,6 +193,13 @@ public static Map> buildHOSECodeShiftStatistics(fi return new HashMap<>(); } + public static Map> buildHOSECodeShiftStatistics(final List dataSetList, + final Integer maxSphere, + final boolean withExplicitH) { + return HOSECodeShiftStatistics.buildHOSECodeShiftStatistics( + collectHOSECodeShifts(dataSetList, maxSphere, withExplicitH)); + } + public static boolean writeHOSECodeShiftStatistics(final Map> hoseCodeShifts, final String pathToJsonFile) { try { From 0bfc272c94704ae91f884d57e145b7263a9bf262 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 29 Jul 2021 09:32:42 +0200 Subject: [PATCH 277/405] feat: set the multiplicity of carbon atoms when predicting spectra --- src/casekit/nmr/model/Spectrum.java | 8 ++++++++ src/casekit/nmr/prediction/Prediction.java | 10 ++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 85b730b..2db2734 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -140,6 +140,10 @@ public Integer addSignal(final Signal signal) { */ public List checkForEquivalences(final Signal signal, final double[] pickPrecisions, final boolean checkMultiplicity) { + if (signal.getShift(0) + == null) { + return null; + } // check for equivalent signals in all dimensions final List closestSignalIndexList = this.pickByClosestShift(signal.getShift(0), 0, pickPrecisions[0]); for (int dim = 1; dim @@ -172,6 +176,10 @@ public Integer addSignal(final Signal signal, final double[] pickPrecisions, fin final List closestSignalIndexList = this.checkForEquivalences(signal, pickPrecisions, checkMultiplicity); + if (closestSignalIndexList + == null) { + return null; + } // if no equivalent signal was found then just add as new signal if (closestSignalIndexList.isEmpty()) { this.addSignalWithoutEquivalenceSearch(signal); diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index 1c988a9..3d13253 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -86,7 +86,7 @@ public static DataSet predict1D(final Map> hoseCod for (int i = 0; i < structure.getAtomCount(); i++) { - atomTypeSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(nucleus); + atomTypeSpectrum = Utils.getAtomTypeFromNucleus(nucleus); if (structure.getAtom(i) .getSymbol() .equals(atomTypeSpectrum)) { @@ -106,11 +106,17 @@ public static DataSet predict1D(final Map> hoseCod signal = new Signal(); signal.setNuclei(spectrum.getNuclei()); signal.setEquivalencesCount(1); - // signal.setMultiplicity(); + if (atomTypeSpectrum.equals("C")) { + signal.setMultiplicity(Utils.getMultiplicityFromProtonsCount( + AtomContainerManipulator.countHydrogens(structure, structure.getAtom(i)))); + } + signal.setKind("signal"); signal.setShifts(new Double[]{shift}); addedSignalIndex = spectrum.addSignal(signal); if (addedSignalIndex + == null + || addedSignalIndex >= assignment.getSetAssignmentsCount(0)) { assignment.addAssignment(0, new int[]{i}); } else { From 7190371add32c472ef4f9d99c446fa42cdd82ea1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 29 Jul 2021 09:33:29 +0200 Subject: [PATCH 278/405] chore: improvements in MultiplicitySectionsBuilder --- .../analysis/MultiplicitySectionsBuilder.java | 85 +++++++++++++++---- 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java index 3d773ff..465a503 100644 --- a/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java +++ b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java @@ -32,14 +32,15 @@ public MultiplicitySectionsBuilder() { private void init() { this.multiplicities.clear(); - this.multiplicities.add("S"); - this.multiplicities.add("D"); - this.multiplicities.add("T"); - this.multiplicities.add("Q"); + this.multiplicities.add("s"); + this.multiplicities.add("d"); + this.multiplicities.add("t"); + this.multiplicities.add("q"); + this.multiplicities.add("unknown"); this.minLimit = -20; this.maxLimit = 260; this.stepSize = 5; - this.steps = (this.maxLimit - this.minLimit) / this.stepSize; // ppm range from -20 to 260 in 5 ppm steps + this.updateSteps(); // ppm range from -20 to 260 in 5 ppm steps } /** @@ -53,25 +54,64 @@ public void reset() { this.init(); } - public Map> buildMultiplicitySections(final Spectrum spectrum) throws CDKException { - final HashMap> multSections = new HashMap<>(); + public Map> buildMultiplicitySections(final Spectrum spectrum) throws CDKException { + final HashMap> multiplicitySections = new HashMap<>(); // init - for (final String mult : this.multiplicities) { - multSections.put(mult, new ArrayList<>()); + for (final String multiplicity : this.multiplicities) { + multiplicitySections.put(multiplicity, new ArrayList<>()); } // set the mult. sections Signal signal; - int shiftSection; - for (int i = 0; i < spectrum.getSignalCount(); i++) { + Integer shiftSection; + String multiplicity; + for (int i = 0; i + < spectrum.getSignalCount(); i++) { signal = spectrum.getSignal(i); - if ((signal == null) || (signal.getShift(0) == null) || (signal.getMultiplicity() == null) || (signal.getIntensity() == null) || (!this.multiplicities.contains(signal.getMultiplicity()))) { - throw new CDKException(Thread.currentThread().getStackTrace()[1].getMethodName() + ": signal, shift or multiplicity is missing"); + shiftSection = this.calculateShiftSection(signal); + if (shiftSection + == null) { + throw new CDKException(Thread.currentThread() + .getStackTrace()[1].getMethodName() + + ": signal or its chemical shift is missing: " + + signal); } - shiftSection = (int) ((signal.getShift(0) - this.minLimit) / this.stepSize); - multSections.get(signal.getMultiplicity()).add(shiftSection); + multiplicity = this.checkMultiplicity(signal); + if (multiplicity + == null) { + throw new CDKException(Thread.currentThread() + .getStackTrace()[1].getMethodName() + + ": signal multiplicity is not in list: " + + signal); + } + multiplicitySections.get(multiplicity) + .add(shiftSection); + } + + return multiplicitySections; + } + + public Integer calculateShiftSection(final Signal signal) { + if (signal + == null + || signal.getShift(0) + == null) { + return null; + } + return (int) ((signal.getShift(0) + - this.minLimit) + / this.stepSize); + } + + public String checkMultiplicity(final Signal signal) { + final String multiplicity = signal.getMultiplicity() + != null + ? signal.getMultiplicity() + : "unknown"; + if (!this.multiplicities.contains(multiplicity)) { + return null; } - return multSections; + return multiplicity; } public Set getMultiplicities() { @@ -96,6 +136,7 @@ public int getMinLimit() { public void setMinLimit(final int minLimit) { this.minLimit = minLimit; + this.updateSteps(); } public int getMaxLimit() { @@ -104,6 +145,7 @@ public int getMaxLimit() { public void setMaxLimit(final int maxLimit) { this.maxLimit = maxLimit; + this.updateSteps(); } public int getStepSize() { @@ -112,5 +154,16 @@ public int getStepSize() { public void setStepSize(final int stepSize) { this.stepSize = stepSize; + this.updateSteps(); + } + + public int getSteps() { + return this.steps; + } + + private void updateSteps() { + this.steps = (this.maxLimit + - this.minLimit) + / this.stepSize; } } From 9d00efd53acaff9e27b00912217ff7d43dc89d01 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 29 Jul 2021 12:34:47 +0200 Subject: [PATCH 279/405] chore: added dimension of spectrum --- .../nmr/analysis/MultiplicitySectionsBuilder.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java index 465a503..79d8213 100644 --- a/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java +++ b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java @@ -54,8 +54,9 @@ public void reset() { this.init(); } - public Map> buildMultiplicitySections(final Spectrum spectrum) throws CDKException { - final HashMap> multiplicitySections = new HashMap<>(); + public Map> buildMultiplicitySections(final Spectrum spectrum, + final int dim) throws CDKException { + final Map> multiplicitySections = new HashMap<>(); // init for (final String multiplicity : this.multiplicities) { multiplicitySections.put(multiplicity, new ArrayList<>()); @@ -67,7 +68,7 @@ public Map> buildMultiplicitySections(final Spectrum spect for (int i = 0; i < spectrum.getSignalCount(); i++) { signal = spectrum.getSignal(i); - shiftSection = this.calculateShiftSection(signal); + shiftSection = this.calculateShiftSection(signal, dim); if (shiftSection == null) { throw new CDKException(Thread.currentThread() @@ -90,14 +91,14 @@ public Map> buildMultiplicitySections(final Spectrum spect return multiplicitySections; } - public Integer calculateShiftSection(final Signal signal) { + public Integer calculateShiftSection(final Signal signal, final int dim) { if (signal == null - || signal.getShift(0) + || signal.getShift(dim) == null) { return null; } - return (int) ((signal.getShift(0) + return (int) ((signal.getShift(dim) - this.minLimit) / this.stepSize); } From 5da296d709e0498cc6633ac992d1be7979db0d01 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 29 Jul 2021 12:35:27 +0200 Subject: [PATCH 280/405] feat: added method to calculate the Tanimoto coefficient between two spectra --- src/casekit/nmr/similarity/Similarity.java | 39 +++++++++++++--------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/casekit/nmr/similarity/Similarity.java b/src/casekit/nmr/similarity/Similarity.java index d5a66f5..7220880 100644 --- a/src/casekit/nmr/similarity/Similarity.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -12,19 +12,19 @@ package casekit.nmr.similarity; +import casekit.nmr.analysis.MultiplicitySectionsBuilder; import casekit.nmr.model.Assignment; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Statistics; -import org.apache.commons.lang3.ArrayUtils; import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.fingerprint.BitSetFingerprint; import org.openscience.cdk.similarity.Tanimoto; import java.util.*; public class Similarity { - /** * Checks whether two spectra contain given dimensions. * @@ -50,31 +50,38 @@ private static boolean checkDimensions(final Spectrum spectrum1, final Spectrum * @param dim2 dimension in second spectrum to take the shifts from * * @return - * - * @deprecated */ - public static Float calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2) { - if (!Similarity.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + public static Double calculateTanimotoCoefficient(final Spectrum spectrum1, final Spectrum spectrum2, + final int dim1, final int dim2, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder) { + if (!checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } - final double[] shiftsSpectrum1 = ArrayUtils.toPrimitive(spectrum1.getShifts(dim1) - .toArray( - new Double[spectrum1.getSignalCount()])); - Arrays.parallelSort(shiftsSpectrum1); - final double[] shiftsSpectrum2 = ArrayUtils.toPrimitive(spectrum2.getShifts(dim2) - .toArray( - new Double[spectrum2.getSignalCount()])); - Arrays.parallelSort(shiftsSpectrum2); try { - return Tanimoto.calculate(shiftsSpectrum1, shiftsSpectrum2); + return Tanimoto.calculate(getFingerprint(spectrum1, dim1, multiplicitySectionsBuilder), + getFingerprint(spectrum2, dim2, multiplicitySectionsBuilder)); } catch (final CDKException e) { e.printStackTrace(); } + return null; } + public static BitSetFingerprint getFingerprint(final Spectrum spectrum, final int dim, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder) throws CDKException { + final BitSetFingerprint bitSetFingerprint = new BitSetFingerprint(multiplicitySectionsBuilder.getSteps()); + final Map> multiplicitySections = multiplicitySectionsBuilder.buildMultiplicitySections( + spectrum, dim); + for (final Map.Entry> entry : multiplicitySections.entrySet()) { + for (final int section : entry.getValue()) { + bitSetFingerprint.set(section, true); + } + } + + return bitSetFingerprint; + } + /** * Returns deviations between two already matched spectra. * From 743d5b07444ad0e7fb753e0447c2d6adc9ad9441 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 6 Aug 2021 19:20:05 +0200 Subject: [PATCH 281/405] feat: improvement of matching between two spectra by using minimal distances --- src/casekit/nmr/similarity/Distance.java | 38 ++++++++++ src/casekit/nmr/similarity/Similarity.java | 83 +++++++--------------- src/casekit/nmr/similarity/Utilities.java | 73 +++++++++++++++++++ 3 files changed, 135 insertions(+), 59 deletions(-) create mode 100644 src/casekit/nmr/similarity/Distance.java create mode 100644 src/casekit/nmr/similarity/Utilities.java diff --git a/src/casekit/nmr/similarity/Distance.java b/src/casekit/nmr/similarity/Distance.java new file mode 100644 index 0000000..36a0067 --- /dev/null +++ b/src/casekit/nmr/similarity/Distance.java @@ -0,0 +1,38 @@ +package casekit.nmr.similarity; + +public class Distance { + + private final int signalIndexSpectrum1; + private final int signalIndexSpectrum2; + private final double value; + + public Distance(final int signalIndexSpectrum1, final int signalIndexSpectrum2, final double value) { + this.signalIndexSpectrum1 = signalIndexSpectrum1; + this.signalIndexSpectrum2 = signalIndexSpectrum2; + this.value = value; + } + + public int getSignalIndexSpectrum1() { + return this.signalIndexSpectrum1; + } + + public int getSignalIndexSpectrum2() { + return this.signalIndexSpectrum2; + } + + public double getValue() { + return this.value; + } + + @Override + public String toString() { + return "Distance{" + + "signalIndexSpectrum1=" + + this.signalIndexSpectrum1 + + ", signalIndexSpectrum2=" + + this.signalIndexSpectrum2 + + ", value=" + + this.value + + '}'; + } +} diff --git a/src/casekit/nmr/similarity/Similarity.java b/src/casekit/nmr/similarity/Similarity.java index 7220880..b714716 100644 --- a/src/casekit/nmr/similarity/Similarity.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -21,7 +21,10 @@ import org.openscience.cdk.fingerprint.BitSetFingerprint; import org.openscience.cdk.similarity.Tanimoto; -import java.util.*; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; public class Similarity { @@ -246,7 +249,7 @@ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spec * @param spectrum2 second spectrum * @param dim1 dimension in first spectrum to take the shifts from * @param dim2 dimension in second spectrum to take the shifts from - * @param shiftTol Tolerance value [ppm] used during spectra shift + * @param shiftTolerance Tolerance value [ppm] used during spectra shift * comparison * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals * @param checkEquivalencesCount indicates whether to compare the equivalences counts of matched signals @@ -257,73 +260,35 @@ public static Double calculateRMSD(final Spectrum spectrum1, final Spectrum spec * contain the selected dimension */ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, - final int dim2, final double shiftTol, final boolean checkMultiplicity, + final int dim2, final double shiftTolerance, final boolean checkMultiplicity, final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { if (!Similarity.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { return null; } - final Assignment matchAssignments = new Assignment(); - matchAssignments.setNuclei(new String[]{spectrum1.getNuclei()[dim1]}); - matchAssignments.initAssignments(spectrum1.getSignalCount()); - final Set assigned = new HashSet<>(); - List pickedSignalIndicesSpectrum2; - boolean passed; - - for (int i = 0; i - < spectrum1.getSignalCount(); i++) { - if (spectrum1.getShift(i, dim1) - == null) { - continue; - } - - // @TODO add solvent deviation value for picking closest signal(s) - pickedSignalIndicesSpectrum2 = new ArrayList<>(); - for (final int pickedSignalIndexSpectrum2 : spectrum2.pickSignals(spectrum1.getShift(i, dim1), dim2, - shiftTol)) { - passed = true; - // @TODO maybe consider further parameters to check ? e.g. intensity - if (checkMultiplicity) { - passed = (spectrum1.getMultiplicity(i) - == null - && spectrum2.getMultiplicity(pickedSignalIndexSpectrum2) - == null) - || (spectrum1.getMultiplicity(i) - != null - && spectrum1.getMultiplicity(i) - .equals(spectrum2.getMultiplicity(pickedSignalIndexSpectrum2))); - } - if (passed - && checkEquivalencesCount) { - if (allowLowerEquivalencesCount) { - passed = spectrum1.getEquivalencesCount(i) - <= spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2); - } else { - passed = spectrum1.getEquivalencesCount(i) - == spectrum2.getEquivalencesCount(pickedSignalIndexSpectrum2); - } - } - - if (passed) { - pickedSignalIndicesSpectrum2.add(pickedSignalIndexSpectrum2); - } - } - for (final int pickedSignalIndexSpectrum2 : pickedSignalIndicesSpectrum2) { - if (!assigned.contains(pickedSignalIndexSpectrum2)) { - // add signal to list of already assigned signals - assigned.add(pickedSignalIndexSpectrum2); - for (int k = 0; k - < spectrum1.getEquivalencesCount(i); k++) { - matchAssignments.addAssignmentEquivalence(0, i, pickedSignalIndexSpectrum2); - } - break; - } + final List distanceList = Utilities.buildDistanceList(spectrum1, spectrum2, dim1, dim2, + shiftTolerance, checkMultiplicity, + checkEquivalencesCount, + allowLowerEquivalencesCount); + final Assignment matchAssignment = new Assignment(); + matchAssignment.setNuclei(spectrum1.getNuclei()); + matchAssignment.initAssignments(spectrum1.getSignalCount()); + final Set assignedSpectrum1 = new HashSet<>(); + final Set assignedSpectrum2 = new HashSet<>(); + for (final Distance distance : distanceList) { + if (!assignedSpectrum1.contains(distance.getSignalIndexSpectrum1()) + && !assignedSpectrum2.contains(distance.getSignalIndexSpectrum2())) { + matchAssignment.addAssignmentEquivalence(0, distance.getSignalIndexSpectrum1(), + distance.getSignalIndexSpectrum2()); + assignedSpectrum1.add(distance.getSignalIndexSpectrum1()); + assignedSpectrum2.add(distance.getSignalIndexSpectrum2()); } } - return matchAssignments; + return matchAssignment; } + /** * Returns the closest shift matches between two spectra in all dimensions * as one Assignment object with N set dimensions. diff --git a/src/casekit/nmr/similarity/Utilities.java b/src/casekit/nmr/similarity/Utilities.java new file mode 100644 index 0000000..0f6e4bc --- /dev/null +++ b/src/casekit/nmr/similarity/Utilities.java @@ -0,0 +1,73 @@ +package casekit.nmr.similarity; + +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +public class Utilities { + + public static List buildDistanceList(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final double shiftTolerance, + final boolean checkMultiplicity, + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount) { + final List distanceList = new ArrayList<>(); + Double distanceValue; + for (int i = 0; i + < spectrum1.getSignalCount(); i++) { + for (int j = 0; j + < spectrum2.getSignalCount(); j++) { + distanceValue = getDistanceValue(spectrum1.getSignal(i), spectrum2.getSignal(j), dim1, dim2, + checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, + shiftTolerance); + if (distanceValue + != null) { + distanceList.add(new Distance(i, j, distanceValue)); + } + } + } + distanceList.sort(Comparator.comparingDouble(Distance::getValue)); + + return distanceList; + } + + public static Double getDistanceValue(final Signal signal1, final Signal signal2, final int dim1, final int dim2, + final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, final double shiftTolerance) { + boolean passed = true; + // @TODO maybe consider further parameters to check ? e.g. intensity + if (checkMultiplicity) { + passed = (signal1.getMultiplicity() + == null + && signal2.getMultiplicity() + == null) + || (signal1.getMultiplicity() + != null + && signal1.getMultiplicity() + .equals(signal2.getMultiplicity())); + } + if (passed + && checkEquivalencesCount) { + if (allowLowerEquivalencesCount) { + passed = signal1.getEquivalencesCount() + <= signal2.getEquivalencesCount(); + } else { + passed = signal1.getEquivalencesCount() + == signal2.getEquivalencesCount(); + } + } + if (!passed) { + return null; + } + final double distanceValue = Math.abs(signal1.getShift(dim1) + - signal2.getShift(dim2)); + + return distanceValue + > shiftTolerance + ? null + : distanceValue; + } +} From cc41b55568b2403e87b6d6e313a975bb84ca12d8 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 10 Aug 2021 12:58:36 +0200 Subject: [PATCH 282/405] fix: insert missing equivalences in matchSpectra & small optimisations --- src/casekit/nmr/similarity/Similarity.java | 29 ++++++++++++++++------ 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/casekit/nmr/similarity/Similarity.java b/src/casekit/nmr/similarity/Similarity.java index b714716..91160d7 100644 --- a/src/casekit/nmr/similarity/Similarity.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -17,7 +17,6 @@ import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Statistics; -import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.fingerprint.BitSetFingerprint; import org.openscience.cdk.similarity.Tanimoto; @@ -61,18 +60,29 @@ public static Double calculateTanimotoCoefficient(final Spectrum spectrum1, fina return null; } + return calculateTanimotoCoefficient(getBitSetFingerprint(spectrum1, dim1, multiplicitySectionsBuilder), + getBitSetFingerprint(spectrum2, dim2, multiplicitySectionsBuilder)); + } + + public static Double calculateTanimotoCoefficient(final BitSetFingerprint bitSetFingerprint1, + final BitSetFingerprint bitSetFingerprint2) { + if (bitSetFingerprint1 + == null + || bitSetFingerprint2 + == null) { + return null; + } try { - return Tanimoto.calculate(getFingerprint(spectrum1, dim1, multiplicitySectionsBuilder), - getFingerprint(spectrum2, dim2, multiplicitySectionsBuilder)); - } catch (final CDKException e) { + return Tanimoto.calculate(bitSetFingerprint1, bitSetFingerprint2); + } catch (final IllegalArgumentException e) { e.printStackTrace(); } return null; } - public static BitSetFingerprint getFingerprint(final Spectrum spectrum, final int dim, - final MultiplicitySectionsBuilder multiplicitySectionsBuilder) throws CDKException { + public static BitSetFingerprint getBitSetFingerprint(final Spectrum spectrum, final int dim, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder) { final BitSetFingerprint bitSetFingerprint = new BitSetFingerprint(multiplicitySectionsBuilder.getSteps()); final Map> multiplicitySections = multiplicitySectionsBuilder.buildMultiplicitySections( spectrum, dim); @@ -278,8 +288,11 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s for (final Distance distance : distanceList) { if (!assignedSpectrum1.contains(distance.getSignalIndexSpectrum1()) && !assignedSpectrum2.contains(distance.getSignalIndexSpectrum2())) { - matchAssignment.addAssignmentEquivalence(0, distance.getSignalIndexSpectrum1(), - distance.getSignalIndexSpectrum2()); + for (int equiv = 0; equiv + < spectrum2.getEquivalencesCount(distance.getSignalIndexSpectrum2()); equiv++) { + matchAssignment.addAssignmentEquivalence(0, distance.getSignalIndexSpectrum1(), + distance.getSignalIndexSpectrum2()); + } assignedSpectrum1.add(distance.getSignalIndexSpectrum1()); assignedSpectrum2.add(distance.getSignalIndexSpectrum2()); } From c3a77d895e046f81be65f1afd51bef6456c99041 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 10 Aug 2021 13:01:32 +0200 Subject: [PATCH 283/405] feat: added public method to calculate the steps & small optimisations --- .../analysis/MultiplicitySectionsBuilder.java | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java index 79d8213..04dc01d 100644 --- a/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java +++ b/src/casekit/nmr/analysis/MultiplicitySectionsBuilder.java @@ -13,7 +13,6 @@ import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; -import org.openscience.cdk.exception.CDKException; import java.util.*; @@ -37,8 +36,8 @@ private void init() { this.multiplicities.add("t"); this.multiplicities.add("q"); this.multiplicities.add("unknown"); - this.minLimit = -20; - this.maxLimit = 260; + this.minLimit = -50; + this.maxLimit = 300; this.stepSize = 5; this.updateSteps(); // ppm range from -20 to 260 in 5 ppm steps } @@ -46,22 +45,21 @@ private void init() { /** * Resets to following default values:

* multiplicties: S, D, T. Q
- * min. ppm limit: -20
- * max. ppm limit: 260
+ * min. ppm limit: -50
+ * max. ppm limit: 300
* step size: 5 */ public void reset() { this.init(); } - public Map> buildMultiplicitySections(final Spectrum spectrum, - final int dim) throws CDKException { + public Map> buildMultiplicitySections(final Spectrum spectrum, final int dim) { final Map> multiplicitySections = new HashMap<>(); // init for (final String multiplicity : this.multiplicities) { multiplicitySections.put(multiplicity, new ArrayList<>()); } - // set the mult. sections + // set the mult. sections Signal signal; Integer shiftSection; String multiplicity; @@ -71,18 +69,16 @@ public Map> buildMultiplicitySections(final Spectrum spect shiftSection = this.calculateShiftSection(signal, dim); if (shiftSection == null) { - throw new CDKException(Thread.currentThread() - .getStackTrace()[1].getMethodName() - + ": signal or its chemical shift is missing: " - + signal); + System.err.println("MultiplicitySectionsBuilder: signal or its chemical shift is missing: " + + signal); + continue; } multiplicity = this.checkMultiplicity(signal); if (multiplicity == null) { - throw new CDKException(Thread.currentThread() - .getStackTrace()[1].getMethodName() - + ": signal multiplicity is not in list: " - + signal); + System.err.println("MultiplicitySectionsBuilder: signal multiplicity is not in list: " + + signal); + continue; } multiplicitySections.get(multiplicity) .add(shiftSection); @@ -163,8 +159,12 @@ public int getSteps() { } private void updateSteps() { - this.steps = (this.maxLimit - - this.minLimit) - / this.stepSize; + this.steps = this.calculateSteps(this.minLimit, this.maxLimit, this.stepSize); + } + + public int calculateSteps(final int minLimit, final int maxLimit, final int stepSize) { + return (maxLimit + - minLimit) + / stepSize; } } From fe85d823adeb5bd2cd435c94030ac075e567e937 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 17 Aug 2021 00:44:44 +0200 Subject: [PATCH 284/405] chore: optimisation of toString method in Assignment --- src/casekit/nmr/model/Assignment.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index f2fb63f..2e1a7df 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -251,15 +251,11 @@ public Assignment buildClone() { @Override public String toString() { - final StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append("Assignments:\n"); - - for (int i = 0; i - < this.getNDim(); i++) { - stringBuilder.append(Arrays.toString(this.assignments[i])) - .append("\n"); - } - - return stringBuilder.toString(); + return "Assignment{" + + "nuclei=" + + Arrays.toString(this.nuclei) + + ", assignments=" + + Arrays.deepToString(this.assignments) + + '}'; } } From bdd00531cd2deccd7df8d87426cb62c280b1a4c6 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 17 Aug 2021 00:45:48 +0200 Subject: [PATCH 285/405] fix: initialise meta map when adding an info in DataSet --- src/casekit/nmr/model/DataSet.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/casekit/nmr/model/DataSet.java b/src/casekit/nmr/model/DataSet.java index 18eb077..3dd0ae7 100644 --- a/src/casekit/nmr/model/DataSet.java +++ b/src/casekit/nmr/model/DataSet.java @@ -29,6 +29,10 @@ public DataSet(final IAtomContainer structure, final Spectrum spectrum, final As } public void addMetaInfo(final String key, final String value) { + if (this.meta + == null) { + this.meta = new HashMap<>(); + } this.meta.put(key, value); } From ebd336ee5b7e61ef083a5b6ebe87bf875a48f074 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 17 Aug 2021 00:47:38 +0200 Subject: [PATCH 286/405] fix: use case insensitive comparison of multiplicities --- src/casekit/nmr/similarity/Utilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/similarity/Utilities.java b/src/casekit/nmr/similarity/Utilities.java index 0f6e4bc..4d35655 100644 --- a/src/casekit/nmr/similarity/Utilities.java +++ b/src/casekit/nmr/similarity/Utilities.java @@ -47,7 +47,7 @@ public static Double getDistanceValue(final Signal signal1, final Signal signal2 || (signal1.getMultiplicity() != null && signal1.getMultiplicity() - .equals(signal2.getMultiplicity())); + .equalsIgnoreCase(signal2.getMultiplicity())); } if (passed && checkEquivalencesCount) { From d996b77e2301fb16ca678e972402c52aa597ca13 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 17 Aug 2021 00:48:28 +0200 Subject: [PATCH 287/405] feat: added atomContainerToDataSet method to Utils --- src/casekit/nmr/utils/Utils.java | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 3609625..28d9d9b 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,5 +1,7 @@ package casekit.nmr.utils; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.ExtendedConnectionMatrix; import casekit.nmr.model.Spectrum; import casekit.nmr.model.nmrdisplayer.Correlation; import org.openscience.cdk.aromaticity.Aromaticity; @@ -550,4 +552,25 @@ public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex return bondsOrderSum; } + + public static DataSet atomContainerToDataSet(final IAtomContainer structure) throws CDKException { + final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + hydrogenAdder.addImplicitHydrogens(structure); + setAromaticityAndKekulize(structure); + final Map meta = new HashMap<>(); + meta.put("title", structure.getTitle()); + meta.put("mf", molecularFormularToString(getMolecularFormulaFromAtomContainer(structure))); + try { + final String smiles = getSmilesFromAtomContainer(structure); + meta.put("smiles", smiles); + } catch (final CDKException e) { + e.printStackTrace(); + } + final DataSet dataSet = new DataSet(); + dataSet.setStructure(new ExtendedConnectionMatrix(structure)); + dataSet.setMeta(meta); + + return dataSet; + } } From f89a92c8e12c0cc5fe4741c08566620d5338be2c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 17 Aug 2021 00:49:14 +0200 Subject: [PATCH 288/405] feat: added methods to parse SMILES from file in Parser --- src/casekit/nmr/utils/Parser.java | 79 ++++++++++++++++++++++++++++ src/casekit/nmr/utils/SDFParser.java | 64 ---------------------- 2 files changed, 79 insertions(+), 64 deletions(-) create mode 100644 src/casekit/nmr/utils/Parser.java delete mode 100644 src/casekit/nmr/utils/SDFParser.java diff --git a/src/casekit/nmr/utils/Parser.java b/src/casekit/nmr/utils/Parser.java new file mode 100644 index 0000000..3a1a574 --- /dev/null +++ b/src/casekit/nmr/utils/Parser.java @@ -0,0 +1,79 @@ +package casekit.nmr.utils; + +import casekit.nmr.model.DataSet; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.smiles.SmilesParser; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +public class Parser { + + public static List parseSDFile(final String pathToFile) throws CDKException, FileNotFoundException { + return parseSDFile(new FileReader(pathToFile)); + } + + public static List parseSDFileContent(final String fileContent) throws CDKException { + final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); + return parseSDFile(new InputStreamReader(inputStream)); + } + + public static List parseSDFile(final Reader fileReader) throws CDKException { + final List dataSetList = new ArrayList<>(); + final IteratingSDFReader iterator = new IteratingSDFReader(fileReader, SilentChemObjectBuilder.getInstance()); + + while (iterator.hasNext()) { + dataSetList.add(Utils.atomContainerToDataSet(iterator.next())); + } + + return dataSetList; + } + + public static List parseSmilesFile(final String pathToFile) throws FileNotFoundException { + return parseSmilesFile(new FileReader(pathToFile)); + } + + public static List parseSmilesFileContent(final String fileContent) { + final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); + return parseSmilesFile(new InputStreamReader(inputStream)); + } + + public static List parseSmilesFile(final Reader fileReader) { + final List dataSetList = new ArrayList<>(); + final BufferedReader bufferedReader = new BufferedReader(fileReader); + final SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); + bufferedReader.lines() + .forEach(smiles -> { + try { + dataSetList.add(Utils.atomContainerToDataSet(smilesParser.parseSmiles(smiles))); + } catch (final CDKException e) { + e.printStackTrace(); + } + }); + + return dataSetList; + } + + public static List smilesFileToList(final String pathToFile) throws FileNotFoundException { + return smilesFileToList(new FileReader(pathToFile)); + } + + public static List smilesFileContentToList(final String fileContent) { + final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); + return smilesFileToList(new InputStreamReader(inputStream)); + } + + public static List smilesFileToList(final Reader fileReader) { + final List smilesList = new ArrayList<>(); + final BufferedReader bufferedReader = new BufferedReader(fileReader); + bufferedReader.lines() + .forEach(smilesList::add); + + return smilesList; + } + +} diff --git a/src/casekit/nmr/utils/SDFParser.java b/src/casekit/nmr/utils/SDFParser.java deleted file mode 100644 index 4b41129..0000000 --- a/src/casekit/nmr/utils/SDFParser.java +++ /dev/null @@ -1,64 +0,0 @@ -package casekit.nmr.utils; - -import casekit.nmr.model.DataSet; -import casekit.nmr.model.ExtendedConnectionMatrix; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.CDKHydrogenAdder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; - -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class SDFParser { - - public static List parseSDFile(final String pathToFile) throws CDKException, FileNotFoundException { - return parseSDFile(new FileReader(pathToFile)); - } - - public static List parseSDFileContent(final String fileContent) throws CDKException { - final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); - return parseSDFile(new InputStreamReader(inputStream)); - } - - public static List parseSDFile(final Reader fileReader) throws CDKException { - final List dataSetList = new ArrayList<>(); - final IteratingSDFReader iterator = new IteratingSDFReader(fileReader, SilentChemObjectBuilder.getInstance()); - IAtomContainer structure; - Map meta; - final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); - IMolecularFormula mf; - DataSet dataSet; - - while (iterator.hasNext()) { - structure = iterator.next(); - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - hydrogenAdder.addImplicitHydrogens(structure); - Utils.setAromaticityAndKekulize(structure); - meta = new HashMap<>(); - meta.put("title", structure.getTitle()); - mf = Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mf", Utils.molecularFormularToString(mf)); - try { - final String smiles = Utils.getSmilesFromAtomContainer(structure); - meta.put("smiles", smiles); - } catch (final CDKException e) { - e.printStackTrace(); - } - dataSet = new DataSet(); - dataSet.setStructure(new ExtendedConnectionMatrix(structure)); - dataSet.setMeta(meta); - - dataSetList.add(dataSet); - } - - return dataSetList; - } -} From 04a3969fdc7d39ee229a6d44f011db4bd5a89bf3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 17 Aug 2021 00:51:05 +0200 Subject: [PATCH 289/405] feat: added optional hybridizations to check when searching for goodlist and badlist elements; added matchAssignment to meta info --- .../nmr/fragments/FragmentUtilities.java | 77 ++++++++++++++----- 1 file changed, 59 insertions(+), 18 deletions(-) diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index 342e6bb..12ab169 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -5,12 +5,16 @@ import casekit.nmr.model.Spectrum; import casekit.nmr.similarity.Similarity; import casekit.nmr.utils.Utils; +import com.google.gson.Gson; import org.openscience.cdk.interfaces.IAtomContainer; import java.util.*; import java.util.stream.Collectors; public class FragmentUtilities { + + private final static Gson gson = new Gson(); + public static LinkedHashMap> sortByFrequencies( final Map> functionalGroupDataSetsMap) { final LinkedHashMap> sortedCollection = new LinkedHashMap<>(); @@ -77,13 +81,17 @@ public static Map> getGoodlistAndBadlist(final List> queryHybridizationList) { final List matches = new ArrayList<>(); final List nonMatches = new ArrayList<>(); + Assignment matchAssignment; for (final DataSet dataSet : dataSetList) { - if (isMatch(dataSet, querySpectrum, mf, shiftTol, maxAverageDeviation, checkMultiplicity)) { + matchAssignment = Similarity.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, + checkMultiplicity, true, true); + if (isMatch(dataSet, querySpectrum, mf, matchAssignment, maxAverageDeviation, queryHybridizationList)) { matches.add(dataSet); - } else if (isNonMatch(dataSet, querySpectrum, mf, shiftTol, checkMultiplicity)) { + } else if (isNonMatch(dataSet, querySpectrum, mf, matchAssignment)) { nonMatches.add(dataSet); } } @@ -94,9 +102,9 @@ public static Map> getGoodlistAndBadlist(final List> queryHybridizationList) { // check for nuclei if (!dataSet.getSpectrum() .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { @@ -108,33 +116,34 @@ public static boolean isMatch(final DataSet dataSet, final Spectrum querySpectru } // check average deviation final Double averageDeviation = Similarity.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, - shiftTol, checkMultiplicity, true, true); + matchAssignment); if (averageDeviation == null || averageDeviation > maxAverageDeviation) { return false; } - final Double rmsd = Similarity.calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, - checkMultiplicity, true, true); - dataSet.getMeta() - .put("avgDev", Double.toString(averageDeviation)); - dataSet.getMeta() - .put("rmsd", Double.toString(rmsd)); + // check hybridazations after knowing that all signals in dataset have an assignment + if (!checkHybridizations(dataSet, matchAssignment, queryHybridizationList)) { + return false; + } + dataSet.addMetaInfo("matchAssignment", gson.toJson(matchAssignment, Assignment.class)); + final Double rmsd = Similarity.calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, matchAssignment); + dataSet.addMetaInfo("averageDeviation", Double.toString(averageDeviation)); + dataSet.addMetaInfo("rmsd", Double.toString(rmsd)); + return true; } - public static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, - final double shiftTol, final boolean checkMultiplicity) { + private static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final Assignment matchAssigment) { if (!isStructuralMatch(dataSet, mf)) { return false; } boolean isSpectralMatch = false; if (dataSet.getSpectrum() .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { - final Assignment matchAssigment = Similarity.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, - shiftTol, checkMultiplicity, true, true); if (matchAssigment != null && matchAssigment.getSetAssignmentsCount(0) @@ -146,7 +155,7 @@ public static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpec return !isSpectralMatch; } - public static boolean isStructuralMatch(final DataSet dataSet, final String mf) { + private static boolean isStructuralMatch(final DataSet dataSet, final String mf) { final IAtomContainer fragment = dataSet.getStructure() .toAtomContainer(); // check molecular formula with atom types in group @@ -155,4 +164,36 @@ public static boolean isStructuralMatch(final DataSet dataSet, final String mf) && !Utils.getUnsaturatedAtomIndices(fragment) .isEmpty(); } + + private static boolean checkHybridizations(final DataSet dataSet, final Assignment matchAssignment, + final List> queryHybridizationList) { + if (queryHybridizationList.isEmpty()) { + return true; + } + + final IAtomContainer fragment = dataSet.getStructure() + .toAtomContainer(); + final String atomType = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); + int signalIndexInDataSetSpectrum, signalIndexInQuerySpectrum; + for (int i = 0; i + < fragment.getAtomCount(); i++) { + if (fragment.getAtom(i) + .getSymbol() + .equals(atomType)) { + signalIndexInDataSetSpectrum = dataSet.getAssignment() + .getIndices(0, i) + .get(0); + signalIndexInQuerySpectrum = matchAssignment.getAssignment(0, signalIndexInDataSetSpectrum, 0); + if (!queryHybridizationList.get(signalIndexInQuerySpectrum) + .contains(fragment.getAtom(i) + .getHybridization() + .name())) { + return false; + } + } + } + + return true; + } } From cc5505d07d888b0fa99671be8e9b4afc1d92db10 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 17 Aug 2021 01:21:01 +0200 Subject: [PATCH 290/405] feat: added ConnectivityStatistics class --- .../nmr/analysis/ConnectivityStatistics.java | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 src/casekit/nmr/analysis/ConnectivityStatistics.java diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java new file mode 100644 index 0000000..f36c236 --- /dev/null +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -0,0 +1,148 @@ +package casekit.nmr.analysis; + +import casekit.nmr.model.DataSet; +import casekit.nmr.utils.Utils; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class ConnectivityStatistics { + + + /** + * @param dataSetList + * @param nucleus + * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence + */ + public static void buildConnectivityStatistics(final List dataSetList, final String nucleus, + final Map>>>>> connectivityStatistics) { + IAtomContainer structure; + IAtom atom; + final String atomType = Utils.getAtomTypeFromNucleus(nucleus); + String multiplicity; + String hybridization; + String connectedAtomType; + String connectedAtomHybridization; + int shift, atomIndex; + for (final DataSet dataSet : dataSetList) { + if (!dataSet.getSpectrum() + .getNuclei()[0].equals(nucleus)) { + continue; + } + structure = dataSet.getStructure() + .toAtomContainer(); + for (int signalIndex = 0; signalIndex + < dataSet.getSpectrum() + .getSignals() + .size(); signalIndex++) { + shift = dataSet.getSpectrum() + .getShift(signalIndex, 0) + .intValue(); + for (int equivalenceIndex = 0; equivalenceIndex + < dataSet.getAssignment() + .getAssignment(0, signalIndex).length; equivalenceIndex++) { + atomIndex = dataSet.getAssignment() + .getAssignment(0, signalIndex, equivalenceIndex); + atom = structure.getAtom(atomIndex); + if (atom.getSymbol() + .equals(atomType)) { + multiplicity = Utils.getMultiplicityFromProtonsCount(atom.getImplicitHydrogenCount()); + if (multiplicity + == null) { + continue; + } + multiplicity = multiplicity.toLowerCase(); + hybridization = atom.getHybridization() + .name(); + connectivityStatistics.putIfAbsent(multiplicity, new HashMap<>()); + connectivityStatistics.get(multiplicity) + .putIfAbsent(hybridization, new HashMap<>()); + // check for connected hetero atoms + for (final IAtom connectedAtom : structure.getConnectedAtomsList(atom)) { + if (connectedAtom.getSymbol() + .equals("H")) { + continue; + } + connectedAtomType = connectedAtom.getSymbol(); + if (connectedAtom.getHybridization() + == null) { + continue; + } + connectedAtomHybridization = connectedAtom.getHybridization() + .name(); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .putIfAbsent(shift, new HashMap<>()); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .putIfAbsent(connectedAtomType, new HashMap<>()); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .putIfAbsent(connectedAtomHybridization, new HashMap<>()); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .get(connectedAtomHybridization) + .putIfAbsent(connectedAtom.getImplicitHydrogenCount(), 0); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .get(connectedAtomHybridization) + .put(connectedAtom.getImplicitHydrogenCount(), + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .get(connectedAtomHybridization) + .get(connectedAtom.getImplicitHydrogenCount()) + + 1); + } + } + } + } + } + } + + /** + * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence + * @param multiplicity + * @param hybridization + * @param shift + * @param molecularFormulaElements + * + * @return + */ + public static Map>> extractConnectivities( + final Map>>>>> connectivityStatistics, + final String multiplicity, final String hybridization, final int shift, + final Set molecularFormulaElements) { + final Map>> extractedConnectivities = new HashMap<>(); + if (connectivityStatistics.containsKey(multiplicity) + && connectivityStatistics.get(multiplicity) + .containsKey(hybridization) + && connectivityStatistics.get(multiplicity) + .get(hybridization) + .containsKey(shift)) { + for (final Map.Entry>> entry : connectivityStatistics.get( + multiplicity) + .get(hybridization) + .get(shift) + .entrySet()) { + if (molecularFormulaElements.contains(entry.getKey())) { + extractedConnectivities.put(entry.getKey(), entry.getValue()); + } + } + } + + return extractedConnectivities; + } +} From ef293a3087291fc92a86254be1635382f1168f75 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 18 Aug 2021 15:43:55 +0200 Subject: [PATCH 291/405] chore: use Utils.atomContainerToDataSet() in COCONUT.java --- src/casekit/nmr/dbservice/COCONUT.java | 42 +++++++------------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/src/casekit/nmr/dbservice/COCONUT.java b/src/casekit/nmr/dbservice/COCONUT.java index e65692c..e50ec60 100644 --- a/src/casekit/nmr/dbservice/COCONUT.java +++ b/src/casekit/nmr/dbservice/COCONUT.java @@ -7,58 +7,34 @@ import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.CDKHydrogenAdder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import java.io.FileNotFoundException; import java.io.FileReader; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; public class COCONUT { public static List getDataSetsWithShiftPredictionFromCOCONUT(final String pathToCOCONUT, final String[] nuclei) throws CDKException, FileNotFoundException { - final List dataSets = new ArrayList<>(); + final List dataSetList = new ArrayList<>(); final IteratingSDFReader iterator = new IteratingSDFReader(new FileReader(pathToCOCONUT), SilentChemObjectBuilder.getInstance()); IAtomContainer structure; + DataSet dataSet; Spectrum spectrum; Assignment assignment; - Map meta; - final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); - String[] split, split2; String spectrumPropertyString, multiplicity; - IMolecularFormula mf; double calcShift; List closestSignalList; int atomIndex; while (iterator.hasNext()) { structure = iterator.next(); - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - if (!Utils.containsExplicitHydrogens(structure)) { - hydrogenAdder.addImplicitHydrogens(structure); - } - Utils.setAromaticityAndKekulize(structure); - - meta = new HashMap<>(); - meta.put("title", structure.getTitle()); - meta.put("id", structure.getProperty("ID")); - mf = Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mf", Utils.molecularFormularToString(mf)); - try { - final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); - meta.put("smiles", smiles); - } catch (final CDKException e) { - e.printStackTrace(); - } + dataSet = Utils.atomContainerToDataSet(structure); for (final String nucleus : nuclei) { final String atomType = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(nucleus); @@ -141,18 +117,22 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri // + spectrum.getSignal(i) // .getEquivalencesCount()); // } - // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, 0) + if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, + Utils.getMolecularFormulaFromString( + dataSet.getMeta() + .get("mf")), 0) != 0) { continue; } + dataSet.setSpectrum(spectrum); + dataSet.setAssignment(assignment); - dataSets.add(new DataSet(structure, spectrum, assignment, meta)); + dataSetList.add(dataSet); } } - return dataSets; + return dataSetList; } } From 8a1fe388f0fddd7fba8ccf3bfc01223cb3fa36fd Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 18 Aug 2021 15:45:47 +0200 Subject: [PATCH 292/405] chore: receive general dataSet list when collecting functional groups (instead of path to NMRShiftDB) --- .../ErtlFunctionalGroupsUtilities.java | 97 +++++++++---------- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsUtilities.java b/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsUtilities.java index 818b586..bb5e9f6 100644 --- a/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsUtilities.java +++ b/src/casekit/nmr/fragments/functionalgroup/ErtlFunctionalGroupsUtilities.java @@ -1,6 +1,5 @@ package casekit.nmr.fragments.functionalgroup; -import casekit.nmr.dbservice.NMRShiftDB; import casekit.nmr.fragments.fragmentation.Fragmentation; import casekit.nmr.fragments.fragmentation.FragmentationUtilities; import casekit.nmr.fragments.model.ConnectionTree; @@ -15,71 +14,65 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; public class ErtlFunctionalGroupsUtilities { - public static final List buildFunctionalGroupDataSets(final String pathToNMRShiftDB, + public static final List buildFunctionalGroupDataSets(final List dataSetList, final String[] nuclei) { final List functionalGroupDataSets = new ArrayList<>(); - try { - final ErtlFunctionalGroupsFinder ertlFunctionalGroupsFinder = new ErtlFunctionalGroupsFinder( - ErtlFunctionalGroupsFinder.Mode.NO_GENERALIZATION); - final List dataSetsFromNMRShiftDB = NMRShiftDB.getDataSetsFromNMRShiftDB(pathToNMRShiftDB, nuclei); - List dataSetList; - List groups; - List fragmentTrees; - ConnectionTree fragmentTree; - IAtomContainer structure; - String atomTypeInSpectrum; - Aromaticity[] aromaticities; - for (final DataSet dataSet : dataSetsFromNMRShiftDB) { - structure = dataSet.getStructure() - .toAtomContainer(); - aromaticities = buildDefaultAromaticities(structure); + final ErtlFunctionalGroupsFinder ertlFunctionalGroupsFinder = new ErtlFunctionalGroupsFinder( + ErtlFunctionalGroupsFinder.Mode.NO_GENERALIZATION); + List subDataSetList; + List groups; + List fragmentTrees; + ConnectionTree fragmentTree; + IAtomContainer structure; + String atomTypeInSpectrum; + Aromaticity[] aromaticities; + for (final DataSet dataSet : dataSetList) { + structure = dataSet.getStructure() + .toAtomContainer(); + aromaticities = buildDefaultAromaticities(structure); + fragmentTrees = new ArrayList<>(); + for (final Aromaticity aromaticity : aromaticities) { + try { + Utils.setAromaticityAndKekulize(structure, aromaticity); + groups = ertlFunctionalGroupsFinder.find(structure, false); + } catch (final IllegalArgumentException | CDKException e) { + e.printStackTrace(); + continue; + } + restoreOriginalEnvironmentalCarbons(groups, structure); fragmentTrees = new ArrayList<>(); - for (final Aromaticity aromaticity : aromaticities) { - try { - Utils.setAromaticityAndKekulize(structure, aromaticity); - groups = ertlFunctionalGroupsFinder.find(structure, false); - } catch (final IllegalArgumentException | CDKException e) { - e.printStackTrace(); - continue; - } - restoreOriginalEnvironmentalCarbons(groups, structure); - fragmentTrees = new ArrayList<>(); - for (final IAtomContainer group : groups) { - // each group has to contain at least one atom of specific spectrum - atomTypeInSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() - .getNuclei()[0]); - if (atomTypeInSpectrum.equals("H")) { - if (AtomContainerManipulator.getImplicitHydrogenCount(group) - == 0) { - continue; - } - } else if (casekit.nmr.utils.Utils.getAtomTypeIndicesByElement(group, atomTypeInSpectrum) - .isEmpty()) { + for (final IAtomContainer group : groups) { + // each group has to contain at least one atom of specific spectrum + atomTypeInSpectrum = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); + if (atomTypeInSpectrum.equals("H")) { + if (AtomContainerManipulator.getImplicitHydrogenCount(group) + == 0) { continue; } - fragmentTree = Fragmentation.buildFragmentTree(group, 0, null, new HashSet<>(), false); - FragmentationUtilities.adjustNodeKeys(fragmentTree, structure); - FragmentationUtilities.closeRings(fragmentTree, structure); - - fragmentTrees.add(fragmentTree); + } else if (casekit.nmr.utils.Utils.getAtomTypeIndicesByElement(group, atomTypeInSpectrum) + .isEmpty()) { + continue; } + fragmentTree = Fragmentation.buildFragmentTree(group, 0, null, new HashSet<>(), false); + FragmentationUtilities.adjustNodeKeys(fragmentTree, structure); + FragmentationUtilities.closeRings(fragmentTree, structure); + + fragmentTrees.add(fragmentTree); } - FragmentationUtilities.removeDuplicates(fragmentTrees); - dataSetList = Fragmentation.fragmentTreesToSubDataSets(dataSet, fragmentTrees); - if (dataSetList - != null) { - functionalGroupDataSets.addAll(dataSetList); - } } - } catch (final IOException | CDKException e) { - e.printStackTrace(); + FragmentationUtilities.removeDuplicates(fragmentTrees); + subDataSetList = Fragmentation.fragmentTreesToSubDataSets(dataSet, fragmentTrees); + if (subDataSetList + != null) { + functionalGroupDataSets.addAll(subDataSetList); + } } return functionalGroupDataSets; From b9cfc9fc34bb6cca0f8406b7acd6d544ef7fcc3b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 18 Aug 2021 19:31:44 +0200 Subject: [PATCH 293/405] chore: simplification and renaming of ExtendedConnectionMatrix into ExtendedAdjacencyList --- .../fragmentation/Fragmentation.java | 2 +- src/casekit/nmr/model/DataSet.java | 4 +- .../nmr/model/ExtendedAdjacencyList.java | 187 ++++++++ .../nmr/model/ExtendedConnectionMatrix.java | 431 ------------------ src/casekit/nmr/utils/Utils.java | 4 +- 5 files changed, 192 insertions(+), 436 deletions(-) create mode 100644 src/casekit/nmr/model/ExtendedAdjacencyList.java delete mode 100644 src/casekit/nmr/model/ExtendedConnectionMatrix.java diff --git a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java index 6cc0910..504366e 100644 --- a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java @@ -117,7 +117,7 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, substructure = FragmentationUtilities.toAtomContainer(fragmentTree); subDataSet = new DataSet(); - subDataSet.setStructure(new ExtendedConnectionMatrix(substructure)); + subDataSet.setStructure(new ExtendedAdjacencyList(substructure)); subDataSet.setSpectrum(subspectrum); subDataSet.setAssignment(subassignment); diff --git a/src/casekit/nmr/model/DataSet.java b/src/casekit/nmr/model/DataSet.java index 3dd0ae7..22f0603 100644 --- a/src/casekit/nmr/model/DataSet.java +++ b/src/casekit/nmr/model/DataSet.java @@ -15,14 +15,14 @@ @Setter public class DataSet { - private ExtendedConnectionMatrix structure; + private ExtendedAdjacencyList structure; private Spectrum spectrum; private Assignment assignment; private Map meta; public DataSet(final IAtomContainer structure, final Spectrum spectrum, final Assignment assignment, final Map meta) { - this.structure = new ExtendedConnectionMatrix(structure); + this.structure = new ExtendedAdjacencyList(structure); this.spectrum = spectrum; this.assignment = assignment; this.meta = new HashMap<>(meta); diff --git a/src/casekit/nmr/model/ExtendedAdjacencyList.java b/src/casekit/nmr/model/ExtendedAdjacencyList.java new file mode 100644 index 0000000..ab47cbf --- /dev/null +++ b/src/casekit/nmr/model/ExtendedAdjacencyList.java @@ -0,0 +1,187 @@ +/* + * The MIT License + * + * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package casekit.nmr.model; + +import casekit.nmr.utils.Utils; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.openscience.cdk.graph.matrix.ConnectionMatrix; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomType.Hybridization; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.silent.Atom; +import org.openscience.cdk.silent.Bond; +import org.openscience.cdk.silent.PseudoAtom; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author Michael Wenk [https://github.com/michaelwenk] + */ +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +public class ExtendedAdjacencyList { + + private int[][][] adjacencyList; // connected atom index, bond order, bond is in ring, bond is aromatic + private String[] atomTypes; + private Integer[][] atomProperties;// hydrogenCounts, valencies, formalCharges, isInRingAtoms, isAromaticAtoms + private Hybridization[] hybridizations; + private int bondCount; + + + public ExtendedAdjacencyList(final IAtomContainer ac) { + final double[][] connectionMatrix = ConnectionMatrix.getMatrix(ac); + this.adjacencyList = new int[connectionMatrix.length][][]; + List connectedAtomsList; + int[][] temp; + IBond bond; + for (int i = 0; i + < connectionMatrix.length; i++) { + connectedAtomsList = new ArrayList<>(); + for (int j = 0; j + < connectionMatrix[i].length; j++) { + if (connectionMatrix[i][j] + >= 1) { + bond = ac.getBond(ac.getAtom(i), ac.getAtom(j)); + connectedAtomsList.add(new int[]{j, (int) connectionMatrix[i][j], bond.isInRing() + ? 1 + : 0, bond.isAromatic() + ? 1 + : 0}); + } + } + temp = new int[connectedAtomsList.size()][]; + for (int k = 0; k + < connectedAtomsList.size(); k++) { + temp[k] = connectedAtomsList.get(k); + } + this.adjacencyList[i] = temp; + } + this.atomTypes = new String[this.adjacencyList.length]; + this.hybridizations = new Hybridization[this.adjacencyList.length]; + this.atomProperties = new Integer[this.adjacencyList.length][]; + + IAtom atom; + for (int i = 0; i + < this.adjacencyList.length; i++) { + atom = ac.getAtom(i); + this.setAtomProperties(i, atom.getSymbol(), atom.getImplicitHydrogenCount(), atom.getValency(), + atom.getFormalCharge(), atom.isInRing(), atom.isAromatic(), atom.getHybridization()); + } + this.updateBondCount(); + } + + private void setAtomProperties(final int atomIndex, final String atomType, final Integer implicitHydrogenCount, + final Integer valency, final Integer formalCharge, final Boolean isInRing, + final Boolean isAromatic, final Hybridization hybridization) { + this.atomTypes[atomIndex] = atomType; + this.atomProperties[atomIndex] = new Integer[5]; + this.atomProperties[atomIndex][0] = implicitHydrogenCount; + this.atomProperties[atomIndex][1] = valency; + this.atomProperties[atomIndex][2] = formalCharge; + this.atomProperties[atomIndex][3] = isInRing + ? 1 + : 0; + this.atomProperties[atomIndex][4] = isAromatic + ? 1 + : 0; + this.hybridizations[atomIndex] = hybridization; + } + + private void updateBondCount() { + int bondCounter = 0; + for (int i = 0; i + < this.adjacencyList.length; i++) { + bondCounter += this.adjacencyList[i].length; + } + this.bondCount = bondCounter + / 2; + } + + public int getAtomCount() { + return this.adjacencyList.length; + } + + + public IAtomContainer toAtomContainer() { + final IAtomContainer ac = SilentChemObjectBuilder.getInstance() + .newAtomContainer(); + IAtom atom; + for (int i = 0; i + < this.adjacencyList.length; i++) { + if (this.atomTypes[i].equals("R")) { + atom = new PseudoAtom("R"); + } else { + atom = new Atom(this.atomTypes[i]); + } + atom.setImplicitHydrogenCount(this.atomProperties[i][0]); + atom.setValency(this.atomProperties[i][1]); + atom.setFormalCharge(this.atomProperties[i][2]); + atom.setIsInRing(this.atomProperties[i][3] + == 1); + atom.setIsAromatic(this.atomProperties[i][4] + == 1); + atom.setHybridization(this.hybridizations[i]); + + ac.addAtom(atom); + } + IBond bond; + for (int i = 0; i + < this.adjacencyList.length; i++) { + for (int k = 0; k + < this.adjacencyList[i].length; k++) { + if (ac.getBond(ac.getAtom(i), ac.getAtom(this.adjacencyList[i][k][0])) + == null) { + bond = new Bond(ac.getAtom(i), ac.getAtom(this.adjacencyList[i][k][0]), + Utils.getBondOrder(this.adjacencyList[i][k][1])); + bond.setIsInRing(this.adjacencyList[i][k][2] + == 1); + bond.setIsAromatic(this.adjacencyList[i][k][3] + == 1); + ac.addBond(bond); + } + } + } + + return ac; + } + + public ExtendedAdjacencyList buildClone() { + return new ExtendedAdjacencyList(this.toAtomContainer()); + } + + @Override + public String toString() { + return "ExtendedAdjacencyList{" + + "atomCount=" + + this.getAtomCount() + + ", bondCount=" + + this.bondCount + + ", adjacencyList=" + + Arrays.deepToString(this.adjacencyList) + + ", atomTypes=" + + Arrays.toString(this.atomTypes) + + ", atomProperties=" + + Arrays.deepToString(this.atomProperties) + + ", hybridizations=" + + Arrays.toString(this.hybridizations) + + '}'; + } +} diff --git a/src/casekit/nmr/model/ExtendedConnectionMatrix.java b/src/casekit/nmr/model/ExtendedConnectionMatrix.java deleted file mode 100644 index a5bd652..0000000 --- a/src/casekit/nmr/model/ExtendedConnectionMatrix.java +++ /dev/null @@ -1,431 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2019 Michael Wenk [https://github.com/michaelwenk] - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -package casekit.nmr.model; - -import casekit.nmr.utils.Utils; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import org.openscience.cdk.graph.matrix.ConnectionMatrix; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomType.Hybridization; -import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.silent.Atom; -import org.openscience.cdk.silent.Bond; -import org.openscience.cdk.silent.PseudoAtom; -import org.openscience.cdk.silent.SilentChemObjectBuilder; - -import java.util.Arrays; - -/** - * @author Michael Wenk [https://github.com/michaelwenk] - */ -@NoArgsConstructor -@AllArgsConstructor -@Getter -@Setter -public class ExtendedConnectionMatrix { - - private double[][] connectionMatrix; - private String[] atomTypes; - private Integer[][] atomPropertiesNumeric;// hydrogenCounts, valencies, formalCharges; - private Hybridization[] hybridizations; - private Boolean[][] atomPropertiesBoolean;// isInRingAtoms, isAromaticAtoms; - private Boolean[][][] bondProperties; - private int bondCount; - - - public ExtendedConnectionMatrix(final IAtomContainer ac) { - this.connectionMatrix = ConnectionMatrix.getMatrix(ac); - this.atomTypes = new String[this.connectionMatrix.length]; - this.hybridizations = new Hybridization[this.connectionMatrix.length]; - this.atomPropertiesNumeric = new Integer[this.connectionMatrix.length][]; - this.atomPropertiesBoolean = new Boolean[this.connectionMatrix.length][]; - this.bondProperties = new Boolean[this.connectionMatrix.length][][]; - - this.init(ac); - } - - private void init(final IAtomContainer ac) { - IAtom atom1, atom2; - IBond bond; - for (int i = 0; i - < this.connectionMatrix.length; i++) { - atom1 = ac.getAtom(i); - this.setAtomProperties(i, atom1.getSymbol(), atom1.getImplicitHydrogenCount(), atom1.getValency(), - atom1.getFormalCharge(), atom1.isInRing(), atom1.isAromatic(), - atom1.getHybridization()); - - this.bondProperties[i] = new Boolean[this.connectionMatrix.length][2]; - for (int k = 0; k - < this.connectionMatrix.length; k++) { - atom2 = ac.getAtom(k); - bond = ac.getBond(atom1, atom2); - if (bond - != null) { - this.setBondProperty(i, k, bond.isInRing(), bond.isAromatic()); - } - } - } - this.updateBondCount(); - } - - private void init(final ExtendedConnectionMatrix extendedConnectionMatrix) { - for (int i = 0; i - < this.getAtomCount(); i++) { - if (i - < extendedConnectionMatrix.getAtomCount()) { - this.setAtomProperties(i, extendedConnectionMatrix.getAtomType(i), - extendedConnectionMatrix.getHydrogenCount(i), - extendedConnectionMatrix.getValency(i), - extendedConnectionMatrix.getFormalCharge(i), - extendedConnectionMatrix.isInRing(i), extendedConnectionMatrix.isAromatic(i), - extendedConnectionMatrix.getHybridization(i)); - - - } - this.bondProperties[i] = new Boolean[this.getAtomCount()][2]; - if (i - < extendedConnectionMatrix.getAtomCount()) { - for (int k = 0; k - < extendedConnectionMatrix.getAtomCount(); k++) { - this.connectionMatrix[i][k] = extendedConnectionMatrix.getBondOrder(i, k); - this.setBondProperty(i, k, extendedConnectionMatrix.isInRing(i, k), - extendedConnectionMatrix.isAromatic(i, k)); - - } - } else { - for (int k = 0; k - < this.getAtomCount(); k++) { - this.connectionMatrix[i][k] = 0.0; - // this.setBondProperty(i, k, null, null); - } - } - } - this.updateBondCount(); - } - - private void extendConnectionMatrix() { - this.extendConnectionMatrix(1); - } - - private void extendConnectionMatrix(final int extensionSize) { - this.connectionMatrix = new double[this.getAtomCount() - + extensionSize][this.getAtomCount() - + extensionSize]; - this.atomTypes = new String[this.connectionMatrix.length]; - this.hybridizations = new Hybridization[this.connectionMatrix.length]; - this.atomPropertiesNumeric = new Integer[this.connectionMatrix.length][]; - this.atomPropertiesBoolean = new Boolean[this.connectionMatrix.length][]; - this.bondProperties = new Boolean[this.connectionMatrix.length][][]; - } - - public void addAtom(final String atomType, final Integer implicitHydrogenCount, final Integer valency, - final Integer formalCharge, final Boolean isInRing, final Boolean isAromatic, - final Hybridization hybridization) { - // create backup object - final ExtendedConnectionMatrix extendedConnectionMatrixBackup = this.buildClone(); - // extend the sizes of all matrices by one - this.extendConnectionMatrix(); - // fill all information in again from backup object - this.init(extendedConnectionMatrixBackup); - // set information for new atom - this.setAtomProperties(this.getAtomCount() - - 1, atomType, implicitHydrogenCount, valency, formalCharge, isInRing, - isAromatic, hybridization); - } - - public boolean addBond(final int atomIndex1, final int atomIndex2, final double order, final Boolean isInRing, - final Boolean isAromatic) { - if (!this.hasAtom(atomIndex1) - || !this.hasAtom(atomIndex2)) { - return false; - } - if (!this.isValidBondAddition(atomIndex1, atomIndex2, order, isAromatic)) { - return false; - } - this.connectionMatrix[atomIndex1][atomIndex2] = order; - this.connectionMatrix[atomIndex2][atomIndex1] = order; - this.setBondProperty(atomIndex1, atomIndex2, isInRing, isAromatic); - this.setBondProperty(atomIndex2, atomIndex1, isInRing, isAromatic); - - this.updateBondCount(); - - return true; - } - - public boolean isValidBondAddition(final int atomIndex1, final int atomIndex2, final double order, - final boolean isAromatic) { - if (!this.hasAtom(atomIndex1) - || !this.hasAtom(atomIndex2)) { - return false; - } - - return this.isValidBondAddition(atomIndex1, order, isAromatic) - && this.isValidBondAddition(atomIndex2, order, isAromatic); - } - - public boolean isValidBondAddition(final int atomIndex, final double order, final boolean isAromatic) { - float bondOrderSum = this.getBondOrderSum(atomIndex, true); - if (isAromatic) { - bondOrderSum += 1.5; - } else { - bondOrderSum += order; - } - // -1 for cases with heterocyclic aromatics, like the N in the small aromatic ring in coffein if we want to add the bond to the CH3 group - if (this.isAromatic(atomIndex) - && (!this.getAtomType(atomIndex) - .equals("C"))) { - bondOrderSum -= 1; - } - - return bondOrderSum - <= this.getValency(atomIndex); - } - - private void setAtomProperties(final int atomIndex, final String atomType, final Integer implicitHydrogenCount, - final Integer valency, final Integer formalCharge, final Boolean isInRing, - final Boolean isAromatic, final Hybridization hybridization) { - this.atomTypes[atomIndex] = atomType; - this.atomPropertiesNumeric[atomIndex] = new Integer[3]; - this.atomPropertiesNumeric[atomIndex][0] = implicitHydrogenCount; - this.atomPropertiesNumeric[atomIndex][1] = valency; - this.atomPropertiesNumeric[atomIndex][2] = formalCharge; - this.atomPropertiesBoolean[atomIndex] = new Boolean[2]; - this.atomPropertiesBoolean[atomIndex][0] = isInRing; - this.atomPropertiesBoolean[atomIndex][1] = isAromatic; - this.hybridizations[atomIndex] = hybridization; - } - - private void setBondProperty(final int atomIndex1, final int atomIndex2, final Boolean isInRing, - final Boolean isAromatic) { - this.bondProperties[atomIndex1][atomIndex2][0] = isInRing; - this.bondProperties[atomIndex1][atomIndex2][1] = isAromatic; - } - - private void updateBondCount() { - int bondCounter = 0; - for (int i = 0; i - < this.getAtomCount(); i++) { - for (int j = i - + 1; j - < this.getAtomCount(); j++) { - if (this.connectionMatrix[i][j] - > 0.0) { - bondCounter++; - } - } - } - this.bondCount = bondCounter; - } - - public Boolean hasBond(final int atomIndex1, final int atomIndex2) { - if (!this.hasAtom(atomIndex1) - || !this.hasAtom(atomIndex2)) { - return null; - } - - return this.getBondOrder(atomIndex1, atomIndex2) - > 0.0; - } - - public Double getBondOrder(final int atomIndex1, final int atomIndex2) { - if (!this.hasAtom(atomIndex1) - || !this.hasAtom(atomIndex2)) { - return null; - } - - return this.connectionMatrix[atomIndex1][atomIndex2]; - } - - public Float getBondOrderSum(final int atomIndex, final boolean includeHydrogens) { - if (!this.hasAtom(atomIndex)) { - return null; - } - float bondOrderSum = (float) 0.0; - for (int j = 0; j - < this.connectionMatrix[atomIndex].length; j++) { - if ((this.isAromatic(atomIndex, j) - != null) - && this.isAromatic(atomIndex, j)) { - bondOrderSum += 1.5; - } else { - bondOrderSum += this.getBondOrder(atomIndex, j); - } - } - if (includeHydrogens) { - bondOrderSum += this.getHydrogenCount(atomIndex); - } - - return bondOrderSum; - } - - public String getAtomType(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.atomTypes[atomIndex]; - } - - public Integer getHydrogenCount(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.atomPropertiesNumeric[atomIndex][0]; - } - - public Integer getValency(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.atomPropertiesNumeric[atomIndex][1]; - } - - public Integer getFormalCharge(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.atomPropertiesNumeric[atomIndex][2]; - } - - public Boolean isInRing(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.atomPropertiesBoolean[atomIndex][0]; - } - - public Boolean isAromatic(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.atomPropertiesBoolean[atomIndex][1]; - } - - public Hybridization getHybridization(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.hybridizations[atomIndex]; - } - - public Boolean isInRing(final int atomIndex1, final int atomIndex2) { - if (!this.hasAtom(atomIndex1) - || !this.hasAtom(atomIndex2)) { - return null; - } - - return this.bondProperties[atomIndex1][atomIndex2][0]; - } - - public Boolean isAromatic(final int atomIndex1, final int atomIndex2) { - if (!this.hasAtom(atomIndex1) - || !this.hasAtom(atomIndex2)) { - return null; - } - - return this.bondProperties[atomIndex1][atomIndex2][1]; - } - - public int getAtomCount() { - return this.connectionMatrix.length; - } - - public Boolean isUnsaturated(final int atomIndex) { - if (!this.hasAtom(atomIndex)) { - return null; - } - - return this.getBondOrderSum(atomIndex, true) - < this.getValency(atomIndex); - } - - public boolean hasAtom(final int atomIndex) { - return (atomIndex - >= 0) - && (atomIndex - < this.getAtomCount()); - } - - public IAtomContainer toAtomContainer() { - final IAtomContainer ac = SilentChemObjectBuilder.getInstance() - .newAtomContainer(); - IAtom atom; - for (int i = 0; i - < this.connectionMatrix.length; i++) { - if (this.atomTypes[i].equals("R")) { - atom = new PseudoAtom("R"); - } else { - atom = new Atom(this.atomTypes[i]); - } - atom.setImplicitHydrogenCount(this.atomPropertiesNumeric[i][0]); - atom.setValency(this.atomPropertiesNumeric[i][1]); - atom.setFormalCharge(this.atomPropertiesNumeric[i][2]); - atom.setHybridization(this.hybridizations[i]); - atom.setIsInRing(this.atomPropertiesBoolean[i][0]); - atom.setIsAromatic(this.atomPropertiesBoolean[i][1]); - - ac.addAtom(atom); - } - IBond bond; - for (int i = 0; i - < this.bondProperties.length; i++) { - for (int k = i - + 1; k - < this.bondProperties.length; k++) { - if (this.connectionMatrix[i][k] - > 0.0) { - bond = new Bond(ac.getAtom(i), ac.getAtom(k), - Utils.getBondOrder((int) this.connectionMatrix[i][k])); - bond.setIsInRing(this.bondProperties[i][k][0]); - bond.setIsAromatic(this.bondProperties[i][k][1]); - ac.addBond(bond); - } - } - } - - return ac; - } - - public ExtendedConnectionMatrix buildClone() { - return new ExtendedConnectionMatrix(this.toAtomContainer()); - } - - @Override - public String toString() { - return "ExtendedConnectionMatrix{" - + "connectionMatrix=" - + Arrays.toString(this.connectionMatrix) - + ", atomTypes=" - + Arrays.toString(this.atomTypes) - + ", atomPropertiesNumeric=" - + Arrays.toString(this.atomPropertiesNumeric) - + ", hybridizations=" - + Arrays.toString(this.hybridizations) - + ", atomPropertiesBoolean=" - + Arrays.toString(this.atomPropertiesBoolean) - + ", bondProperties=" - + Arrays.toString(this.bondProperties) - + ", bondCount=" - + this.bondCount - + '}'; - } -} diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 28d9d9b..7c73619 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,7 +1,7 @@ package casekit.nmr.utils; import casekit.nmr.model.DataSet; -import casekit.nmr.model.ExtendedConnectionMatrix; +import casekit.nmr.model.ExtendedAdjacencyList; import casekit.nmr.model.Spectrum; import casekit.nmr.model.nmrdisplayer.Correlation; import org.openscience.cdk.aromaticity.Aromaticity; @@ -568,7 +568,7 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure) thr e.printStackTrace(); } final DataSet dataSet = new DataSet(); - dataSet.setStructure(new ExtendedConnectionMatrix(structure)); + dataSet.setStructure(new ExtendedAdjacencyList(structure)); dataSet.setMeta(meta); return dataSet; From e03a4040db8211e0e20b4470627bf46aebc70548 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 19 Aug 2021 09:57:54 +0200 Subject: [PATCH 294/405] chore: optimisation of ExtendedAdjacencyList --- .../nmr/model/ExtendedAdjacencyList.java | 112 ++++++++---------- 1 file changed, 49 insertions(+), 63 deletions(-) diff --git a/src/casekit/nmr/model/ExtendedAdjacencyList.java b/src/casekit/nmr/model/ExtendedAdjacencyList.java index ab47cbf..a7120e5 100644 --- a/src/casekit/nmr/model/ExtendedAdjacencyList.java +++ b/src/casekit/nmr/model/ExtendedAdjacencyList.java @@ -19,7 +19,7 @@ import org.openscience.cdk.graph.matrix.ConnectionMatrix; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomType.Hybridization; +import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.silent.Atom; import org.openscience.cdk.silent.Bond; @@ -39,16 +39,12 @@ @Setter public class ExtendedAdjacencyList { - private int[][][] adjacencyList; // connected atom index, bond order, bond is in ring, bond is aromatic - private String[] atomTypes; - private Integer[][] atomProperties;// hydrogenCounts, valencies, formalCharges, isInRingAtoms, isAromaticAtoms - private Hybridization[] hybridizations; - private int bondCount; - + private int[][][] bondProperties; // connected atom index, bond order, bond is in ring, bond is aromatic + private Integer[][] atomProperties; // element symbol, hybridization, implicitHydrogenCount, valency, formalCharge, isInRingAtom, isAromaticAtom public ExtendedAdjacencyList(final IAtomContainer ac) { final double[][] connectionMatrix = ConnectionMatrix.getMatrix(ac); - this.adjacencyList = new int[connectionMatrix.length][][]; + this.bondProperties = new int[connectionMatrix.length][][]; List connectedAtomsList; int[][] temp; IBond bond; @@ -72,88 +68,82 @@ public ExtendedAdjacencyList(final IAtomContainer ac) { < connectedAtomsList.size(); k++) { temp[k] = connectedAtomsList.get(k); } - this.adjacencyList[i] = temp; + this.bondProperties[i] = temp; } - this.atomTypes = new String[this.adjacencyList.length]; - this.hybridizations = new Hybridization[this.adjacencyList.length]; - this.atomProperties = new Integer[this.adjacencyList.length][]; + this.atomProperties = new Integer[this.bondProperties.length][]; IAtom atom; for (int i = 0; i - < this.adjacencyList.length; i++) { + < this.bondProperties.length; i++) { atom = ac.getAtom(i); - this.setAtomProperties(i, atom.getSymbol(), atom.getImplicitHydrogenCount(), atom.getValency(), - atom.getFormalCharge(), atom.isInRing(), atom.isAromatic(), atom.getHybridization()); + this.atomProperties[i] = new Integer[7]; + this.atomProperties[i][0] = atom.getSymbol() + .equals("R") + ? -1 + : atom.getAtomicNumber(); + this.atomProperties[i][1] = atom.getHybridization() + .ordinal(); + this.atomProperties[i][2] = atom.getImplicitHydrogenCount(); + this.atomProperties[i][3] = atom.getValency(); + this.atomProperties[i][4] = atom.getFormalCharge(); + this.atomProperties[i][5] = atom.isInRing() + ? 1 + : 0; + this.atomProperties[i][6] = atom.isAromatic() + ? 1 + : 0; } - this.updateBondCount(); } - private void setAtomProperties(final int atomIndex, final String atomType, final Integer implicitHydrogenCount, - final Integer valency, final Integer formalCharge, final Boolean isInRing, - final Boolean isAromatic, final Hybridization hybridization) { - this.atomTypes[atomIndex] = atomType; - this.atomProperties[atomIndex] = new Integer[5]; - this.atomProperties[atomIndex][0] = implicitHydrogenCount; - this.atomProperties[atomIndex][1] = valency; - this.atomProperties[atomIndex][2] = formalCharge; - this.atomProperties[atomIndex][3] = isInRing - ? 1 - : 0; - this.atomProperties[atomIndex][4] = isAromatic - ? 1 - : 0; - this.hybridizations[atomIndex] = hybridization; + public int getAtomCount() { + return this.bondProperties.length; } - private void updateBondCount() { + public int getBondCount() { int bondCounter = 0; for (int i = 0; i - < this.adjacencyList.length; i++) { - bondCounter += this.adjacencyList[i].length; + < this.bondProperties.length; i++) { + bondCounter += this.bondProperties[i].length; } - this.bondCount = bondCounter + return bondCounter / 2; } - public int getAtomCount() { - return this.adjacencyList.length; - } - - public IAtomContainer toAtomContainer() { final IAtomContainer ac = SilentChemObjectBuilder.getInstance() .newAtomContainer(); IAtom atom; for (int i = 0; i - < this.adjacencyList.length; i++) { - if (this.atomTypes[i].equals("R")) { + < this.bondProperties.length; i++) { + if (this.atomProperties[i][0] + == -1) { atom = new PseudoAtom("R"); } else { - atom = new Atom(this.atomTypes[i]); + atom = new Atom(this.atomProperties[i][0]); } - atom.setImplicitHydrogenCount(this.atomProperties[i][0]); - atom.setValency(this.atomProperties[i][1]); - atom.setFormalCharge(this.atomProperties[i][2]); - atom.setIsInRing(this.atomProperties[i][3] + atom.setHybridization(IAtomType.Hybridization.values()[this.atomProperties[i][1]]); + atom.setImplicitHydrogenCount(this.atomProperties[i][2]); + atom.setValency(this.atomProperties[i][3]); + atom.setFormalCharge(this.atomProperties[i][4]); + atom.setIsInRing(this.atomProperties[i][5] == 1); - atom.setIsAromatic(this.atomProperties[i][4] + atom.setIsAromatic(this.atomProperties[i][6] == 1); - atom.setHybridization(this.hybridizations[i]); ac.addAtom(atom); } IBond bond; for (int i = 0; i - < this.adjacencyList.length; i++) { + < this.bondProperties.length; i++) { for (int k = 0; k - < this.adjacencyList[i].length; k++) { - if (ac.getBond(ac.getAtom(i), ac.getAtom(this.adjacencyList[i][k][0])) + < this.bondProperties[i].length; k++) { + if (ac.getBond(ac.getAtom(i), ac.getAtom(this.bondProperties[i][k][0])) == null) { - bond = new Bond(ac.getAtom(i), ac.getAtom(this.adjacencyList[i][k][0]), - Utils.getBondOrder(this.adjacencyList[i][k][1])); - bond.setIsInRing(this.adjacencyList[i][k][2] + bond = new Bond(ac.getAtom(i), ac.getAtom(this.bondProperties[i][k][0]), + Utils.getBondOrder(this.bondProperties[i][k][1])); + bond.setIsInRing(this.bondProperties[i][k][2] == 1); - bond.setIsAromatic(this.adjacencyList[i][k][3] + bond.setIsAromatic(this.bondProperties[i][k][3] == 1); ac.addBond(bond); } @@ -173,15 +163,11 @@ public String toString() { + "atomCount=" + this.getAtomCount() + ", bondCount=" - + this.bondCount - + ", adjacencyList=" - + Arrays.deepToString(this.adjacencyList) - + ", atomTypes=" - + Arrays.toString(this.atomTypes) + + this.getBondCount() + + ", bondProperties=" + + Arrays.deepToString(this.bondProperties) + ", atomProperties=" + Arrays.deepToString(this.atomProperties) - + ", hybridizations=" - + Arrays.toString(this.hybridizations) + '}'; } } From 9eecafe288e2c42d7c69e5891490a99fee34932b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 19 Aug 2021 10:19:46 +0200 Subject: [PATCH 295/405] fix: check for null value of hybridisation state --- .../nmr/model/ExtendedAdjacencyList.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/casekit/nmr/model/ExtendedAdjacencyList.java b/src/casekit/nmr/model/ExtendedAdjacencyList.java index a7120e5..d2c8250 100644 --- a/src/casekit/nmr/model/ExtendedAdjacencyList.java +++ b/src/casekit/nmr/model/ExtendedAdjacencyList.java @@ -82,7 +82,10 @@ public ExtendedAdjacencyList(final IAtomContainer ac) { ? -1 : atom.getAtomicNumber(); this.atomProperties[i][1] = atom.getHybridization() - .ordinal(); + == null + ? -1 + : atom.getHybridization() + .ordinal(); this.atomProperties[i][2] = atom.getImplicitHydrogenCount(); this.atomProperties[i][3] = atom.getValency(); this.atomProperties[i][4] = atom.getFormalCharge(); @@ -115,13 +118,14 @@ public IAtomContainer toAtomContainer() { IAtom atom; for (int i = 0; i < this.bondProperties.length; i++) { - if (this.atomProperties[i][0] - == -1) { - atom = new PseudoAtom("R"); - } else { - atom = new Atom(this.atomProperties[i][0]); - } - atom.setHybridization(IAtomType.Hybridization.values()[this.atomProperties[i][1]]); + atom = this.atomProperties[i][0] + == -1 + ? new PseudoAtom("R") + : new Atom(this.atomProperties[i][0]); + atom.setHybridization(this.atomProperties[i][1] + == -1 + ? null + : IAtomType.Hybridization.values()[this.atomProperties[i][1]]); atom.setImplicitHydrogenCount(this.atomProperties[i][2]); atom.setValency(this.atomProperties[i][3]); atom.setFormalCharge(this.atomProperties[i][4]); From 856fe5ac8099f8edb76708eccf8d9fd3137b573e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 Aug 2021 18:11:54 +0200 Subject: [PATCH 296/405] fix: renaming to atomCount and bondCount to avoid storage in JSON format --- src/casekit/nmr/model/ExtendedAdjacencyList.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/casekit/nmr/model/ExtendedAdjacencyList.java b/src/casekit/nmr/model/ExtendedAdjacencyList.java index d2c8250..5e7f115 100644 --- a/src/casekit/nmr/model/ExtendedAdjacencyList.java +++ b/src/casekit/nmr/model/ExtendedAdjacencyList.java @@ -98,11 +98,11 @@ public ExtendedAdjacencyList(final IAtomContainer ac) { } } - public int getAtomCount() { + public int atomCount() { return this.bondProperties.length; } - public int getBondCount() { + public int bondCount() { int bondCounter = 0; for (int i = 0; i < this.bondProperties.length; i++) { @@ -164,11 +164,7 @@ public ExtendedAdjacencyList buildClone() { @Override public String toString() { return "ExtendedAdjacencyList{" - + "atomCount=" - + this.getAtomCount() - + ", bondCount=" - + this.getBondCount() - + ", bondProperties=" + + "bondProperties=" + Arrays.deepToString(this.bondProperties) + ", atomProperties=" + Arrays.deepToString(this.atomProperties) From ec9252179737854e4f26828432397a83581d5d61 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 Aug 2021 18:18:22 +0200 Subject: [PATCH 297/405] chore: optimisations on Spectrum class --- .../nmr/analysis/HOSECodeShiftStatistics.java | 3 +- src/casekit/nmr/dbservice/NMRShiftDB.java | 9 +-- .../fragmentation/Fragmentation.java | 14 +++-- src/casekit/nmr/model/Spectrum.java | 56 ++++++------------- .../nmr/model/nmrdisplayer/Spectrum.java | 8 +-- src/casekit/nmr/prediction/Prediction.java | 11 ++-- src/casekit/nmr/utils/Utils.java | 2 +- 7 files changed, 44 insertions(+), 59 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index c338db5..a9b674e 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -86,7 +86,8 @@ public static Map>> collectHOSECodeShifts(final } } solvent = dataSet.getSpectrum() - .getSolvent(); + .getMeta() + .get("solvent"); if (solvent == null || solvent.equals("")) { diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index f10c3a0..db717df 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -124,7 +124,7 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif casekit.nmr.utils.Utils.setAromaticityAndKekulize(structure); meta = new HashMap<>(); - meta.put("title", structure.getTitle()); + // meta.put("title", structure.getTitle()); meta.put("id", structure.getProperty("nmrshiftdb2 ID")); mf = casekit.nmr.utils.Utils.getMolecularFormulaFromAtomContainer(structure); meta.put("mfOriginal", casekit.nmr.utils.Utils.molecularFormularToString(mf)); @@ -167,7 +167,8 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif } if (structure.getProperty("Solvent") != null) { - spectrum.setSolvent(getSolvent(structure.getProperty("Solvent"), spectrumIndexInRecord)); + spectrum.addMetaInfo("solvent", + getSolvent(structure.getProperty("Solvent"), spectrumIndexInRecord)); } if (structure.getProperty("Field Strength [MHz]") != null) { @@ -177,9 +178,9 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif if (fieldStrength.startsWith(spectrumIndexInRecord + ":")) { try { - spectrum.setSpectrometerFrequency(Double.parseDouble(fieldStrength.split( + spectrum.addMetaInfo("spectrometerFrequency", fieldStrength.split( spectrumIndexInRecord - + ":")[1])); + + ":")[1]); } catch (final NumberFormatException e) { // e.printStackTrace(); } diff --git a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java index 504366e..aa46fe7 100644 --- a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java @@ -110,10 +110,12 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, } } } - subspectrum.setSolvent(dataSet.getSpectrum() - .getSolvent()); - subspectrum.setSpectrometerFrequency(dataSet.getSpectrum() - .getSpectrometerFrequency()); + subspectrum.addMetaInfo("solvent", dataSet.getSpectrum() + .getMeta() + .get("solvent")); + subspectrum.addMetaInfo("spectrometerFrequency", dataSet.getSpectrum() + .getMeta() + .get("spectrometerFrequency")); substructure = FragmentationUtilities.toAtomContainer(fragmentTree); subDataSet = new DataSet(); @@ -129,8 +131,8 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, } catch (final CDKException e) { e.printStackTrace(); } - meta.put("title", dataSet.getMeta() - .get("title")); + // meta.put("title", dataSet.getMeta() + // .get("title")); meta.put("id", dataSet.getMeta() .get("id")); meta.put("mf", Utils.molecularFormularToString(Utils.getMolecularFormulaFromAtomContainer(substructure))); diff --git a/src/casekit/nmr/model/Spectrum.java b/src/casekit/nmr/model/Spectrum.java index 2db2734..5b5c905 100644 --- a/src/casekit/nmr/model/Spectrum.java +++ b/src/casekit/nmr/model/Spectrum.java @@ -33,10 +33,7 @@ import lombok.NoArgsConstructor; import lombok.Setter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; +import java.util.*; import java.util.stream.Collectors; /** @@ -49,25 +46,22 @@ public class Spectrum { private String[] nuclei; - /** - * An arbitrary name or description that can be assigned to this spectrum for identification purposes. - */ - private String description; - /** - * An arbitrary name to identify the type of this spectrum, like COSY, NOESY, HSQC, etc. I - * decided not to provide static Strings with given experiment type since the there are - * numerous experiments yielding basically identical information having different names - */ - private String specType; - /** - * The proton frequency of the spectrometer used to record this spectrum. - */ - private Double spectrometerFrequency; - private String solvent; - private String standard; + private Map meta; private List signals; private int signalCount; + public void addMetaInfo(final String key, final String value) { + if (this.meta + == null) { + this.meta = new HashMap<>(); + } + this.meta.put(key, value); + } + + public void removeMetaInfo(final String key) { + this.meta.remove(key); + } + public int getNDim() { return this.getNuclei().length; } @@ -431,11 +425,7 @@ public Spectrum buildClone() { clone.addSignal(this.getSignal(i) .buildClone()); } - clone.setDescription(this.description); - clone.setSolvent(this.solvent); - clone.setSpecType(this.specType); - clone.setSpectrometerFrequency(this.spectrometerFrequency); - clone.setStandard(this.standard); + clone.setMeta(new HashMap<>(this.getMeta())); return clone; } @@ -445,20 +435,8 @@ public String toString() { return "Spectrum{" + "nuclei=" + Arrays.toString(this.nuclei) - + ", description='" - + this.description - + '\'' - + ", specType='" - + this.specType - + '\'' - + ", spectrometerFrequency=" - + this.spectrometerFrequency - + ", solvent='" - + this.solvent - + '\'' - + ", standard='" - + this.standard - + '\'' + + ", meta=" + + this.meta + ", signals=" + this.signals + ", signalCount=" diff --git a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java index 816d132..f92d25c 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java +++ b/src/casekit/nmr/model/nmrdisplayer/Spectrum.java @@ -70,8 +70,8 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { 0)); } })); - spectrum.setSolvent((String) this.info.get("solvent")); - spectrum.setSpecType((String) this.info.get("experiment")); + spectrum.addMetaInfo("solvent", (String) this.info.get("solvent")); + spectrum.addMetaInfo("spectrumType", (String) this.info.get("experiment")); return spectrum; @@ -95,8 +95,8 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { signal2D.getKind(), null, 0, 0)); } })); - spectrum.setSolvent((String) this.info.get("solvent")); - spectrum.setSpecType((String) this.info.get("experiment")); + spectrum.addMetaInfo("solvent", (String) this.info.get("solvent")); + spectrum.addMetaInfo("spectrumType", (String) this.info.get("experiment")); return spectrum; } diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index 3d13253..f8aecc7 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -64,7 +64,7 @@ public static DataSet predict1D(final Map> hoseCod final int minMatchingSphere = 1; final Spectrum spectrum = new Spectrum(); spectrum.setNuclei(new String[]{nucleus}); - spectrum.setSolvent(solvent); + spectrum.addMetaInfo("solvent", solvent); spectrum.setSignals(new ArrayList<>()); final Assignment assignment = new Assignment(); assignment.setNuclei(spectrum.getNuclei()); @@ -176,8 +176,10 @@ public static DataSet predict2D(final Map> hoseCod public static DataSet predict2D(final IAtomContainer structure, final Spectrum spectrumDim1, final Spectrum spectrumDim2, final Assignment assignmentDim1, final Assignment assignmentDim2, final int minPathLength, final int maxPathLength) { - if (!spectrumDim1.getSolvent() - .equals(spectrumDim2.getSolvent())) { + if (!spectrumDim1.getMeta() + .get("solvent") + .equals(spectrumDim2.getMeta() + .get("solvent"))) { return null; } final String[] nuclei2D = new String[]{spectrumDim1.getNuclei()[0], spectrumDim2.getNuclei()[0]}; @@ -187,7 +189,8 @@ public static DataSet predict2D(final IAtomContainer structure, final Spectrum s final Spectrum predictedSpectrum2D = new Spectrum(); predictedSpectrum2D.setNuclei(nuclei2D); predictedSpectrum2D.setSignals(new ArrayList<>()); - predictedSpectrum2D.setSolvent(spectrumDim1.getSolvent()); + predictedSpectrum2D.addMetaInfo("solvent", spectrumDim1.getMeta() + .get("solvent")); final Assignment assignment2D = new Assignment(); assignment2D.setNuclei(predictedSpectrum2D.getNuclei()); assignment2D.initAssignments(0); diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 7c73619..3371687 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -559,7 +559,7 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure) thr hydrogenAdder.addImplicitHydrogens(structure); setAromaticityAndKekulize(structure); final Map meta = new HashMap<>(); - meta.put("title", structure.getTitle()); + // meta.put("title", structure.getTitle()); meta.put("mf", molecularFormularToString(getMolecularFormulaFromAtomContainer(structure))); try { final String smiles = getSmilesFromAtomContainer(structure); From d74ee7ec0936056fa73b94705eea080b8692b4b8 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Aug 2021 22:02:17 +0200 Subject: [PATCH 298/405] feat: usage of Concurrent class when building HOSE code statistics --- .../nmr/analysis/HOSECodeShiftStatistics.java | 206 +++++++++--------- 1 file changed, 109 insertions(+), 97 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index a9b674e..f2d96b7 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -19,16 +19,17 @@ import java.io.*; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; public class HOSECodeShiftStatistics { private final static Gson GSON = new GsonBuilder().setLenient() .create(); //.setPrettyPrinting() - public static Map>> collectHOSECodeShifts(final List dataSetList, - final Integer maxSphere, - final boolean withExplicitH) { - return collectHOSECodeShifts(dataSetList, maxSphere, withExplicitH, new HashMap<>()); + public static Map>> collectHOSECodeShifts( + final List dataSetList, final Integer maxSphere, final boolean withExplicitH) { + return collectHOSECodeShifts(dataSetList, maxSphere, withExplicitH, new ConcurrentHashMap<>()); } /** @@ -40,121 +41,132 @@ public static Map>> collectHOSECodeShifts(final * * @return */ - public static Map>> collectHOSECodeShifts(final List dataSetList, - final Integer maxSphere, - final boolean withExplicitH, - final Map>> hoseCodeShifts) { - IAtomContainer structure; + public static Map>> collectHOSECodeShifts( + final List dataSetList, final Integer maxSphere, final boolean withExplicitH, + final Map>> hoseCodeShifts) { + for (final DataSet dataSet : dataSetList) { + insert(dataSet, maxSphere, withExplicitH, hoseCodeShifts); + } + + return hoseCodeShifts; + } + + public static boolean insert(final DataSet dataSet, final Integer maxSphere, final boolean withExplicitH, + final Map>> hoseCodeShifts) { + final IAtomContainer structure; Signal signal; - String hoseCode, atomTypeSpectrum; + String hoseCode; + final String atomTypeSpectrum; String solvent; - Map atomIndexMap; // from explicit H to heavy atom + final Map atomIndexMap; // from explicit H to heavy atom ConnectionTree connectionTree; int maxSphereTemp; List signalIndices; - for (final DataSet dataSet : dataSetList) { - structure = dataSet.getStructure() - .toAtomContainer(); - if (Utils.containsExplicitHydrogens(structure)) { - System.out.println("!!!Dataset skipped must not contain (previously set) explicit hydrogens!!!"); - continue; - } - // create atom index map to know which indices the explicit hydrogens will have - atomIndexMap = new HashMap<>(); - if (withExplicitH) { - try { - int nextAtomIndexExplicitH = structure.getAtomCount(); - for (int i = 0; i - < structure.getAtomCount(); i++) { - if (structure.getAtom(i) - .getImplicitHydrogenCount() - != null) { - for (int j = 0; j - < structure.getAtom(i) - .getImplicitHydrogenCount(); j++) { - atomIndexMap.put(nextAtomIndexExplicitH, i); - nextAtomIndexExplicitH++; - } + structure = dataSet.getStructure() + .toAtomContainer(); + if (Utils.containsExplicitHydrogens(structure)) { + System.out.println("!!!Dataset skipped must not contain (previously set) explicit hydrogens!!!"); + return false; + } + // create atom index map to know which indices the explicit hydrogens will have + atomIndexMap = new HashMap<>(); + if (withExplicitH) { + try { + int nextAtomIndexExplicitH = structure.getAtomCount(); + for (int i = 0; i + < structure.getAtomCount(); i++) { + if (structure.getAtom(i) + .getImplicitHydrogenCount() + != null) { + for (int j = 0; j + < structure.getAtom(i) + .getImplicitHydrogenCount(); j++) { + atomIndexMap.put(nextAtomIndexExplicitH, i); + nextAtomIndexExplicitH++; } } - - Utils.convertImplicitToExplicitHydrogens(structure); - Utils.setAromaticityAndKekulize(structure); - } catch (final CDKException e) { - e.printStackTrace(); - continue; } + + Utils.convertImplicitToExplicitHydrogens(structure); + Utils.setAromaticityAndKekulize(structure); + } catch (final CDKException e) { + e.printStackTrace(); + return false; } - solvent = dataSet.getSpectrum() - .getMeta() - .get("solvent"); - if (solvent - == null - || solvent.equals("")) { - solvent = "Unknown"; - } - atomTypeSpectrum = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() - .getNuclei()[0]); - for (int i = 0; i - < structure.getAtomCount(); i++) { - signalIndices = null; - if (structure.getAtom(i) - .getSymbol() - .equals(atomTypeSpectrum)) { - if (atomTypeSpectrum.equals("H")) { - // could be multiple signals - signalIndices = dataSet.getAssignment() - .getIndices(0, atomIndexMap.get(i)); - } else { - // should be one only - signalIndices = dataSet.getAssignment() - .getIndices(0, i); - } + } + solvent = dataSet.getSpectrum() + .getMeta() + == null + ? null + : dataSet.getSpectrum() + .getMeta() + .get("solvent"); + if (solvent + == null + || solvent.equals("")) { + solvent = "Unknown"; + } + atomTypeSpectrum = Utils.getAtomTypeFromNucleus(dataSet.getSpectrum() + .getNuclei()[0]); + for (int i = 0; i + < structure.getAtomCount(); i++) { + signalIndices = null; + if (structure.getAtom(i) + .getSymbol() + .equals(atomTypeSpectrum)) { + if (atomTypeSpectrum.equals("H")) { + // could be multiple signals + signalIndices = dataSet.getAssignment() + .getIndices(0, atomIndexMap.get(i)); + } else { + // should be one only + signalIndices = dataSet.getAssignment() + .getIndices(0, i); } - if (signalIndices - != null) { - for (final Integer signalIndex : signalIndices) { - signal = dataSet.getSpectrum() - .getSignal(signalIndex); - try { - if (maxSphere - == null) { - connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); - maxSphereTemp = connectionTree.getMaxSphere(true); - } else { - maxSphereTemp = maxSphere; - } - for (int sphere = 1; sphere - <= maxSphereTemp; sphere++) { - hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); - hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); - hoseCodeShifts.get(hoseCode) - .putIfAbsent(solvent, new ArrayList<>()); - hoseCodeShifts.get(hoseCode) - .get(solvent) - .add(signal.getShift(0)); - } - } catch (final CDKException e) { - e.printStackTrace(); + } + if (signalIndices + != null) { + for (final Integer signalIndex : signalIndices) { + signal = dataSet.getSpectrum() + .getSignal(signalIndex); + try { + if (maxSphere + == null) { + connectionTree = HOSECodeBuilder.buildConnectionTree(structure, i, null); + maxSphereTemp = connectionTree.getMaxSphere(true); + } else { + maxSphereTemp = maxSphere; } + for (int sphere = 1; sphere + <= maxSphereTemp; sphere++) { + hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + hoseCodeShifts.putIfAbsent(hoseCode, new ConcurrentHashMap<>()); + hoseCodeShifts.get(hoseCode) + .putIfAbsent(solvent, new ConcurrentLinkedQueue<>()); + hoseCodeShifts.get(hoseCode) + .get(solvent) + .add(signal.getShift(0)); + } + } catch (final CDKException e) { + e.printStackTrace(); } } } } - return hoseCodeShifts; + return true; } public static Map> buildHOSECodeShiftStatistics( - final Map>> hoseCodeShifts) { + final Map>> hoseCodeShifts) { final Map> hoseCodeShiftStatistics = new HashMap<>(); List values; - for (final Map.Entry>> hoseCodes : hoseCodeShifts.entrySet()) { + for (final Map.Entry>> hoseCodes : hoseCodeShifts.entrySet()) { hoseCodeShiftStatistics.put(hoseCodes.getKey(), new HashMap<>()); - for (final Map.Entry> solvents : hoseCodes.getValue() - .entrySet()) { - values = solvents.getValue(); + for (final Map.Entry> solvents : hoseCodes.getValue() + .entrySet()) { + values = new ArrayList<>(solvents.getValue()); Statistics.removeOutliers(values, 1.5); hoseCodeShiftStatistics.get(hoseCodes.getKey()) .put(solvents.getKey(), @@ -173,7 +185,7 @@ public static Map> buildHOSECodeShiftStatistics(fi final Integer maxSphere, final boolean withExplicitH) { try { - final Map>> hoseCodeShifts = new HashMap<>(); + final Map>> hoseCodeShifts = new HashMap<>(); for (int i = 0; i < pathsToNMRShiftDBs.length; i++) { HOSECodeShiftStatistics.collectHOSECodeShifts( From ac7e74ad7272c8aedec5a39a9957e7bc4988965f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 Aug 2021 23:12:34 +0200 Subject: [PATCH 299/405] fix: pre-set carbon hybridization states when having at least two protons --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 091be2e..ae3961e 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -149,6 +149,18 @@ private static String buildMULT(final Correlation correlation, final int index, if (detectedHybridizations.containsKey(index)) { hybridizations = detectedHybridizations.get(index); } + if (hybridizations.isEmpty() + && correlation.getAtomType() + .equals("C") + && !correlation.getProtonsCount() + .isEmpty() + && correlation.getProtonsCount() + .get(0) + >= 2) { + // a carbon with at least two protons can only be SP2 or SP3 + hybridizations.add(2); + hybridizations.add(3); + } } if (hybridizations.isEmpty()) { hybridizationStringBuilder = new StringBuilder( From fbd8f518494e9a5ef3e466201e4eca6b1be38e47 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 25 Aug 2021 13:48:49 +0200 Subject: [PATCH 300/405] chore: use common parser method for COCONUT and NMRShiftDB --- src/casekit/nmr/dbservice/NMRShiftDB.java | 53 +++++------------------ src/casekit/nmr/utils/Utils.java | 33 +++++++++++++- 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index db717df..5c551b3 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -19,15 +19,15 @@ import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.iterator.IteratingSDFReader; import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import java.io.FileNotFoundException; import java.io.FileReader; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; public class NMRShiftDB { @@ -99,53 +99,19 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif IAtomContainer structure; Spectrum spectrum; Assignment assignment; - Map meta; - final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); - + DataSet dataSet; List spectraProperties1D; String[] split; String spectrumIndexInRecord; - IMolecularFormula mf; List explicitHydrogenIndices; int[] temp; - StringBuilder mfAlphabetic; - Map mfAlphabeticMap; while (iterator.hasNext()) { structure = iterator.next(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); explicitHydrogenIndices = casekit.nmr.utils.Utils.getExplicitHydrogenIndices(structure); Collections.sort(explicitHydrogenIndices); - if (!explicitHydrogenIndices.isEmpty()) { - // remove explicit hydrogens - Utils.removeAtoms(structure, "H"); - } - hydrogenAdder.addImplicitHydrogens(structure); - casekit.nmr.utils.Utils.setAromaticityAndKekulize(structure); - - meta = new HashMap<>(); - // meta.put("title", structure.getTitle()); - meta.put("id", structure.getProperty("nmrshiftdb2 ID")); - mf = casekit.nmr.utils.Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mfOriginal", casekit.nmr.utils.Utils.molecularFormularToString(mf)); - mfAlphabetic = new StringBuilder(); - mfAlphabeticMap = new TreeMap<>(casekit.nmr.utils.Utils.getMolecularFormulaElementCounts( - casekit.nmr.utils.Utils.molecularFormularToString(mf))); - for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { - mfAlphabetic.append(entry.getKey()); - if (entry.getValue() - > 1) { - mfAlphabetic.append(entry.getValue()); - } - } - meta.put("mf", mfAlphabetic.toString()); - try { - final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); - meta.put("smiles", smiles); - } catch (final CDKException e) { - e.printStackTrace(); - } - + dataSet = Utils.atomContainerToDataSet(structure); for (final String nucleus : nuclei) { spectraProperties1D = getSpectraProperties1D(structure, nucleus); @@ -160,7 +126,10 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule if ((spectrum == null) - || casekit.nmr.utils.Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, mf, + || casekit.nmr.utils.Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, + Utils.getMolecularFormulaFromString( + dataSet.getMeta() + .get("mf")), 0) != 0) { continue; @@ -214,8 +183,10 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif } } } + dataSet.setSpectrum(spectrum); + dataSet.setAssignment(assignment); - dataSets.add(new DataSet(structure, spectrum, assignment, meta)); + dataSets.add(dataSet); } } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 3371687..100add6 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -556,11 +556,42 @@ public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex public static DataSet atomContainerToDataSet(final IAtomContainer structure) throws CDKException { final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + if (Utils.containsExplicitHydrogens(structure)) { + // remove explicit hydrogens + Utils.removeAtoms(structure, "H"); + } hydrogenAdder.addImplicitHydrogens(structure); setAromaticityAndKekulize(structure); final Map meta = new HashMap<>(); // meta.put("title", structure.getTitle()); - meta.put("mf", molecularFormularToString(getMolecularFormulaFromAtomContainer(structure))); + final String source = structure.getProperty("nmrshiftdb2 ID", String.class) + != null + ? "nmrshiftdb" + : structure.getProperty("SMILES_ID", String.class) + != null + ? "coconut" + : null; + if (source + != null) { + meta.put("source", source); + meta.put("id", source.equals("nmrshiftdb") + ? structure.getProperty("nmrshiftdb2 ID", String.class) + : structure.getProperty("SMILES_ID", String.class) + .split("\\.")[0]); + } + final IMolecularFormula mf = casekit.nmr.utils.Utils.getMolecularFormulaFromAtomContainer(structure); + meta.put("mfOriginal", casekit.nmr.utils.Utils.molecularFormularToString(mf)); + final StringBuilder mfAlphabetic = new StringBuilder(); + final Map mfAlphabeticMap = new TreeMap<>( + Utils.getMolecularFormulaElementCounts(Utils.molecularFormularToString(mf))); + for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { + mfAlphabetic.append(entry.getKey()); + if (entry.getValue() + > 1) { + mfAlphabetic.append(entry.getValue()); + } + } + meta.put("mf", mfAlphabetic.toString()); try { final String smiles = getSmilesFromAtomContainer(structure); meta.put("smiles", smiles); From 5e5aa8b1448feadc4679b61c5dd471721294e451 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 26 Aug 2021 15:29:24 +0200 Subject: [PATCH 301/405] chore: usage of more compact classes in DataSet --- .../nmr/analysis/ConnectivityStatistics.java | 13 +-- .../nmr/analysis/HOSECodeShiftStatistics.java | 6 +- src/casekit/nmr/dbservice/COCONUT.java | 9 +- src/casekit/nmr/dbservice/NMRShiftDB.java | 12 +-- .../nmr/fragments/FragmentUtilities.java | 9 +- .../fragmentation/Fragmentation.java | 13 +-- src/casekit/nmr/model/DataSet.java | 17 +++- src/casekit/nmr/model/SignalCompact.java | 89 +++++++++++++++++++ src/casekit/nmr/model/SpectrumCompact.java | 80 +++++++++++++++++ ...jacencyList.java => StructureCompact.java} | 49 +++++----- src/casekit/nmr/prediction/Prediction.java | 19 ++-- src/casekit/nmr/utils/Utils.java | 4 +- 12 files changed, 248 insertions(+), 72 deletions(-) create mode 100644 src/casekit/nmr/model/SignalCompact.java create mode 100644 src/casekit/nmr/model/SpectrumCompact.java rename src/casekit/nmr/model/{ExtendedAdjacencyList.java => StructureCompact.java} (83%) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index f36c236..d8192eb 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -1,6 +1,7 @@ package casekit.nmr.analysis; import casekit.nmr.model.DataSet; +import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Utils; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; @@ -21,6 +22,7 @@ public class ConnectivityStatistics { public static void buildConnectivityStatistics(final List dataSetList, final String nucleus, final Map>>>>> connectivityStatistics) { IAtomContainer structure; + Spectrum spectrum; IAtom atom; final String atomType = Utils.getAtomTypeFromNucleus(nucleus); String multiplicity; @@ -35,13 +37,12 @@ public static void buildConnectivityStatistics(final List dataSetList, } structure = dataSet.getStructure() .toAtomContainer(); + spectrum = dataSet.getSpectrum() + .toSpectrum(); for (int signalIndex = 0; signalIndex - < dataSet.getSpectrum() - .getSignals() - .size(); signalIndex++) { - shift = dataSet.getSpectrum() - .getShift(signalIndex, 0) - .intValue(); + < spectrum.getSignalCount(); signalIndex++) { + shift = spectrum.getShift(signalIndex, 0) + .intValue(); for (int equivalenceIndex = 0; equivalenceIndex < dataSet.getAssignment() .getAssignment(0, signalIndex).length; equivalenceIndex++) { diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index f2d96b7..bd318d5 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -6,6 +6,7 @@ import casekit.nmr.hose.HOSECodeBuilder; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; import com.google.gson.Gson; @@ -64,6 +65,8 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin List signalIndices; structure = dataSet.getStructure() .toAtomContainer(); + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); if (Utils.containsExplicitHydrogens(structure)) { System.out.println("!!!Dataset skipped must not contain (previously set) explicit hydrogens!!!"); return false; @@ -127,8 +130,7 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin if (signalIndices != null) { for (final Integer signalIndex : signalIndices) { - signal = dataSet.getSpectrum() - .getSignal(signalIndex); + signal = spectrum.getSignal(signalIndex); try { if (maxSphere == null) { diff --git a/src/casekit/nmr/dbservice/COCONUT.java b/src/casekit/nmr/dbservice/COCONUT.java index e50ec60..64b4c24 100644 --- a/src/casekit/nmr/dbservice/COCONUT.java +++ b/src/casekit/nmr/dbservice/COCONUT.java @@ -1,9 +1,6 @@ package casekit.nmr.dbservice; -import casekit.nmr.model.Assignment; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Signal; -import casekit.nmr.model.Spectrum; +import casekit.nmr.model.*; import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; @@ -125,10 +122,10 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri != 0) { continue; } - dataSet.setSpectrum(spectrum); + dataSet.setSpectrum(new SpectrumCompact(spectrum)); dataSet.setAssignment(assignment); - dataSetList.add(dataSet); + dataSetList.add(dataSet.buildClone()); } } diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 5c551b3..6c95474 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -12,10 +12,7 @@ package casekit.nmr.dbservice; -import casekit.nmr.model.Assignment; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Signal; -import casekit.nmr.model.Spectrum; +import casekit.nmr.model.*; import casekit.nmr.utils.Utils; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; @@ -116,7 +113,6 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif for (final String nucleus : nuclei) { spectraProperties1D = getSpectraProperties1D(structure, nucleus); for (final String spectrumProperty1D : spectraProperties1D) { - split = spectrumProperty1D.split("\\s"); spectrumIndexInRecord = split[split.length - 1]; @@ -183,14 +179,12 @@ public static List getDataSetsFromNMRShiftDB(final String pathToNMRShif } } } - dataSet.setSpectrum(spectrum); + dataSet.setSpectrum(new SpectrumCompact(spectrum)); dataSet.setAssignment(assignment); - dataSets.add(dataSet); + dataSets.add(dataSet.buildClone()); } } - - } return dataSets; diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index 12ab169..9d32336 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -87,7 +87,8 @@ public static Map> getGoodlistAndBadlist(final List nonMatches = new ArrayList<>(); Assignment matchAssignment; for (final DataSet dataSet : dataSetList) { - matchAssignment = Similarity.matchSpectra(dataSet.getSpectrum(), querySpectrum, 0, 0, shiftTol, + matchAssignment = Similarity.matchSpectra(dataSet.getSpectrum() + .toSpectrum(), querySpectrum, 0, 0, shiftTol, checkMultiplicity, true, true); if (isMatch(dataSet, querySpectrum, mf, matchAssignment, maxAverageDeviation, queryHybridizationList)) { matches.add(dataSet); @@ -114,8 +115,10 @@ private static boolean isMatch(final DataSet dataSet, final Spectrum querySpectr if (!isStructuralMatch(dataSet, mf)) { return false; } + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); // check average deviation - final Double averageDeviation = Similarity.calculateAverageDeviation(dataSet.getSpectrum(), querySpectrum, 0, 0, + final Double averageDeviation = Similarity.calculateAverageDeviation(spectrum, querySpectrum, 0, 0, matchAssignment); if (averageDeviation == null @@ -128,7 +131,7 @@ private static boolean isMatch(final DataSet dataSet, final Spectrum querySpectr return false; } dataSet.addMetaInfo("matchAssignment", gson.toJson(matchAssignment, Assignment.class)); - final Double rmsd = Similarity.calculateRMSD(dataSet.getSpectrum(), querySpectrum, 0, 0, matchAssignment); + final Double rmsd = Similarity.calculateRMSD(spectrum, querySpectrum, 0, 0, matchAssignment); dataSet.addMetaInfo("averageDeviation", Double.toString(averageDeviation)); dataSet.addMetaInfo("rmsd", Double.toString(rmsd)); diff --git a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java index aa46fe7..6dc6c2d 100644 --- a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java @@ -43,7 +43,9 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, final List fragmentDataSetList = new ArrayList<>(); final IAtomContainer structure = dataSet.getStructure() .toAtomContainer(); - final String spectrumAtomType = Utils.getAtomTypeFromSpectrum(dataSet.getSpectrum(), 0); + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); + final String spectrumAtomType = Utils.getAtomTypeFromSpectrum(spectrum, 0); List substructureAtomIndices, signalIndices; IAtomContainer substructure; Spectrum subspectrum; @@ -84,9 +86,8 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, } for (final int index : signalIndices) { - signal = dataSet.getSpectrum() - .getSignal(index) - .buildClone(); + signal = spectrum.getSignal(index) + .buildClone(); final int atomIndex = j; final List closestSignalIndexList = subspectrum.checkForEquivalences(signal, new double[]{0.0}, @@ -119,8 +120,8 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, substructure = FragmentationUtilities.toAtomContainer(fragmentTree); subDataSet = new DataSet(); - subDataSet.setStructure(new ExtendedAdjacencyList(substructure)); - subDataSet.setSpectrum(subspectrum); + subDataSet.setStructure(new StructureCompact(substructure)); + subDataSet.setSpectrum(new SpectrumCompact(subspectrum)); subDataSet.setAssignment(subassignment); meta = new HashMap<>(); diff --git a/src/casekit/nmr/model/DataSet.java b/src/casekit/nmr/model/DataSet.java index 22f0603..a707bd8 100644 --- a/src/casekit/nmr/model/DataSet.java +++ b/src/casekit/nmr/model/DataSet.java @@ -15,15 +15,15 @@ @Setter public class DataSet { - private ExtendedAdjacencyList structure; - private Spectrum spectrum; + private StructureCompact structure; + private SpectrumCompact spectrum; private Assignment assignment; private Map meta; public DataSet(final IAtomContainer structure, final Spectrum spectrum, final Assignment assignment, final Map meta) { - this.structure = new ExtendedAdjacencyList(structure); - this.spectrum = spectrum; + this.structure = new StructureCompact(structure); + this.spectrum = new SpectrumCompact(spectrum); this.assignment = assignment; this.meta = new HashMap<>(meta); } @@ -40,6 +40,15 @@ public void removeMetaInfo(final String key) { this.meta.remove(key); } + public DataSet buildClone() { + final Map metaTemp = this.meta + == null + ? new HashMap<>() + : new HashMap<>(this.meta); + return new DataSet(this.structure.buildClone(), this.spectrum.buildClone(), this.assignment.buildClone(), + new HashMap<>(metaTemp)); + } + @Override public String toString() { return "DataSet{" diff --git a/src/casekit/nmr/model/SignalCompact.java b/src/casekit/nmr/model/SignalCompact.java new file mode 100644 index 0000000..17e0611 --- /dev/null +++ b/src/casekit/nmr/model/SignalCompact.java @@ -0,0 +1,89 @@ +package casekit.nmr.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +import java.util.Arrays; + +/** + * @author Michael Wenk [https://github.com/michaelwenk] + */ +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +public class SignalCompact { + + private String[] strings; // nucleus dim 1, nucleus dim 2, ... , multiplicity, signal kind + private Double[] doubles; // shift dim 1, shift dim 2, ... , intensity + private Integer[] integers; // dimensions, equivalence count, phase + + public SignalCompact(final Signal signal) { + this.strings = new String[signal.getNDim() + + 2]; + this.doubles = new Double[signal.getNDim() + + 1]; + for (int dim = 0; dim + < signal.getNDim(); dim++) { + this.strings[dim] = signal.getNuclei()[dim]; + this.doubles[dim] = signal.getShift(dim); + } + this.strings[signal.getNDim()] = signal.getMultiplicity(); + this.strings[signal.getNDim() + + 1] = signal.getKind(); + this.doubles[signal.getNDim()] = signal.getIntensity(); + this.integers = new Integer[]{signal.getNDim(), signal.getEquivalencesCount(), signal.getPhase()}; + } + + public int dimensions() { + return this.integers[0]; + } + + public String[] nuclei() { + final int nDim = this.integers[0]; + final String[] nuclei = new String[nDim]; + for (int dim = 0; dim + < nDim; dim++) { + nuclei[dim] = this.strings[dim]; + } + return nuclei; + } + + public Signal toSignal() { + final Signal signal = new Signal(); + signal.setNuclei(this.nuclei()); + signal.setMultiplicity(this.strings[this.dimensions()]); + signal.setKind(this.strings[this.dimensions() + + 1]); + signal.setShifts(new Double[this.dimensions()]); + for (int dim = 0; dim + < this.dimensions(); dim++) { + signal.setShift(this.doubles[dim], dim); + } + signal.setIntensity(this.doubles[this.dimensions()]); + signal.setEquivalencesCount(this.integers[1]); + signal.setPhase(this.integers[2]); + + return signal; + } + + + public SignalCompact buildClone() { + return new SignalCompact(this.strings.clone(), this.doubles.clone(), this.integers.clone()); + } + + @Override + public String toString() { + return "SignalCompact{" + + "strings=" + + Arrays.toString(this.strings) + + ", doubles=" + + Arrays.toString(this.doubles) + + ", integers=" + + Arrays.toString(this.integers) + + '}'; + } +} + diff --git a/src/casekit/nmr/model/SpectrumCompact.java b/src/casekit/nmr/model/SpectrumCompact.java new file mode 100644 index 0000000..3f7b31a --- /dev/null +++ b/src/casekit/nmr/model/SpectrumCompact.java @@ -0,0 +1,80 @@ +package casekit.nmr.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * @author Michael Wenk [https://github.com/michaelwenk] + */ +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +public class SpectrumCompact { + + private String[] nuclei; + private Map meta; + private SignalCompact[] signals; + + public SpectrumCompact(final Spectrum spectrum) { + this.nuclei = spectrum.getNuclei(); + this.meta = spectrum.getMeta(); + this.signals = spectrum.getSignals() + .stream() + .map(SignalCompact::new) + .toArray(SignalCompact[]::new); + } + + public void addMetaInfo(final String key, final String value) { + if (this.meta + == null) { + this.meta = new HashMap<>(); + } + this.meta.put(key, value); + } + + public void removeMetaInfo(final String key) { + this.meta.remove(key); + } + + public Spectrum toSpectrum() { + final Spectrum spectrum = new Spectrum(); + spectrum.setNuclei(this.nuclei); + spectrum.setMeta(this.meta); + spectrum.setSignals(Arrays.stream(this.signals) + .map(SignalCompact::toSignal) + .collect(Collectors.toList())); + spectrum.setSignalCount(this.signals.length); + + return spectrum; + } + + public SpectrumCompact buildClone() { + final Map metaTemp = this.meta + == null + ? new HashMap<>() + : new HashMap<>(this.meta); + return new SpectrumCompact(this.nuclei.clone(), metaTemp, Arrays.stream(this.signals) + .map(SignalCompact::buildClone) + .toArray(SignalCompact[]::new)); + } + + @Override + public String toString() { + return "SpectrumCompact{" + + "nuclei=" + + Arrays.toString(this.nuclei) + + ", meta=" + + this.meta + + ", signals=" + + Arrays.toString(this.signals) + + '}'; + } +} diff --git a/src/casekit/nmr/model/ExtendedAdjacencyList.java b/src/casekit/nmr/model/StructureCompact.java similarity index 83% rename from src/casekit/nmr/model/ExtendedAdjacencyList.java rename to src/casekit/nmr/model/StructureCompact.java index 5e7f115..18421d2 100644 --- a/src/casekit/nmr/model/ExtendedAdjacencyList.java +++ b/src/casekit/nmr/model/StructureCompact.java @@ -37,12 +37,12 @@ @AllArgsConstructor @Getter @Setter -public class ExtendedAdjacencyList { +public class StructureCompact { private int[][][] bondProperties; // connected atom index, bond order, bond is in ring, bond is aromatic private Integer[][] atomProperties; // element symbol, hybridization, implicitHydrogenCount, valency, formalCharge, isInRingAtom, isAromaticAtom - public ExtendedAdjacencyList(final IAtomContainer ac) { + public StructureCompact(final IAtomContainer ac) { final double[][] connectionMatrix = ConnectionMatrix.getMatrix(ac); this.bondProperties = new int[connectionMatrix.length][][]; List connectedAtomsList; @@ -51,8 +51,9 @@ public ExtendedAdjacencyList(final IAtomContainer ac) { for (int i = 0; i < connectionMatrix.length; i++) { connectedAtomsList = new ArrayList<>(); - for (int j = 0; j - < connectionMatrix[i].length; j++) { + for (int j = i + + 1; j + < connectionMatrix[i].length; j++) { if (connectionMatrix[i][j] >= 1) { bond = ac.getBond(ac.getAtom(i), ac.getAtom(j)); @@ -70,11 +71,11 @@ public ExtendedAdjacencyList(final IAtomContainer ac) { } this.bondProperties[i] = temp; } - this.atomProperties = new Integer[this.bondProperties.length][]; + this.atomProperties = new Integer[connectionMatrix.length][]; IAtom atom; for (int i = 0; i - < this.bondProperties.length; i++) { + < connectionMatrix.length; i++) { atom = ac.getAtom(i); this.atomProperties[i] = new Integer[7]; this.atomProperties[i][0] = atom.getSymbol() @@ -99,17 +100,16 @@ public ExtendedAdjacencyList(final IAtomContainer ac) { } public int atomCount() { - return this.bondProperties.length; + return this.atomProperties.length; } public int bondCount() { - int bondCounter = 0; + int bondCount = 0; for (int i = 0; i < this.bondProperties.length; i++) { - bondCounter += this.bondProperties[i].length; + bondCount += this.bondProperties[i].length; } - return bondCounter - / 2; + return bondCount; } public IAtomContainer toAtomContainer() { @@ -117,7 +117,7 @@ public IAtomContainer toAtomContainer() { .newAtomContainer(); IAtom atom; for (int i = 0; i - < this.bondProperties.length; i++) { + < this.atomProperties.length; i++) { atom = this.atomProperties[i][0] == -1 ? new PseudoAtom("R") @@ -141,29 +141,28 @@ public IAtomContainer toAtomContainer() { < this.bondProperties.length; i++) { for (int k = 0; k < this.bondProperties[i].length; k++) { - if (ac.getBond(ac.getAtom(i), ac.getAtom(this.bondProperties[i][k][0])) - == null) { - bond = new Bond(ac.getAtom(i), ac.getAtom(this.bondProperties[i][k][0]), - Utils.getBondOrder(this.bondProperties[i][k][1])); - bond.setIsInRing(this.bondProperties[i][k][2] - == 1); - bond.setIsAromatic(this.bondProperties[i][k][3] - == 1); - ac.addBond(bond); - } + bond = new Bond(ac.getAtom(i), ac.getAtom(this.bondProperties[i][k][0]), + Utils.getBondOrder(this.bondProperties[i][k][1])); + bond.setIsInRing(this.bondProperties[i][k][2] + == 1); + bond.setIsAromatic(this.bondProperties[i][k][3] + == 1); + ac.addBond(bond); } } return ac; } - public ExtendedAdjacencyList buildClone() { - return new ExtendedAdjacencyList(this.toAtomContainer()); + public StructureCompact buildClone() { + return new StructureCompact(Arrays.copyOf(this.bondProperties, this.bondProperties.length), + Arrays.copyOf(this.atomProperties, this.atomProperties.length)); + } @Override public String toString() { - return "ExtendedAdjacencyList{" + return "StructureCompact{" + "bondProperties=" + Arrays.deepToString(this.bondProperties) + ", atomProperties=" diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index f8aecc7..094e454 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -153,7 +153,9 @@ public static DataSet predict2D(final Map> hoseCod final int minPathLength, final int maxPathLength) { final DataSet predictionDim1 = predict1D(hoseCodeShiftStatistics, structure, nuclei[0], solvent); final DataSet predictionDim2 = predict1D(hoseCodeShiftStatistics, structure, nuclei[1], solvent); - return Prediction.predict2D(structure, predictionDim1.getSpectrum(), predictionDim2.getSpectrum(), + return Prediction.predict2D(structure, predictionDim1.getSpectrum() + .toSpectrum(), predictionDim2.getSpectrum() + .toSpectrum(), predictionDim1.getAssignment(), predictionDim2.getAssignment(), minPathLength, maxPathLength); } @@ -260,13 +262,14 @@ public static DataSet predictHSQCEdited(final IAtomContainer structure, final Sp final Spectrum spectrumDim2, final Assignment assignmentDim1, final Assignment assignmentDim2) { final DataSet dataSet = predictHSQC(structure, spectrumDim1, spectrumDim2, assignmentDim1, assignmentDim2); + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); final String atomTypeDim2 = Utils.getAtomTypeFromSpectrum(spectrumDim2, 0); IAtom atom; Integer explicitHydrogensCount; for (int i = 0; i - < dataSet.getSpectrum() - .getSignalCount(); i++) { + < spectrum.getSignalCount(); i++) { atom = structure.getAtom(dataSet.getAssignment() .getAssignment(1, i, 0)); if (!atom.getSymbol() @@ -276,16 +279,14 @@ public static DataSet predictHSQCEdited(final IAtomContainer structure, final Sp explicitHydrogensCount = AtomContainerManipulator.countExplicitHydrogens(structure, atom); if (explicitHydrogensCount == 2) { - dataSet.getSpectrum() - .getSignal(i) - .setPhase(-1); + spectrum.getSignal(i) + .setPhase(-1); } else if (explicitHydrogensCount == 1 || explicitHydrogensCount == 3) { - dataSet.getSpectrum() - .getSignal(i) - .setPhase(1); + spectrum.getSignal(i) + .setPhase(1); } } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 100add6..f673438 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,8 +1,8 @@ package casekit.nmr.utils; import casekit.nmr.model.DataSet; -import casekit.nmr.model.ExtendedAdjacencyList; import casekit.nmr.model.Spectrum; +import casekit.nmr.model.StructureCompact; import casekit.nmr.model.nmrdisplayer.Correlation; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; @@ -599,7 +599,7 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure) thr e.printStackTrace(); } final DataSet dataSet = new DataSet(); - dataSet.setStructure(new ExtendedAdjacencyList(structure)); + dataSet.setStructure(new StructureCompact(structure)); dataSet.setMeta(meta); return dataSet; From 145c1c700403140003442e671668f7586d167921 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 27 Aug 2021 17:37:04 +0200 Subject: [PATCH 302/405] chore: extracted buildConnectivityStatistics method --- .../nmr/analysis/ConnectivityStatistics.java | 158 +++++++++--------- 1 file changed, 82 insertions(+), 76 deletions(-) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index d8192eb..826d518 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; public class ConnectivityStatistics { @@ -21,92 +22,97 @@ public class ConnectivityStatistics { */ public static void buildConnectivityStatistics(final List dataSetList, final String nucleus, final Map>>>>> connectivityStatistics) { - IAtomContainer structure; - Spectrum spectrum; - IAtom atom; final String atomType = Utils.getAtomTypeFromNucleus(nucleus); - String multiplicity; - String hybridization; - String connectedAtomType; - String connectedAtomHybridization; - int shift, atomIndex; for (final DataSet dataSet : dataSetList) { if (!dataSet.getSpectrum() .getNuclei()[0].equals(nucleus)) { continue; } - structure = dataSet.getStructure() - .toAtomContainer(); - spectrum = dataSet.getSpectrum() - .toSpectrum(); - for (int signalIndex = 0; signalIndex - < spectrum.getSignalCount(); signalIndex++) { - shift = spectrum.getShift(signalIndex, 0) - .intValue(); - for (int equivalenceIndex = 0; equivalenceIndex - < dataSet.getAssignment() - .getAssignment(0, signalIndex).length; equivalenceIndex++) { - atomIndex = dataSet.getAssignment() - .getAssignment(0, signalIndex, equivalenceIndex); - atom = structure.getAtom(atomIndex); - if (atom.getSymbol() - .equals(atomType)) { - multiplicity = Utils.getMultiplicityFromProtonsCount(atom.getImplicitHydrogenCount()); - if (multiplicity + buildConnectivityStatistics(dataSet, atomType, connectivityStatistics); + } + } + + /** + * @param dataSet + * @param atomType + * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence + */ + public static void buildConnectivityStatistics(final DataSet dataSet, final String atomType, + final Map>>>>> connectivityStatistics) { + final IAtomContainer structure = dataSet.getStructure() + .toAtomContainer(); + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); + int shift, atomIndex; + IAtom atom; + String multiplicity, hybridization, connectedAtomType, connectedAtomHybridization; + for (int signalIndex = 0; signalIndex + < spectrum.getSignalCount(); signalIndex++) { + shift = spectrum.getShift(signalIndex, 0) + .intValue(); + for (int equivalenceIndex = 0; equivalenceIndex + < dataSet.getAssignment() + .getAssignment(0, signalIndex).length; equivalenceIndex++) { + atomIndex = dataSet.getAssignment() + .getAssignment(0, signalIndex, equivalenceIndex); + atom = structure.getAtom(atomIndex); + if (atom.getSymbol() + .equals(atomType)) { + multiplicity = Utils.getMultiplicityFromProtonsCount(atom.getImplicitHydrogenCount()); + if (multiplicity + == null) { + continue; + } + multiplicity = multiplicity.toLowerCase(); + hybridization = atom.getHybridization() + .name(); + connectivityStatistics.putIfAbsent(multiplicity, new ConcurrentHashMap<>()); + connectivityStatistics.get(multiplicity) + .putIfAbsent(hybridization, new ConcurrentHashMap<>()); + // check for connected hetero atoms + for (final IAtom connectedAtom : structure.getConnectedAtomsList(atom)) { + if (connectedAtom.getSymbol() + .equals("H")) { + continue; + } + connectedAtomType = connectedAtom.getSymbol(); + if (connectedAtom.getHybridization() == null) { continue; } - multiplicity = multiplicity.toLowerCase(); - hybridization = atom.getHybridization() - .name(); - connectivityStatistics.putIfAbsent(multiplicity, new HashMap<>()); + connectedAtomHybridization = connectedAtom.getHybridization() + .name(); connectivityStatistics.get(multiplicity) - .putIfAbsent(hybridization, new HashMap<>()); - // check for connected hetero atoms - for (final IAtom connectedAtom : structure.getConnectedAtomsList(atom)) { - if (connectedAtom.getSymbol() - .equals("H")) { - continue; - } - connectedAtomType = connectedAtom.getSymbol(); - if (connectedAtom.getHybridization() - == null) { - continue; - } - connectedAtomHybridization = connectedAtom.getHybridization() - .name(); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .putIfAbsent(shift, new HashMap<>()); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .putIfAbsent(connectedAtomType, new HashMap<>()); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .putIfAbsent(connectedAtomHybridization, new HashMap<>()); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .get(connectedAtomHybridization) - .putIfAbsent(connectedAtom.getImplicitHydrogenCount(), 0); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .get(connectedAtomHybridization) - .put(connectedAtom.getImplicitHydrogenCount(), - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .get(connectedAtomHybridization) - .get(connectedAtom.getImplicitHydrogenCount()) - + 1); - } + .get(hybridization) + .putIfAbsent(shift, new ConcurrentHashMap<>()); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .putIfAbsent(connectedAtomType, new ConcurrentHashMap<>()); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .putIfAbsent(connectedAtomHybridization, new ConcurrentHashMap<>()); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .get(connectedAtomHybridization) + .putIfAbsent(connectedAtom.getImplicitHydrogenCount(), 0); + connectivityStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .get(connectedAtomHybridization) + .put(connectedAtom.getImplicitHydrogenCount(), connectivityStatistics.get( + multiplicity) + .get(hybridization) + .get(shift) + .get(connectedAtomType) + .get(connectedAtomHybridization) + .get(connectedAtom.getImplicitHydrogenCount()) + + 1); } } } From 7dea28c56b022251c69eb92aecddbfcfa853f440 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 28 Aug 2021 12:10:02 +0200 Subject: [PATCH 303/405] feat: methods for extraction of neighbor atom types and hybridizations --- .../nmr/analysis/ConnectivityStatistics.java | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index 826d518..9e9ef04 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; public class ConnectivityStatistics { @@ -152,4 +153,73 @@ public static Map>> extractConnectivit return extractedConnectivities; } + + public static List extractNeighborAtomTypes( + final Map>> extractedConnectivities, final double threshold) { + final Map totalCounts = new HashMap<>(); + for (final String connectedAtomType : extractedConnectivities.keySet()) { + totalCounts.putIfAbsent(connectedAtomType, 0); + for (final String connectedAtomHybridization : extractedConnectivities.get(connectedAtomType) + .keySet()) { + for (final Map.Entry countsEntry : extractedConnectivities.get(connectedAtomType) + .get(connectedAtomHybridization) + .entrySet()) { + totalCounts.put(connectedAtomType, totalCounts.get(connectedAtomType) + + countsEntry.getValue()); + } + } + } + final int totalCountsSum = totalCounts.values() + .stream() + .reduce(0, (total, current) -> total += current); + return totalCounts.keySet() + .stream() + .filter(atomType -> (totalCounts.get(atomType) + / (double) totalCountsSum) + >= threshold) + .collect(Collectors.toList()); + } + + public static Map> extractNeighborHybridizations( + final Map>> extractedConnectivities, + final String neighborAtomType, final double threshold) { + if (!extractedConnectivities.containsKey(neighborAtomType)) { + return new HashMap<>(); + } + final Map countsPerHybridization = new HashMap<>(); + extractedConnectivities.get(neighborAtomType) + .keySet() + .forEach(hybridizationNeighbor -> { + countsPerHybridization.put(hybridizationNeighbor, extractedConnectivities.get( + neighborAtomType) + .get(hybridizationNeighbor) + .keySet() + .stream() + .reduce(0, + (protonsCountSum, protonsCount) -> protonsCountSum += extractedConnectivities.get( + neighborAtomType) + .get(hybridizationNeighbor) + .get(protonsCount))); + }); + final int totalCount = countsPerHybridization.keySet() + .stream() + .map(countsPerHybridization::get) + + .reduce(0, (sum, current) -> sum += current); + final List allowedHybridizationList = countsPerHybridization.keySet() + .stream() + .filter(hybridizationNeighbor -> countsPerHybridization.get( + hybridizationNeighbor) + / (double) totalCount + >= threshold) + .collect(Collectors.toList()); + + final Map> extractedNeighborHybridizationMap = new HashMap<>(); + for (final String allowedHybridization : allowedHybridizationList) { + extractedNeighborHybridizationMap.put(allowedHybridization, extractedConnectivities.get(neighborAtomType) + .get(allowedHybridization)); + } + + return extractedNeighborHybridizationMap; + } } From 22d2bd10063ed080506e9e8aa25a27725dacc256 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 30 Aug 2021 12:06:36 +0200 Subject: [PATCH 304/405] feat: use connectivity information to forbid bonds between an atom and a group of atoms --- .../nmr/lsd/PyLSDInputFileBuilder.java | 73 ++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index ae3961e..48fe529 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -446,7 +446,11 @@ private static String buildSHIH(final Correlation correlation, final int index, return stringBuilder.toString(); } - private static String buildLISTAndPROP(final boolean allowHeteroHeteroBonds) { + private static String buildLISTsAndPROPs(final List correlationList, + final Map indicesMap, + final Map elementCounts, + final Map>>> detectedConnectivities, + final boolean allowHeteroHeteroBonds) { final StringBuilder stringBuilder = new StringBuilder(); // LIST PROP for hetero hetero bonds allowance if (!allowHeteroHeteroBonds) { @@ -456,6 +460,69 @@ private static String buildLISTAndPROP(final boolean allowHeteroHeteroBonds) { stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); } + final Set atomTypesByMf = new HashSet<>(elementCounts.keySet()); + atomTypesByMf.remove("H"); + final Map listMap = new HashMap<>(); + listMap.put("HETE", "L1"); + for (final String atomType : atomTypesByMf) { + listMap.put(atomType, "L" + + (listMap.size() + + 1)); + stringBuilder.append("ELEM") + .append(" ") + .append(listMap.get(atomType)) + .append(" ") + .append(atomType) + .append("\n"); + } + + Correlation correlation; + String atomType; + int indexInPyLSD; + Map>> connectivities; + Set nonNeighborAtomTypes; + for (int i = 0; i + < correlationList.size(); i++) { + correlation = correlationList.get(i); + atomType = correlation.getAtomType(); + connectivities = detectedConnectivities.get(i); + if (atomType.equals("H") + || connectivities + == null + || connectivities.isEmpty()) { + continue; + } + nonNeighborAtomTypes = new HashSet<>(elementCounts.keySet()); + nonNeighborAtomTypes.removeAll(connectivities.keySet()); + nonNeighborAtomTypes.remove("H"); + + for (int k = 1; k + < indicesMap.get(i).length; k++) { + indexInPyLSD = (int) indicesMap.get(i)[k]; + for (final String nonNeighborAtomType : nonNeighborAtomTypes) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(nonNeighborAtomType)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(correlation.getSignal() + .getDelta(), 2)) + .append(") and ") + .append(listMap.get(nonNeighborAtomType)) + .append(" (") + .append(nonNeighborAtomType) + .append(")") + .append("\n"); + } + } + } + + return stringBuilder.toString(); } @@ -499,6 +566,7 @@ private static String buildFilters(final String[] filterPaths) { public static String buildPyLSDInputFileContent(final Data data, final String mf, final Map> detectedHybridizations, + final Map>>> detectedConnectivities, final ElucidationOptions elucidationOptions) { final Map> state = data.getCorrelations() .getState(); @@ -574,7 +642,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance - stringBuilder.append(buildLISTAndPROP(elucidationOptions.isAllowHeteroHeteroBonds())) + stringBuilder.append(buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, detectedConnectivities, + elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as filters (bad lists) stringBuilder.append(buildFilters(elucidationOptions.getFilterPaths())) From 6850f5760c890e46108a35d81f16c48d30294bd4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 1 Sep 2021 11:54:34 +0200 Subject: [PATCH 305/405] chore: renamed nmrdisplayer package to nmrium --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 6 +++--- .../nmr/model/{nmrdisplayer => nmrium}/Correlation.java | 2 +- .../nmr/model/{nmrdisplayer => nmrium}/Correlations.java | 2 +- src/casekit/nmr/model/{nmrdisplayer => nmrium}/Data.java | 2 +- src/casekit/nmr/model/{nmrdisplayer => nmrium}/Default.java | 2 +- src/casekit/nmr/model/{nmrdisplayer => nmrium}/Link.java | 2 +- src/casekit/nmr/model/{nmrdisplayer => nmrium}/Range.java | 2 +- .../nmr/model/{nmrdisplayer => nmrium}/Signal1D.java | 2 +- .../nmr/model/{nmrdisplayer => nmrium}/Signal2D.java | 2 +- .../nmr/model/{nmrdisplayer => nmrium}/Spectrum.java | 2 +- src/casekit/nmr/model/{nmrdisplayer => nmrium}/Zone.java | 2 +- src/casekit/nmr/utils/Utils.java | 2 +- 12 files changed, 14 insertions(+), 14 deletions(-) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Correlation.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Correlations.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Data.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Default.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Link.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Range.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Signal1D.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Signal2D.java (97%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Spectrum.java (99%) rename src/casekit/nmr/model/{nmrdisplayer => nmrium}/Zone.java (97%) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 48fe529..235ab4c 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -1,9 +1,9 @@ package casekit.nmr.lsd; import casekit.nmr.lsd.model.ElucidationOptions; -import casekit.nmr.model.nmrdisplayer.Correlation; -import casekit.nmr.model.nmrdisplayer.Data; -import casekit.nmr.model.nmrdisplayer.Link; +import casekit.nmr.model.nmrium.Correlation; +import casekit.nmr.model.nmrium.Data; +import casekit.nmr.model.nmrium.Link; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; diff --git a/src/casekit/nmr/model/nmrdisplayer/Correlation.java b/src/casekit/nmr/model/nmrium/Correlation.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Correlation.java rename to src/casekit/nmr/model/nmrium/Correlation.java index 6b8b23c..ec61752 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Correlation.java +++ b/src/casekit/nmr/model/nmrium/Correlation.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import lombok.Getter; import lombok.NoArgsConstructor; diff --git a/src/casekit/nmr/model/nmrdisplayer/Correlations.java b/src/casekit/nmr/model/nmrium/Correlations.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Correlations.java rename to src/casekit/nmr/model/nmrium/Correlations.java index 5070b64..4789e18 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Correlations.java +++ b/src/casekit/nmr/model/nmrium/Correlations.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import lombok.Getter; import lombok.NoArgsConstructor; diff --git a/src/casekit/nmr/model/nmrdisplayer/Data.java b/src/casekit/nmr/model/nmrium/Data.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Data.java rename to src/casekit/nmr/model/nmrium/Data.java index 53c42b9..937090e 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Data.java +++ b/src/casekit/nmr/model/nmrium/Data.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.Getter; diff --git a/src/casekit/nmr/model/nmrdisplayer/Default.java b/src/casekit/nmr/model/nmrium/Default.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Default.java rename to src/casekit/nmr/model/nmrium/Default.java index f2117c4..12fb15e 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Default.java +++ b/src/casekit/nmr/model/nmrium/Default.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import lombok.*; diff --git a/src/casekit/nmr/model/nmrdisplayer/Link.java b/src/casekit/nmr/model/nmrium/Link.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Link.java rename to src/casekit/nmr/model/nmrium/Link.java index da575da..ec9e8e8 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Link.java +++ b/src/casekit/nmr/model/nmrium/Link.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import lombok.Getter; import lombok.NoArgsConstructor; diff --git a/src/casekit/nmr/model/nmrdisplayer/Range.java b/src/casekit/nmr/model/nmrium/Range.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Range.java rename to src/casekit/nmr/model/nmrium/Range.java index faa62d4..90335e8 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Range.java +++ b/src/casekit/nmr/model/nmrium/Range.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; diff --git a/src/casekit/nmr/model/nmrdisplayer/Signal1D.java b/src/casekit/nmr/model/nmrium/Signal1D.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Signal1D.java rename to src/casekit/nmr/model/nmrium/Signal1D.java index 9733610..8002a45 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Signal1D.java +++ b/src/casekit/nmr/model/nmrium/Signal1D.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; diff --git a/src/casekit/nmr/model/nmrdisplayer/Signal2D.java b/src/casekit/nmr/model/nmrium/Signal2D.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Signal2D.java rename to src/casekit/nmr/model/nmrium/Signal2D.java index 162806e..4ed31b8 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Signal2D.java +++ b/src/casekit/nmr/model/nmrium/Signal2D.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; diff --git a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java b/src/casekit/nmr/model/nmrium/Spectrum.java similarity index 99% rename from src/casekit/nmr/model/nmrdisplayer/Spectrum.java rename to src/casekit/nmr/model/nmrium/Spectrum.java index f92d25c..369342e 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Spectrum.java +++ b/src/casekit/nmr/model/nmrium/Spectrum.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import casekit.nmr.model.Signal; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; diff --git a/src/casekit/nmr/model/nmrdisplayer/Zone.java b/src/casekit/nmr/model/nmrium/Zone.java similarity index 97% rename from src/casekit/nmr/model/nmrdisplayer/Zone.java rename to src/casekit/nmr/model/nmrium/Zone.java index dd5521c..fca899e 100644 --- a/src/casekit/nmr/model/nmrdisplayer/Zone.java +++ b/src/casekit/nmr/model/nmrium/Zone.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package casekit.nmr.model.nmrdisplayer; +package casekit.nmr.model.nmrium; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index f673438..44621cf 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -3,7 +3,7 @@ import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; import casekit.nmr.model.StructureCompact; -import casekit.nmr.model.nmrdisplayer.Correlation; +import casekit.nmr.model.nmrium.Correlation; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; import org.openscience.cdk.aromaticity.Kekulization; From f56ab3ace046eb0998dcfc7872af95165a867b09 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 1 Sep 2021 12:02:16 +0200 Subject: [PATCH 306/405] chore: changed string values to numeric ones; hybridizationConversionMap contains atom type as key instead of nucleus --- src/casekit/nmr/lsd/Constants.java | 86 ++++++++++++------- .../nmr/lsd/PyLSDInputFileBuilder.java | 44 +++++++++- 2 files changed, 93 insertions(+), 37 deletions(-) diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index eb60abb..e8cbe68 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -8,8 +8,8 @@ public class Constants { // valid strings from LSD webpage: C N N5 O S S4 S6 F Cl Br I P P5 Si B X public static final Map nucleiMap = createNucleiMap(); - public static final Map defaultHybridizationMap = createDefaultHybridizationMap(); - public static final Map defaultProtonsCountPerValencyMap = createDefaultProtonsCountPerValencyMap(); + public static final Map defaultHybridizationMap = createDefaultHybridizationMap(); + public static final Map defaultProtonsCountPerValencyMap = createDefaultProtonsCountPerValencyMap(); public static final Map defaultAtomLabelMap = createDefaultAtomLabelMap(); public static final Map> hybridizationConversionMap = createHybridizationConversionMap(); @@ -22,31 +22,31 @@ private static Map createNucleiMap() { return Collections.unmodifiableMap(nuclei); } - private static Map createDefaultHybridizationMap() { - final Map defaultHybridization = new HashMap<>(); - defaultHybridization.put("C", "(1 2 3)"); - defaultHybridization.put("N", "(1 2 3)"); - defaultHybridization.put("O", "(2 3)"); - defaultHybridization.put("S", "(1 2 3)"); - defaultHybridization.put("I", "3"); - defaultHybridization.put("F", "3"); + private static Map createDefaultHybridizationMap() { + final Map defaultHybridization = new HashMap<>(); + defaultHybridization.put("C", new int[]{1, 2, 3}); + defaultHybridization.put("N", new int[]{1, 2, 3}); + defaultHybridization.put("S", new int[]{1, 2, 3}); + defaultHybridization.put("O", new int[]{2, 3}); + defaultHybridization.put("I", new int[]{3}); + defaultHybridization.put("F", new int[]{3}); return Collections.unmodifiableMap(defaultHybridization); } - private static Map createDefaultProtonsCountPerValencyMap() { - final Map defaultProtonsCountPerValency = new HashMap<>(); - defaultProtonsCountPerValency.put("C", "(0 1 2 3)"); - defaultProtonsCountPerValency.put("N", "(0 1 2)"); - defaultProtonsCountPerValency.put("N5", "(0 1 2 3)"); - defaultProtonsCountPerValency.put("N35", "(0 1 2 3)"); - defaultProtonsCountPerValency.put("S", "(0 1)"); - defaultProtonsCountPerValency.put("S4", "(0 1 2 3)"); - defaultProtonsCountPerValency.put("S6", "(0 1 2 3)"); - defaultProtonsCountPerValency.put("S246", "(0 1 2 3)"); - defaultProtonsCountPerValency.put("O", "(0 1)"); - defaultProtonsCountPerValency.put("I", "0"); - defaultProtonsCountPerValency.put("F", "0"); + private static Map createDefaultProtonsCountPerValencyMap() { + final Map defaultProtonsCountPerValency = new HashMap<>(); + defaultProtonsCountPerValency.put("C", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("N", new int[]{0, 1, 2}); + defaultProtonsCountPerValency.put("N5", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("N35", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("S", new int[]{0, 1}); + defaultProtonsCountPerValency.put("S4", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("S6", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("S246", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("O", new int[]{0, 1}); + defaultProtonsCountPerValency.put("I", new int[]{0}); + defaultProtonsCountPerValency.put("F", new int[]{0}); return defaultProtonsCountPerValency; } @@ -68,24 +68,44 @@ private static Map> createHybridizationConversionMa // possible command in MongoDB: db.hybridizations.aggregate([{$match: {nucleus: "15N"}}, {$group: {_id: null, set: {$addToSet: "$hybridization"}}}]) // nucleus -> hybridization string -> number final Map> hybridizationConversionMap = new HashMap<>(); - hybridizationConversionMap.put("13C", new HashMap<>()); - hybridizationConversionMap.get("13C") + hybridizationConversionMap.put("C", new HashMap<>()); + hybridizationConversionMap.get("C") .put("PLANAR3", 3); - hybridizationConversionMap.get("13C") + hybridizationConversionMap.get("C") .put("SP3", 3); - hybridizationConversionMap.get("13C") + hybridizationConversionMap.get("C") .put("SP2", 2); - hybridizationConversionMap.get("13C") + hybridizationConversionMap.get("C") .put("SP1", 1); - hybridizationConversionMap.put("15N", new HashMap<>()); - hybridizationConversionMap.get("15N") + hybridizationConversionMap.put("N", new HashMap<>()); + hybridizationConversionMap.get("N") .put("PLANAR3", 3); - hybridizationConversionMap.get("15N") + hybridizationConversionMap.get("N") .put("SP3", 3); - hybridizationConversionMap.get("15N") + hybridizationConversionMap.get("N") .put("SP2", 2); - hybridizationConversionMap.get("15N") + hybridizationConversionMap.get("N") .put("SP1", 1); + hybridizationConversionMap.put("S", new HashMap<>()); + hybridizationConversionMap.get("S") + .put("PLANAR3", 3); + hybridizationConversionMap.get("S") + .put("SP3", 3); + hybridizationConversionMap.get("S") + .put("SP2", 2); + hybridizationConversionMap.get("S") + .put("SP1", 1); + hybridizationConversionMap.put("O", new HashMap<>()); + hybridizationConversionMap.get("O") + .put("SP3", 3); + hybridizationConversionMap.get("O") + .put("SP2", 2); + hybridizationConversionMap.put("I", new HashMap<>()); + hybridizationConversionMap.get("I") + .put("SP3", 3); + hybridizationConversionMap.put("F", new HashMap<>()); + hybridizationConversionMap.get("F") + .put("SP3", 3); return Collections.unmodifiableMap(hybridizationConversionMap); } diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 235ab4c..8765022 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -163,8 +163,24 @@ private static String buildMULT(final Correlation correlation, final int index, } } if (hybridizations.isEmpty()) { - hybridizationStringBuilder = new StringBuilder( - Constants.defaultHybridizationMap.get(correlation.getAtomType())); + hybridizationStringBuilder = new StringBuilder(); + if (Constants.defaultHybridizationMap.get(correlation.getAtomType()).length + > 1) { + hybridizationStringBuilder.append("("); + } + for (int i = 0; i + < Constants.defaultHybridizationMap.get(correlation.getAtomType()).length; i++) { + hybridizationStringBuilder.append(Constants.defaultHybridizationMap.get(correlation.getAtomType())[i]); + if (i + < Constants.defaultHybridizationMap.get(correlation.getAtomType()).length + - 1) { + hybridizationStringBuilder.append(" "); + } + } + if (Constants.defaultHybridizationMap.get(correlation.getAtomType()).length + > 1) { + hybridizationStringBuilder.append(")"); + } } else { hybridizationStringBuilder = new StringBuilder(); if (hybridizations.size() @@ -208,8 +224,28 @@ private static String buildMULT(final Correlation correlation, final int index, attachedProtonsCountStringBuilder.append(")"); } } else { // if protons count is not given then set it to default value - attachedProtonsCountStringBuilder.append(Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType()))); + if (Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length + > 1) { + attachedProtonsCountStringBuilder.append("("); + } + for (int i = 0; i + < Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length; i++) { + attachedProtonsCountStringBuilder.append(Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType()))[i]); + if (i + < Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length + - 1) { + attachedProtonsCountStringBuilder.append(" "); + } + } + if (Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length + > 1) { + attachedProtonsCountStringBuilder.append(")"); + } } for (int j = 1; j < indicesMap.get(index).length; j++) { From 53d50eb4b6d74a303a8a4e1d0f696a15218a94c4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 1 Sep 2021 12:04:18 +0200 Subject: [PATCH 307/405] feat: use hybridization statistics too to forbid bonds --- .../nmr/lsd/PyLSDInputFileBuilder.java | 67 ++++++++++++++++++- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 8765022..942a3de 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -9,6 +9,7 @@ import java.text.SimpleDateFormat; import java.util.*; +import java.util.stream.Collectors; public class PyLSDInputFileBuilder { @@ -500,6 +501,7 @@ private static String buildLISTsAndPROPs(final List correlationList atomTypesByMf.remove("H"); final Map listMap = new HashMap<>(); listMap.put("HETE", "L1"); + final Set defaultHybridizationStates = new HashSet<>(); for (final String atomType : atomTypesByMf) { listMap.put(atomType, "L" + (listMap.size() @@ -510,13 +512,25 @@ private static String buildLISTsAndPROPs(final List correlationList .append(" ") .append(atomType) .append("\n"); + defaultHybridizationStates.clear(); + for (final int numericHybridization : Constants.defaultHybridizationMap.get(atomType)) { + defaultHybridizationStates.add(numericHybridization); + } + for (final int hybridizationState : defaultHybridizationStates) { + listMap.put(atomType + + "_" + + hybridizationState, "L" + + (listMap.size() + + 1)); + } } Correlation correlation; String atomType; int indexInPyLSD; Map>> connectivities; - Set nonNeighborAtomTypes; + Set neighborAtomTypes, nonNeighborAtomTypes; + Map> forbiddenNeighborHybridizations; for (int i = 0; i < correlationList.size(); i++) { correlation = correlationList.get(i); @@ -528,13 +542,33 @@ private static String buildLISTsAndPROPs(final List correlationList || connectivities.isEmpty()) { continue; } + // define atom types of non-neighbors + neighborAtomTypes = new HashSet<>(connectivities.keySet()); nonNeighborAtomTypes = new HashSet<>(elementCounts.keySet()); - nonNeighborAtomTypes.removeAll(connectivities.keySet()); + nonNeighborAtomTypes.removeAll(neighborAtomTypes); nonNeighborAtomTypes.remove("H"); + // define forbidden hybridizations of possible neighbors + forbiddenNeighborHybridizations = new HashMap<>(); + for (final String neighborAtomType : neighborAtomTypes) { + for (final String neighborHybridization : connectivities.get(neighborAtomType) + .keySet()) { + forbiddenNeighborHybridizations.putIfAbsent(neighborAtomType, new HashSet<>(Arrays.stream( + Constants.defaultHybridizationMap.get(neighborAtomType)) + .boxed() + .collect( + Collectors.toSet()))); + forbiddenNeighborHybridizations.get(neighborAtomType) + .remove(Constants.hybridizationConversionMap.get(neighborAtomType) + .get(neighborHybridization)); + } + } + + // put in the extracted information per correlation for (int k = 1; k < indicesMap.get(i).length; k++) { indexInPyLSD = (int) indicesMap.get(i)[k]; + // forbid bonds to whole element groups for (final String nonNeighborAtomType : nonNeighborAtomTypes) { stringBuilder.append("PROP ") .append(indexInPyLSD) @@ -555,10 +589,37 @@ private static String buildLISTsAndPROPs(final List correlationList .append(")") .append("\n"); } + // forbid bonds to possible neighbors with certain hybridization states + for (final String neighborAtomType : neighborAtomTypes) { + for (final int forbiddenNeighborHybridization : forbiddenNeighborHybridizations.get( + neighborAtomType)) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(neighborAtomType + + "_" + + forbiddenNeighborHybridization)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(correlation.getSignal() + .getDelta(), 2)) + .append(") and ") + .append(listMap.get(neighborAtomType)) + .append(" (") + .append(neighborAtomType) + .append(", SP") + .append(forbiddenNeighborHybridization) + .append(")") + .append("\n"); + } + } } } - return stringBuilder.toString(); } From b3dc39280dadcb896e009aad52e317130a1be275 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 3 Sep 2021 14:58:08 +0200 Subject: [PATCH 308/405] feat: use SHIX command in PyLSD to let it recognise equivalent atoms by shift values --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 18 ++++++++---------- .../nmr/lsd/model/ElucidationOptions.java | 1 - 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 942a3de..00b8f67 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -444,8 +444,8 @@ private static String buildSHIX(final Correlation correlation, final int index, stringBuilder.append("SHIX ") .append(indicesMap.get(index)[k]) .append(" ") - .append(correlation.getSignal() - .getDelta()) + .append(Statistics.roundDouble(correlation.getSignal() + .getDelta(), 2)) .append("\n"); } @@ -473,8 +473,8 @@ private static String buildSHIH(final Correlation correlation, final int index, stringBuilder.append("SHIH ") .append(indicesMap.get(index)[k]) .append(" ") - .append(correlation.getSignal() - .getDelta()) + .append(Statistics.roundDouble(correlation.getSignal() + .getDelta(), 3)) .append("\n"); } } @@ -719,12 +719,10 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf collection.get("COSY") .add(buildCOSY(correlationList, i, indicesMap, elucidationOptions.getCosyP3(), elucidationOptions.getCosyP4())); - if (elucidationOptions.isUsePrediction()) { - collection.get("SHIX") - .add(buildSHIX(correlation, i, indicesMap)); - collection.get("SHIH") - .add(buildSHIH(correlation, i, indicesMap)); - } + collection.get("SHIX") + .add(buildSHIX(correlation, i, indicesMap)); + collection.get("SHIH") + .add(buildSHIH(correlation, i, indicesMap)); } collection.keySet() diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java index 820e463..84822d8 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -21,5 +21,4 @@ public class ElucidationOptions { private int hmbcP4; private int cosyP3; private int cosyP4; - private boolean usePrediction; } From 373d1b57166a33941dcca2ffdcd21ee874b70ac3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 3 Sep 2021 14:58:44 +0200 Subject: [PATCH 309/405] chore: added PLANAR3 for O to hybridizationConversionMap --- src/casekit/nmr/lsd/Constants.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index e8cbe68..18708aa 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -96,6 +96,8 @@ private static Map> createHybridizationConversionMa hybridizationConversionMap.get("S") .put("SP1", 1); hybridizationConversionMap.put("O", new HashMap<>()); + hybridizationConversionMap.get("O") + .put("PLANAR3", 3); hybridizationConversionMap.get("O") .put("SP3", 3); hybridizationConversionMap.get("O") From e272d43aea828be4ff82af3e7519f583a8cb846a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 3 Sep 2021 16:07:22 +0200 Subject: [PATCH 310/405] feat: simplified connectivity statistics filtering --- .../nmr/analysis/ConnectivityStatistics.java | 129 ++++++++++-------- 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index 9e9ef04..604478f 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -11,7 +11,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; public class ConnectivityStatistics { @@ -154,72 +153,82 @@ public static Map>> extractConnectivit return extractedConnectivities; } - public static List extractNeighborAtomTypes( - final Map>> extractedConnectivities, final double threshold) { + public static Map>> filterExtractedConnectivities( + final Map>> extractedConnectivities, + final double thresholdHybridizationCount, final double thresholdProtonsCount) { + final Map totalCounts = getTotalCounts(extractedConnectivities); + final int totalCountsSum = getTotalCount(totalCounts); + + final Map>> filteredExtractedConnectivities = new HashMap<>(); + extractedConnectivities.keySet() + .forEach(neighborAtomType -> { + extractedConnectivities.get(neighborAtomType) + .keySet() + .forEach(neighborHybridization -> { + final int countHybridization = extractedConnectivities.get( + neighborAtomType) + .get(neighborHybridization) + .keySet() + .stream() + .reduce(0, + (protonsCountSum, protonsCount) -> protonsCountSum += extractedConnectivities.get( + neighborAtomType) + .get(neighborHybridization) + .get(protonsCount)); + if (countHybridization + / (double) totalCountsSum + >= thresholdHybridizationCount) { + for (final Map.Entry entryProtonsCount : extractedConnectivities.get( + neighborAtomType) + .get(neighborHybridization) + .entrySet()) { + if (entryProtonsCount.getValue() + / (double) countHybridization + >= thresholdProtonsCount) { + filteredExtractedConnectivities.putIfAbsent( + neighborAtomType, new HashMap<>()); + filteredExtractedConnectivities.get( + neighborAtomType) + .putIfAbsent( + neighborHybridization, + new HashMap<>()); + filteredExtractedConnectivities.get( + neighborAtomType) + .get(neighborHybridization) + .putIfAbsent( + entryProtonsCount.getKey(), + entryProtonsCount.getValue()); + } + } + } + }); + }); + + return filteredExtractedConnectivities; + } + + private static Map getTotalCounts( + final Map>> extractedConnectivities) { final Map totalCounts = new HashMap<>(); - for (final String connectedAtomType : extractedConnectivities.keySet()) { - totalCounts.putIfAbsent(connectedAtomType, 0); - for (final String connectedAtomHybridization : extractedConnectivities.get(connectedAtomType) - .keySet()) { - for (final Map.Entry countsEntry : extractedConnectivities.get(connectedAtomType) - .get(connectedAtomHybridization) + for (final String key1 : extractedConnectivities.keySet()) { + totalCounts.putIfAbsent(key1, 0); + for (final String key2 : extractedConnectivities.get(key1) + .keySet()) { + for (final Map.Entry countsEntry : extractedConnectivities.get(key1) + .get(key2) .entrySet()) { - totalCounts.put(connectedAtomType, totalCounts.get(connectedAtomType) + totalCounts.put(key1, totalCounts.get(key1) + countsEntry.getValue()); } } } - final int totalCountsSum = totalCounts.values() - .stream() - .reduce(0, (total, current) -> total += current); - return totalCounts.keySet() - .stream() - .filter(atomType -> (totalCounts.get(atomType) - / (double) totalCountsSum) - >= threshold) - .collect(Collectors.toList()); - } - - public static Map> extractNeighborHybridizations( - final Map>> extractedConnectivities, - final String neighborAtomType, final double threshold) { - if (!extractedConnectivities.containsKey(neighborAtomType)) { - return new HashMap<>(); - } - final Map countsPerHybridization = new HashMap<>(); - extractedConnectivities.get(neighborAtomType) - .keySet() - .forEach(hybridizationNeighbor -> { - countsPerHybridization.put(hybridizationNeighbor, extractedConnectivities.get( - neighborAtomType) - .get(hybridizationNeighbor) - .keySet() - .stream() - .reduce(0, - (protonsCountSum, protonsCount) -> protonsCountSum += extractedConnectivities.get( - neighborAtomType) - .get(hybridizationNeighbor) - .get(protonsCount))); - }); - final int totalCount = countsPerHybridization.keySet() - .stream() - .map(countsPerHybridization::get) - .reduce(0, (sum, current) -> sum += current); - final List allowedHybridizationList = countsPerHybridization.keySet() - .stream() - .filter(hybridizationNeighbor -> countsPerHybridization.get( - hybridizationNeighbor) - / (double) totalCount - >= threshold) - .collect(Collectors.toList()); - - final Map> extractedNeighborHybridizationMap = new HashMap<>(); - for (final String allowedHybridization : allowedHybridizationList) { - extractedNeighborHybridizationMap.put(allowedHybridization, extractedConnectivities.get(neighborAtomType) - .get(allowedHybridization)); - } + return totalCounts; + } - return extractedNeighborHybridizationMap; + private static int getTotalCount(final Map totalCounts) { + return totalCounts.values() + .stream() + .reduce(0, (total, current) -> total += current); } } From 168277e7a0f521761c5e510ff37da59a22779267 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 11 Sep 2021 12:16:39 +0200 Subject: [PATCH 311/405] chore: simplification in Constants --- src/casekit/nmr/lsd/Constants.java | 50 +++++------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index 18708aa..f1a5de3 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -11,7 +11,7 @@ public class Constants { public static final Map defaultHybridizationMap = createDefaultHybridizationMap(); public static final Map defaultProtonsCountPerValencyMap = createDefaultProtonsCountPerValencyMap(); public static final Map defaultAtomLabelMap = createDefaultAtomLabelMap(); - public static final Map> hybridizationConversionMap = createHybridizationConversionMap(); + public static final Map hybridizationConversionMap = createHybridizationConversionMap(); private static Map createNucleiMap() { final Map nuclei = new HashMap<>(); @@ -63,51 +63,15 @@ private static Map createDefaultAtomLabelMap() { return Collections.unmodifiableMap(defaultAtomLabel); } - private static Map> createHybridizationConversionMap() { + private static Map createHybridizationConversionMap() { // @TODO access this information from MongoDB and store it instead of hard coding it // possible command in MongoDB: db.hybridizations.aggregate([{$match: {nucleus: "15N"}}, {$group: {_id: null, set: {$addToSet: "$hybridization"}}}]) // nucleus -> hybridization string -> number - final Map> hybridizationConversionMap = new HashMap<>(); - hybridizationConversionMap.put("C", new HashMap<>()); - hybridizationConversionMap.get("C") - .put("PLANAR3", 3); - hybridizationConversionMap.get("C") - .put("SP3", 3); - hybridizationConversionMap.get("C") - .put("SP2", 2); - hybridizationConversionMap.get("C") - .put("SP1", 1); - hybridizationConversionMap.put("N", new HashMap<>()); - hybridizationConversionMap.get("N") - .put("PLANAR3", 3); - hybridizationConversionMap.get("N") - .put("SP3", 3); - hybridizationConversionMap.get("N") - .put("SP2", 2); - hybridizationConversionMap.get("N") - .put("SP1", 1); - hybridizationConversionMap.put("S", new HashMap<>()); - hybridizationConversionMap.get("S") - .put("PLANAR3", 3); - hybridizationConversionMap.get("S") - .put("SP3", 3); - hybridizationConversionMap.get("S") - .put("SP2", 2); - hybridizationConversionMap.get("S") - .put("SP1", 1); - hybridizationConversionMap.put("O", new HashMap<>()); - hybridizationConversionMap.get("O") - .put("PLANAR3", 3); - hybridizationConversionMap.get("O") - .put("SP3", 3); - hybridizationConversionMap.get("O") - .put("SP2", 2); - hybridizationConversionMap.put("I", new HashMap<>()); - hybridizationConversionMap.get("I") - .put("SP3", 3); - hybridizationConversionMap.put("F", new HashMap<>()); - hybridizationConversionMap.get("F") - .put("SP3", 3); + final Map hybridizationConversionMap = new HashMap<>(); + hybridizationConversionMap.put("PLANAR3", 3); + hybridizationConversionMap.put("SP3", 3); + hybridizationConversionMap.put("SP2", 2); + hybridizationConversionMap.put("SP1", 1); return Collections.unmodifiableMap(hybridizationConversionMap); } From 04e407f39227d7e76b9ae6ff07a7b1b286759dee Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 11 Sep 2021 13:08:10 +0200 Subject: [PATCH 312/405] feat: use utility classes for LIST and PROP creation and extension of connection forbids; allow multiple hybridizations --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 208 ++++++++++++++++++ .../nmr/lsd/PyLSDInputFileBuilder.java | 153 ++----------- src/casekit/nmr/lsd/Utilities.java | 146 ++++++++++++ src/casekit/nmr/model/nmrium/Correlation.java | 2 +- 4 files changed, 369 insertions(+), 140 deletions(-) create mode 100644 src/casekit/nmr/lsd/LISTAndPROPUtilities.java create mode 100644 src/casekit/nmr/lsd/Utilities.java diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java new file mode 100644 index 0000000..850a335 --- /dev/null +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -0,0 +1,208 @@ +package casekit.nmr.lsd; + +import casekit.nmr.model.nmrium.Correlation; +import casekit.nmr.utils.Statistics; + +import java.util.*; + +public class LISTAndPROPUtilities { + + public static void insertELEM(final StringBuilder stringBuilder, final Map listMap, + final Set atomTypesByMf) { + final Set atomTypes = new HashSet<>(atomTypesByMf); + atomTypes.remove("H"); + for (final String atomType : atomTypes) { + listMap.put(atomType, "L" + + (listMap.size() + + 1)); + stringBuilder.append("ELEM") + .append(" ") + .append(listMap.get(atomType)) + .append(" ") + .append(atomType) + .append("\n"); + } + } + + public static void insertNoHeteroHeteroBonds(final StringBuilder stringBuilder, final Map listMap) { + // create hetero atom list automatically to forbid hetero-hetero bonds + stringBuilder.append("HETE L1") + .append("; list of hetero atoms\n"); + stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); + listMap.put("HETE", "L1"); + } + + public static void insertCarbonCombinationLISTs(final StringBuilder stringBuilder, + final Map listMap, + final List correlationList, + final Map indicesMap, + final Map>>> detectedConnectivities) { + final Map> atomIndicesMap = new HashMap<>(); + Correlation correlation; + int indexInPyLSD; + String listKey; + for (int i = 0; i + < correlationList.size(); i++) { + for (int k = 1; k + < indicesMap.get(i).length; k++) { + correlation = correlationList.get(i); + if (!correlation.getAtomType() + .equals("C") + || correlation.getHybridization() + .size() + != 1 + || correlation.getProtonsCount() + .size() + != 1) { + continue; + } + listKey = correlation.getAtomType() + + "_" + + correlation.getHybridization() + .get(0) + + "_" + + correlation.getProtonsCount() + .get(0); + indexInPyLSD = (int) indicesMap.get(i)[k]; + atomIndicesMap.putIfAbsent(listKey, new HashSet<>()); + atomIndicesMap.get(listKey) + .add(indexInPyLSD); + } + } + String[] split; + for (final Map.Entry> combinationEntry : atomIndicesMap.entrySet()) { + stringBuilder.append("LIST ") + .append("L") + .append(listMap.size() + + 1); + + for (final Integer pyLSDAtomIndex : combinationEntry.getValue()) { + stringBuilder.append(" ") + .append(pyLSDAtomIndex); + } + split = combinationEntry.getKey() + .split("_"); + stringBuilder.append("; list of ") + .append(split[0]) + .append("H") + .append(split[2]) + .append(" and ") + .append(split[1]) + .append("\n"); + + listMap.put(combinationEntry.getKey(), "L" + + (listMap.size() + + 1)); + } + } + + public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder stringBuilder, + final Map listMap, + final List correlationList, + final Map indicesMap, + final Map>>> detectedConnectivities, + final Set atomTypesByMf) { + // insert ELEM for each heavy atom type in MF + LISTAndPROPUtilities.insertELEM(stringBuilder, listMap, atomTypesByMf); + // insert list combinations of carbon and hybridization states + LISTAndPROPUtilities.insertCarbonCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap, + detectedConnectivities); + Correlation correlation; + String atomType, listKey; + int indexInPyLSD; + Map>> connectivities; + Set nonNeighborAtomTypes, neighborAtomTypes; + Map>> forbiddenNeighborHybridizationsAndProtonCounts; + for (int i = 0; i + < correlationList.size(); i++) { + correlation = correlationList.get(i); + atomType = correlation.getAtomType(); + connectivities = detectedConnectivities.get(i); + // consider carbons here only, because of having complete connectivity information + if (!atomType.equals("C") + || connectivities + == null + || connectivities.isEmpty()) { + continue; + } + // define atom types of non-neighbors + neighborAtomTypes = connectivities.keySet(); + nonNeighborAtomTypes = new HashSet<>(atomTypesByMf); + nonNeighborAtomTypes.removeAll(neighborAtomTypes); + nonNeighborAtomTypes.remove("H"); + forbiddenNeighborHybridizationsAndProtonCounts = Utilities.buildForbiddenNeighborHybridizationsAndProtonCounts( + connectivities, neighborAtomTypes); + + // put in the extracted information per correlation + for (int k = 1; k + < indicesMap.get(i).length; k++) { + indexInPyLSD = (int) indicesMap.get(i)[k]; + // forbid bonds to whole element groups + for (final String nonNeighborAtomType : nonNeighborAtomTypes) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(nonNeighborAtomType)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(correlation.getSignal() + .getDelta(), 2)) + .append(") and ") + .append(listMap.get(nonNeighborAtomType)) + .append(" (") + .append(nonNeighborAtomType) + .append(")") + .append("\n"); + } + // forbid bonds to possible neighbors with certain hybridization states and proton counts + for (final String neighborAtomType : forbiddenNeighborHybridizationsAndProtonCounts.keySet()) { + for (final int forbiddenNeighborHybridization : forbiddenNeighborHybridizationsAndProtonCounts.get( + neighborAtomType) + .keySet()) { + if (!neighborAtomType.equals("C")) { + continue; + } + for (final Integer forbiddenProtonsCount : forbiddenNeighborHybridizationsAndProtonCounts.get( + neighborAtomType) + .get(forbiddenNeighborHybridization)) { + listKey = neighborAtomType + + "_SP" + + forbiddenNeighborHybridization + + "_" + + forbiddenProtonsCount; + if (listMap.containsKey(listKey)) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(listKey)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(correlation.getSignal() + .getDelta(), 2)) + .append(") and ") + .append(listMap.get(listKey)) + .append(" (") + .append(neighborAtomType) + .append(", SP") + .append(forbiddenNeighborHybridization) + .append(", ") + .append(forbiddenProtonsCount) + .append("H") + .append(")") + .append("\n"); + } + } + } + } + } + } + } +} diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 00b8f67..ebebd6e 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -136,17 +136,12 @@ private static String buildMULT(final Correlation correlation, final int index, && !correlation.getHybridization() .isEmpty()) { // if hybridization is already given - if (correlation.getHybridization() - .equals("SP")) { - hybridizations.add(1); - } else if (correlation.getHybridization() - .equals("SP2")) { - hybridizations.add(2); - } else { - hybridizations.add(3); - } + hybridizations.addAll(correlation.getHybridization() + .stream() + .map(Constants.hybridizationConversionMap::get) + .collect(Collectors.toList())); } else { - // if hybridization is not given then use the detected ones via MongoDB queries + // if hybridization is not given then use the detected ones if (detectedHybridizations.containsKey(index)) { hybridizations = detectedHybridizations.get(index); } @@ -486,139 +481,19 @@ private static String buildSHIH(final Correlation correlation, final int index, private static String buildLISTsAndPROPs(final List correlationList, final Map indicesMap, final Map elementCounts, - final Map>>> detectedConnectivities, + final Map>>> detectedConnectivities, final boolean allowHeteroHeteroBonds) { final StringBuilder stringBuilder = new StringBuilder(); - // LIST PROP for hetero hetero bonds allowance - if (!allowHeteroHeteroBonds) { - // create hetero atom list automatically - stringBuilder.append("HETE L1") - .append("; list of hetero atoms\n"); - stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); - } - - final Set atomTypesByMf = new HashSet<>(elementCounts.keySet()); - atomTypesByMf.remove("H"); final Map listMap = new HashMap<>(); - listMap.put("HETE", "L1"); - final Set defaultHybridizationStates = new HashSet<>(); - for (final String atomType : atomTypesByMf) { - listMap.put(atomType, "L" - + (listMap.size() - + 1)); - stringBuilder.append("ELEM") - .append(" ") - .append(listMap.get(atomType)) - .append(" ") - .append(atomType) - .append("\n"); - defaultHybridizationStates.clear(); - for (final int numericHybridization : Constants.defaultHybridizationMap.get(atomType)) { - defaultHybridizationStates.add(numericHybridization); - } - for (final int hybridizationState : defaultHybridizationStates) { - listMap.put(atomType - + "_" - + hybridizationState, "L" - + (listMap.size() - + 1)); - } - } - - Correlation correlation; - String atomType; - int indexInPyLSD; - Map>> connectivities; - Set neighborAtomTypes, nonNeighborAtomTypes; - Map> forbiddenNeighborHybridizations; - for (int i = 0; i - < correlationList.size(); i++) { - correlation = correlationList.get(i); - atomType = correlation.getAtomType(); - connectivities = detectedConnectivities.get(i); - if (atomType.equals("H") - || connectivities - == null - || connectivities.isEmpty()) { - continue; - } - // define atom types of non-neighbors - neighborAtomTypes = new HashSet<>(connectivities.keySet()); - nonNeighborAtomTypes = new HashSet<>(elementCounts.keySet()); - nonNeighborAtomTypes.removeAll(neighborAtomTypes); - nonNeighborAtomTypes.remove("H"); - - // define forbidden hybridizations of possible neighbors - forbiddenNeighborHybridizations = new HashMap<>(); - for (final String neighborAtomType : neighborAtomTypes) { - for (final String neighborHybridization : connectivities.get(neighborAtomType) - .keySet()) { - forbiddenNeighborHybridizations.putIfAbsent(neighborAtomType, new HashSet<>(Arrays.stream( - Constants.defaultHybridizationMap.get(neighborAtomType)) - .boxed() - .collect( - Collectors.toSet()))); - forbiddenNeighborHybridizations.get(neighborAtomType) - .remove(Constants.hybridizationConversionMap.get(neighborAtomType) - .get(neighborHybridization)); - } - } - // put in the extracted information per correlation - for (int k = 1; k - < indicesMap.get(i).length; k++) { - indexInPyLSD = (int) indicesMap.get(i)[k]; - // forbid bonds to whole element groups - for (final String nonNeighborAtomType : nonNeighborAtomTypes) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 0 ") - .append(listMap.get(nonNeighborAtomType)) - .append(" -") - .append("; no bonds between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(Statistics.roundDouble(correlation.getSignal() - .getDelta(), 2)) - .append(") and ") - .append(listMap.get(nonNeighborAtomType)) - .append(" (") - .append(nonNeighborAtomType) - .append(")") - .append("\n"); - } - // forbid bonds to possible neighbors with certain hybridization states - for (final String neighborAtomType : neighborAtomTypes) { - for (final int forbiddenNeighborHybridization : forbiddenNeighborHybridizations.get( - neighborAtomType)) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 0 ") - .append(listMap.get(neighborAtomType - + "_" - + forbiddenNeighborHybridization)) - .append(" -") - .append("; no bonds between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(Statistics.roundDouble(correlation.getSignal() - .getDelta(), 2)) - .append(") and ") - .append(listMap.get(neighborAtomType)) - .append(" (") - .append(neighborAtomType) - .append(", SP") - .append(forbiddenNeighborHybridization) - .append(")") - .append("\n"); - } - } - } + // LIST and PROP for hetero hetero bonds allowance as well as hybridization states and proton counts reduction + if (!allowHeteroHeteroBonds) { + LISTAndPROPUtilities.insertNoHeteroHeteroBonds(stringBuilder, listMap); + Utilities.reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(correlationList, detectedConnectivities); } + // insert forbidden connection lists and properties + LISTAndPROPUtilities.insertForbiddenConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, + detectedConnectivities, elementCounts.keySet()); return stringBuilder.toString(); } @@ -663,7 +538,7 @@ private static String buildFilters(final String[] filterPaths) { public static String buildPyLSDInputFileContent(final Data data, final String mf, final Map> detectedHybridizations, - final Map>>> detectedConnectivities, + final Map>>> detectedConnectivities, final ElucidationOptions elucidationOptions) { final Map> state = data.getCorrelations() .getState(); diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java new file mode 100644 index 0000000..4132de1 --- /dev/null +++ b/src/casekit/nmr/lsd/Utilities.java @@ -0,0 +1,146 @@ +package casekit.nmr.lsd; + +import casekit.nmr.model.nmrium.Correlation; + +import java.util.*; +import java.util.stream.Collectors; + +public class Utilities { + + public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, + final Map>>> detectedConnectivities) { + final Map> allowedHeteroAtomHybridizations = buildAllowedHeteroAtomHybridizations( + correlationList, detectedConnectivities); + final Map> allowedHeteroAtomProtonCounts = buildAllowedHeteroAtomProtonCounts( + correlationList, detectedConnectivities); + // hetero atoms can bond to carbons only, due to that we can use more connectivity information + // do not allow bond between carbon and hetero atoms in certain hybridization states and proton counts + for (final Correlation correlation : correlationList) { + // ignore C and H atoms + if (correlation.getAtomType() + .equals("C") + || correlation.getAtomType() + .equals("H")) { + continue; + } + // but only if we have seen the hetero atom type in connectivity statistics + // and hybridization states or protons count was not set beforehand + if (correlation.getEdited() + != null + && correlation.getEdited() + .containsKey("hybridization") + && correlation.getEdited() + .get("hybridization") + && allowedHeteroAtomHybridizations.containsKey(correlation.getAtomType())) { + correlation.getHybridization() + .retainAll(allowedHeteroAtomHybridizations.get(correlation.getAtomType())); + } + if (correlation.getEdited() + != null + && correlation.getEdited() + .containsKey("protonsCount") + && correlation.getEdited() + .get("protonsCount") + && allowedHeteroAtomProtonCounts.containsKey(correlation.getAtomType())) { + correlation.getProtonsCount() + .retainAll(allowedHeteroAtomProtonCounts.get(correlation.getAtomType())); + } + } + } + + public static Map> buildAllowedHeteroAtomHybridizations(final List correlationList, + final Map>>> detectedConnectivities) { + final Map> allowedHeteroAtomHybridizations = new HashMap<>(); + for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { + if (!correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("C") + && !correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("H")) { + continue; + } + for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() + .entrySet()) { + allowedHeteroAtomHybridizations.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); + allowedHeteroAtomHybridizations.get(neighborAtomTypeEntry.getKey()) + .addAll(neighborAtomTypeEntry.getValue() + .keySet()); + } + } + + return allowedHeteroAtomHybridizations; + } + + public static Map> buildAllowedHeteroAtomProtonCounts(final List correlationList, + final Map>>> detectedConnectivities) { + final Map> allowedHeteroAtomProtonCounts = new HashMap<>(); + for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { + if (!correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("C") + && !correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("H")) { + continue; + } + for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() + .entrySet()) { + allowedHeteroAtomProtonCounts.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); + for (final Map.Entry> neighborHybridizationEntry : neighborAtomTypeEntry.getValue() + .entrySet()) { + allowedHeteroAtomProtonCounts.get(neighborAtomTypeEntry.getKey()) + .addAll(neighborHybridizationEntry.getValue()); + } + } + } + + return allowedHeteroAtomProtonCounts; + } + + public static Map>> buildForbiddenNeighborHybridizationsAndProtonCounts( + final Map>> connectivities, final Set neighborAtomTypes) { + + // define forbidden hybridizations and proton counts (carbons only) of possible neighbors + final Map>> forbiddenNeighborHybridizationsAndProtonCounts = new HashMap<>(); + for (final String neighborAtomType : neighborAtomTypes) { + forbiddenNeighborHybridizationsAndProtonCounts.put(neighborAtomType, new HashMap<>()); + for (final int defaultHybridization : Arrays.stream(Constants.defaultHybridizationMap.get(neighborAtomType)) + .boxed() + .collect(Collectors.toList())) { + forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + .put(defaultHybridization, Arrays.stream( + Constants.defaultProtonsCountPerValencyMap.get( + neighborAtomType)) + .boxed() + .collect( + Collectors.toSet())); + } + for (final String neighborHybridization : connectivities.get(neighborAtomType) + .keySet()) { + // remove found protons count per hybridzations from list of forbidden ones + for (final int forbiddenNeighborHybridization : new HashSet<>( + forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + .keySet())) { + forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + .get(forbiddenNeighborHybridization) + .removeAll(connectivities.get(neighborAtomType) + .get(neighborHybridization)); + if (forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + .get(forbiddenNeighborHybridization) + .isEmpty()) { + forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + .remove(forbiddenNeighborHybridization); + } + } + if (forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + .isEmpty()) { + forbiddenNeighborHybridizationsAndProtonCounts.remove(neighborAtomType); + break; + } + } + } + + return forbiddenNeighborHybridizationsAndProtonCounts; + } +} diff --git a/src/casekit/nmr/model/nmrium/Correlation.java b/src/casekit/nmr/model/nmrium/Correlation.java index ec61752..7bdfea7 100644 --- a/src/casekit/nmr/model/nmrium/Correlation.java +++ b/src/casekit/nmr/model/nmrium/Correlation.java @@ -49,7 +49,7 @@ public class Correlation { private int equivalence; private Map> attachment; private List protonsCount; - private String hybridization; + private List hybridization; private boolean pseudo; private Map edited; } From deda39cbcebb409155863905f464c14a9be230cb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 11 Sep 2021 22:34:34 +0200 Subject: [PATCH 313/405] feat: ability to use of INADEQUATE information --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 6 +- .../nmr/lsd/PyLSDInputFileBuilder.java | 59 +++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index 850a335..3e069bc 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -27,7 +27,7 @@ public static void insertELEM(final StringBuilder stringBuilder, final Map listMap) { // create hetero atom list automatically to forbid hetero-hetero bonds stringBuilder.append("HETE L1") - .append("; list of hetero atoms\n"); + .append("; hetero atoms\n"); stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); listMap.put("HETE", "L1"); } @@ -82,11 +82,11 @@ public static void insertCarbonCombinationLISTs(final StringBuilder stringBuilde } split = combinationEntry.getKey() .split("_"); - stringBuilder.append("; list of ") + stringBuilder.append("; ") .append(split[0]) .append("H") .append(split[2]) - .append(" and ") + .append(", ") .append(split[1]) .append("\n"); diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index ebebd6e..0a945dd 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -536,6 +536,63 @@ private static String buildFilters(final String[] filterPaths) { return stringBuilder.toString(); } + private static String buildBONDByINADEQUATE(final List correlationList, + final Map indicesMap) { + final StringBuilder stringBuilder = new StringBuilder(); + + final Set uniqueSet = new HashSet<>(); + Correlation correlation; + for (int i = 0; i + < correlationList.size(); i++) { + correlation = correlationList.get(i); + // @TODO for now use INADEQUATE information of atoms without equivalences only + if (!correlation.getAtomType() + .equals("C") + || correlation.getEquivalence() + > 1) { + continue; + } + for (final Link link : correlation.getLink()) { + if (link.getExperimentType() + .equals("inadequate")) { + for (final int matchIndex : link.getMatch()) { + // insert BOND pair once only and not if equivalences exist + if (!uniqueSet.contains(indicesMap.get(i)[1] + + " " + + indicesMap.get(matchIndex)[1]) + && correlationList.get(matchIndex) + .getEquivalence() + == 1) { + stringBuilder.append("BOND ") + .append(indicesMap.get(i)[1]) + .append(" ") + .append(indicesMap.get(matchIndex)[1]) + .append(buildShiftsComment(correlation, correlationList.get(matchIndex))) + .append("\n"); + uniqueSet.add(indicesMap.get(i)[1] + + " " + + indicesMap.get(matchIndex)[1]); + uniqueSet.add(indicesMap.get(matchIndex)[1] + + " " + + indicesMap.get(i)[1]); + } + } + } + } + } + + return stringBuilder.toString(); + } + + private static String buildBOND(final List correlationList, final Map indicesMap) { + final StringBuilder stringBuilder = new StringBuilder(); + + stringBuilder.append(buildBONDByINADEQUATE(correlationList, indicesMap)) + .append("\n"); + + return stringBuilder.toString(); + } + public static String buildPyLSDInputFileContent(final Data data, final String mf, final Map> detectedHybridizations, final Map>>> detectedConnectivities, @@ -610,6 +667,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf }); // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments + stringBuilder.append(buildBOND(correlationList, indicesMap)) + .append("\n"); // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance stringBuilder.append(buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, detectedConnectivities, From d08bc3ae807ee533e83539c2aea3c8e719b26e00 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 13 Sep 2021 21:56:09 +0200 Subject: [PATCH 314/405] fix: use unique hybridization states in MULT --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 0a945dd..d2d2288 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -139,7 +139,7 @@ private static String buildMULT(final Correlation correlation, final int index, hybridizations.addAll(correlation.getHybridization() .stream() .map(Constants.hybridizationConversionMap::get) - .collect(Collectors.toList())); + .collect(Collectors.toSet())); } else { // if hybridization is not given then use the detected ones if (detectedHybridizations.containsKey(index)) { @@ -486,10 +486,9 @@ private static String buildLISTsAndPROPs(final List correlationList final StringBuilder stringBuilder = new StringBuilder(); final Map listMap = new HashMap<>(); - // LIST and PROP for hetero hetero bonds allowance as well as hybridization states and proton counts reduction + // LIST and PROP for hetero hetero bonds to disallow if (!allowHeteroHeteroBonds) { LISTAndPROPUtilities.insertNoHeteroHeteroBonds(stringBuilder, listMap); - Utilities.reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(correlationList, detectedConnectivities); } // insert forbidden connection lists and properties LISTAndPROPUtilities.insertForbiddenConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, From dd39bcda7962b8c01ed70dc3b89e3dda58664d2f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 13 Sep 2021 21:57:55 +0200 Subject: [PATCH 315/405] fix: wrong if conditions for setting possible hybridizations of hetero atoms --- src/casekit/nmr/lsd/Utilities.java | 44 ++++++++++++++++++------------ 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 4132de1..48a6003 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -9,11 +9,11 @@ public class Utilities { public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, final Map>>> detectedConnectivities) { - final Map> allowedHeteroAtomHybridizations = buildAllowedHeteroAtomHybridizations( + final Map> allowedNeighborAtomHybridizations = buildAllowedNeighborAtomHybridizations( correlationList, detectedConnectivities); - final Map> allowedHeteroAtomProtonCounts = buildAllowedHeteroAtomProtonCounts( + final Map> allowedNeighborAtomProtonCounts = buildAllowedNeighborAtomProtonCounts( correlationList, detectedConnectivities); - // hetero atoms can bond to carbons only, due to that we can use more connectivity information + // hetero atoms can bond to carbons only, due to that we can use further connectivity information // do not allow bond between carbon and hetero atoms in certain hybridization states and proton counts for (final Correlation correlation : correlationList) { // ignore C and H atoms @@ -25,31 +25,40 @@ public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final } // but only if we have seen the hetero atom type in connectivity statistics // and hybridization states or protons count was not set beforehand - if (correlation.getEdited() + if (correlation.getHybridization() + .isEmpty()) { + correlation.getHybridization() + .addAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); + } else if (correlation.getEdited() != null && correlation.getEdited() .containsKey("hybridization") - && correlation.getEdited() - .get("hybridization") - && allowedHeteroAtomHybridizations.containsKey(correlation.getAtomType())) { + && !correlation.getEdited() + .get("hybridization") + && allowedNeighborAtomHybridizations.containsKey(correlation.getAtomType())) { correlation.getHybridization() - .retainAll(allowedHeteroAtomHybridizations.get(correlation.getAtomType())); + .retainAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); } - if (correlation.getEdited() + if (correlation.getProtonsCount() + .isEmpty()) { + correlation.getProtonsCount() + .addAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); + } else if (correlation.getEdited() != null && correlation.getEdited() .containsKey("protonsCount") - && correlation.getEdited() - .get("protonsCount") - && allowedHeteroAtomProtonCounts.containsKey(correlation.getAtomType())) { + && !correlation.getEdited() + .get("protonsCount") + && allowedNeighborAtomProtonCounts.containsKey(correlation.getAtomType())) { correlation.getProtonsCount() - .retainAll(allowedHeteroAtomProtonCounts.get(correlation.getAtomType())); + .retainAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); } } } - public static Map> buildAllowedHeteroAtomHybridizations(final List correlationList, - final Map>>> detectedConnectivities) { + public static Map> buildAllowedNeighborAtomHybridizations( + final List correlationList, + final Map>>> detectedConnectivities) { final Map> allowedHeteroAtomHybridizations = new HashMap<>(); for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { if (!correlationList.get(correlationEntry.getKey()) @@ -72,8 +81,9 @@ public static Map> buildAllowedHeteroAtomHybridizations(fina return allowedHeteroAtomHybridizations; } - public static Map> buildAllowedHeteroAtomProtonCounts(final List correlationList, - final Map>>> detectedConnectivities) { + public static Map> buildAllowedNeighborAtomProtonCounts( + final List correlationList, + final Map>>> detectedConnectivities) { final Map> allowedHeteroAtomProtonCounts = new HashMap<>(); for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { if (!correlationList.get(correlationEntry.getKey()) From 68aa8d27eb50a9fe0853b34140949a2d2437dc49 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 16 Sep 2021 11:00:38 +0200 Subject: [PATCH 316/405] fix: enable max ring size when detecting rings for ring fragment trees building --- .../fragmentation/Fragmentation.java | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java index 6dc6c2d..80f87f8 100644 --- a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java @@ -27,14 +27,15 @@ public class Fragmentation { * * @return * - * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, boolean) + * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, int, boolean) */ public static List buildFragmentDataSets(final DataSet dataSet, final Integer maxSphere, - final Integer maxSphereRing, final boolean withPseudoAtoms) { + final Integer maxSphereRing, final int maxRingSize, + final boolean withPseudoAtoms) { final List fragmentTrees = buildFragmentTrees(dataSet.getStructure() .toAtomContainer(), maxSphere, - maxSphereRing, withPseudoAtoms); + maxSphereRing, maxRingSize, withPseudoAtoms); return fragmentTreesToSubDataSets(dataSet, fragmentTrees); } @@ -156,12 +157,13 @@ public static List fragmentTreesToSubDataSets(final DataSet dataSet, * * @return * - * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, boolean) + * @see #buildFragmentTrees(IAtomContainer, Integer, Integer, int, boolean) * @see FragmentationUtilities#toAtomContainer(ConnectionTree) */ public static List buildFragments(final IAtomContainer structure, final Integer maxSphere, - final Integer maxSphereRing, final boolean withPseudoAtoms) { - final List fragmentTrees = buildFragmentTrees(structure, maxSphere, maxSphereRing, + final Integer maxSphereRing, final int maxRingSize, + final boolean withPseudoAtoms) { + final List fragmentTrees = buildFragmentTrees(structure, maxSphere, maxSphereRing, maxRingSize, withPseudoAtoms); return fragmentTrees.stream() .map(FragmentationUtilities::toAtomContainer) @@ -169,13 +171,13 @@ public static List buildFragments(final IAtomContainer structure } public static List buildRingFragmentTrees(final IAtomContainer structure, - final Integer maxSphereRing, + final Integer maxSphereRing, final int maxRingSize, final boolean withPseudoAtoms) { final List ringFragmentTrees = new ArrayList<>(); try { // build ring fragment trees from detected rings and extend by given maximum sphere for rings ConnectionTree connectionTreeRing, connectionTreeOuterSphere, subtreeToAdd; - final IRingSet ringSet = Cycles.all(structure)//essential(structure) + final IRingSet ringSet = Cycles.all(structure, maxRingSize)//essential(structure) .toRingSet(); final List ringFragments = new ArrayList<>(); for (int i = 0; i @@ -254,14 +256,16 @@ public static List buildRingFragmentTrees(final IAtomContainer s * * @return * - * @see #buildRingFragmentTrees(IAtomContainer, Integer, boolean) + * @see #buildRingFragmentTrees(IAtomContainer, Integer, int, boolean) * @see #buildFragmentTree(IAtomContainer, int, Integer, Set, boolean) * @see FragmentationUtilities#removeDuplicates(List) */ public static List buildFragmentTrees(final IAtomContainer structure, final Integer maxSphere, - final Integer maxSphereRing, final boolean withPseudoAtoms) { + final Integer maxSphereRing, final int maxRingSize, + final boolean withPseudoAtoms) { // build fragment trees for rings - final List fragmentTrees = buildRingFragmentTrees(structure, maxSphereRing, withPseudoAtoms); + final List fragmentTrees = buildRingFragmentTrees(structure, maxSphereRing, maxRingSize, + withPseudoAtoms); // build fragment for each single atom for (int i = 0; i < structure.getAtomCount(); i++) { From 664fa34e77d98f9c1eac4938ad16fa7138fb3307 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 16 Sep 2021 14:25:44 +0200 Subject: [PATCH 317/405] feat: added correlationListToSpectrum1D method to Utils --- src/casekit/nmr/utils/Utils.java | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 44621cf..68d9a1f 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,6 +1,7 @@ package casekit.nmr.utils; import casekit.nmr.model.DataSet; +import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import casekit.nmr.model.StructureCompact; import casekit.nmr.model.nmrium.Correlation; @@ -23,6 +24,7 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; public class Utils { @@ -604,4 +606,27 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure) thr return dataSet; } + + public static Spectrum correlationListToSpectrum1D(final List correlationList, final String nucleus) { + final String atomType = Utils.getAtomTypeFromNucleus(nucleus); + final List correlationListAtomType = correlationList.stream() + .filter(correlation -> correlation.getAtomType() + .equals(atomType) + && !correlation.isPseudo()) + .collect(Collectors.toList()); + final Spectrum spectrum = new Spectrum(); + spectrum.setNuclei(new String[]{nucleus}); + spectrum.setSignals(new ArrayList<>()); + Signal signal; + for (final Correlation correlation : correlationListAtomType) { + signal = new Signal(spectrum.getNuclei(), new Double[]{correlation.getSignal().getDelta()}, + Utils.getMultiplicityFromProtonsCount(correlation), correlation.getSignal() + .getKind(), null, + correlation.getEquivalence(), correlation.getSignal() + .getSign()); + spectrum.addSignalWithoutEquivalenceSearch(signal); + } + + return spectrum; + } } From e1bc6840de2fff9b3cbbb100e31b528cf77b1982 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 11 Oct 2021 15:58:21 +0200 Subject: [PATCH 318/405] feat: added getFileContent method to FileSystem --- src/casekit/io/FileSystem.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/casekit/io/FileSystem.java b/src/casekit/io/FileSystem.java index 7e42144..4498902 100644 --- a/src/casekit/io/FileSystem.java +++ b/src/casekit/io/FileSystem.java @@ -70,6 +70,16 @@ public static boolean cleanup(final String[] directoriesToCheck, final String re return cleaned; } + public static String getFileContent(final String pathToJsonFile) { + final BufferedReader bufferedReader = FileSystem.readFile(pathToJsonFile); + return bufferedReader + == null + ? null + : bufferedReader.lines() + .reduce("", (content, line) -> content + + line); + } + public static List getSmilesListFromFile(final String pathToSmilesFile) { final List smilesList = new ArrayList<>(); try { From af01c1e43fd278b1538534807ee3f2c39be19cee Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 11 Oct 2021 15:58:56 +0200 Subject: [PATCH 319/405] feat: added MultiThreading class --- src/casekit/threading/MultiThreading.java | 44 +++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/casekit/threading/MultiThreading.java diff --git a/src/casekit/threading/MultiThreading.java b/src/casekit/threading/MultiThreading.java new file mode 100644 index 0000000..85713fd --- /dev/null +++ b/src/casekit/threading/MultiThreading.java @@ -0,0 +1,44 @@ +package casekit.threading; + +import java.util.Collection; +import java.util.concurrent.*; +import java.util.function.Consumer; + +public class MultiThreading { + + public static ExecutorService initExecuter(final int nThreads) { + return Executors.newFixedThreadPool(nThreads); + } + + public static void stopExecuter(final ExecutorService executor, final long seconds) { + executor.shutdown(); + try { + if (!executor.awaitTermination(seconds, TimeUnit.SECONDS)) { + System.err.println("killing non-finished tasks!"); + executor.shutdownNow(); + } + } catch (final InterruptedException e) { + System.err.println("killing non-finished tasks!"); + executor.shutdownNow(); + } + } + + public static void processTasks(final Collection> callables, final Consumer consumer, + final int nThreads, final long seconds) throws InterruptedException { + // initialize an executor for parallelization + final ExecutorService executor = initExecuter(nThreads); + // execute all task in parallel + executor.invokeAll(callables) + .stream() + .map(future -> { + try { + return future.get(); + } catch (final InterruptedException | ExecutionException e) { + throw new IllegalStateException(e); + } + }) + .forEach(consumer); + // shut down the executor service + stopExecuter(executor, seconds); + } +} From fdc155ba5c0d62a8475ad42cd764110ef007eb18 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 11 Oct 2021 20:59:20 +0200 Subject: [PATCH 320/405] chore: NMRium signal classes use a common parent class --- src/casekit/nmr/model/nmrium/Signal.java | 18 ++++++++++++++++++ src/casekit/nmr/model/nmrium/Signal1D.java | 12 +++++++----- src/casekit/nmr/model/nmrium/Signal2D.java | 11 ++++++----- 3 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 src/casekit/nmr/model/nmrium/Signal.java diff --git a/src/casekit/nmr/model/nmrium/Signal.java b/src/casekit/nmr/model/nmrium/Signal.java new file mode 100644 index 0000000..63223ca --- /dev/null +++ b/src/casekit/nmr/model/nmrium/Signal.java @@ -0,0 +1,18 @@ +package casekit.nmr.model.nmrium; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.*; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString +@JsonIgnoreProperties(ignoreUnknown = true) +public class Signal { + + private String id; + private String kind; + private String multiplicity; + private Integer sign; +} diff --git a/src/casekit/nmr/model/nmrium/Signal1D.java b/src/casekit/nmr/model/nmrium/Signal1D.java index 8002a45..7141dac 100644 --- a/src/casekit/nmr/model/nmrium/Signal1D.java +++ b/src/casekit/nmr/model/nmrium/Signal1D.java @@ -33,11 +33,13 @@ @Setter @ToString @JsonIgnoreProperties(ignoreUnknown = true) -public class Signal1D { +public class Signal1D + extends Signal { - private String id; - private String kind; - private String multiplicity; private double delta; - private int sign; + + public Signal1D(final Signal signal) { + super(signal.getId(), signal.getKind(), signal.getMultiplicity(), signal.getSign()); + } + } diff --git a/src/casekit/nmr/model/nmrium/Signal2D.java b/src/casekit/nmr/model/nmrium/Signal2D.java index 4ed31b8..ae21f5c 100644 --- a/src/casekit/nmr/model/nmrium/Signal2D.java +++ b/src/casekit/nmr/model/nmrium/Signal2D.java @@ -35,12 +35,13 @@ @Setter @ToString @JsonIgnoreProperties(ignoreUnknown = true) -public class Signal2D { +public class Signal2D + extends Signal { - private String id; - private String kind; - private String multiplicity; private Map x; private Map y; - private Integer sign; + + public Signal2D(final Signal signal) { + super(signal.getId(), signal.getKind(), signal.getMultiplicity(), signal.getSign()); + } } From ad465b0d6d9138857af2b84136044806e7eccc23 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 11 Oct 2021 21:04:38 +0200 Subject: [PATCH 321/405] chore: adoptions regarding change in correlation data structure in nmr-correlation package --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 19 ++++-- .../nmr/lsd/PyLSDInputFileBuilder.java | 27 ++++++-- src/casekit/nmr/model/nmrium/Correlation.java | 3 - src/casekit/nmr/model/nmrium/Link.java | 4 +- src/casekit/nmr/utils/Utils.java | 64 +++++++++++++++++-- 5 files changed, 94 insertions(+), 23 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index 3e069bc..a97a642 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -1,7 +1,9 @@ package casekit.nmr.lsd; +import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.utils.Statistics; +import casekit.nmr.utils.Utils; import java.util.*; @@ -108,6 +110,7 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st LISTAndPROPUtilities.insertCarbonCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap, detectedConnectivities); Correlation correlation; + Signal signal; String atomType, listKey; int indexInPyLSD; Map>> connectivities; @@ -116,13 +119,17 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st for (int i = 0; i < correlationList.size(); i++) { correlation = correlationList.get(i); + signal = Utils.extractSignalFromCorrelation(correlation); + atomType = correlation.getAtomType(); connectivities = detectedConnectivities.get(i); // consider carbons here only, because of having complete connectivity information if (!atomType.equals("C") || connectivities == null - || connectivities.isEmpty()) { + || connectivities.isEmpty() + || signal + == null) { continue; } // define atom types of non-neighbors @@ -149,8 +156,7 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st .append(" (") .append(atomType) .append(", ") - .append(Statistics.roundDouble(correlation.getSignal() - .getDelta(), 2)) + .append(Statistics.roundDouble(signal.getShift(0), 2)) .append(") and ") .append(listMap.get(nonNeighborAtomType)) .append(" (") @@ -161,13 +167,13 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st // forbid bonds to possible neighbors with certain hybridization states and proton counts for (final String neighborAtomType : forbiddenNeighborHybridizationsAndProtonCounts.keySet()) { for (final int forbiddenNeighborHybridization : forbiddenNeighborHybridizationsAndProtonCounts.get( - neighborAtomType) + neighborAtomType) .keySet()) { if (!neighborAtomType.equals("C")) { continue; } for (final Integer forbiddenProtonsCount : forbiddenNeighborHybridizationsAndProtonCounts.get( - neighborAtomType) + neighborAtomType) .get(forbiddenNeighborHybridization)) { listKey = neighborAtomType + "_SP" @@ -185,8 +191,7 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st .append(" (") .append(atomType) .append(", ") - .append(Statistics.roundDouble(correlation.getSignal() - .getDelta(), 2)) + .append(Statistics.roundDouble(signal.getShift(0), 2)) .append(") and ") .append(listMap.get(listKey)) .append(" (") diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index d2d2288..343f19b 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -1,6 +1,7 @@ package casekit.nmr.lsd; import casekit.nmr.lsd.model.ElucidationOptions; +import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Data; import casekit.nmr.model.nmrium.Link; @@ -269,10 +270,16 @@ private static String buildMULT(final Correlation correlation, final int index, } private static String buildShiftString(final Correlation correlation) { + + final Signal signal = Utils.extractSignalFromCorrelation(correlation); + if (signal + != null) { + return "?"; + } + return correlation.isPseudo() ? "?" - : String.valueOf(Statistics.roundDouble(correlation.getSignal() - .getDelta(), 2)); + : String.valueOf(Statistics.roundDouble(signal.getShift(0), 2)); } private static String buildShiftsComment(final Correlation correlation1, final Correlation correlation2) { @@ -433,14 +440,18 @@ private static String buildSHIX(final Correlation correlation, final int index, || correlation.isPseudo()) { return null; } + final Signal signal = Utils.extractSignalFromCorrelation(correlation); + if (signal + != null) { + return null; + } final StringBuilder stringBuilder = new StringBuilder(); for (int k = 1; k < indicesMap.get(index).length; k++) { stringBuilder.append("SHIX ") .append(indicesMap.get(index)[k]) .append(" ") - .append(Statistics.roundDouble(correlation.getSignal() - .getDelta(), 2)) + .append(Statistics.roundDouble(signal.getShift(0), 2)) .append("\n"); } @@ -454,6 +465,11 @@ private static String buildSHIH(final Correlation correlation, final int index, || correlation.isPseudo()) { return null; } + final Signal signal = Utils.extractSignalFromCorrelation(correlation); + if (signal + != null) { + return null; + } final StringBuilder stringBuilder = new StringBuilder(); // only consider protons which are attached via HSQC/HMQC (pseudo and real links) for (final Link link : correlation.getLink()) { @@ -468,8 +484,7 @@ private static String buildSHIH(final Correlation correlation, final int index, stringBuilder.append("SHIH ") .append(indicesMap.get(index)[k]) .append(" ") - .append(Statistics.roundDouble(correlation.getSignal() - .getDelta(), 3)) + .append(Statistics.roundDouble(signal.getShift(0), 3)) .append("\n"); } } diff --git a/src/casekit/nmr/model/nmrium/Correlation.java b/src/casekit/nmr/model/nmrium/Correlation.java index 7bdfea7..7b66728 100644 --- a/src/casekit/nmr/model/nmrium/Correlation.java +++ b/src/casekit/nmr/model/nmrium/Correlation.java @@ -40,11 +40,8 @@ public class Correlation { private String id; - private String experimentType; - private String experimentID; private String atomType; private Map label; - private Signal1D signal; private List link; private int equivalence; private Map> attachment; diff --git a/src/casekit/nmr/model/nmrium/Link.java b/src/casekit/nmr/model/nmrium/Link.java index ec9e8e8..b12fa59 100644 --- a/src/casekit/nmr/model/nmrium/Link.java +++ b/src/casekit/nmr/model/nmrium/Link.java @@ -30,6 +30,7 @@ import lombok.ToString; import java.util.List; +import java.util.Map; @NoArgsConstructor @Getter @@ -40,10 +41,11 @@ public class Link { private String experimentType; private String experimentID; private String[] atomType; - private Signal2D signal; + private Object signal; private String axis; private List match; private String id; private String experimentLabel; private boolean pseudo; + private Map edited; } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 68d9a1f..8751d58 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,10 +1,14 @@ package casekit.nmr.utils; +import casekit.nmr.lsd.Constants; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import casekit.nmr.model.StructureCompact; import casekit.nmr.model.nmrium.Correlation; +import casekit.nmr.model.nmrium.Link; +import casekit.nmr.model.nmrium.Signal1D; +import casekit.nmr.model.nmrium.Signal2D; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; import org.openscience.cdk.aromaticity.Kekulization; @@ -607,6 +611,54 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure) thr return dataSet; } + public static Signal extractSignalFromCorrelation(final Correlation correlation) { + if (correlation.isPseudo()) { + return null; + } + final List nonPseudoLinks = correlation.getLink() + .stream() + .filter(linkTemp -> !linkTemp.isPseudo()) + .collect(Collectors.toList()); + if (nonPseudoLinks.isEmpty()) { + return null; + } + final Link link = nonPseudoLinks.get(0); + final Map signalMap = (Map) link.getSignal(); + final String multiplicity = Utils.getMultiplicityFromProtonsCount(correlation); + final casekit.nmr.model.nmrium.Signal signal = new casekit.nmr.model.nmrium.Signal((String) signalMap.get("id"), + (String) signalMap.get( + "kind"), + multiplicity, + signalMap.containsKey("sign") + ? (Integer) signalMap.get( + "sign") + : null); + if (signalMap.containsKey("delta")) { + final Signal1D signal1D = new Signal1D(signal); + signal1D.setDelta((double) signalMap.get("delta")); + + return new Signal(new String[]{Constants.nucleiMap.get(correlation.getAtomType())}, + new Double[]{signal1D.getDelta()}, signal1D.getMultiplicity(), signal1D.getKind(), null, + correlation.getEquivalence(), signal1D.getSign()); + } else if (signalMap.containsKey("x")) { + final Signal2D signal2D = new Signal2D(signal); + signal2D.setX((Map) signalMap.get("x")); + signal2D.setY((Map) signalMap.get("y")); + final double shift = link.getAxis() + .equals("x") + ? (double) signal2D.getX() + .get("delta") + : (double) signal2D.getY() + .get("delta"); + + return new Signal(new String[]{Constants.nucleiMap.get(correlation.getAtomType())}, new Double[]{shift}, + signal2D.getMultiplicity(), signal2D.getKind(), null, correlation.getEquivalence(), + signal2D.getSign()); + } + + return null; + } + public static Spectrum correlationListToSpectrum1D(final List correlationList, final String nucleus) { final String atomType = Utils.getAtomTypeFromNucleus(nucleus); final List correlationListAtomType = correlationList.stream() @@ -617,14 +669,14 @@ public static Spectrum correlationListToSpectrum1D(final List corre final Spectrum spectrum = new Spectrum(); spectrum.setNuclei(new String[]{nucleus}); spectrum.setSignals(new ArrayList<>()); + Signal signal; for (final Correlation correlation : correlationListAtomType) { - signal = new Signal(spectrum.getNuclei(), new Double[]{correlation.getSignal().getDelta()}, - Utils.getMultiplicityFromProtonsCount(correlation), correlation.getSignal() - .getKind(), null, - correlation.getEquivalence(), correlation.getSignal() - .getSign()); - spectrum.addSignalWithoutEquivalenceSearch(signal); + signal = extractSignalFromCorrelation(correlation); + if (signal + != null) { + spectrum.addSignalWithoutEquivalenceSearch(signal); + } } return spectrum; From 1e716908b55ddbff06f8ef0ac162c42d06a07a02 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 11 Oct 2021 21:05:19 +0200 Subject: [PATCH 322/405] chore: changed "phase" class member to Integer --- src/casekit/nmr/model/Signal.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index acadc83..828dff8 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -49,7 +49,7 @@ public class Signal { private String kind; private Double intensity; private int equivalencesCount; - private int phase; + private Integer phase; public int getNDim() { From 341f8471ad08d126bd141bf40b233fc83934d466 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 11 Oct 2021 21:30:33 +0200 Subject: [PATCH 323/405] fix: wrong if condition when checking signal for null value --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 1 - src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index a97a642..17d5db9 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -120,7 +120,6 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st < correlationList.size(); i++) { correlation = correlationList.get(i); signal = Utils.extractSignalFromCorrelation(correlation); - atomType = correlation.getAtomType(); connectivities = detectedConnectivities.get(i); // consider carbons here only, because of having complete connectivity information diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 343f19b..183328a 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -273,7 +273,7 @@ private static String buildShiftString(final Correlation correlation) { final Signal signal = Utils.extractSignalFromCorrelation(correlation); if (signal - != null) { + == null) { return "?"; } @@ -442,7 +442,7 @@ private static String buildSHIX(final Correlation correlation, final int index, } final Signal signal = Utils.extractSignalFromCorrelation(correlation); if (signal - != null) { + == null) { return null; } final StringBuilder stringBuilder = new StringBuilder(); @@ -467,7 +467,7 @@ private static String buildSHIH(final Correlation correlation, final int index, } final Signal signal = Utils.extractSignalFromCorrelation(correlation); if (signal - != null) { + == null) { return null; } final StringBuilder stringBuilder = new StringBuilder(); From b97c87b01cefb2bbdd27f1bd974b43c61f18a67c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 19 Oct 2021 17:57:38 +0200 Subject: [PATCH 324/405] chore: made buildForbiddenNeighborHybridizationsAndProtonCounts more general to re-use it --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 123 +++++++++--------- src/casekit/nmr/lsd/Utilities.java | 70 +++++----- 2 files changed, 97 insertions(+), 96 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index 17d5db9..139dea1 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -114,7 +114,6 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st String atomType, listKey; int indexInPyLSD; Map>> connectivities; - Set nonNeighborAtomTypes, neighborAtomTypes; Map>> forbiddenNeighborHybridizationsAndProtonCounts; for (int i = 0; i < correlationList.size(); i++) { @@ -131,77 +130,71 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st == null) { continue; } - // define atom types of non-neighbors - neighborAtomTypes = connectivities.keySet(); - nonNeighborAtomTypes = new HashSet<>(atomTypesByMf); - nonNeighborAtomTypes.removeAll(neighborAtomTypes); - nonNeighborAtomTypes.remove("H"); forbiddenNeighborHybridizationsAndProtonCounts = Utilities.buildForbiddenNeighborHybridizationsAndProtonCounts( - connectivities, neighborAtomTypes); + connectivities, atomTypesByMf); // put in the extracted information per correlation for (int k = 1; k < indicesMap.get(i).length; k++) { indexInPyLSD = (int) indicesMap.get(i)[k]; - // forbid bonds to whole element groups - for (final String nonNeighborAtomType : nonNeighborAtomTypes) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 0 ") - .append(listMap.get(nonNeighborAtomType)) - .append(" -") - .append("; no bonds between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(Statistics.roundDouble(signal.getShift(0), 2)) - .append(") and ") - .append(listMap.get(nonNeighborAtomType)) - .append(" (") - .append(nonNeighborAtomType) - .append(")") - .append("\n"); - } - // forbid bonds to possible neighbors with certain hybridization states and proton counts for (final String neighborAtomType : forbiddenNeighborHybridizationsAndProtonCounts.keySet()) { - for (final int forbiddenNeighborHybridization : forbiddenNeighborHybridizationsAndProtonCounts.get( - neighborAtomType) - .keySet()) { - if (!neighborAtomType.equals("C")) { - continue; - } - for (final Integer forbiddenProtonsCount : forbiddenNeighborHybridizationsAndProtonCounts.get( - neighborAtomType) - .get(forbiddenNeighborHybridization)) { - listKey = neighborAtomType - + "_SP" - + forbiddenNeighborHybridization - + "_" - + forbiddenProtonsCount; - if (listMap.containsKey(listKey)) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 0 ") - .append(listMap.get(listKey)) - .append(" -") - .append("; no bonds between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(Statistics.roundDouble(signal.getShift(0), 2)) - .append(") and ") - .append(listMap.get(listKey)) - .append(" (") - .append(neighborAtomType) - .append(", SP") - .append(forbiddenNeighborHybridization) - .append(", ") - .append(forbiddenProtonsCount) - .append("H") - .append(")") - .append("\n"); + // forbid bonds to whole element groups if there is an empty map for an atom type + if (forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + .isEmpty()) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(neighborAtomType)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(signal.getShift(0), 2)) + .append(") and ") + .append(listMap.get(neighborAtomType)) + .append(" (") + .append(neighborAtomType) + .append(")") + .append("\n"); + } else { + // forbid bonds to possible neighbors with certain hybridization states and proton counts + for (final int forbiddenNeighborHybridization : forbiddenNeighborHybridizationsAndProtonCounts.get( + neighborAtomType) + .keySet()) { + for (final Integer forbiddenProtonsCount : forbiddenNeighborHybridizationsAndProtonCounts.get( + neighborAtomType) + .get(forbiddenNeighborHybridization)) { + listKey = neighborAtomType + + "_SP" + + forbiddenNeighborHybridization + + "_" + + forbiddenProtonsCount; + if (listMap.containsKey(listKey)) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(listKey)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(signal.getShift(0), 2)) + .append(") and ") + .append(listMap.get(listKey)) + .append(" (") + .append(neighborAtomType) + .append(", SP") + .append(forbiddenNeighborHybridization) + .append(", ") + .append(forbiddenProtonsCount) + .append("H") + .append(")") + .append("\n"); + } } } } diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 48a6003..18a2585 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -109,45 +109,53 @@ public static Map> buildAllowedNeighborAtomProtonCounts( } public static Map>> buildForbiddenNeighborHybridizationsAndProtonCounts( - final Map>> connectivities, final Set neighborAtomTypes) { + final Map>> connectivities, final Set possibleNeighborAtomTypes) { // define forbidden hybridizations and proton counts (carbons only) of possible neighbors + // or put just an empty map which stands for all hybridizations and proton counts final Map>> forbiddenNeighborHybridizationsAndProtonCounts = new HashMap<>(); - for (final String neighborAtomType : neighborAtomTypes) { - forbiddenNeighborHybridizationsAndProtonCounts.put(neighborAtomType, new HashMap<>()); - for (final int defaultHybridization : Arrays.stream(Constants.defaultHybridizationMap.get(neighborAtomType)) - .boxed() - .collect(Collectors.toList())) { - forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) - .put(defaultHybridization, Arrays.stream( - Constants.defaultProtonsCountPerValencyMap.get( - neighborAtomType)) - .boxed() - .collect( - Collectors.toSet())); + for (final String possibleNeighborAtomType : possibleNeighborAtomTypes) { + if (possibleNeighborAtomType.equals("H")) { + continue; } - for (final String neighborHybridization : connectivities.get(neighborAtomType) - .keySet()) { - // remove found protons count per hybridzations from list of forbidden ones - for (final int forbiddenNeighborHybridization : new HashSet<>( - forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) - .keySet())) { - forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) - .get(forbiddenNeighborHybridization) - .removeAll(connectivities.get(neighborAtomType) - .get(neighborHybridization)); - if (forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) + forbiddenNeighborHybridizationsAndProtonCounts.put(possibleNeighborAtomType, new HashMap<>()); + if (connectivities.containsKey(possibleNeighborAtomType)) { + for (final int defaultHybridization : Arrays.stream( + Constants.defaultHybridizationMap.get(possibleNeighborAtomType)) + .boxed() + .collect(Collectors.toList())) { + forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) + .put(defaultHybridization, Arrays.stream( + Constants.defaultProtonsCountPerValencyMap.get( + possibleNeighborAtomType)) + .boxed() + .collect( + Collectors.toSet())); + } + for (final String neighborHybridization : connectivities.get(possibleNeighborAtomType) + .keySet()) { + // remove found protons count per hybridzations from list of forbidden ones + for (final int forbiddenNeighborHybridization : new HashSet<>( + forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) + .keySet())) { + forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) .get(forbiddenNeighborHybridization) + .removeAll(connectivities.get( + possibleNeighborAtomType) + .get(neighborHybridization)); + if (forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) + .get(forbiddenNeighborHybridization) + .isEmpty()) { + forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) + .remove(forbiddenNeighborHybridization); + } + } + if (forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) .isEmpty()) { - forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) - .remove(forbiddenNeighborHybridization); + forbiddenNeighborHybridizationsAndProtonCounts.remove(possibleNeighborAtomType); + break; } } - if (forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) - .isEmpty()) { - forbiddenNeighborHybridizationsAndProtonCounts.remove(neighborAtomType); - break; - } } } From 0f8b38a805df362fad965f71f58f8adf87d3d1b1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 22 Oct 2021 18:11:02 +0200 Subject: [PATCH 325/405] fix: do not start proton atom count at heavy atom count --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 183328a..b78354a 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -55,15 +55,8 @@ private static Map buildIndicesMap(final List co // index in correlation data -> [atom type, indices in PyLSD file...] final Map indicesMap = new HashMap<>(); // init element indices within correlations with same order as in correlation data input - final int totalHeavyAtomsCount = elementCounts.entrySet() - .stream() - .filter(set -> !set.getKey() - .equals("H")) - .map(Map.Entry::getValue) - .reduce(0, Integer::sum); int heavyAtomIndexInPyLSDFile = 1; - int protonIndexInPyLSDFile = totalHeavyAtomsCount - + 1; + int protonIndexInPyLSDFile = 1; int protonsToInsert, protonsCount; Correlation correlation; for (int i = 0; i @@ -279,7 +272,7 @@ private static String buildShiftString(final Correlation correlation) { return correlation.isPseudo() ? "?" - : String.valueOf(Statistics.roundDouble(signal.getShift(0), 2)); + : String.valueOf(Statistics.roundDouble(signal.getShift(0), 3)); } private static String buildShiftsComment(final Correlation correlation1, final Correlation correlation2) { @@ -451,7 +444,7 @@ private static String buildSHIX(final Correlation correlation, final int index, stringBuilder.append("SHIX ") .append(indicesMap.get(index)[k]) .append(" ") - .append(Statistics.roundDouble(signal.getShift(0), 2)) + .append(Statistics.roundDouble(signal.getShift(0), 3)) .append("\n"); } @@ -484,7 +477,7 @@ private static String buildSHIH(final Correlation correlation, final int index, stringBuilder.append("SHIH ") .append(indicesMap.get(index)[k]) .append(" ") - .append(Statistics.roundDouble(signal.getShift(0), 3)) + .append(Statistics.roundDouble(signal.getShift(0), 5)) .append("\n"); } } From 78f16f9b1fa7620abceb994c79e5ee493b817345 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 22 Oct 2021 21:18:28 +0200 Subject: [PATCH 326/405] chore: receive forbidden neighbors from external --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 4 ++-- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index 139dea1..db06434 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -103,6 +103,7 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st final List correlationList, final Map indicesMap, final Map>>> detectedConnectivities, + final Map>>> forbiddenNeighbors, final Set atomTypesByMf) { // insert ELEM for each heavy atom type in MF LISTAndPROPUtilities.insertELEM(stringBuilder, listMap, atomTypesByMf); @@ -130,8 +131,7 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st == null) { continue; } - forbiddenNeighborHybridizationsAndProtonCounts = Utilities.buildForbiddenNeighborHybridizationsAndProtonCounts( - connectivities, atomTypesByMf); + forbiddenNeighborHybridizationsAndProtonCounts = forbiddenNeighbors.get(i); // put in the extracted information per correlation for (int k = 1; k diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index b78354a..dd651a0 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -490,6 +490,7 @@ private static String buildLISTsAndPROPs(final List correlationList final Map indicesMap, final Map elementCounts, final Map>>> detectedConnectivities, + final Map>>> forbiddenNeighbors, final boolean allowHeteroHeteroBonds) { final StringBuilder stringBuilder = new StringBuilder(); final Map listMap = new HashMap<>(); @@ -500,7 +501,8 @@ private static String buildLISTsAndPROPs(final List correlationList } // insert forbidden connection lists and properties LISTAndPROPUtilities.insertForbiddenConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, - detectedConnectivities, elementCounts.keySet()); + detectedConnectivities, forbiddenNeighbors, + elementCounts.keySet()); return stringBuilder.toString(); } @@ -603,6 +605,7 @@ private static String buildBOND(final List correlationList, final M public static String buildPyLSDInputFileContent(final Data data, final String mf, final Map> detectedHybridizations, final Map>>> detectedConnectivities, + final Map>>> forbiddenNeighbors, final ElucidationOptions elucidationOptions) { final Map> state = data.getCorrelations() .getState(); @@ -679,7 +682,7 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance stringBuilder.append(buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, detectedConnectivities, - elucidationOptions.isAllowHeteroHeteroBonds())) + forbiddenNeighbors, elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as filters (bad lists) stringBuilder.append(buildFilters(elucidationOptions.getFilterPaths())) From 2e21330b0f94e94306f33ad30c44a43b55a380de Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 2 Nov 2021 19:11:10 +0100 Subject: [PATCH 327/405] chore: simplification of neighborhood -> occurrences without hybridization layer & use of set neighbors --- .../nmr/analysis/ConnectivityStatistics.java | 64 ++--- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 156 +++++++---- .../nmr/lsd/PyLSDInputFileBuilder.java | 23 +- src/casekit/nmr/lsd/Utilities.java | 263 ++++++++---------- 4 files changed, 263 insertions(+), 243 deletions(-) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index 604478f..e6515da 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -6,15 +6,11 @@ import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; public class ConnectivityStatistics { - /** * @param dataSetList * @param nucleus @@ -106,7 +102,7 @@ public static void buildConnectivityStatistics(final DataSet dataSet, final Stri .get(connectedAtomType) .get(connectedAtomHybridization) .put(connectedAtom.getImplicitHydrogenCount(), connectivityStatistics.get( - multiplicity) + multiplicity) .get(hybridization) .get(shift) .get(connectedAtomType) @@ -140,7 +136,7 @@ public static Map>> extractConnectivit .get(hybridization) .containsKey(shift)) { for (final Map.Entry>> entry : connectivityStatistics.get( - multiplicity) + multiplicity) .get(hybridization) .get(shift) .entrySet()) { @@ -153,57 +149,35 @@ public static Map>> extractConnectivit return extractedConnectivities; } - public static Map>> filterExtractedConnectivities( + public static Map> filterExtractedConnectivities( final Map>> extractedConnectivities, - final double thresholdHybridizationCount, final double thresholdProtonsCount) { + final double thresholdElementCount) { final Map totalCounts = getTotalCounts(extractedConnectivities); final int totalCountsSum = getTotalCount(totalCounts); - final Map>> filteredExtractedConnectivities = new HashMap<>(); + final Map> filteredExtractedConnectivities = new HashMap<>(); extractedConnectivities.keySet() .forEach(neighborAtomType -> { extractedConnectivities.get(neighborAtomType) .keySet() .forEach(neighborHybridization -> { - final int countHybridization = extractedConnectivities.get( - neighborAtomType) - .get(neighborHybridization) - .keySet() - .stream() - .reduce(0, - (protonsCountSum, protonsCount) -> protonsCountSum += extractedConnectivities.get( - neighborAtomType) - .get(neighborHybridization) - .get(protonsCount)); - if (countHybridization - / (double) totalCountsSum - >= thresholdHybridizationCount) { - for (final Map.Entry entryProtonsCount : extractedConnectivities.get( - neighborAtomType) - .get(neighborHybridization) - .entrySet()) { - if (entryProtonsCount.getValue() - / (double) countHybridization - >= thresholdProtonsCount) { - filteredExtractedConnectivities.putIfAbsent( - neighborAtomType, new HashMap<>()); - filteredExtractedConnectivities.get( - neighborAtomType) - .putIfAbsent( - neighborHybridization, - new HashMap<>()); - filteredExtractedConnectivities.get( - neighborAtomType) - .get(neighborHybridization) - .putIfAbsent( - entryProtonsCount.getKey(), - entryProtonsCount.getValue()); - } + for (final Map.Entry entryProtonsCount : extractedConnectivities.get( + neighborAtomType) + .get(neighborHybridization) + .entrySet()) { + if (entryProtonsCount.getValue() + / (double) totalCountsSum + >= thresholdElementCount) { + filteredExtractedConnectivities.putIfAbsent( + neighborAtomType, new HashSet<>()); + filteredExtractedConnectivities.get( + neighborAtomType) + .add(entryProtonsCount.getKey()); } } }); }); - + return filteredExtractedConnectivities; } diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index db06434..ddc3f9e 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -34,11 +34,10 @@ public static void insertNoHeteroHeteroBonds(final StringBuilder stringBuilder, listMap.put("HETE", "L1"); } - public static void insertCarbonCombinationLISTs(final StringBuilder stringBuilder, - final Map listMap, - final List correlationList, - final Map indicesMap, - final Map>>> detectedConnectivities) { + public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBuilder, + final Map listMap, + final List correlationList, + final Map indicesMap) { final Map> atomIndicesMap = new HashMap<>(); Correlation correlation; int indexInPyLSD; @@ -48,11 +47,8 @@ public static void insertCarbonCombinationLISTs(final StringBuilder stringBuilde for (int k = 1; k < indicesMap.get(i).length; k++) { correlation = correlationList.get(i); - if (!correlation.getAtomType() - .equals("C") - || correlation.getHybridization() - .size() - != 1 + if (correlation.getAtomType() + .equals("H") || correlation.getProtonsCount() .size() != 1) { @@ -60,9 +56,6 @@ public static void insertCarbonCombinationLISTs(final StringBuilder stringBuilde } listKey = correlation.getAtomType() + "_" - + correlation.getHybridization() - .get(0) - + "_" + correlation.getProtonsCount() .get(0); indexInPyLSD = (int) indicesMap.get(i)[k]; @@ -87,8 +80,6 @@ public static void insertCarbonCombinationLISTs(final StringBuilder stringBuilde stringBuilder.append("; ") .append(split[0]) .append("H") - .append(split[2]) - .append(", ") .append(split[1]) .append("\n"); @@ -102,45 +93,39 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st final Map listMap, final List correlationList, final Map indicesMap, - final Map>>> detectedConnectivities, - final Map>>> forbiddenNeighbors, - final Set atomTypesByMf) { - // insert ELEM for each heavy atom type in MF - LISTAndPROPUtilities.insertELEM(stringBuilder, listMap, atomTypesByMf); - // insert list combinations of carbon and hybridization states - LISTAndPROPUtilities.insertCarbonCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap, - detectedConnectivities); + final Map>> detectedConnectivities, + final Map>> forbiddenNeighbors) { Correlation correlation; Signal signal; String atomType, listKey; int indexInPyLSD; - Map>> connectivities; - Map>> forbiddenNeighborHybridizationsAndProtonCounts; + Map> connectivitiesTemp; + Map> forbiddenNeighborsTemp; for (int i = 0; i < correlationList.size(); i++) { correlation = correlationList.get(i); signal = Utils.extractSignalFromCorrelation(correlation); atomType = correlation.getAtomType(); - connectivities = detectedConnectivities.get(i); + connectivitiesTemp = detectedConnectivities.get(i); // consider carbons here only, because of having complete connectivity information if (!atomType.equals("C") - || connectivities + || connectivitiesTemp == null - || connectivities.isEmpty() + || connectivitiesTemp.isEmpty() || signal == null) { continue; } - forbiddenNeighborHybridizationsAndProtonCounts = forbiddenNeighbors.get(i); + forbiddenNeighborsTemp = forbiddenNeighbors.get(i); // put in the extracted information per correlation for (int k = 1; k < indicesMap.get(i).length; k++) { indexInPyLSD = (int) indicesMap.get(i)[k]; - for (final String neighborAtomType : forbiddenNeighborHybridizationsAndProtonCounts.keySet()) { + for (final String neighborAtomType : forbiddenNeighborsTemp.keySet()) { // forbid bonds to whole element groups if there is an empty map for an atom type - if (forbiddenNeighborHybridizationsAndProtonCounts.get(neighborAtomType) - .isEmpty()) { + if (forbiddenNeighborsTemp.get(neighborAtomType) + .isEmpty()) { stringBuilder.append("PROP ") .append(indexInPyLSD) .append(" 0 ") @@ -159,38 +144,111 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st .append(")") .append("\n"); } else { - // forbid bonds to possible neighbors with certain hybridization states and proton counts - for (final int forbiddenNeighborHybridization : forbiddenNeighborHybridizationsAndProtonCounts.get( - neighborAtomType) - .keySet()) { - for (final Integer forbiddenProtonsCount : forbiddenNeighborHybridizationsAndProtonCounts.get( - neighborAtomType) - .get(forbiddenNeighborHybridization)) { + for (final int forbiddenProtonsCount : forbiddenNeighborsTemp.get(neighborAtomType)) { + listKey = neighborAtomType + + "_" + + forbiddenProtonsCount; + if (listMap.containsKey(listKey)) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(listKey)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(signal.getShift(0), 2)) + .append(") and ") + .append(listMap.get(listKey)) + .append(" (") + .append(neighborAtomType) + .append(", ") + .append(forbiddenProtonsCount) + .append("H") + .append(")") + .append("\n"); + } + } + } + } + } + } + } + + public static void insertSetConnectionLISTsAndPROPs(final StringBuilder stringBuilder, + final Map listMap, + final List correlationList, + final Map indicesMap, + final Map>> setNeighbors) { + Correlation correlation; + Signal signal; + String atomType, listKey; + int indexInPyLSD; + Map> setNeighborsTemp; + for (int i = 0; i + < correlationList.size(); i++) { + if (setNeighbors.containsKey(i)) { + correlation = correlationList.get(i); + signal = Utils.extractSignalFromCorrelation(correlation); + atomType = correlation.getAtomType(); + setNeighborsTemp = setNeighbors.get(i); + + // put in the extracted information per correlation + for (int k = 1; k + < indicesMap.get(i).length; k++) { + indexInPyLSD = (int) indicesMap.get(i)[k]; + for (final String neighborAtomType : setNeighborsTemp.keySet()) { + // forbid bonds to whole element groups if there is an empty map for an atom type + if (setNeighborsTemp.get(neighborAtomType) + .isEmpty()) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 1 ") + .append(listMap.get(neighborAtomType)) + .append(" +") + .append("; at least one bond between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(signal + != null + ? Statistics.roundDouble(signal.getShift(0), 2) + : "?") + .append(") and ") + .append(listMap.get(neighborAtomType)) + .append(" (") + .append(neighborAtomType) + .append(")") + .append("\n"); + } else { + for (final int setProtonsCount : setNeighborsTemp.get(neighborAtomType)) { listKey = neighborAtomType - + "_SP" - + forbiddenNeighborHybridization + "_" - + forbiddenProtonsCount; + + setProtonsCount; if (listMap.containsKey(listKey)) { stringBuilder.append("PROP ") .append(indexInPyLSD) - .append(" 0 ") + .append(" 1 ") .append(listMap.get(listKey)) - .append(" -") - .append("; no bonds between ") + .append(" +") + .append("; at least one bond between ") .append(indexInPyLSD) .append(" (") .append(atomType) .append(", ") - .append(Statistics.roundDouble(signal.getShift(0), 2)) + .append(signal + != null + ? Statistics.roundDouble(signal.getShift(0), 2) + : "?") .append(") and ") .append(listMap.get(listKey)) .append(" (") .append(neighborAtomType) - .append(", SP") - .append(forbiddenNeighborHybridization) .append(", ") - .append(forbiddenProtonsCount) + .append(setProtonsCount) .append("H") .append(")") .append("\n"); diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index dd651a0..2284d14 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -489,8 +489,9 @@ private static String buildSHIH(final Correlation correlation, final int index, private static String buildLISTsAndPROPs(final List correlationList, final Map indicesMap, final Map elementCounts, - final Map>>> detectedConnectivities, - final Map>>> forbiddenNeighbors, + final Map>> detectedConnectivities, + final Map>> forbiddenNeighbors, + final Map>> setNeighbors, final boolean allowHeteroHeteroBonds) { final StringBuilder stringBuilder = new StringBuilder(); final Map listMap = new HashMap<>(); @@ -499,10 +500,16 @@ private static String buildLISTsAndPROPs(final List correlationList if (!allowHeteroHeteroBonds) { LISTAndPROPUtilities.insertNoHeteroHeteroBonds(stringBuilder, listMap); } + // insert ELEM for each heavy atom type in MF + LISTAndPROPUtilities.insertELEM(stringBuilder, listMap, elementCounts.keySet()); + // insert list combinations of carbon and hybridization states + LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap); // insert forbidden connection lists and properties LISTAndPROPUtilities.insertForbiddenConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, - detectedConnectivities, forbiddenNeighbors, - elementCounts.keySet()); + detectedConnectivities, forbiddenNeighbors); + // insert set connection lists and properties + LISTAndPROPUtilities.insertSetConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, + setNeighbors); return stringBuilder.toString(); } @@ -604,8 +611,9 @@ private static String buildBOND(final List correlationList, final M public static String buildPyLSDInputFileContent(final Data data, final String mf, final Map> detectedHybridizations, - final Map>>> detectedConnectivities, - final Map>>> forbiddenNeighbors, + final Map>> detectedConnectivities, + final Map>> forbiddenNeighbors, + final Map>> setNeighbors, final ElucidationOptions elucidationOptions) { final Map> state = data.getCorrelations() .getState(); @@ -682,7 +690,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance stringBuilder.append(buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, detectedConnectivities, - forbiddenNeighbors, elucidationOptions.isAllowHeteroHeteroBonds())) + forbiddenNeighbors, setNeighbors, + elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as filters (bad lists) stringBuilder.append(buildFilters(elucidationOptions.getFilterPaths())) diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 18a2585..3e2516c 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -1,164 +1,143 @@ package casekit.nmr.lsd; -import casekit.nmr.model.nmrium.Correlation; - import java.util.*; import java.util.stream.Collectors; public class Utilities { - public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, - final Map>>> detectedConnectivities) { - final Map> allowedNeighborAtomHybridizations = buildAllowedNeighborAtomHybridizations( - correlationList, detectedConnectivities); - final Map> allowedNeighborAtomProtonCounts = buildAllowedNeighborAtomProtonCounts( - correlationList, detectedConnectivities); - // hetero atoms can bond to carbons only, due to that we can use further connectivity information - // do not allow bond between carbon and hetero atoms in certain hybridization states and proton counts - for (final Correlation correlation : correlationList) { - // ignore C and H atoms - if (correlation.getAtomType() - .equals("C") - || correlation.getAtomType() - .equals("H")) { - continue; - } - // but only if we have seen the hetero atom type in connectivity statistics - // and hybridization states or protons count was not set beforehand - if (correlation.getHybridization() - .isEmpty()) { - correlation.getHybridization() - .addAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); - } else if (correlation.getEdited() - != null - && correlation.getEdited() - .containsKey("hybridization") - && !correlation.getEdited() - .get("hybridization") - && allowedNeighborAtomHybridizations.containsKey(correlation.getAtomType())) { - correlation.getHybridization() - .retainAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); - } - if (correlation.getProtonsCount() - .isEmpty()) { - correlation.getProtonsCount() - .addAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); - } else if (correlation.getEdited() - != null - && correlation.getEdited() - .containsKey("protonsCount") - && !correlation.getEdited() - .get("protonsCount") - && allowedNeighborAtomProtonCounts.containsKey(correlation.getAtomType())) { - correlation.getProtonsCount() - .retainAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); - } - } - } - - public static Map> buildAllowedNeighborAtomHybridizations( - final List correlationList, - final Map>>> detectedConnectivities) { - final Map> allowedHeteroAtomHybridizations = new HashMap<>(); - for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { - if (!correlationList.get(correlationEntry.getKey()) - .getAtomType() - .equals("C") - && !correlationList.get(correlationEntry.getKey()) - .getAtomType() - .equals("H")) { - continue; - } - for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() - .entrySet()) { - allowedHeteroAtomHybridizations.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); - allowedHeteroAtomHybridizations.get(neighborAtomTypeEntry.getKey()) - .addAll(neighborAtomTypeEntry.getValue() - .keySet()); - } - } - - return allowedHeteroAtomHybridizations; - } + // public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, + // final Map>>> detectedConnectivities) { + // final Map> allowedNeighborAtomHybridizations = buildAllowedNeighborAtomHybridizations( + // correlationList, detectedConnectivities); + // final Map> allowedNeighborAtomProtonCounts = buildAllowedNeighborAtomProtonCounts( + // correlationList, detectedConnectivities); + // // hetero atoms can bond to carbons only, due to that we can use further connectivity information + // // do not allow bond between carbon and hetero atoms in certain hybridization states and proton counts + // for (final Correlation correlation : correlationList) { + // // ignore C and H atoms + // if (correlation.getAtomType() + // .equals("C") + // || correlation.getAtomType() + // .equals("H")) { + // continue; + // } + // // but only if we have seen the hetero atom type in connectivity statistics + // // and hybridization states or protons count was not set beforehand + // if (correlation.getHybridization() + // .isEmpty()) { + // correlation.getHybridization() + // .addAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); + // } else if (correlation.getEdited() + // != null + // && correlation.getEdited() + // .containsKey("hybridization") + // && !correlation.getEdited() + // .get("hybridization") + // && allowedNeighborAtomHybridizations.containsKey(correlation.getAtomType())) { + // correlation.getHybridization() + // .retainAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); + // } + // if (correlation.getProtonsCount() + // .isEmpty()) { + // correlation.getProtonsCount() + // .addAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); + // } else if (correlation.getEdited() + // != null + // && correlation.getEdited() + // .containsKey("protonsCount") + // && !correlation.getEdited() + // .get("protonsCount") + // && allowedNeighborAtomProtonCounts.containsKey(correlation.getAtomType())) { + // correlation.getProtonsCount() + // .retainAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); + // } + // } + // } + // + // public static Map> buildAllowedNeighborAtomHybridizations( + // final List correlationList, + // final Map>>> detectedConnectivities) { + // final Map> allowedHeteroAtomHybridizations = new HashMap<>(); + // for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { + // if (!correlationList.get(correlationEntry.getKey()) + // .getAtomType() + // .equals("C") + // && !correlationList.get(correlationEntry.getKey()) + // .getAtomType() + // .equals("H")) { + // continue; + // } + // for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() + // .entrySet()) { + // allowedHeteroAtomHybridizations.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); + // allowedHeteroAtomHybridizations.get(neighborAtomTypeEntry.getKey()) + // .addAll(neighborAtomTypeEntry.getValue() + // .keySet()); + // } + // } + // + // return allowedHeteroAtomHybridizations; + // } + // + // public static Map> buildAllowedNeighborAtomProtonCounts( + // final List correlationList, + // final Map>>> detectedConnectivities) { + // final Map> allowedHeteroAtomProtonCounts = new HashMap<>(); + // for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { + // if (!correlationList.get(correlationEntry.getKey()) + // .getAtomType() + // .equals("C") + // && !correlationList.get(correlationEntry.getKey()) + // .getAtomType() + // .equals("H")) { + // continue; + // } + // for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() + // .entrySet()) { + // allowedHeteroAtomProtonCounts.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); + // for (final Map.Entry> neighborHybridizationEntry : neighborAtomTypeEntry.getValue() + // .entrySet()) { + // allowedHeteroAtomProtonCounts.get(neighborAtomTypeEntry.getKey()) + // .addAll(neighborHybridizationEntry.getValue()); + // } + // } + // } + // + // return allowedHeteroAtomProtonCounts; + // } - public static Map> buildAllowedNeighborAtomProtonCounts( - final List correlationList, - final Map>>> detectedConnectivities) { - final Map> allowedHeteroAtomProtonCounts = new HashMap<>(); - for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { - if (!correlationList.get(correlationEntry.getKey()) - .getAtomType() - .equals("C") - && !correlationList.get(correlationEntry.getKey()) - .getAtomType() - .equals("H")) { - continue; - } - for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() - .entrySet()) { - allowedHeteroAtomProtonCounts.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); - for (final Map.Entry> neighborHybridizationEntry : neighborAtomTypeEntry.getValue() - .entrySet()) { - allowedHeteroAtomProtonCounts.get(neighborAtomTypeEntry.getKey()) - .addAll(neighborHybridizationEntry.getValue()); - } - } - } + public static Map> buildForbiddenNeighbors(final Map> connectivities, + final Set possibleNeighborAtomTypes) { - return allowedHeteroAtomProtonCounts; - } - - public static Map>> buildForbiddenNeighborHybridizationsAndProtonCounts( - final Map>> connectivities, final Set possibleNeighborAtomTypes) { - - // define forbidden hybridizations and proton counts (carbons only) of possible neighbors - // or put just an empty map which stands for all hybridizations and proton counts - final Map>> forbiddenNeighborHybridizationsAndProtonCounts = new HashMap<>(); + // define forbidden neighbors (carbons only) + // or put just an empty map which stands for the whole element + final Map> forbiddenNeighbors = new HashMap<>(); for (final String possibleNeighborAtomType : possibleNeighborAtomTypes) { if (possibleNeighborAtomType.equals("H")) { continue; } - forbiddenNeighborHybridizationsAndProtonCounts.put(possibleNeighborAtomType, new HashMap<>()); + forbiddenNeighbors.put(possibleNeighborAtomType, new HashSet<>()); if (connectivities.containsKey(possibleNeighborAtomType)) { - for (final int defaultHybridization : Arrays.stream( - Constants.defaultHybridizationMap.get(possibleNeighborAtomType)) - .boxed() - .collect(Collectors.toList())) { - forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) - .put(defaultHybridization, Arrays.stream( - Constants.defaultProtonsCountPerValencyMap.get( - possibleNeighborAtomType)) - .boxed() - .collect( - Collectors.toSet())); - } - for (final String neighborHybridization : connectivities.get(possibleNeighborAtomType) - .keySet()) { - // remove found protons count per hybridzations from list of forbidden ones - for (final int forbiddenNeighborHybridization : new HashSet<>( - forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) - .keySet())) { - forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) - .get(forbiddenNeighborHybridization) - .removeAll(connectivities.get( - possibleNeighborAtomType) - .get(neighborHybridization)); - if (forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) - .get(forbiddenNeighborHybridization) - .isEmpty()) { - forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) - .remove(forbiddenNeighborHybridization); - } - } - if (forbiddenNeighborHybridizationsAndProtonCounts.get(possibleNeighborAtomType) - .isEmpty()) { - forbiddenNeighborHybridizationsAndProtonCounts.remove(possibleNeighborAtomType); + + forbiddenNeighbors.get(possibleNeighborAtomType) + .addAll(Arrays.stream( + Constants.defaultProtonsCountPerValencyMap.get(possibleNeighborAtomType)) + .boxed() + .collect(Collectors.toSet())); + + for (final int neighborProtonCount : connectivities.get(possibleNeighborAtomType)) { + // remove found protons count from list of forbidden ones + forbiddenNeighbors.get(possibleNeighborAtomType) + .remove(neighborProtonCount); + if (forbiddenNeighbors.get(possibleNeighborAtomType) + .isEmpty()) { + forbiddenNeighbors.remove(possibleNeighborAtomType); break; } } } } - return forbiddenNeighborHybridizationsAndProtonCounts; + return forbiddenNeighbors; } } From 8848e4e5bbc650d224922b298afd3dca1c3ebe6e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 2 Nov 2021 19:15:46 +0100 Subject: [PATCH 328/405] chore: renamed requestID to pattern --- src/casekit/io/FileSystem.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/casekit/io/FileSystem.java b/src/casekit/io/FileSystem.java index 4498902..63e0e9e 100644 --- a/src/casekit/io/FileSystem.java +++ b/src/casekit/io/FileSystem.java @@ -50,7 +50,7 @@ public static boolean writeFile(final String pathToFile, final String content) { return false; } - public static boolean cleanup(final String[] directoriesToCheck, final String requestID) { + public static boolean cleanup(final String[] directoriesToCheck, final String pattern) { boolean cleaned = false; for (final String dir : directoriesToCheck) { @@ -58,7 +58,7 @@ public static boolean cleanup(final String[] directoriesToCheck, final String re cleaned = Files.walk(Paths.get(dir)) .map(Path::toFile) .filter(file -> file.getAbsolutePath() - .contains(requestID)) + .contains(pattern)) .allMatch(File::delete); } catch (final IOException e) { From 7a760ec2dc587ae07e0644918d86ca0021360cf3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 15 Nov 2021 12:05:46 +0100 Subject: [PATCH 329/405] feat: re-use hybridizations --- .../nmr/analysis/ConnectivityStatistics.java | 14 +- src/casekit/nmr/lsd/Constants.java | 67 ++++- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 160 ++++++----- .../nmr/lsd/PyLSDInputFileBuilder.java | 18 +- src/casekit/nmr/lsd/Utilities.java | 250 ++++++++++-------- src/casekit/nmr/model/nmrium/Correlation.java | 2 +- 6 files changed, 313 insertions(+), 198 deletions(-) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index e6515da..bcc2f72 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -149,13 +149,13 @@ public static Map>> extractConnectivit return extractedConnectivities; } - public static Map> filterExtractedConnectivities( + public static Map>> filterExtractedConnectivities( final Map>> extractedConnectivities, final double thresholdElementCount) { final Map totalCounts = getTotalCounts(extractedConnectivities); final int totalCountsSum = getTotalCount(totalCounts); - final Map> filteredExtractedConnectivities = new HashMap<>(); + final Map>> filteredExtractedConnectivities = new HashMap<>(); extractedConnectivities.keySet() .forEach(neighborAtomType -> { extractedConnectivities.get(neighborAtomType) @@ -169,15 +169,21 @@ public static Map> filterExtractedConnectivities( / (double) totalCountsSum >= thresholdElementCount) { filteredExtractedConnectivities.putIfAbsent( - neighborAtomType, new HashSet<>()); + neighborAtomType, new HashMap<>()); filteredExtractedConnectivities.get( neighborAtomType) + .putIfAbsent( + neighborHybridization, + new HashSet<>()); + filteredExtractedConnectivities.get( + neighborAtomType) + .get(neighborHybridization) .add(entryProtonsCount.getKey()); } } }); }); - + return filteredExtractedConnectivities; } diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index f1a5de3..202fa30 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -1,8 +1,6 @@ package casekit.nmr.lsd; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; +import java.util.*; public class Constants { @@ -12,6 +10,7 @@ public class Constants { public static final Map defaultProtonsCountPerValencyMap = createDefaultProtonsCountPerValencyMap(); public static final Map defaultAtomLabelMap = createDefaultAtomLabelMap(); public static final Map hybridizationConversionMap = createHybridizationConversionMap(); + public static final Map>> hybridizationsByProtonsCountMap = createHybridizationsByProtonsCountMap(); private static Map createNucleiMap() { final Map nuclei = new HashMap<>(); @@ -75,4 +74,66 @@ private static Map createHybridizationConversionMap() { return Collections.unmodifiableMap(hybridizationConversionMap); } + + private static Map>> createHybridizationsByProtonsCountMap() { + final Map>> hybridizationsByProtonsCountMap = new HashMap<>(); + hybridizationsByProtonsCountMap.put("C", new HashMap<>()); + hybridizationsByProtonsCountMap.get("C") + .put(4, new HashSet<>()); + hybridizationsByProtonsCountMap.get("C") + .get(4) + .add(3); + hybridizationsByProtonsCountMap.get("C") + .put(3, new HashSet<>()); + hybridizationsByProtonsCountMap.get("C") + .get(3) + .add(3); + hybridizationsByProtonsCountMap.get("C") + .put(2, new HashSet<>()); + hybridizationsByProtonsCountMap.get("C") + .get(2) + .add(3); + hybridizationsByProtonsCountMap.get("C") + .get(2) + .add(2); + hybridizationsByProtonsCountMap.get("C") + .put(1, new HashSet<>()); + hybridizationsByProtonsCountMap.get("C") + .get(1) + .add(3); + hybridizationsByProtonsCountMap.get("C") + .get(1) + .add(2); + hybridizationsByProtonsCountMap.get("C") + .get(1) + .add(1); + hybridizationsByProtonsCountMap.get("C") + .put(0, new HashSet<>()); + hybridizationsByProtonsCountMap.get("C") + .get(0) + .add(3); + hybridizationsByProtonsCountMap.get("C") + .get(0) + .add(2); + hybridizationsByProtonsCountMap.get("C") + .get(0) + .add(1); + // N (3) + hybridizationsByProtonsCountMap.put("N", new HashMap<>()); + hybridizationsByProtonsCountMap.get("N") + .put(3, new HashSet<>()); + hybridizationsByProtonsCountMap.get("C") + .get(3) + .add(3); + hybridizationsByProtonsCountMap.get("N") + .put(2, new HashSet<>()); + hybridizationsByProtonsCountMap.get("C") + .get(2) + .add(3); + hybridizationsByProtonsCountMap.get("C") + .get(2) + .add(2); + + return Collections.unmodifiableMap(hybridizationsByProtonsCountMap); + } } diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index ddc3f9e..bde93b5 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -34,6 +34,14 @@ public static void insertNoHeteroHeteroBonds(final StringBuilder stringBuilder, listMap.put("HETE", "L1"); } + private static String buildListKey(final String atomType, final int hybridization, final int protonsCount) { + return atomType + + "_" + + hybridization + + "_" + + protonsCount; + } + public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBuilder, final Map listMap, final List correlationList, @@ -49,15 +57,17 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui correlation = correlationList.get(i); if (correlation.getAtomType() .equals("H") + || correlation.getHybridization() + .size() + != 1 || correlation.getProtonsCount() .size() != 1) { continue; } - listKey = correlation.getAtomType() - + "_" - + correlation.getProtonsCount() - .get(0); + listKey = buildListKey(correlation.getAtomType(), correlation.getHybridization() + .get(0), correlation.getProtonsCount() + .get(0)); indexInPyLSD = (int) indicesMap.get(i)[k]; atomIndicesMap.putIfAbsent(listKey, new HashSet<>()); atomIndicesMap.get(listKey) @@ -80,12 +90,17 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui stringBuilder.append("; ") .append(split[0]) .append("H") + .append(split[2]) + .append(", SP") .append(split[1]) .append("\n"); listMap.put(combinationEntry.getKey(), "L" + (listMap.size() + 1)); + + System.out.println("listMap: " + + listMap); } } @@ -93,14 +108,14 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st final Map listMap, final List correlationList, final Map indicesMap, - final Map>> detectedConnectivities, - final Map>> forbiddenNeighbors) { + final Map>>> detectedConnectivities, + final Map>>> forbiddenNeighbors) { Correlation correlation; Signal signal; String atomType, listKey; int indexInPyLSD; - Map> connectivitiesTemp; - Map> forbiddenNeighborsTemp; + Map>> connectivitiesTemp; + Map>> forbiddenNeighborsTemp; for (int i = 0; i < correlationList.size(); i++) { correlation = correlationList.get(i); @@ -144,49 +159,61 @@ public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder st .append(")") .append("\n"); } else { - for (final int forbiddenProtonsCount : forbiddenNeighborsTemp.get(neighborAtomType)) { - listKey = neighborAtomType - + "_" - + forbiddenProtonsCount; - if (listMap.containsKey(listKey)) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 0 ") - .append(listMap.get(listKey)) - .append(" -") - .append("; no bonds between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(Statistics.roundDouble(signal.getShift(0), 2)) - .append(") and ") - .append(listMap.get(listKey)) - .append(" (") - .append(neighborAtomType) - .append(", ") - .append(forbiddenProtonsCount) - .append("H") - .append(")") - .append("\n"); + // forbid bonds to possible neighbors with certain hybridization states and proton counts + for (final int forbiddenNeighborHybridization : forbiddenNeighborsTemp.get(neighborAtomType) + .keySet()) { + for (final int forbiddenProtonsCount : forbiddenNeighborsTemp.get(neighborAtomType) + .get(forbiddenNeighborHybridization)) { + listKey = buildListKey(neighborAtomType, forbiddenNeighborHybridization, + forbiddenProtonsCount); + System.out.println("forbidden: " + + listKey); + if (listMap.containsKey(listKey)) { + System.out.println("-> HUHU"); + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 0 ") + .append(listMap.get(listKey)) + .append(" -") + .append("; no bonds between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(Statistics.roundDouble(signal.getShift(0), 2)) + .append(", SP") + .append(forbiddenNeighborHybridization) + .append(") and ") + .append(listMap.get(listKey)) + .append(" (") + .append(neighborAtomType) + .append(", SP") + .append(forbiddenNeighborHybridization) + .append(", ") + .append(forbiddenProtonsCount) + .append("H") + .append(")") + .append("\n"); + } } } } } } } + } public static void insertSetConnectionLISTsAndPROPs(final StringBuilder stringBuilder, final Map listMap, final List correlationList, final Map indicesMap, - final Map>> setNeighbors) { + final Map>>> setNeighbors) { Correlation correlation; Signal signal; String atomType, listKey; int indexInPyLSD; - Map> setNeighborsTemp; + Map>> setNeighborsTemp; for (int i = 0; i < correlationList.size(); i++) { if (setNeighbors.containsKey(i)) { @@ -224,34 +251,41 @@ public static void insertSetConnectionLISTsAndPROPs(final StringBuilder stringBu .append(")") .append("\n"); } else { - for (final int setProtonsCount : setNeighborsTemp.get(neighborAtomType)) { - listKey = neighborAtomType - + "_" - + setProtonsCount; - if (listMap.containsKey(listKey)) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 1 ") - .append(listMap.get(listKey)) - .append(" +") - .append("; at least one bond between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(signal - != null - ? Statistics.roundDouble(signal.getShift(0), 2) - : "?") - .append(") and ") - .append(listMap.get(listKey)) - .append(" (") - .append(neighborAtomType) - .append(", ") - .append(setProtonsCount) - .append("H") - .append(")") - .append("\n"); + for (final int setNeighborHybridization : setNeighborsTemp.get(neighborAtomType) + .keySet()) { + for (final int setProtonsCount : setNeighborsTemp.get(neighborAtomType) + .get(setNeighborHybridization)) { + listKey = buildListKey(neighborAtomType, setNeighborHybridization, setProtonsCount); + System.out.println("set: " + + listKey); + if (listMap.containsKey(listKey)) { + System.out.println("-> HUHU"); + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(" 1 ") + .append(listMap.get(listKey)) + .append(" +") + .append("; at least one bond between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(signal + != null + ? Statistics.roundDouble(signal.getShift(0), 2) + : "?") + .append(") and ") + .append(listMap.get(listKey)) + .append(" (") + .append(neighborAtomType) + .append(", SP") + .append(setNeighborHybridization) + .append(", ") + .append(setProtonsCount) + .append("H") + .append(")") + .append("\n"); + } } } } diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 2284d14..89192d5 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -10,7 +10,6 @@ import java.text.SimpleDateFormat; import java.util.*; -import java.util.stream.Collectors; public class PyLSDInputFileBuilder { @@ -130,10 +129,7 @@ private static String buildMULT(final Correlation correlation, final int index, && !correlation.getHybridization() .isEmpty()) { // if hybridization is already given - hybridizations.addAll(correlation.getHybridization() - .stream() - .map(Constants.hybridizationConversionMap::get) - .collect(Collectors.toSet())); + hybridizations.addAll(correlation.getHybridization()); } else { // if hybridization is not given then use the detected ones if (detectedHybridizations.containsKey(index)) { @@ -489,9 +485,9 @@ private static String buildSHIH(final Correlation correlation, final int index, private static String buildLISTsAndPROPs(final List correlationList, final Map indicesMap, final Map elementCounts, - final Map>> detectedConnectivities, - final Map>> forbiddenNeighbors, - final Map>> setNeighbors, + final Map>>> detectedConnectivities, + final Map>>> forbiddenNeighbors, + final Map>>> setNeighbors, final boolean allowHeteroHeteroBonds) { final StringBuilder stringBuilder = new StringBuilder(); final Map listMap = new HashMap<>(); @@ -611,9 +607,9 @@ private static String buildBOND(final List correlationList, final M public static String buildPyLSDInputFileContent(final Data data, final String mf, final Map> detectedHybridizations, - final Map>> detectedConnectivities, - final Map>> forbiddenNeighbors, - final Map>> setNeighbors, + final Map>>> detectedConnectivities, + final Map>>> forbiddenNeighbors, + final Map>>> setNeighbors, final ElucidationOptions elucidationOptions) { final Map> state = data.getCorrelations() .getState(); diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 3e2516c..673fb6e 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -1,134 +1,152 @@ package casekit.nmr.lsd; +import casekit.nmr.model.nmrium.Correlation; + import java.util.*; import java.util.stream.Collectors; public class Utilities { - // public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, - // final Map>>> detectedConnectivities) { - // final Map> allowedNeighborAtomHybridizations = buildAllowedNeighborAtomHybridizations( - // correlationList, detectedConnectivities); - // final Map> allowedNeighborAtomProtonCounts = buildAllowedNeighborAtomProtonCounts( - // correlationList, detectedConnectivities); - // // hetero atoms can bond to carbons only, due to that we can use further connectivity information - // // do not allow bond between carbon and hetero atoms in certain hybridization states and proton counts - // for (final Correlation correlation : correlationList) { - // // ignore C and H atoms - // if (correlation.getAtomType() - // .equals("C") - // || correlation.getAtomType() - // .equals("H")) { - // continue; - // } - // // but only if we have seen the hetero atom type in connectivity statistics - // // and hybridization states or protons count was not set beforehand - // if (correlation.getHybridization() - // .isEmpty()) { - // correlation.getHybridization() - // .addAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); - // } else if (correlation.getEdited() - // != null - // && correlation.getEdited() - // .containsKey("hybridization") - // && !correlation.getEdited() - // .get("hybridization") - // && allowedNeighborAtomHybridizations.containsKey(correlation.getAtomType())) { - // correlation.getHybridization() - // .retainAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); - // } - // if (correlation.getProtonsCount() - // .isEmpty()) { - // correlation.getProtonsCount() - // .addAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); - // } else if (correlation.getEdited() - // != null - // && correlation.getEdited() - // .containsKey("protonsCount") - // && !correlation.getEdited() - // .get("protonsCount") - // && allowedNeighborAtomProtonCounts.containsKey(correlation.getAtomType())) { - // correlation.getProtonsCount() - // .retainAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); - // } - // } - // } - // - // public static Map> buildAllowedNeighborAtomHybridizations( - // final List correlationList, - // final Map>>> detectedConnectivities) { - // final Map> allowedHeteroAtomHybridizations = new HashMap<>(); - // for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { - // if (!correlationList.get(correlationEntry.getKey()) - // .getAtomType() - // .equals("C") - // && !correlationList.get(correlationEntry.getKey()) - // .getAtomType() - // .equals("H")) { - // continue; - // } - // for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() - // .entrySet()) { - // allowedHeteroAtomHybridizations.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); - // allowedHeteroAtomHybridizations.get(neighborAtomTypeEntry.getKey()) - // .addAll(neighborAtomTypeEntry.getValue() - // .keySet()); - // } - // } - // - // return allowedHeteroAtomHybridizations; - // } - // - // public static Map> buildAllowedNeighborAtomProtonCounts( - // final List correlationList, - // final Map>>> detectedConnectivities) { - // final Map> allowedHeteroAtomProtonCounts = new HashMap<>(); - // for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { - // if (!correlationList.get(correlationEntry.getKey()) - // .getAtomType() - // .equals("C") - // && !correlationList.get(correlationEntry.getKey()) - // .getAtomType() - // .equals("H")) { - // continue; - // } - // for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() - // .entrySet()) { - // allowedHeteroAtomProtonCounts.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); - // for (final Map.Entry> neighborHybridizationEntry : neighborAtomTypeEntry.getValue() - // .entrySet()) { - // allowedHeteroAtomProtonCounts.get(neighborAtomTypeEntry.getKey()) - // .addAll(neighborHybridizationEntry.getValue()); - // } - // } - // } - // - // return allowedHeteroAtomProtonCounts; - // } + public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, + final Map>>> detectedConnectivities) { + final Map> allowedNeighborAtomHybridizations = buildAllowedNeighborAtomHybridizations( + correlationList, detectedConnectivities); + final Map> allowedNeighborAtomProtonCounts = buildAllowedNeighborAtomProtonCounts( + correlationList, detectedConnectivities); + // hetero atoms can bond to carbons only, due to that we can use further connectivity information + // do not allow bond between carbon and hetero atoms in certain hybridization states and proton counts + for (final Correlation correlation : correlationList) { + // ignore C and H atoms + if (correlation.getAtomType() + .equals("C") + || correlation.getAtomType() + .equals("H")) { + continue; + } + // but only if we have seen the hetero atom type in connectivity statistics + // and hybridization states or protons count was not set beforehand + if (correlation.getHybridization() + .isEmpty()) { + correlation.getHybridization() + .addAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); + } else if (correlation.getEdited() + != null + && correlation.getEdited() + .containsKey("hybridization") + && !correlation.getEdited() + .get("hybridization") + && allowedNeighborAtomHybridizations.containsKey(correlation.getAtomType())) { + correlation.getHybridization() + .retainAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); + } + if (correlation.getProtonsCount() + .isEmpty()) { + correlation.getProtonsCount() + .addAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); + } else if (correlation.getEdited() + != null + && correlation.getEdited() + .containsKey("protonsCount") + && !correlation.getEdited() + .get("protonsCount") + && allowedNeighborAtomProtonCounts.containsKey(correlation.getAtomType())) { + correlation.getProtonsCount() + .retainAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); + } + } + } + + public static Map> buildAllowedNeighborAtomHybridizations( + final List correlationList, + final Map>>> detectedConnectivities) { + final Map> allowedHeteroAtomHybridizations = new HashMap<>(); + for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { + if (!correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("C") + && !correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("H")) { + continue; + } + for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() + .entrySet()) { + allowedHeteroAtomHybridizations.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); + allowedHeteroAtomHybridizations.get(neighborAtomTypeEntry.getKey()) + .addAll(neighborAtomTypeEntry.getValue() + .keySet()); + } + } + + return allowedHeteroAtomHybridizations; + } - public static Map> buildForbiddenNeighbors(final Map> connectivities, - final Set possibleNeighborAtomTypes) { + public static Map> buildAllowedNeighborAtomProtonCounts( + final List correlationList, + final Map>>> detectedConnectivities) { + final Map> allowedHeteroAtomProtonCounts = new HashMap<>(); + for (final Map.Entry>>> correlationEntry : detectedConnectivities.entrySet()) { + if (!correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("C") + && !correlationList.get(correlationEntry.getKey()) + .getAtomType() + .equals("H")) { + continue; + } + for (final Map.Entry>> neighborAtomTypeEntry : correlationEntry.getValue() + .entrySet()) { + allowedHeteroAtomProtonCounts.putIfAbsent(neighborAtomTypeEntry.getKey(), new HashSet<>()); + for (final Map.Entry> neighborHybridizationEntry : neighborAtomTypeEntry.getValue() + .entrySet()) { + allowedHeteroAtomProtonCounts.get(neighborAtomTypeEntry.getKey()) + .addAll(neighborHybridizationEntry.getValue()); + } + } + } - // define forbidden neighbors (carbons only) - // or put just an empty map which stands for the whole element - final Map> forbiddenNeighbors = new HashMap<>(); + return allowedHeteroAtomProtonCounts; + } + + public static Map>> buildForbiddenNeighbors( + final Map>> connectivities, final Set possibleNeighborAtomTypes) { + + // define forbidden neighbors (for carbons only) + // or put just an empty map which means the whole element is forbidden + final Map>> forbiddenNeighbors = new HashMap<>(); for (final String possibleNeighborAtomType : possibleNeighborAtomTypes) { if (possibleNeighborAtomType.equals("H")) { continue; } - forbiddenNeighbors.put(possibleNeighborAtomType, new HashSet<>()); + forbiddenNeighbors.put(possibleNeighborAtomType, new HashMap<>()); if (connectivities.containsKey(possibleNeighborAtomType)) { - - forbiddenNeighbors.get(possibleNeighborAtomType) - .addAll(Arrays.stream( - Constants.defaultProtonsCountPerValencyMap.get(possibleNeighborAtomType)) - .boxed() - .collect(Collectors.toSet())); - - for (final int neighborProtonCount : connectivities.get(possibleNeighborAtomType)) { - // remove found protons count from list of forbidden ones + for (final int defaultHybridization : Arrays.stream( + Constants.defaultHybridizationMap.get(possibleNeighborAtomType)) + .boxed() + .collect(Collectors.toList())) { forbiddenNeighbors.get(possibleNeighborAtomType) - .remove(neighborProtonCount); + .put(defaultHybridization, Arrays.stream( + Constants.defaultProtonsCountPerValencyMap.get(possibleNeighborAtomType)) + .boxed() + .collect(Collectors.toSet())); + } + for (final int possibleNeighborHybridization : connectivities.get(possibleNeighborAtomType) + .keySet()) { + // remove found protons count per hybridzations from list of forbidden ones + for (final int forbiddenNeighborHybridization : new HashSet<>( + forbiddenNeighbors.get(possibleNeighborAtomType) + .keySet())) { + forbiddenNeighbors.get(possibleNeighborAtomType) + .get(forbiddenNeighborHybridization) + .removeAll(connectivities.get(possibleNeighborAtomType) + .get(possibleNeighborHybridization)); + if (forbiddenNeighbors.get(possibleNeighborAtomType) + .get(forbiddenNeighborHybridization) + .isEmpty()) { + forbiddenNeighbors.get(possibleNeighborAtomType) + .remove(forbiddenNeighborHybridization); + } + } if (forbiddenNeighbors.get(possibleNeighborAtomType) .isEmpty()) { forbiddenNeighbors.remove(possibleNeighborAtomType); diff --git a/src/casekit/nmr/model/nmrium/Correlation.java b/src/casekit/nmr/model/nmrium/Correlation.java index 7b66728..002ef81 100644 --- a/src/casekit/nmr/model/nmrium/Correlation.java +++ b/src/casekit/nmr/model/nmrium/Correlation.java @@ -46,7 +46,7 @@ public class Correlation { private int equivalence; private Map> attachment; private List protonsCount; - private List hybridization; + private List hybridization; private boolean pseudo; private Map edited; } From 02c9b83d211cf1a98f3ff5a021af8a5b59789bda Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 17 Nov 2021 15:05:13 +0100 Subject: [PATCH 330/405] chore: separated DEFF and FEXP --- .../nmr/lsd/PyLSDInputFileBuilder.java | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 89192d5..6711dce 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -498,8 +498,8 @@ private static String buildLISTsAndPROPs(final List correlationList } // insert ELEM for each heavy atom type in MF LISTAndPROPUtilities.insertELEM(stringBuilder, listMap, elementCounts.keySet()); - // insert list combinations of carbon and hybridization states - LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap); + // // insert list combinations of carbon and hybridization states + // LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap); // insert forbidden connection lists and properties LISTAndPROPUtilities.insertForbiddenConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, detectedConnectivities, forbiddenNeighbors); @@ -510,9 +510,9 @@ private static String buildLISTsAndPROPs(final List correlationList return stringBuilder.toString(); } - private static String buildFilters(final String[] filterPaths) { + private static String buildFilterDEFFs(final Map fexpMap, final String[] filterPaths) { final StringBuilder stringBuilder = new StringBuilder(); - // DEFF + FEXP -> add filters + // DEFF -> add filters stringBuilder.append("; externally defined filters\n"); final Map filters = new LinkedHashMap<>(); int counter = 1; @@ -530,13 +530,31 @@ private static String buildFilters(final String[] filterPaths) { .append("\"\n")); stringBuilder.append("\n"); + for (int i = 0; i + < filters.size(); i++) { + fexpMap.put("F" + + (i + + 1), false); + } + } + + return stringBuilder.toString(); + } + + private static String buildFEXP(final Map fexpMap) { + final StringBuilder stringBuilder = new StringBuilder(); + + if (!fexpMap.isEmpty()) { stringBuilder.append("FEXP \""); - counter = 0; - for (final String label : filters.keySet()) { - stringBuilder.append("NOT ") - .append(label); + int counter = 0; + for (final String label : fexpMap.keySet()) { + if (!fexpMap.get(label)) { + stringBuilder.append("NOT "); + } + stringBuilder.append(label); if (counter - < filters.size() + < fexpMap.keySet() + .size() - 1) { stringBuilder.append(" and "); } @@ -689,8 +707,13 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf forbiddenNeighbors, setNeighbors, elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); - // DEFF and FEXP as filters (bad lists) - stringBuilder.append(buildFilters(elucidationOptions.getFilterPaths())) + // DEFF and FEXP as filters (good/bad lists) + final Map fexpMap = new HashMap<>(); + stringBuilder.append(buildFilterDEFFs(fexpMap, elucidationOptions.getFilterPaths())) + .append("\n"); + System.out.println("fexpMap: " + + fexpMap); + stringBuilder.append(buildFEXP(fexpMap)) .append("\n"); return stringBuilder.toString(); From 6dbb349db4c36fe17b631336a00710ff46a11ef4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 17 Nov 2021 15:38:50 +0100 Subject: [PATCH 331/405] chore: use common method for forbidden or set neighbors LIST and PROP creation --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 216 +++++------------- .../nmr/lsd/PyLSDInputFileBuilder.java | 8 +- 2 files changed, 65 insertions(+), 159 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index bde93b5..c051bd7 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -98,144 +98,49 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui listMap.put(combinationEntry.getKey(), "L" + (listMap.size() + 1)); - - System.out.println("listMap: " - + listMap); } } - public static void insertForbiddenConnectionLISTsAndPROPs(final StringBuilder stringBuilder, - final Map listMap, - final List correlationList, - final Map indicesMap, - final Map>>> detectedConnectivities, - final Map>>> forbiddenNeighbors) { - Correlation correlation; - Signal signal; - String atomType, listKey; - int indexInPyLSD; - Map>> connectivitiesTemp; - Map>> forbiddenNeighborsTemp; - for (int i = 0; i - < correlationList.size(); i++) { - correlation = correlationList.get(i); - signal = Utils.extractSignalFromCorrelation(correlation); - atomType = correlation.getAtomType(); - connectivitiesTemp = detectedConnectivities.get(i); - // consider carbons here only, because of having complete connectivity information - if (!atomType.equals("C") - || connectivitiesTemp - == null - || connectivitiesTemp.isEmpty() - || signal - == null) { - continue; - } - forbiddenNeighborsTemp = forbiddenNeighbors.get(i); - - // put in the extracted information per correlation - for (int k = 1; k - < indicesMap.get(i).length; k++) { - indexInPyLSD = (int) indicesMap.get(i)[k]; - for (final String neighborAtomType : forbiddenNeighborsTemp.keySet()) { - // forbid bonds to whole element groups if there is an empty map for an atom type - if (forbiddenNeighborsTemp.get(neighborAtomType) - .isEmpty()) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 0 ") - .append(listMap.get(neighborAtomType)) - .append(" -") - .append("; no bonds between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(Statistics.roundDouble(signal.getShift(0), 2)) - .append(") and ") - .append(listMap.get(neighborAtomType)) - .append(" (") - .append(neighborAtomType) - .append(")") - .append("\n"); - } else { - // forbid bonds to possible neighbors with certain hybridization states and proton counts - for (final int forbiddenNeighborHybridization : forbiddenNeighborsTemp.get(neighborAtomType) - .keySet()) { - for (final int forbiddenProtonsCount : forbiddenNeighborsTemp.get(neighborAtomType) - .get(forbiddenNeighborHybridization)) { - listKey = buildListKey(neighborAtomType, forbiddenNeighborHybridization, - forbiddenProtonsCount); - System.out.println("forbidden: " - + listKey); - if (listMap.containsKey(listKey)) { - System.out.println("-> HUHU"); - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 0 ") - .append(listMap.get(listKey)) - .append(" -") - .append("; no bonds between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(Statistics.roundDouble(signal.getShift(0), 2)) - .append(", SP") - .append(forbiddenNeighborHybridization) - .append(") and ") - .append(listMap.get(listKey)) - .append(" (") - .append(neighborAtomType) - .append(", SP") - .append(forbiddenNeighborHybridization) - .append(", ") - .append(forbiddenProtonsCount) - .append("H") - .append(")") - .append("\n"); - } - } - } - } - } - } - } - - } - public static void insertSetConnectionLISTsAndPROPs(final StringBuilder stringBuilder, - final Map listMap, - final List correlationList, - final Map indicesMap, - final Map>>> setNeighbors) { + public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuilder, + final Map listMap, + final List correlationList, + final Map indicesMap, + final Map>>> neighbors, + final String mode) { Correlation correlation; Signal signal; - String atomType, listKey; + String atomType; int indexInPyLSD; - Map>> setNeighborsTemp; + Map>> neighborsTemp; for (int i = 0; i < correlationList.size(); i++) { - if (setNeighbors.containsKey(i)) { + if (neighbors.containsKey(i)) { correlation = correlationList.get(i); signal = Utils.extractSignalFromCorrelation(correlation); atomType = correlation.getAtomType(); - setNeighborsTemp = setNeighbors.get(i); + neighborsTemp = neighbors.get(i); // put in the extracted information per correlation for (int k = 1; k < indicesMap.get(i).length; k++) { indexInPyLSD = (int) indicesMap.get(i)[k]; - for (final String neighborAtomType : setNeighborsTemp.keySet()) { + for (final String neighborAtomType : neighborsTemp.keySet()) { // forbid bonds to whole element groups if there is an empty map for an atom type - if (setNeighborsTemp.get(neighborAtomType) - .isEmpty()) { + if (neighborsTemp.get(neighborAtomType) + .isEmpty()) { stringBuilder.append("PROP ") .append(indexInPyLSD) - .append(" 1 ") + .append(mode.equals("forbid") + ? " 0 " + : " 1 ") .append(listMap.get(neighborAtomType)) - .append(" +") - .append("; at least one bond between ") + .append(mode.equals("forbid") + ? " -" + : " +") + .append(mode.equals("forbid") + ? "; no bonds between " + : "; at least one bond between ") .append(indexInPyLSD) .append(" (") .append(atomType) @@ -250,45 +155,46 @@ public static void insertSetConnectionLISTsAndPROPs(final StringBuilder stringBu .append(neighborAtomType) .append(")") .append("\n"); - } else { - for (final int setNeighborHybridization : setNeighborsTemp.get(neighborAtomType) - .keySet()) { - for (final int setProtonsCount : setNeighborsTemp.get(neighborAtomType) - .get(setNeighborHybridization)) { - listKey = buildListKey(neighborAtomType, setNeighborHybridization, setProtonsCount); - System.out.println("set: " - + listKey); - if (listMap.containsKey(listKey)) { - System.out.println("-> HUHU"); - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(" 1 ") - .append(listMap.get(listKey)) - .append(" +") - .append("; at least one bond between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(signal - != null - ? Statistics.roundDouble(signal.getShift(0), 2) - : "?") - .append(") and ") - .append(listMap.get(listKey)) - .append(" (") - .append(neighborAtomType) - .append(", SP") - .append(setNeighborHybridization) - .append(", ") - .append(setProtonsCount) - .append("H") - .append(")") - .append("\n"); - } - } - } } + // else { + // for (final int setNeighborHybridization : setNeighborsTemp.get(neighborAtomType) + // .keySet()) { + // for (final int setProtonsCount : setNeighborsTemp.get(neighborAtomType) + // .get(setNeighborHybridization)) { + // listKey = buildListKey(neighborAtomType, setNeighborHybridization, setProtonsCount); + // System.out.println("set: " + // + listKey); + // if (listMap.containsKey(listKey)) { + // System.out.println("-> HUHU"); + // stringBuilder.append("PROP ") + // .append(indexInPyLSD) + // .append(" 1 ") + // .append(listMap.get(listKey)) + // .append(" +") + // .append("; at least one bond between ") + // .append(indexInPyLSD) + // .append(" (") + // .append(atomType) + // .append(", ") + // .append(signal + // != null + // ? Statistics.roundDouble(signal.getShift(0), 2) + // : "?") + // .append(") and ") + // .append(listMap.get(listKey)) + // .append(" (") + // .append(neighborAtomType) + // .append(", SP") + // .append(setNeighborHybridization) + // .append(", ") + // .append(setProtonsCount) + // .append("H") + // .append(")") + // .append("\n"); + // } + // } + // } + // } } } } diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 6711dce..5e7172c 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -501,11 +501,11 @@ private static String buildLISTsAndPROPs(final List correlationList // // insert list combinations of carbon and hybridization states // LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap); // insert forbidden connection lists and properties - LISTAndPROPUtilities.insertForbiddenConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, - detectedConnectivities, forbiddenNeighbors); + LISTAndPROPUtilities.insertConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, + forbiddenNeighbors, "forbid"); // insert set connection lists and properties - LISTAndPROPUtilities.insertSetConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, - setNeighbors); + LISTAndPROPUtilities.insertConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, + setNeighbors, "allow"); return stringBuilder.toString(); } From 0cd0f955d17715a989c1c8c2760fa67a4623dbee Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 17 Nov 2021 23:39:12 +0100 Subject: [PATCH 332/405] chore: use common method for forbidden or set neighbors LIST and PROP creation --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 4 +- .../nmr/lsd/PyLSDInputFileBuilder.java | 59 ++++++-- src/casekit/nmr/lsd/Utilities.java | 140 ++++++++++++++++++ .../nmr/lsd/model/ElucidationOptions.java | 1 + 4 files changed, 188 insertions(+), 16 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index c051bd7..d7930d8 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -121,12 +121,12 @@ public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuild atomType = correlation.getAtomType(); neighborsTemp = neighbors.get(i); - // put in the extracted information per correlation + // put in the extracted information per correlation and equivalent for (int k = 1; k < indicesMap.get(i).length; k++) { indexInPyLSD = (int) indicesMap.get(i)[k]; for (final String neighborAtomType : neighborsTemp.keySet()) { - // forbid bonds to whole element groups if there is an empty map for an atom type + // forbid/set bonds to whole element groups if there is an empty map for an atom type if (neighborsTemp.get(neighborAtomType) .isEmpty()) { stringBuilder.append("PROP ") diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 5e7172c..99b0fd6 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -510,7 +510,7 @@ private static String buildLISTsAndPROPs(final List correlationList return stringBuilder.toString(); } - private static String buildFilterDEFFs(final Map fexpMap, final String[] filterPaths) { + private static String buildDEFFs(final String[] filterPaths, final String[] pathsToNeighborsFiles) { final StringBuilder stringBuilder = new StringBuilder(); // DEFF -> add filters stringBuilder.append("; externally defined filters\n"); @@ -521,6 +521,11 @@ private static String buildFilterDEFFs(final Map fexpMap, final + counter, filterPath); counter++; } + for (final String pathToNeighborsFiles : pathsToNeighborsFiles) { + filters.put("F" + + counter, pathToNeighborsFiles); + counter++; + } if (!filters.isEmpty()) { filters.forEach((label, filePath) -> stringBuilder.append("DEFF ") @@ -529,13 +534,6 @@ private static String buildFilterDEFFs(final Map fexpMap, final .append(filePath) .append("\"\n")); stringBuilder.append("\n"); - - for (int i = 0; i - < filters.size(); i++) { - fexpMap.put("F" - + (i - + 1), false); - } } return stringBuilder.toString(); @@ -566,6 +564,43 @@ private static String buildFEXP(final Map fexpMap) { return stringBuilder.toString(); } + private static String buildDEFFsAndFEXP(final List correlationList, + final Map indicesMap, + final ElucidationOptions elucidationOptions, + final Map>>> forbiddenNeighbors, + final Map>>> setNeighbors) { + final StringBuilder stringBuilder = new StringBuilder(); + final Map fexpMap = new HashMap<>(); + for (int i = 0; i + < elucidationOptions.getFilterPaths().length; i++) { + fexpMap.put("F" + + (i + + 1), false); + } + // build and write neighbors files + if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[0], correlationList, indicesMap, + forbiddenNeighbors)) { + fexpMap.put("F" + + (fexpMap.size() + + 1), false); + } + if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[1], correlationList, indicesMap, + setNeighbors)) { + fexpMap.put("F" + + (fexpMap.size() + + 1), true); + } + // build DEFFs + stringBuilder.append( + buildDEFFs(elucidationOptions.getFilterPaths(), elucidationOptions.getPathsToNeighborsFiles())) + .append("\n"); + // build FEXP + stringBuilder.append(buildFEXP(fexpMap)) + .append("\n"); + + return stringBuilder.toString(); + } + private static String buildBONDByINADEQUATE(final List correlationList, final Map indicesMap) { final StringBuilder stringBuilder = new StringBuilder(); @@ -708,12 +743,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as filters (good/bad lists) - final Map fexpMap = new HashMap<>(); - stringBuilder.append(buildFilterDEFFs(fexpMap, elucidationOptions.getFilterPaths())) - .append("\n"); - System.out.println("fexpMap: " - + fexpMap); - stringBuilder.append(buildFEXP(fexpMap)) + stringBuilder.append(buildDEFFsAndFEXP(correlationList, indicesMap, elucidationOptions, forbiddenNeighbors, + setNeighbors)) .append("\n"); return stringBuilder.toString(); diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 673fb6e..35bc694 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -1,6 +1,10 @@ package casekit.nmr.lsd; +import casekit.io.FileSystem; +import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; +import casekit.nmr.utils.Statistics; +import casekit.nmr.utils.Utils; import java.util.*; import java.util.stream.Collectors; @@ -158,4 +162,140 @@ public static Map>> buildForbiddenNeighbors( return forbiddenNeighbors; } + + public static boolean writeNeighborsFile(final String pathToNeighborsFile, final List correlationList, + final Map indicesMap, + final Map>>> neighbors) { + final StringBuilder stringBuilder = new StringBuilder(); + Correlation correlation; + Signal signal; + String atomType; + int indexInPyLSD; + int sstrIndex = 1, sstrIndexCorrelation; + Map>> neighborsTemp; + for (int i = 0; i + < correlationList.size(); i++) { + if (neighbors.containsKey(i)) { + correlation = correlationList.get(i); + signal = Utils.extractSignalFromCorrelation(correlation); + atomType = correlation.getAtomType(); + neighborsTemp = neighbors.get(i); + + // put in the extracted information per correlation and equivalent + for (int k = 1; k + < indicesMap.get(i).length; k++) { + indexInPyLSD = (int) indicesMap.get(i)[k]; + for (final String neighborAtomType : neighborsTemp.keySet()) { + for (final Map.Entry> entryPerHybridization : neighborsTemp.get( + neighborAtomType) + .entrySet()) { + sstrIndexCorrelation = sstrIndex; + stringBuilder.append( + buildSSTR(sstrIndexCorrelation, atomType, correlation.getHybridization(), + correlation.getProtonsCount())); + stringBuilder.append("; ") + .append(atomType) + .append(" at ") + .append(signal + != null + ? Statistics.roundDouble(signal.getShift(0), 2) + : "?") + .append(" (") + .append(indexInPyLSD) + .append(")") + .append("\n"); + stringBuilder.append("ASGN S") + .append(sstrIndexCorrelation) + .append(" ") + .append(indexInPyLSD) + .append("\n"); + sstrIndex++; + + final List tempList = new ArrayList<>(); + if (entryPerHybridization.getKey() + != -1) { + tempList.add(entryPerHybridization.getKey()); + } + stringBuilder.append(buildSSTR(sstrIndex, neighborAtomType, tempList, + new ArrayList<>(entryPerHybridization.getValue()))) + .append("\n"); + stringBuilder.append("LINK S") + .append(sstrIndexCorrelation) + .append(" S") + .append(sstrIndex) + .append("\n"); + sstrIndex++; + } + } + } + stringBuilder.append("\n"); + } + } + + System.out.println(stringBuilder); + + + return !stringBuilder.toString() + .isEmpty() + && FileSystem.writeFile(pathToNeighborsFile, stringBuilder.toString()); + } + + private static String buildSSTR(final int sstrIndex, final String atomType, final List hybridization, + final List protonsCount) { + if (hybridization.isEmpty()) { + hybridization.addAll(Arrays.stream(Constants.defaultHybridizationMap.get(atomType)) + .boxed() + .collect(Collectors.toList())); + } + if (protonsCount.isEmpty()) { + protonsCount.addAll(Arrays.stream(Constants.defaultProtonsCountPerValencyMap.get(atomType)) + .boxed() + .collect(Collectors.toList())); + } + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("SSTR S") + .append(sstrIndex) + .append(" ") + .append(atomType) + .append(" "); + if (hybridization.size() + == 1) { + stringBuilder.append(hybridization.get(0)) + .append(" "); + if (protonsCount.size() + == 1) { + stringBuilder.append(protonsCount.get(0)); + } else { + stringBuilder.append(buildMultipleValuesString(protonsCount)); + } + } else { + stringBuilder.append(buildMultipleValuesString(hybridization)); + stringBuilder.append(" "); + if (protonsCount.size() + == 1) { + stringBuilder.append(protonsCount.get(0)); + } else { + stringBuilder.append(buildMultipleValuesString(protonsCount)); + } + } + + return stringBuilder.toString(); + } + + private static String buildMultipleValuesString(final List values) { + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("("); + for (int l = 0; l + < values.size(); l++) { + stringBuilder.append(values.get(l)); + if (l + < values.size() + - 1) { + stringBuilder.append(" "); + } + } + stringBuilder.append(")"); + + return stringBuilder.toString(); + } } diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java index 84822d8..fe161d0 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -13,6 +13,7 @@ public class ElucidationOptions { // PyLSD options private String[] filterPaths; + private String[] pathsToNeighborsFiles; private boolean allowHeteroHeteroBonds; private boolean useElim; private int elimP1; From 337fb78e1a7dbb7b16a753d53ec324c7856500b3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Nov 2021 13:49:46 +0100 Subject: [PATCH 333/405] chore: moved buildFixedNeighborsByINADEQUATE method to Utilities class && created buildBONDByFixedNeighbors method --- .../nmr/lsd/PyLSDInputFileBuilder.java | 92 ++++----- .../nmr/lsd/RankedResultSDFParser.java | 191 ------------------ src/casekit/nmr/lsd/Utilities.java | 44 +++- 3 files changed, 90 insertions(+), 237 deletions(-) delete mode 100644 src/casekit/nmr/lsd/RankedResultSDFParser.java diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 99b0fd6..82bf57d 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -273,8 +273,12 @@ private static String buildShiftString(final Correlation correlation) { private static String buildShiftsComment(final Correlation correlation1, final Correlation correlation2) { return "; " + + correlation1.getAtomType() + + ": " + buildShiftString(correlation1) + " -> " + + correlation2.getAtomType() + + ": " + buildShiftString(correlation2); } @@ -578,21 +582,24 @@ private static String buildDEFFsAndFEXP(final List correlationList, + 1), false); } // build and write neighbors files + final List pathsToNeighborsFilesToUse = new ArrayList<>(); if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[0], correlationList, indicesMap, forbiddenNeighbors)) { fexpMap.put("F" + (fexpMap.size() + 1), false); + pathsToNeighborsFilesToUse.add(elucidationOptions.getPathsToNeighborsFiles()[0]); } if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[1], correlationList, indicesMap, setNeighbors)) { fexpMap.put("F" + (fexpMap.size() + 1), true); + pathsToNeighborsFilesToUse.add(elucidationOptions.getPathsToNeighborsFiles()[1]); } // build DEFFs stringBuilder.append( - buildDEFFs(elucidationOptions.getFilterPaths(), elucidationOptions.getPathsToNeighborsFiles())) + buildDEFFs(elucidationOptions.getFilterPaths(), pathsToNeighborsFilesToUse.toArray(String[]::new))) .append("\n"); // build FEXP stringBuilder.append(buildFEXP(fexpMap)) @@ -600,48 +607,45 @@ private static String buildDEFFsAndFEXP(final List correlationList, return stringBuilder.toString(); } - - private static String buildBONDByINADEQUATE(final List correlationList, - final Map indicesMap) { + + private static String buildBONDByFixedNeighbors(final List correlationList, + final Map indicesMap, + final Map> fixedNeighbors) { final StringBuilder stringBuilder = new StringBuilder(); final Set uniqueSet = new HashSet<>(); - Correlation correlation; - for (int i = 0; i - < correlationList.size(); i++) { - correlation = correlationList.get(i); - // @TODO for now use INADEQUATE information of atoms without equivalences only - if (!correlation.getAtomType() - .equals("C") - || correlation.getEquivalence() + int correlationIndex1; + Correlation correlation1, correlation2; + for (final Map.Entry> entry : fixedNeighbors.entrySet()) { + correlationIndex1 = entry.getKey(); + correlation1 = correlationList.get(correlationIndex1); + if (correlation1.getEquivalence() > 1) { continue; } - for (final Link link : correlation.getLink()) { - if (link.getExperimentType() - .equals("inadequate")) { - for (final int matchIndex : link.getMatch()) { - // insert BOND pair once only and not if equivalences exist - if (!uniqueSet.contains(indicesMap.get(i)[1] - + " " - + indicesMap.get(matchIndex)[1]) - && correlationList.get(matchIndex) - .getEquivalence() - == 1) { - stringBuilder.append("BOND ") - .append(indicesMap.get(i)[1]) - .append(" ") - .append(indicesMap.get(matchIndex)[1]) - .append(buildShiftsComment(correlation, correlationList.get(matchIndex))) - .append("\n"); - uniqueSet.add(indicesMap.get(i)[1] - + " " - + indicesMap.get(matchIndex)[1]); - uniqueSet.add(indicesMap.get(matchIndex)[1] - + " " - + indicesMap.get(i)[1]); - } - } + for (final int correlationIndex2 : entry.getValue()) { + correlation2 = correlationList.get(correlationIndex2); + // @TODO for now use fixed neighbor information of atoms without equivalences only + if (correlation2.getEquivalence() + > 1) { + continue; + } + // insert BOND pair once only and not if equivalences exist + if (!uniqueSet.contains(indicesMap.get(correlationIndex1)[1] + + " " + + indicesMap.get(correlationIndex2)[1])) { + stringBuilder.append("BOND ") + .append(indicesMap.get(correlationIndex1)[1]) + .append(" ") + .append(indicesMap.get(correlationIndex2)[1]) + .append(buildShiftsComment(correlation1, correlation2)) + .append("\n"); + uniqueSet.add(indicesMap.get(correlationIndex1)[1] + + " " + + indicesMap.get(correlationIndex2)[1]); + uniqueSet.add(indicesMap.get(correlationIndex2)[1] + + " " + + indicesMap.get(correlationIndex1)[1]); } } } @@ -649,13 +653,10 @@ private static String buildBONDByINADEQUATE(final List correlationL return stringBuilder.toString(); } - private static String buildBOND(final List correlationList, final Map indicesMap) { - final StringBuilder stringBuilder = new StringBuilder(); - - stringBuilder.append(buildBONDByINADEQUATE(correlationList, indicesMap)) - .append("\n"); - - return stringBuilder.toString(); + private static String buildBOND(final List correlationList, final Map indicesMap, + final Map> fixedNeighbors) { + return buildBONDByFixedNeighbors(correlationList, indicesMap, fixedNeighbors) + + "\n"; } public static String buildPyLSDInputFileContent(final Data data, final String mf, @@ -663,6 +664,7 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf final Map>>> detectedConnectivities, final Map>>> forbiddenNeighbors, final Map>>> setNeighbors, + final Map> fixedNeighbors, final ElucidationOptions elucidationOptions) { final Map> state = data.getCorrelations() .getState(); @@ -734,7 +736,7 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf }); // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments - stringBuilder.append(buildBOND(correlationList, indicesMap)) + stringBuilder.append(buildBOND(correlationList, indicesMap, fixedNeighbors)) .append("\n"); // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance diff --git a/src/casekit/nmr/lsd/RankedResultSDFParser.java b/src/casekit/nmr/lsd/RankedResultSDFParser.java deleted file mode 100644 index 6a6457a..0000000 --- a/src/casekit/nmr/lsd/RankedResultSDFParser.java +++ /dev/null @@ -1,191 +0,0 @@ -package casekit.nmr.lsd; - -import casekit.nmr.model.Assignment; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Signal; -import casekit.nmr.model.Spectrum; -import casekit.nmr.utils.Statistics; -import casekit.nmr.utils.Utils; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IMolecularFormula; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.CDKHydrogenAdder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; - -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.util.*; - -@Deprecated -public class RankedResultSDFParser { - - public static List parseRankedResultSDFile(final String pathToFile, final String nucleus, - final double maxAverageDeviation) throws CDKException, FileNotFoundException { - return parseRankedResultSDFile(new FileReader(pathToFile), nucleus, maxAverageDeviation); - } - - public static List parseRankedResultSDFileContent(final String fileContent, final String nucleus, - final double maxAverageDeviation) throws CDKException { - final InputStream inputStream = new ByteArrayInputStream(fileContent.getBytes(StandardCharsets.UTF_8)); - return parseRankedResultSDFile(new InputStreamReader(inputStream), nucleus, maxAverageDeviation); - } - - public static List parseRankedResultSDFile(final Reader fileReader, final String nucleus, - final double maxAverageDeviation) throws CDKException { - final List dataSetList = new ArrayList<>(); - final IteratingSDFReader iterator = new IteratingSDFReader(fileReader, SilentChemObjectBuilder.getInstance()); - IAtomContainer structure; - Spectrum experimentalSpectrum, predictedSpectrum; - Assignment assignment; - HashMap meta; - final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); - IMolecularFormula mf; - LinkedHashMap shiftProperties1D; - String[] split; - Signal experimentalSignal; - double experimentalShift, predictedShift; - String multiplicity; - Map> signalShiftList; - DataSet dataSet; - Double[] deviations; - int signalCounter, matchedSignalIndex; - List closestSignalList; - Double rmsd, averageDeviation; - - while (iterator.hasNext()) { - structure = iterator.next(); - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - hydrogenAdder.addImplicitHydrogens(structure); - Utils.setAromaticityAndKekulize(structure); - meta = new HashMap<>(); - meta.put("title", structure.getTitle()); - meta.put("id", structure.getProperty("nmrshiftdb2 ID")); - mf = Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mf", Utils.molecularFormularToString(mf)); - try { - final String smiles = casekit.nmr.utils.Utils.getSmilesFromAtomContainer(structure); - meta.put("smiles", smiles); - } catch (final CDKException e) { - e.printStackTrace(); - } - shiftProperties1D = getShiftProperties1D(structure, Utils.getAtomTypeFromNucleus(nucleus)); - - experimentalSpectrum = new Spectrum(); - experimentalSpectrum.setNuclei(new String[]{nucleus}); - experimentalSpectrum.setSignals(new ArrayList<>()); - - - for (final Map.Entry shiftProperty1D : shiftProperties1D.entrySet()) { - split = shiftProperty1D.getValue() - .split("\\s"); - multiplicity = Utils.getMultiplicityFromProtonsCount(structure.getAtom(Integer.parseInt(split[0]) - - 1) - .getImplicitHydrogenCount()); - experimentalSignal = new Signal(); - experimentalSignal.setNuclei(new String[]{nucleus}); - experimentalSignal.setShifts(new Double[experimentalSignal.getNDim()]); - experimentalShift = Double.parseDouble(split[1]); // exp. shift - experimentalSignal.setShift(experimentalShift, 0); - experimentalSignal.setEquivalencesCount(1); - experimentalSignal.setMultiplicity(multiplicity); - experimentalSpectrum.addSignal(experimentalSignal); - } - assignment = new Assignment(); - assignment.setNuclei(new String[]{nucleus}); - assignment.initAssignments(experimentalSpectrum.getSignalCount()); - - predictedSpectrum = experimentalSpectrum.buildClone(); - - deviations = new Double[predictedSpectrum.getSignalCountWithEquivalences()]; - signalCounter = 0; - signalShiftList = new HashMap<>(); - for (final Map.Entry shiftProperty1D : shiftProperties1D.entrySet()) { - split = shiftProperty1D.getValue() - .split("\\s"); - multiplicity = Utils.getMultiplicityFromProtonsCount(structure.getAtom(Integer.parseInt(split[0]) - - 1) - .getImplicitHydrogenCount()); - experimentalShift = Double.parseDouble(split[1]); // exp. shift - predictedShift = Double.parseDouble(split[3]); // pred. shift - - // just to be sure that we take the right signal if equivalences are present - closestSignalList = experimentalSpectrum.pickByClosestShift(experimentalShift, 0, 0.0); - closestSignalList.retainAll(experimentalSpectrum.pickByMultiplicity(multiplicity)); - matchedSignalIndex = closestSignalList.get(0); - - deviations[signalCounter] = Math.abs(predictedShift - - experimentalShift); - signalShiftList.putIfAbsent(matchedSignalIndex, new ArrayList<>()); - signalShiftList.get(matchedSignalIndex) - .add(predictedShift); - assignment.addAssignmentEquivalence(0, matchedSignalIndex, Integer.parseInt(split[0]) - - 1); - signalCounter++; - } - for (final int signalIndex : signalShiftList.keySet()) { - predictedSpectrum.getSignal(signalIndex) - .setShift(Statistics.getMedian(signalShiftList.get(signalIndex)), 0); - predictedSpectrum.getSignal(signalIndex) - .setEquivalencesCount(signalShiftList.get(signalIndex) - .size()); - } - // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(predictedSpectrum, mf, 0) - != 0) { - continue; - } - dataSet = new DataSet(structure, predictedSpectrum, assignment, meta); - - averageDeviation = Statistics.calculateAverageDeviation(deviations); - if (averageDeviation - != null - && averageDeviation - <= maxAverageDeviation) { - dataSet.addMetaInfo("averageDeviation", String.valueOf(averageDeviation)); - rmsd = Statistics.calculateRMSD(deviations); - dataSet.addMetaInfo("rmsd", String.valueOf(rmsd)); - - dataSetList.add(dataSet); - } - } - // pre-sort by RMSD value - dataSetList.sort((dataSet1, dataSet2) -> { - if (Double.parseDouble(dataSet1.getMeta() - .get("rmsd")) - < Double.parseDouble(dataSet2.getMeta() - .get("rmsd"))) { - return -1; - } else if (Double.parseDouble(dataSet1.getMeta() - .get("rmsd")) - > Double.parseDouble(dataSet2.getMeta() - .get("rmsd"))) { - return 1; - } - return 0; - }); - - return dataSetList; - } - - public static LinkedHashMap getShiftProperties1D(final IAtomContainer ac, final String atomType) { - final LinkedHashMap shiftProperties1D = new LinkedHashMap<>(); - String[] split; - for (final Object key : ac.getProperties() - .keySet()) { - if (key instanceof String - && ((String) key).startsWith("CS")) { - split = ((String) key).split("CS"); - if (ac.getAtom(Integer.parseInt(split[1]) - - 1) - .getSymbol() - .equals(atomType)) { - shiftProperties1D.put(split[1], ac.getProperty(key)); - } - } - } - - return shiftProperties1D; - } -} diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 35bc694..7df7d55 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -3,6 +3,7 @@ import casekit.io.FileSystem; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; +import casekit.nmr.model.nmrium.Link; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; @@ -223,12 +224,12 @@ public static boolean writeNeighborsFile(final String pathToNeighborsFile, final .append(sstrIndexCorrelation) .append(" S") .append(sstrIndex) + .append("\n") .append("\n"); sstrIndex++; } } } - stringBuilder.append("\n"); } } @@ -298,4 +299,45 @@ private static String buildMultipleValuesString(final List values) { return stringBuilder.toString(); } + + public static Map> buildFixedNeighborsByINADEQUATE(final List correlationList) { + final Map> fixedNeighbors = new HashMap<>(); + final Set uniqueSet = new HashSet<>(); + Correlation correlation; + for (int i = 0; i + < correlationList.size(); i++) { + correlation = correlationList.get(i); + // @TODO for now use INADEQUATE information of atoms without equivalences only + if (correlation.getEquivalence() + > 1) { + continue; + } + for (final Link link : correlation.getLink()) { + if (link.getExperimentType() + .equals("inadequate")) { + for (final int matchIndex : link.getMatch()) { + // insert BOND pair once only and not if equivalences exist + if (!uniqueSet.contains(i + + " " + + matchIndex) + && correlationList.get(matchIndex) + .getEquivalence() + == 1) { + fixedNeighbors.putIfAbsent(i, new HashSet<>()); + fixedNeighbors.get(i) + .add(matchIndex); + uniqueSet.add(i + + " " + + matchIndex); + uniqueSet.add(matchIndex + + " " + + i); + } + } + } + } + } + + return fixedNeighbors; + } } From 08376c73ea3a4104970cf609529c25c8f617609a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 18 Nov 2021 20:29:54 +0100 Subject: [PATCH 334/405] chore: use Java 11 --- pom.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 164cc32..bb1362a 100644 --- a/pom.xml +++ b/pom.xml @@ -15,13 +15,14 @@ src + org.apache.maven.plugins maven-compiler-plugin - 3.8.1 - 1.8 - 1.8 + 11 + 11 true + 3.8.1 org.apache.maven.plugins From 706b44aa0b05f418b51173255ce0008c4d5d19e1 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 23 Nov 2021 23:11:30 +0100 Subject: [PATCH 335/405] feat: use path length information from 2D signal if given --- .../nmr/lsd/PyLSDInputFileBuilder.java | 54 ++++++++++++++----- .../nmr/lsd/model/ElucidationOptions.java | 4 -- src/casekit/nmr/model/nmrium/PathLength.java | 15 ++++++ src/casekit/nmr/model/nmrium/Signal2D.java | 1 + 4 files changed, 57 insertions(+), 17 deletions(-) create mode 100644 src/casekit/nmr/model/nmrium/PathLength.java diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 82bf57d..91f9148 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -330,15 +330,17 @@ private static String buildHSQC(final List correlationList, final i } private static String buildHMBC(final List correlationList, final int index, - final Map indicesMap, final int hmbcP3, final int hmbcP4) { + final Map indicesMap) { final Correlation correlation = correlationList.get(index); if (correlation.getAtomType() .equals("H")) { return null; } - final String defaultBondDistance = hmbcP3 + final String defaultBondDistanceString = 2 + " " - + hmbcP4; + + 3; + String bondDistanceString; + Map signal2DMap, pathLengthMap; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType() @@ -358,11 +360,24 @@ private static String buildHMBC(final List correlationList, final i .contains(index) && l == k)) { + bondDistanceString = null; + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + bondDistanceString = pathLengthMap.get("min") + + " " + + pathLengthMap.get("max"); + } uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[l] + " " - + defaultBondDistance + + (bondDistanceString + != null + ? bondDistanceString + : defaultBondDistanceString) + buildShiftsComment(correlation, correlationList.get(matchIndex))); } @@ -381,15 +396,17 @@ private static String buildHMBC(final List correlationList, final i } private static String buildCOSY(final List correlationList, final int index, - final Map indicesMap, final int cosyP3, final int cosyP4) { + final Map indicesMap) { final Correlation correlation = correlationList.get(index); if (!correlation.getAtomType() .equals("H")) { return null; } - final String defaultBondDistance = cosyP3 + final String defaultBondDistanceString = 3 + " " - + cosyP4; + + 4; + String bondDistanceString; + Map signal2DMap, pathLengthMap; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType() @@ -404,11 +421,24 @@ private static String buildCOSY(final List correlationList, final i // only allow COSY values between possible equivalent protons and only one another non-equivalent proton if (indicesMap.get(matchIndex).length == 2) { + bondDistanceString = null; + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + bondDistanceString = pathLengthMap.get("min") + + " " + + pathLengthMap.get("max"); + } uniqueSet.add(indicesMap.get(index)[k] + " " + indicesMap.get(matchIndex)[1] + " " - + defaultBondDistance + + (bondDistanceString + != null + ? bondDistanceString + : defaultBondDistanceString) + buildShiftsComment(correlation, correlationList.get(matchIndex))); } @@ -607,7 +637,7 @@ private static String buildDEFFsAndFEXP(final List correlationList, return stringBuilder.toString(); } - + private static String buildBONDByFixedNeighbors(final List correlationList, final Map indicesMap, final Map> fixedNeighbors) { @@ -715,11 +745,9 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf collection.get("HSQC") .add(buildHSQC(correlationList, i, indicesMap)); collection.get("HMBC") - .add(buildHMBC(correlationList, i, indicesMap, elucidationOptions.getHmbcP3(), - elucidationOptions.getHmbcP4())); + .add(buildHMBC(correlationList, i, indicesMap)); collection.get("COSY") - .add(buildCOSY(correlationList, i, indicesMap, elucidationOptions.getCosyP3(), - elucidationOptions.getCosyP4())); + .add(buildCOSY(correlationList, i, indicesMap)); collection.get("SHIX") .add(buildSHIX(correlation, i, indicesMap)); collection.get("SHIH") diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/lsd/model/ElucidationOptions.java index fe161d0..b0a4805 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/lsd/model/ElucidationOptions.java @@ -18,8 +18,4 @@ public class ElucidationOptions { private boolean useElim; private int elimP1; private int elimP2; - private int hmbcP3; - private int hmbcP4; - private int cosyP3; - private int cosyP4; } diff --git a/src/casekit/nmr/model/nmrium/PathLength.java b/src/casekit/nmr/model/nmrium/PathLength.java new file mode 100644 index 0000000..75e8296 --- /dev/null +++ b/src/casekit/nmr/model/nmrium/PathLength.java @@ -0,0 +1,15 @@ +package casekit.nmr.model.nmrium; + +import lombok.*; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString +public class PathLength { + + private int min; + private int max; + private String source; +} diff --git a/src/casekit/nmr/model/nmrium/Signal2D.java b/src/casekit/nmr/model/nmrium/Signal2D.java index ae21f5c..638b99c 100644 --- a/src/casekit/nmr/model/nmrium/Signal2D.java +++ b/src/casekit/nmr/model/nmrium/Signal2D.java @@ -40,6 +40,7 @@ public class Signal2D private Map x; private Map y; + private PathLength pathLength; public Signal2D(final Signal signal) { super(signal.getId(), signal.getKind(), signal.getMultiplicity(), signal.getSign()); From caeb7eb527b515634e2162bfa301bff5dcfff95a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 26 Nov 2021 17:48:26 +0100 Subject: [PATCH 336/405] fix: do not check state of correlation data in buildPyLSDInputFileContent method anymore --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 91f9148..c2e4ccf 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -696,17 +696,8 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf final Map>>> setNeighbors, final Map> fixedNeighbors, final ElucidationOptions elucidationOptions) { - final Map> state = data.getCorrelations() - .getState(); - final boolean hasErrors = state.keySet() - .stream() - .anyMatch(atomType -> state.get(atomType) - .containsKey("error") - && !((Map) state.get(atomType) - .get("error")).isEmpty()); if (mf - != null - && !hasErrors) { + != null) { final List correlationList = data.getCorrelations() .getValues(); final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); From aeeb3fe38ba660984cf93cd8a65e82dc59a44f70 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 8 Dec 2021 09:09:54 +0100 Subject: [PATCH 337/405] fix: simplification regarding equivalences and HSQC and HMBC section creation --- .../nmr/lsd/PyLSDInputFileBuilder.java | 68 ++++++++----------- 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index c2e4ccf..3f76c56 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -74,27 +74,21 @@ private static Map buildIndicesMap(final List co protonsCount = correlationList.get(matchIndex) .getProtonsCount() .get(0); - if (protonsCount - == 3) { - protonsCount = 1; - } - protonsToInsert += correlationList.get(matchIndex) - .getEquivalence() - * (protonsCount - / correlationList.get(matchIndex) + protonsToInsert += (correlation.getEquivalence() + / (double) protonsCount) + * correlationList.get(matchIndex) .getAttachment() .get("H") - .size()); + .size(); } } } indicesMap.put(i, new Object[1 + protonsToInsert]); indicesMap.get(i)[0] = correlation.getAtomType(); - for (int j = 0; j - < protonsToInsert; j++) { - indicesMap.get(i)[1 - + j] = protonIndexInPyLSDFile; + for (int j = 1; j + <= protonsToInsert; j++) { + indicesMap.get(i)[j] = protonIndexInPyLSDFile; protonIndexInPyLSDFile++; } } else { @@ -108,6 +102,17 @@ private static Map buildIndicesMap(final List co } } } + // System.out.println("\n\n"); + // for (final Map.Entry entry : indicesMap.entrySet()) { + // System.out.println(entry.getKey() + // + ": " + // + entry.getValue()[0]); + // for (int i = 1; i + // < entry.getValue().length; i++) { + // System.out.println(entry.getValue()[i]); + // } + // } + // System.out.println("\n\n"); return indicesMap; } @@ -290,37 +295,24 @@ private static String buildHSQC(final List correlationList, final i return null; } final StringBuilder stringBuilder = new StringBuilder(); - final Map protonEquivalenceIndexMap = new HashMap<>(); - int protonsCount; for (final Link link : correlation.getLink()) { if (link.getExperimentType() .equals("hsqc") || link.getExperimentType() .equals("hmqc")) { for (final int matchIndex : link.getMatch()) { - protonEquivalenceIndexMap.putIfAbsent(matchIndex, 1); // k = 1 in indicesMap // for each equivalence of heavy atom and attached protons for (int k = 1; k < indicesMap.get(index).length; k++) { - protonsCount = correlation.getProtonsCount() - .get(0); - // consider CH3 same as CH (avoid multiple entries in PyLSD input file) - if (protonsCount - == 3) { - protonsCount = 1; - } - for (int p = 0; p - < Math.min(protonsCount, correlationList.get(matchIndex) - .getEquivalence()); p++) { - stringBuilder.append("HSQC ") - .append(indicesMap.get(index)[k]) - .append(" ") - .append(indicesMap.get(matchIndex)[protonEquivalenceIndexMap.get(matchIndex)]) - .append(buildShiftsComment(correlation, correlationList.get(matchIndex))) - .append("\n"); - protonEquivalenceIndexMap.put(matchIndex, protonEquivalenceIndexMap.get(matchIndex) - + 1); - } + // for (int p = 1; p + // < indicesMap.get(matchIndex).length; p++) { + stringBuilder.append("HSQC ") + .append(indicesMap.get(index)[k]) + .append(" ") + .append(indicesMap.get(matchIndex)[k]) + .append(buildShiftsComment(correlation, correlationList.get(matchIndex))) + .append("\n"); + // } } } } @@ -357,9 +349,7 @@ private static String buildHMBC(final List correlationList, final i && correlationList.get(matchIndex) .getAttachment() .get(correlation.getAtomType()) - .contains(index) - && l - == k)) { + .contains(index))) { bondDistanceString = null; signal2DMap = (Map) link.getSignal(); if (signal2DMap @@ -412,7 +402,7 @@ private static String buildCOSY(final List correlationList, final i if (link.getExperimentType() .equals("cosy")) { for (final int matchIndex : link.getMatch()) { - // only add an COSY correlation if the two signals there is not equivalent + // only add a COSY entry if it is not from same signal if (!correlationList.get(matchIndex) .getId() .equals(correlation.getId())) { From acc15f3f1c320b0c20794ea8de1b8d8b6c4f2f87 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 9 Dec 2021 15:57:43 +0100 Subject: [PATCH 338/405] feat: introduction of Detections class --- .../nmr/lsd/PyLSDInputFileBuilder.java | 21 ++++++++----------- src/casekit/nmr/lsd/model/Detections.java | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+), 12 deletions(-) create mode 100644 src/casekit/nmr/lsd/model/Detections.java diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 3f76c56..7902682 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -1,5 +1,6 @@ package casekit.nmr.lsd; +import casekit.nmr.lsd.model.Detections; import casekit.nmr.lsd.model.ElucidationOptions; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; @@ -679,12 +680,7 @@ private static String buildBOND(final List correlationList, final M + "\n"; } - public static String buildPyLSDInputFileContent(final Data data, final String mf, - final Map> detectedHybridizations, - final Map>>> detectedConnectivities, - final Map>>> forbiddenNeighbors, - final Map>>> setNeighbors, - final Map> fixedNeighbors, + public static String buildPyLSDInputFileContent(final Data data, final String mf, final Detections detections, final ElucidationOptions elucidationOptions) { if (mf != null) { @@ -722,7 +718,7 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf < correlationList.size(); i++) { correlation = correlationList.get(i); collection.get("MULT") - .add(buildMULT(correlation, i, indicesMap, detectedHybridizations)); + .add(buildMULT(correlation, i, indicesMap, detections.getDetectedHybridizations())); collection.get("HSQC") .add(buildHSQC(correlationList, i, indicesMap)); collection.get("HMBC") @@ -745,17 +741,18 @@ public static String buildPyLSDInputFileContent(final Data data, final String mf }); // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments - stringBuilder.append(buildBOND(correlationList, indicesMap, fixedNeighbors)) + stringBuilder.append(buildBOND(correlationList, indicesMap, detections.getFixedNeighbors())) .append("\n"); // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance - stringBuilder.append(buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, detectedConnectivities, - forbiddenNeighbors, setNeighbors, + stringBuilder.append(buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, + detections.getDetectedConnectivities(), + detections.getForbiddenNeighbors(), detections.getSetNeighbors(), elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as filters (good/bad lists) - stringBuilder.append(buildDEFFsAndFEXP(correlationList, indicesMap, elucidationOptions, forbiddenNeighbors, - setNeighbors)) + stringBuilder.append(buildDEFFsAndFEXP(correlationList, indicesMap, elucidationOptions, + detections.getForbiddenNeighbors(), detections.getSetNeighbors())) .append("\n"); return stringBuilder.toString(); diff --git a/src/casekit/nmr/lsd/model/Detections.java b/src/casekit/nmr/lsd/model/Detections.java new file mode 100644 index 0000000..0dd3cc7 --- /dev/null +++ b/src/casekit/nmr/lsd/model/Detections.java @@ -0,0 +1,21 @@ +package casekit.nmr.lsd.model; + +import lombok.*; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString +public class Detections { + + private Map> detectedHybridizations; + private Map>>> detectedConnectivities; + private Map>>> forbiddenNeighbors; + private Map>>> setNeighbors; + private Map> fixedNeighbors; +} From a72534b63f51348ec5a9f7e8a13d6140f303e108 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 12 Dec 2021 23:13:41 +0100 Subject: [PATCH 339/405] chore: expect Correlations as input instead of Data in buildPyLSDInputFileContent --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 7902682..ab7c8d8 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -4,7 +4,7 @@ import casekit.nmr.lsd.model.ElucidationOptions; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; -import casekit.nmr.model.nmrium.Data; +import casekit.nmr.model.nmrium.Correlations; import casekit.nmr.model.nmrium.Link; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; @@ -680,12 +680,12 @@ private static String buildBOND(final List correlationList, final M + "\n"; } - public static String buildPyLSDInputFileContent(final Data data, final String mf, final Detections detections, + public static String buildPyLSDInputFileContent(final Correlations correlations, final String mf, + final Detections detections, final ElucidationOptions elucidationOptions) { if (mf != null) { - final List correlationList = data.getCorrelations() - .getValues(); + final List correlationList = correlations.getValues(); final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); final StringBuilder stringBuilder = new StringBuilder(); // create header From 93549cc3035d0284b82276f5f2f73ed46a992b0f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 14 Dec 2021 14:03:48 +0100 Subject: [PATCH 340/405] chore: extension of LSD Constants --- src/casekit/nmr/lsd/Constants.java | 74 ++++-------------------------- 1 file changed, 10 insertions(+), 64 deletions(-) diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index 202fa30..ad40f78 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -1,6 +1,8 @@ package casekit.nmr.lsd; -import java.util.*; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; public class Constants { @@ -10,13 +12,13 @@ public class Constants { public static final Map defaultProtonsCountPerValencyMap = createDefaultProtonsCountPerValencyMap(); public static final Map defaultAtomLabelMap = createDefaultAtomLabelMap(); public static final Map hybridizationConversionMap = createHybridizationConversionMap(); - public static final Map>> hybridizationsByProtonsCountMap = createHybridizationsByProtonsCountMap(); private static Map createNucleiMap() { final Map nuclei = new HashMap<>(); nuclei.put("C", "13C"); nuclei.put("N", "15N"); nuclei.put("H", "1H"); + nuclei.put("F", "19F"); return Collections.unmodifiableMap(nuclei); } @@ -29,6 +31,8 @@ private static Map createDefaultHybridizationMap() { defaultHybridization.put("O", new int[]{2, 3}); defaultHybridization.put("I", new int[]{3}); defaultHybridization.put("F", new int[]{3}); + defaultHybridization.put("Cl", new int[]{3}); + defaultHybridization.put("Br", new int[]{3}); return Collections.unmodifiableMap(defaultHybridization); } @@ -46,6 +50,8 @@ private static Map createDefaultProtonsCountPerValencyMap() { defaultProtonsCountPerValency.put("O", new int[]{0, 1}); defaultProtonsCountPerValency.put("I", new int[]{0}); defaultProtonsCountPerValency.put("F", new int[]{0}); + defaultProtonsCountPerValency.put("Cl", new int[]{0}); + defaultProtonsCountPerValency.put("Br", new int[]{0}); return defaultProtonsCountPerValency; } @@ -58,6 +64,8 @@ private static Map createDefaultAtomLabelMap() { defaultAtomLabel.put("S", "S246"); defaultAtomLabel.put("I", "I"); defaultAtomLabel.put("F", "F"); + defaultAtomLabel.put("Cl", "Cl"); + defaultAtomLabel.put("Br", "Br"); return Collections.unmodifiableMap(defaultAtomLabel); } @@ -74,66 +82,4 @@ private static Map createHybridizationConversionMap() { return Collections.unmodifiableMap(hybridizationConversionMap); } - - private static Map>> createHybridizationsByProtonsCountMap() { - final Map>> hybridizationsByProtonsCountMap = new HashMap<>(); - hybridizationsByProtonsCountMap.put("C", new HashMap<>()); - hybridizationsByProtonsCountMap.get("C") - .put(4, new HashSet<>()); - hybridizationsByProtonsCountMap.get("C") - .get(4) - .add(3); - hybridizationsByProtonsCountMap.get("C") - .put(3, new HashSet<>()); - hybridizationsByProtonsCountMap.get("C") - .get(3) - .add(3); - hybridizationsByProtonsCountMap.get("C") - .put(2, new HashSet<>()); - hybridizationsByProtonsCountMap.get("C") - .get(2) - .add(3); - hybridizationsByProtonsCountMap.get("C") - .get(2) - .add(2); - hybridizationsByProtonsCountMap.get("C") - .put(1, new HashSet<>()); - hybridizationsByProtonsCountMap.get("C") - .get(1) - .add(3); - hybridizationsByProtonsCountMap.get("C") - .get(1) - .add(2); - hybridizationsByProtonsCountMap.get("C") - .get(1) - .add(1); - hybridizationsByProtonsCountMap.get("C") - .put(0, new HashSet<>()); - hybridizationsByProtonsCountMap.get("C") - .get(0) - .add(3); - hybridizationsByProtonsCountMap.get("C") - .get(0) - .add(2); - hybridizationsByProtonsCountMap.get("C") - .get(0) - .add(1); - // N (3) - hybridizationsByProtonsCountMap.put("N", new HashMap<>()); - hybridizationsByProtonsCountMap.get("N") - .put(3, new HashSet<>()); - hybridizationsByProtonsCountMap.get("C") - .get(3) - .add(3); - hybridizationsByProtonsCountMap.get("N") - .put(2, new HashSet<>()); - hybridizationsByProtonsCountMap.get("C") - .get(2) - .add(3); - hybridizationsByProtonsCountMap.get("C") - .get(2) - .add(2); - - return Collections.unmodifiableMap(hybridizationsByProtonsCountMap); - } } From a38c3996175f9f8af5c81f4adcb372300193d9c2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 14 Dec 2021 14:20:21 +0100 Subject: [PATCH 341/405] fix: mentioning of casekit in PyLSD header builder --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index ab7c8d8..359f850 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -16,7 +16,7 @@ public class PyLSDInputFileBuilder { private static String buildHeader() { final StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append("; PyLSD input file created by webCASE\n"); + stringBuilder.append("; PyLSD input file created by casekit (https://github.com/michaelwenk/casekit)\n"); final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z"); final Date date = new Date(System.currentTimeMillis()); stringBuilder.append("; ") From fb89655d719c94b8c3424aa0edf443279e07c46b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 15 Dec 2021 10:11:33 +0100 Subject: [PATCH 342/405] chore: removed unused variables in PyLSDInputFileBuilder --- src/casekit/nmr/lsd/PyLSDInputFileBuilder.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 359f850..28bfefd 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -50,8 +50,7 @@ private static String buildELIM(final int elimP1, final int elimP2) { + elimP2; } - private static Map buildIndicesMap(final List correlationList, - final Map elementCounts) { + private static Map buildIndicesMap(final List correlationList) { // index in correlation data -> [atom type, indices in PyLSD file...] final Map indicesMap = new HashMap<>(); // init element indices within correlations with same order as in correlation data input @@ -510,7 +509,6 @@ private static String buildSHIH(final Correlation correlation, final int index, private static String buildLISTsAndPROPs(final List correlationList, final Map indicesMap, final Map elementCounts, - final Map>>> detectedConnectivities, final Map>>> forbiddenNeighbors, final Map>>> setNeighbors, final boolean allowHeteroHeteroBonds) { @@ -711,7 +709,7 @@ public static String buildPyLSDInputFileContent(final Correlations correlations, collection.put("SHIX", new ArrayList<>()); collection.put("SHIH", new ArrayList<>()); // index in correlation data -> [atom type, index in PyLSD file] - final Map indicesMap = buildIndicesMap(correlationList, elementCounts); + final Map indicesMap = buildIndicesMap(correlationList); Correlation correlation; for (int i = 0; i @@ -745,10 +743,9 @@ public static String buildPyLSDInputFileContent(final Correlations correlations, .append("\n"); // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance - stringBuilder.append(buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, - detections.getDetectedConnectivities(), - detections.getForbiddenNeighbors(), detections.getSetNeighbors(), - elucidationOptions.isAllowHeteroHeteroBonds())) + stringBuilder.append( + buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, detections.getForbiddenNeighbors(), + detections.getSetNeighbors(), elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as filters (good/bad lists) stringBuilder.append(buildDEFFsAndFEXP(correlationList, indicesMap, elucidationOptions, From 4b3d4b3392ed58349a1d59de75c2fa2694cb734a Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 15 Dec 2021 19:01:28 +0100 Subject: [PATCH 343/405] chore: removed unused variables in PyLSDInputFileBuilder --- src/casekit/nmr/lsd/Utilities.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 7df7d55..f91c9f2 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -14,6 +14,11 @@ public class Utilities { public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, final Map>>> detectedConnectivities) { + if (detectedConnectivities + == null + || detectedConnectivities.isEmpty()) { + return; + } final Map> allowedNeighborAtomHybridizations = buildAllowedNeighborAtomHybridizations( correlationList, detectedConnectivities); final Map> allowedNeighborAtomProtonCounts = buildAllowedNeighborAtomProtonCounts( From e854c2c7317380571f3b5a432a36a06bcb1c2164 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 23 Dec 2021 12:46:22 +0100 Subject: [PATCH 344/405] fix: set hybridizations of correlations too after detecting them --- src/casekit/nmr/lsd/Utilities.java | 36 +++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index f91c9f2..cdb0c80 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -13,7 +13,8 @@ public class Utilities { public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final List correlationList, - final Map>>> detectedConnectivities) { + final Map>>> detectedConnectivities, + final Map> detectedHybridizations) { if (detectedConnectivities == null || detectedConnectivities.isEmpty()) { @@ -25,7 +26,10 @@ public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final correlationList, detectedConnectivities); // hetero atoms can bond to carbons only, due to that we can use further connectivity information // do not allow bond between carbon and hetero atoms in certain hybridization states and proton counts - for (final Correlation correlation : correlationList) { + Correlation correlation; + for (int i = 0; i + < correlationList.size(); i++) { + correlation = correlationList.get(i); // ignore C and H atoms if (correlation.getAtomType() .equals("C") @@ -33,12 +37,26 @@ public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final .equals("H")) { continue; } + final Set hybridizationsToAdd = allowedNeighborAtomHybridizations.containsKey( + correlation.getAtomType()) + ? allowedNeighborAtomHybridizations.get(correlation.getAtomType()) + : Arrays.stream(Constants.defaultHybridizationMap.get( + correlation.getAtomType())) + .boxed() + .collect(Collectors.toSet()); + final Set protonCountsToAdd = allowedNeighborAtomProtonCounts.containsKey( + correlation.getAtomType()) + ? allowedNeighborAtomProtonCounts.get(correlation.getAtomType()) + : Arrays.stream(Constants.defaultProtonsCountPerValencyMap.get( + correlation.getAtomType())) + .boxed() + .collect(Collectors.toSet()); // but only if we have seen the hetero atom type in connectivity statistics // and hybridization states or protons count was not set beforehand if (correlation.getHybridization() .isEmpty()) { correlation.getHybridization() - .addAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); + .addAll(hybridizationsToAdd); } else if (correlation.getEdited() != null && correlation.getEdited() @@ -47,22 +65,24 @@ public static void reduceDefaultHybridizationsAndProtonCountsOfHeteroAtoms(final .get("hybridization") && allowedNeighborAtomHybridizations.containsKey(correlation.getAtomType())) { correlation.getHybridization() - .retainAll(allowedNeighborAtomHybridizations.get(correlation.getAtomType())); + .retainAll(hybridizationsToAdd); } if (correlation.getProtonsCount() .isEmpty()) { correlation.getProtonsCount() - .addAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); + .addAll(protonCountsToAdd); } else if (correlation.getEdited() != null && correlation.getEdited() .containsKey("protonsCount") && !correlation.getEdited() - .get("protonsCount") - && allowedNeighborAtomProtonCounts.containsKey(correlation.getAtomType())) { + .get("protonsCount")) { correlation.getProtonsCount() - .retainAll(allowedNeighborAtomProtonCounts.get(correlation.getAtomType())); + .retainAll(protonCountsToAdd); } + detectedHybridizations.putIfAbsent(i, new ArrayList<>()); + detectedHybridizations.get(i) + .addAll(correlation.getHybridization()); } } From 1917470ca1902be046e248469509ad0a2473876b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 23 Dec 2021 12:47:23 +0100 Subject: [PATCH 345/405] chore: do not use specific neighbor hybridization states for now && simplifications --- src/casekit/nmr/lsd/LISTAndPROPUtilities.java | 185 ++++++++++-------- .../nmr/lsd/PyLSDInputFileBuilder.java | 44 +++-- 2 files changed, 121 insertions(+), 108 deletions(-) diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java index d7930d8..74b97ec 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/LISTAndPROPUtilities.java @@ -9,44 +9,53 @@ public class LISTAndPROPUtilities { - public static void insertELEM(final StringBuilder stringBuilder, final Map listMap, + public static void insertELEM(final StringBuilder stringBuilder, final Map listMap, final Set atomTypesByMf) { final Set atomTypes = new HashSet<>(atomTypesByMf); atomTypes.remove("H"); for (final String atomType : atomTypes) { - listMap.put(atomType, "L" - + (listMap.size() - + 1)); + listMap.put(atomType, new Object[]{"L" + + (listMap.size() + + 1)}); stringBuilder.append("ELEM") .append(" ") - .append(listMap.get(atomType)) + .append(listMap.get(atomType)[0]) .append(" ") .append(atomType) .append("\n"); } } - public static void insertNoHeteroHeteroBonds(final StringBuilder stringBuilder, final Map listMap) { + public static void insertNoHeteroHeteroBonds(final StringBuilder stringBuilder, + final Map listMap) { // create hetero atom list automatically to forbid hetero-hetero bonds - stringBuilder.append("HETE L1") + listMap.put("HETE", new Object[]{"L" + + (listMap.size() + + 1)}); + stringBuilder.append("HETE ") + .append((String) listMap.get("HETE")[0]) .append("; hetero atoms\n"); stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); - listMap.put("HETE", "L1"); } - private static String buildListKey(final String atomType, final int hybridization, final int protonsCount) { + private static String buildListKey(final String atomType, final List hybridizations, + final List protonsCounts) { return atomType + "_" - + hybridization + + (!hybridizations.isEmpty() + ? hybridizations + : "*") + "_" - + protonsCount; + + (!protonsCounts.isEmpty() + ? protonsCounts + : "*"); } public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBuilder, - final Map listMap, + final Map listMap, final List correlationList, final Map indicesMap) { - final Map> atomIndicesMap = new HashMap<>(); + final Map> atomIndicesMap = new LinkedHashMap<>(); Correlation correlation; int indexInPyLSD; String listKey; @@ -65,9 +74,8 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui != 1) { continue; } - listKey = buildListKey(correlation.getAtomType(), correlation.getHybridization() - .get(0), correlation.getProtonsCount() - .get(0)); + listKey = buildListKey(correlation.getAtomType(), new ArrayList<>(), //correlation.getHybridization(), + correlation.getProtonsCount()); indexInPyLSD = (int) indicesMap.get(i)[k]; atomIndicesMap.putIfAbsent(listKey, new HashSet<>()); atomIndicesMap.get(listKey) @@ -81,7 +89,7 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui .append(listMap.size() + 1); - for (final Integer pyLSDAtomIndex : combinationEntry.getValue()) { + for (final int pyLSDAtomIndex : combinationEntry.getValue()) { stringBuilder.append(" ") .append(pyLSDAtomIndex); } @@ -95,24 +103,68 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui .append(split[1]) .append("\n"); - listMap.put(combinationEntry.getKey(), "L" - + (listMap.size() - + 1)); + listMap.put(combinationEntry.getKey(), new Object[]{"L" + + (listMap.size() + + 1), combinationEntry.getValue().size()}); } } + private static boolean checkSkipPROPInsertion(final Map listMap, + final Map usedPropsCount, final String listKey) { + if (!listMap.containsKey(listKey)) { + return true; + } + // LSD crashes if we try to use more atoms with specific hybridization or/and proton count + // thus count and limit that usage as following: + // list name -> number of usages + usedPropsCount.putIfAbsent((String) listMap.get(listKey)[0], 1); + return listMap.get(listKey).length + > 1 + && usedPropsCount.get((String) listMap.get(listKey)[0]) + > (int) listMap.get(listKey)[1]; + } + + private static void insertPROP(final StringBuilder stringBuilder, final Map listMap, + final String atomType, final Signal signal, final String neighborAtomType, + final int indexInPyLSD, final String listKey, final String mode) { + stringBuilder.append("PROP ") + .append(indexInPyLSD) + .append(mode.equals("forbid") + ? " 0 " + : " 1 ") + .append(listMap.get(listKey)[0]) + .append(mode.equals("forbid") + ? " -" + : " +") + .append(mode.equals("forbid") + ? "; no bonds between " + : "; at least one bond between ") + .append(indexInPyLSD) + .append(" (") + .append(atomType) + .append(", ") + .append(signal + != null + ? Statistics.roundDouble(signal.getShift(0), 2) + : "?") + .append(") and ") + .append(Arrays.toString(listMap.get(listKey))) + .append("\n"); + } + public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuilder, - final Map listMap, + final Map listMap, final List correlationList, final Map indicesMap, final Map>>> neighbors, final String mode) { Correlation correlation; Signal signal; - String atomType; + String atomType, listKey; int indexInPyLSD; Map>> neighborsTemp; + final Map usedPropsCount = new HashMap<>(); for (int i = 0; i < correlationList.size(); i++) { if (neighbors.containsKey(i)) { @@ -129,72 +181,31 @@ public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuild // forbid/set bonds to whole element groups if there is an empty map for an atom type if (neighborsTemp.get(neighborAtomType) .isEmpty()) { - stringBuilder.append("PROP ") - .append(indexInPyLSD) - .append(mode.equals("forbid") - ? " 0 " - : " 1 ") - .append(listMap.get(neighborAtomType)) - .append(mode.equals("forbid") - ? " -" - : " +") - .append(mode.equals("forbid") - ? "; no bonds between " - : "; at least one bond between ") - .append(indexInPyLSD) - .append(" (") - .append(atomType) - .append(", ") - .append(signal - != null - ? Statistics.roundDouble(signal.getShift(0), 2) - : "?") - .append(") and ") - .append(listMap.get(neighborAtomType)) - .append(" (") - .append(neighborAtomType) - .append(")") - .append("\n"); + insertPROP(stringBuilder, listMap, atomType, signal, neighborAtomType, indexInPyLSD, + neighborAtomType, mode); + } else { + for (final int neighborHybridization : neighborsTemp.get(neighborAtomType) + .keySet()) { + for (final int protonsCount : neighborsTemp.get(neighborAtomType) + .get(neighborHybridization)) { + listKey = buildListKey(neighborAtomType, neighborHybridization + == -1 + ? new ArrayList<>() + : List.of(neighborHybridization), + List.of(protonsCount)); + if (checkSkipPROPInsertion(listMap, usedPropsCount, listKey)) { + continue; + } + if (listMap.containsKey(listKey)) { + insertPROP(stringBuilder, listMap, atomType, signal, neighborAtomType, + indexInPyLSD, listKey, mode); + usedPropsCount.put((String) listMap.get(listKey)[0], + usedPropsCount.get((String) listMap.get(listKey)[0]) + + 1); + } + } + } } - // else { - // for (final int setNeighborHybridization : setNeighborsTemp.get(neighborAtomType) - // .keySet()) { - // for (final int setProtonsCount : setNeighborsTemp.get(neighborAtomType) - // .get(setNeighborHybridization)) { - // listKey = buildListKey(neighborAtomType, setNeighborHybridization, setProtonsCount); - // System.out.println("set: " - // + listKey); - // if (listMap.containsKey(listKey)) { - // System.out.println("-> HUHU"); - // stringBuilder.append("PROP ") - // .append(indexInPyLSD) - // .append(" 1 ") - // .append(listMap.get(listKey)) - // .append(" +") - // .append("; at least one bond between ") - // .append(indexInPyLSD) - // .append(" (") - // .append(atomType) - // .append(", ") - // .append(signal - // != null - // ? Statistics.roundDouble(signal.getShift(0), 2) - // : "?") - // .append(") and ") - // .append(listMap.get(listKey)) - // .append(" (") - // .append(neighborAtomType) - // .append(", SP") - // .append(setNeighborHybridization) - // .append(", ") - // .append(setProtonsCount) - // .append("H") - // .append(")") - // .append("\n"); - // } - // } - // } - // } } } } diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java index 28bfefd..bb3ab71 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java @@ -513,7 +513,8 @@ private static String buildLISTsAndPROPs(final List correlationList final Map>>> setNeighbors, final boolean allowHeteroHeteroBonds) { final StringBuilder stringBuilder = new StringBuilder(); - final Map listMap = new HashMap<>(); + // list key -> [list name, size] + final Map listMap = new HashMap<>(); // LIST and PROP for hetero hetero bonds to disallow if (!allowHeteroHeteroBonds) { @@ -521,8 +522,8 @@ private static String buildLISTsAndPROPs(final List correlationList } // insert ELEM for each heavy atom type in MF LISTAndPROPUtilities.insertELEM(stringBuilder, listMap, elementCounts.keySet()); - // // insert list combinations of carbon and hybridization states - // LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap); + // insert list combinations of carbon and hybridization states + LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap); // insert forbidden connection lists and properties LISTAndPROPUtilities.insertConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, forbiddenNeighbors, "forbid"); @@ -536,7 +537,6 @@ private static String buildLISTsAndPROPs(final List correlationList private static String buildDEFFs(final String[] filterPaths, final String[] pathsToNeighborsFiles) { final StringBuilder stringBuilder = new StringBuilder(); // DEFF -> add filters - stringBuilder.append("; externally defined filters\n"); final Map filters = new LinkedHashMap<>(); int counter = 1; for (final String filterPath : filterPaths) { @@ -551,6 +551,7 @@ private static String buildDEFFs(final String[] filterPaths, final String[] path } if (!filters.isEmpty()) { + stringBuilder.append("; externally defined filters\n"); filters.forEach((label, filePath) -> stringBuilder.append("DEFF ") .append(label) .append(" \"") @@ -600,25 +601,26 @@ private static String buildDEFFsAndFEXP(final List correlationList, + (i + 1), false); } - // build and write neighbors files - final List pathsToNeighborsFilesToUse = new ArrayList<>(); - if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[0], correlationList, indicesMap, - forbiddenNeighbors)) { - fexpMap.put("F" - + (fexpMap.size() - + 1), false); - pathsToNeighborsFilesToUse.add(elucidationOptions.getPathsToNeighborsFiles()[0]); - } - if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[1], correlationList, indicesMap, - setNeighbors)) { - fexpMap.put("F" - + (fexpMap.size() - + 1), true); - pathsToNeighborsFilesToUse.add(elucidationOptions.getPathsToNeighborsFiles()[1]); - } + // // build and write neighbors files + // final List pathsToNeighborsFilesToUse = new ArrayList<>(); + // if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[0], correlationList, indicesMap, + // forbiddenNeighbors)) { + // fexpMap.put("F" + // + (fexpMap.size() + // + 1), false); + // pathsToNeighborsFilesToUse.add(elucidationOptions.getPathsToNeighborsFiles()[0]); + // } + // if (Utilities.writeNeighborsFile(elucidationOptions.getPathsToNeighborsFiles()[1], correlationList, indicesMap, + // setNeighbors)) { + // fexpMap.put("F" + // + (fexpMap.size() + // + 1), true); + // pathsToNeighborsFilesToUse.add(elucidationOptions.getPathsToNeighborsFiles()[1]); + // } // build DEFFs stringBuilder.append( - buildDEFFs(elucidationOptions.getFilterPaths(), pathsToNeighborsFilesToUse.toArray(String[]::new))) + // buildDEFFs(elucidationOptions.getFilterPaths(), pathsToNeighborsFilesToUse.toArray(String[]::new))) + buildDEFFs(elucidationOptions.getFilterPaths(), new String[]{})) .append("\n"); // build FEXP stringBuilder.append(buildFEXP(fexpMap)) From f9f6982df77674e9251807947850811f7bd703aa Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 4 Jan 2022 16:55:48 +0100 Subject: [PATCH 346/405] feat: introduction of Grouping class --- src/casekit/nmr/lsd/Utilities.java | 189 ++++++++++++------ .../{ => inputfile}/LISTAndPROPUtilities.java | 2 +- .../PyLSDInputFileBuilder.java | 3 +- src/casekit/nmr/lsd/inputfile/Utilities.java | 68 +++++++ src/casekit/nmr/lsd/model/Grouping.java | 19 ++ 5 files changed, 215 insertions(+), 66 deletions(-) rename src/casekit/nmr/lsd/{ => inputfile}/LISTAndPROPUtilities.java (99%) rename src/casekit/nmr/lsd/{ => inputfile}/PyLSDInputFileBuilder.java (99%) create mode 100644 src/casekit/nmr/lsd/inputfile/Utilities.java create mode 100644 src/casekit/nmr/lsd/model/Grouping.java diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index cdb0c80..b14b05e 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -1,6 +1,7 @@ package casekit.nmr.lsd; import casekit.io.FileSystem; +import casekit.nmr.lsd.model.Grouping; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Link; @@ -217,8 +218,9 @@ public static boolean writeNeighborsFile(final String pathToNeighborsFile, final .entrySet()) { sstrIndexCorrelation = sstrIndex; stringBuilder.append( - buildSSTR(sstrIndexCorrelation, atomType, correlation.getHybridization(), - correlation.getProtonsCount())); + casekit.nmr.lsd.inputfile.Utilities.buildSSTR(sstrIndexCorrelation, atomType, + correlation.getHybridization(), + correlation.getProtonsCount())); stringBuilder.append("; ") .append(atomType) .append(" at ") @@ -242,8 +244,10 @@ public static boolean writeNeighborsFile(final String pathToNeighborsFile, final != -1) { tempList.add(entryPerHybridization.getKey()); } - stringBuilder.append(buildSSTR(sstrIndex, neighborAtomType, tempList, - new ArrayList<>(entryPerHybridization.getValue()))) + stringBuilder.append( + casekit.nmr.lsd.inputfile.Utilities.buildSSTR(sstrIndex, neighborAtomType, tempList, + new ArrayList<>( + entryPerHybridization.getValue()))) .append("\n"); stringBuilder.append("LINK S") .append(sstrIndexCorrelation) @@ -266,65 +270,6 @@ public static boolean writeNeighborsFile(final String pathToNeighborsFile, final && FileSystem.writeFile(pathToNeighborsFile, stringBuilder.toString()); } - private static String buildSSTR(final int sstrIndex, final String atomType, final List hybridization, - final List protonsCount) { - if (hybridization.isEmpty()) { - hybridization.addAll(Arrays.stream(Constants.defaultHybridizationMap.get(atomType)) - .boxed() - .collect(Collectors.toList())); - } - if (protonsCount.isEmpty()) { - protonsCount.addAll(Arrays.stream(Constants.defaultProtonsCountPerValencyMap.get(atomType)) - .boxed() - .collect(Collectors.toList())); - } - final StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append("SSTR S") - .append(sstrIndex) - .append(" ") - .append(atomType) - .append(" "); - if (hybridization.size() - == 1) { - stringBuilder.append(hybridization.get(0)) - .append(" "); - if (protonsCount.size() - == 1) { - stringBuilder.append(protonsCount.get(0)); - } else { - stringBuilder.append(buildMultipleValuesString(protonsCount)); - } - } else { - stringBuilder.append(buildMultipleValuesString(hybridization)); - stringBuilder.append(" "); - if (protonsCount.size() - == 1) { - stringBuilder.append(protonsCount.get(0)); - } else { - stringBuilder.append(buildMultipleValuesString(protonsCount)); - } - } - - return stringBuilder.toString(); - } - - private static String buildMultipleValuesString(final List values) { - final StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append("("); - for (int l = 0; l - < values.size(); l++) { - stringBuilder.append(values.get(l)); - if (l - < values.size() - - 1) { - stringBuilder.append(" "); - } - } - stringBuilder.append(")"); - - return stringBuilder.toString(); - } - public static Map> buildFixedNeighborsByINADEQUATE(final List correlationList) { final Map> fixedNeighbors = new HashMap<>(); final Set uniqueSet = new HashSet<>(); @@ -334,7 +279,7 @@ public static Map> buildFixedNeighborsByINADEQUATE(final L correlation = correlationList.get(i); // @TODO for now use INADEQUATE information of atoms without equivalences only if (correlation.getEquivalence() - > 1) { + != 1) { continue; } for (final Link link : correlation.getLink()) { @@ -365,4 +310,120 @@ public static Map> buildFixedNeighborsByINADEQUATE(final L return fixedNeighbors; } + + public static boolean hasMatch(final Correlation correlation1, final Correlation correlation2, + final double tolerance) { + final Signal signal1 = Utils.extractSignalFromCorrelation(correlation1); + final Signal signal2 = Utils.extractSignalFromCorrelation(correlation2); + if (signal1 + == null + || signal2 + == null) { + return false; + } + int dim1 = -1; + int dim2 = -1; + String atomType; + for (int i = 0; i + < signal1.getNuclei().length; i++) { + atomType = Utils.getAtomTypeFromNucleus(signal1.getNuclei()[i]); + if (atomType.equals(correlation1.getAtomType())) { + dim1 = i; + break; + } + } + for (int i = 0; i + < signal2.getNuclei().length; i++) { + atomType = Utils.getAtomTypeFromNucleus(signal2.getNuclei()[i]); + if (atomType.equals(correlation2.getAtomType())) { + dim2 = i; + break; + } + } + if (dim1 + == -1 + || dim2 + == -1) { + return false; + } + + final double shift1 = signal1.getShift(dim1); + final double shift2 = signal2.getShift(dim2); + + return Math.abs(shift1 + - shift2) + <= tolerance; + + } + + public static Map>> findGroups(final List correlationList, + final Map tolerances) { + // cluster group index -> list of correlation index pair + final Map>> groups = new HashMap<>(); + int groupIndex = 0; + final Set inserted = new HashSet<>(); + int foundGroupIndex; + for (int i = 0; i + < correlationList.size(); i++) { + final Correlation correlation = correlationList.get(i); + if (inserted.contains(i) + || correlation.isPseudo()) { + continue; + } + groups.putIfAbsent(correlation.getAtomType(), new HashMap<>()); + // if we have a match somewhere then add the correlation index into to group + // if not then create a new group + foundGroupIndex = -1; + for (final Map.Entry> groupEntry : groups.get(correlation.getAtomType()) + .entrySet()) { + if (groupEntry.getValue() + .stream() + .anyMatch(correlationIndex -> hasMatch(correlation, correlationList.get(correlationIndex), + tolerances.get(correlation.getAtomType())))) { + foundGroupIndex = groupEntry.getKey(); + break; + } + } + if (foundGroupIndex + != -1) { + groups.get(correlation.getAtomType()) + .get(foundGroupIndex) + .add(i); + inserted.add(i); + } else { + groups.get(correlation.getAtomType()) + .put(groupIndex, new HashSet<>()); + groups.get(correlation.getAtomType()) + .get(groupIndex) + .add(i); + inserted.add(i); + groupIndex++; + } + } + + return groups; + } + + public static Map> transformGroups( + final Map>> groups) { + final Map> transformedGroups = new HashMap<>(); + for (final Map.Entry>> atomTypeEntry : groups.entrySet()) { + transformedGroups.put(atomTypeEntry.getKey(), new HashMap<>()); + for (final Map.Entry> groupEntry : atomTypeEntry.getValue() + .entrySet()) { + for (final int correlationIndex : groupEntry.getValue()) { + transformedGroups.get(atomTypeEntry.getKey()) + .put(correlationIndex, groupEntry.getKey()); + } + } + } + + return transformedGroups; + } + + public static Grouping buildGroups(final List correlationList, final Map tolerances) { + final Map>> groups = findGroups(correlationList, tolerances); + + return new Grouping(tolerances, groups, transformGroups(groups)); + } } diff --git a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java similarity index 99% rename from src/casekit/nmr/lsd/LISTAndPROPUtilities.java rename to src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java index 74b97ec..d81b3f9 100644 --- a/src/casekit/nmr/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd; +package casekit.nmr.lsd.inputfile; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; diff --git a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java similarity index 99% rename from src/casekit/nmr/lsd/PyLSDInputFileBuilder.java rename to src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java index bb3ab71..70d3545 100644 --- a/src/casekit/nmr/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java @@ -1,5 +1,6 @@ -package casekit.nmr.lsd; +package casekit.nmr.lsd.inputfile; +import casekit.nmr.lsd.Constants; import casekit.nmr.lsd.model.Detections; import casekit.nmr.lsd.model.ElucidationOptions; import casekit.nmr.model.Signal; diff --git a/src/casekit/nmr/lsd/inputfile/Utilities.java b/src/casekit/nmr/lsd/inputfile/Utilities.java new file mode 100644 index 0000000..d66ea41 --- /dev/null +++ b/src/casekit/nmr/lsd/inputfile/Utilities.java @@ -0,0 +1,68 @@ +package casekit.nmr.lsd.inputfile; + +import casekit.nmr.lsd.Constants; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +public class Utilities { + public static String buildSSTR(final int sstrIndex, final String atomType, final List hybridization, + final List protonsCount) { + if (hybridization.isEmpty()) { + hybridization.addAll(Arrays.stream(Constants.defaultHybridizationMap.get(atomType)) + .boxed() + .collect(Collectors.toList())); + } + if (protonsCount.isEmpty()) { + protonsCount.addAll(Arrays.stream(Constants.defaultProtonsCountPerValencyMap.get(atomType)) + .boxed() + .collect(Collectors.toList())); + } + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("SSTR S") + .append(sstrIndex) + .append(" ") + .append(atomType) + .append(" "); + if (hybridization.size() + == 1) { + stringBuilder.append(hybridization.get(0)) + .append(" "); + if (protonsCount.size() + == 1) { + stringBuilder.append(protonsCount.get(0)); + } else { + stringBuilder.append(buildMultipleValuesString(protonsCount)); + } + } else { + stringBuilder.append(buildMultipleValuesString(hybridization)); + stringBuilder.append(" "); + if (protonsCount.size() + == 1) { + stringBuilder.append(protonsCount.get(0)); + } else { + stringBuilder.append(buildMultipleValuesString(protonsCount)); + } + } + + return stringBuilder.toString(); + } + + private static String buildMultipleValuesString(final List values) { + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("("); + for (int l = 0; l + < values.size(); l++) { + stringBuilder.append(values.get(l)); + if (l + < values.size() + - 1) { + stringBuilder.append(" "); + } + } + stringBuilder.append(")"); + + return stringBuilder.toString(); + } +} diff --git a/src/casekit/nmr/lsd/model/Grouping.java b/src/casekit/nmr/lsd/model/Grouping.java new file mode 100644 index 0000000..6b9d708 --- /dev/null +++ b/src/casekit/nmr/lsd/model/Grouping.java @@ -0,0 +1,19 @@ +package casekit.nmr.lsd.model; + +import lombok.*; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +@AllArgsConstructor +@NoArgsConstructor +@Getter +@Setter +@ToString +public class Grouping { + + Map tolerances = new HashMap<>(); + Map>> groups; + Map> transformedGroups; +} From 953c7e3b7404e63bc5d248636778b6b81dcb495f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 4 Jan 2022 20:57:02 +0100 Subject: [PATCH 347/405] fix: forbidden list was incomplete --- .../lsd/inputfile/LISTAndPROPUtilities.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java index d81b3f9..e8dc797 100644 --- a/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java @@ -111,23 +111,25 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui private static boolean checkSkipPROPInsertion(final Map listMap, - final Map usedPropsCount, final String listKey) { + final Map usedPropsCount, final String listKey, + final String mode) { if (!listMap.containsKey(listKey)) { return true; } - // LSD crashes if we try to use more atoms with specific hybridization or/and proton count + // LSD crashes if we try to use more atoms with specific hybridization or/and proton count (in mode "allow") // thus count and limit that usage as following: // list name -> number of usages usedPropsCount.putIfAbsent((String) listMap.get(listKey)[0], 1); - return listMap.get(listKey).length + return mode.equals("allow") + && listMap.get(listKey).length > 1 && usedPropsCount.get((String) listMap.get(listKey)[0]) > (int) listMap.get(listKey)[1]; } private static void insertPROP(final StringBuilder stringBuilder, final Map listMap, - final String atomType, final Signal signal, final String neighborAtomType, - final int indexInPyLSD, final String listKey, final String mode) { + final String atomType, final Signal signal, final int indexInPyLSD, + final String listKey, final String mode) { stringBuilder.append("PROP ") .append(indexInPyLSD) .append(mode.equals("forbid") @@ -181,8 +183,7 @@ public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuild // forbid/set bonds to whole element groups if there is an empty map for an atom type if (neighborsTemp.get(neighborAtomType) .isEmpty()) { - insertPROP(stringBuilder, listMap, atomType, signal, neighborAtomType, indexInPyLSD, - neighborAtomType, mode); + insertPROP(stringBuilder, listMap, atomType, signal, indexInPyLSD, neighborAtomType, mode); } else { for (final int neighborHybridization : neighborsTemp.get(neighborAtomType) .keySet()) { @@ -193,12 +194,12 @@ public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuild ? new ArrayList<>() : List.of(neighborHybridization), List.of(protonsCount)); - if (checkSkipPROPInsertion(listMap, usedPropsCount, listKey)) { + if (checkSkipPROPInsertion(listMap, usedPropsCount, listKey, mode)) { continue; } if (listMap.containsKey(listKey)) { - insertPROP(stringBuilder, listMap, atomType, signal, neighborAtomType, - indexInPyLSD, listKey, mode); + insertPROP(stringBuilder, listMap, atomType, signal, indexInPyLSD, listKey, + mode); usedPropsCount.put((String) listMap.get(listKey)[0], usedPropsCount.get((String) listMap.get(listKey)[0]) + 1); From be04b5a1953be3967d9cf1be18022b0a42028d0e Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 6 Jan 2022 21:35:28 +0100 Subject: [PATCH 348/405] feat: allow flexible HMBC and COSY --- .../lsd/inputfile/PyLSDInputFileBuilder.java | 218 ++++++++++++------ 1 file changed, 142 insertions(+), 76 deletions(-) diff --git a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java index 70d3545..1823335 100644 --- a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java @@ -118,9 +118,10 @@ private static Map buildIndicesMap(final List co return indicesMap; } - private static String buildMULT(final Correlation correlation, final int index, + private static String buildMULT(final List correlationList, final int index, final Map indicesMap, final Map> detectedHybridizations) { + final Correlation correlation = correlationList.get(index); if (correlation.getAtomType() .equals("H")) { return null; @@ -251,7 +252,7 @@ private static String buildMULT(final Correlation correlation, final int index, .append(attachedProtonsCountStringBuilder); if (!correlation.isPseudo()) { stringBuilder.append("; ") - .append(buildShiftString(correlation)); + .append(buildShiftString(correlationList, correlation)); } if (j >= 2) { @@ -264,7 +265,7 @@ private static String buildMULT(final Correlation correlation, final int index, return stringBuilder.toString(); } - private static String buildShiftString(final Correlation correlation) { + private static String buildShiftString(final List correlationList, final Correlation correlation) { final Signal signal = Utils.extractSignalFromCorrelation(correlation); if (signal @@ -272,20 +273,41 @@ private static String buildShiftString(final Correlation correlation) { return "?"; } + String heavyAtomShiftString = ""; + if (correlation.getAtomType() + .equals("H")) { + final ArrayList bondHeavyAtomTypes = new ArrayList<>(correlation.getAttachment() + .keySet()); + if (!bondHeavyAtomTypes.isEmpty()) { + final Optional firstOptional = correlation.getAttachment() + .get(bondHeavyAtomTypes.get(0)) + .stream() + .findFirst(); + if (firstOptional.isPresent()) { + heavyAtomShiftString = " (" + + buildShiftString(correlationList, correlationList.get(firstOptional.get())) + + ")"; + } + } + + } + return correlation.isPseudo() ? "?" - : String.valueOf(Statistics.roundDouble(signal.getShift(0), 3)); + : Statistics.roundDouble(signal.getShift(0), 3) + + heavyAtomShiftString; } - private static String buildShiftsComment(final Correlation correlation1, final Correlation correlation2) { + private static String buildShiftsComment(final List correlationList, final Correlation correlation1, + final Correlation correlation2) { return "; " + correlation1.getAtomType() + ": " - + buildShiftString(correlation1) + + buildShiftString(correlationList, correlation1) + " -> " + correlation2.getAtomType() + ": " - + buildShiftString(correlation2); + + buildShiftString(correlationList, correlation2); } private static String buildHSQC(final List correlationList, final int index, @@ -305,15 +327,13 @@ private static String buildHSQC(final List correlationList, final i // for each equivalence of heavy atom and attached protons for (int k = 1; k < indicesMap.get(index).length; k++) { - // for (int p = 1; p - // < indicesMap.get(matchIndex).length; p++) { stringBuilder.append("HSQC ") .append(indicesMap.get(index)[k]) .append(" ") .append(indicesMap.get(matchIndex)[k]) - .append(buildShiftsComment(correlation, correlationList.get(matchIndex))) + .append(buildShiftsComment(correlationList, correlation, + correlationList.get(matchIndex))) .append("\n"); - // } } } } @@ -322,6 +342,29 @@ private static String buildHSQC(final List correlationList, final i return stringBuilder.toString(); } + private static String buildPossibilitiesString(final Map indicesMap, final int index) { + final StringBuilder possibilitiesStringBuilder = new StringBuilder(); + if (indicesMap.get(index).length + > 2) { + possibilitiesStringBuilder.append("("); + } + for (int k = 1; k + < indicesMap.get(index).length; k++) { + possibilitiesStringBuilder.append((int) indicesMap.get(index)[k]); + if (k + < indicesMap.get(index).length + - 1) { + possibilitiesStringBuilder.append(" "); + } + } + if (indicesMap.get(index).length + > 2) { + possibilitiesStringBuilder.append(")"); + } + + return possibilitiesStringBuilder.toString(); + } + private static String buildHMBC(final List correlationList, final int index, final Map indicesMap) { final Correlation correlation = correlationList.get(index); @@ -329,6 +372,27 @@ private static String buildHMBC(final List correlationList, final i .equals("H")) { return null; } + + // final Set group = new HashSet<>(); + // if (grouping.getTransformedGroups() + // .containsKey(correlation.getAtomType()) + // && grouping.getTransformedGroups() + // .get(correlation.getAtomType()) + // .containsKey(index)) { + // final int groupIndex = grouping.getTransformedGroups() + // .get(correlation.getAtomType()) + // .get(index); + // group.addAll(grouping.getGroups() + // .get(correlation.getAtomType()) + // .get(groupIndex)); + // System.out.println("\nindex: " + // + index + // + " -> groupIndex: " + // + groupIndex + // + " -> group: " + // + group); + // } + final String defaultBondDistanceString = 2 + " " + 3; @@ -339,40 +403,41 @@ private static String buildHMBC(final List correlationList, final i if (link.getExperimentType() .equals("hmbc")) { for (final int matchIndex : link.getMatch()) { - for (int k = 1; k - < indicesMap.get(index).length; k++) { - for (int l = 1; l - < indicesMap.get(matchIndex).length; l++) { - // only add an HMBC correlation if there is no direct link via HSQC and the equivalence index is not equal - if (!(correlationList.get(matchIndex) - .getAttachment() - .containsKey(correlation.getAtomType()) - && correlationList.get(matchIndex) - .getAttachment() - .get(correlation.getAtomType()) - .contains(index))) { - bondDistanceString = null; - signal2DMap = (Map) link.getSignal(); - if (signal2DMap - != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); - bondDistanceString = pathLengthMap.get("min") - + " " - + pathLengthMap.get("max"); - } - uniqueSet.add(indicesMap.get(index)[k] - + " " - + indicesMap.get(matchIndex)[l] - + " " - + (bondDistanceString - != null - ? bondDistanceString - : defaultBondDistanceString) - + buildShiftsComment(correlation, - correlationList.get(matchIndex))); - } + // for (int k = 1; k + // < indicesMap.get(index).length; k++) { + for (int l = 1; l + < indicesMap.get(matchIndex).length; l++) { + // // only add an HMBC correlation if there is no direct link via HSQC and the equivalence index is not equal + // if (!(correlationList.get(matchIndex) + // .getAttachment() + // .containsKey(correlation.getAtomType()) + // && correlationList.get(matchIndex) + // .getAttachment() + // .get(correlation.getAtomType()) + // .contains(index))) { + bondDistanceString = null; + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + bondDistanceString = pathLengthMap.get("min") + + " " + + pathLengthMap.get("max"); } + uniqueSet.add(buildPossibilitiesString(indicesMap, index) + //indicesMap.get(index)[k] + + " " + + indicesMap.get(matchIndex)[l] + + " " + + (bondDistanceString + != null + ? bondDistanceString + : defaultBondDistanceString) + + buildShiftsComment(correlationList, correlation, + correlationList.get(matchIndex))); + // } + // } } } } @@ -403,38 +468,39 @@ private static String buildCOSY(final List correlationList, final i if (link.getExperimentType() .equals("cosy")) { for (final int matchIndex : link.getMatch()) { - // only add a COSY entry if it is not from same signal - if (!correlationList.get(matchIndex) - .getId() - .equals(correlation.getId())) { - for (int k = 1; k - < indicesMap.get(index).length; k++) { - // only allow COSY values between possible equivalent protons and only one another non-equivalent proton - if (indicesMap.get(matchIndex).length - == 2) { - bondDistanceString = null; - signal2DMap = (Map) link.getSignal(); - if (signal2DMap - != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); - bondDistanceString = pathLengthMap.get("min") - + " " - + pathLengthMap.get("max"); - } - uniqueSet.add(indicesMap.get(index)[k] - + " " - + indicesMap.get(matchIndex)[1] - + " " - + (bondDistanceString - != null - ? bondDistanceString - : defaultBondDistanceString) - + buildShiftsComment(correlation, - correlationList.get(matchIndex))); - } + // // only add a COSY entry if it is not from same signal + // if (!correlationList.get(matchIndex) + // .getId() + // .equals(correlation.getId())) { + for (int l = 1; l + < indicesMap.get(matchIndex).length; l++) { + // // only allow COSY values between possible equivalent protons and only one another non-equivalent proton + // if (indicesMap.get(matchIndex).length + // == 2) { + bondDistanceString = null; + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + bondDistanceString = pathLengthMap.get("min") + + " " + + pathLengthMap.get("max"); } + uniqueSet.add(buildPossibilitiesString(indicesMap, index) + //indicesMap.get(index)[k] + + " " + + indicesMap.get(matchIndex)[l] + + " " + + (bondDistanceString + != null + ? bondDistanceString + : defaultBondDistanceString) + + buildShiftsComment(correlationList, correlation, + correlationList.get(matchIndex))); + // } } + // } } } } @@ -660,7 +726,7 @@ private static String buildBONDByFixedNeighbors(final List correlat .append(indicesMap.get(correlationIndex1)[1]) .append(" ") .append(indicesMap.get(correlationIndex2)[1]) - .append(buildShiftsComment(correlation1, correlation2)) + .append(buildShiftsComment(correlationList, correlation1, correlation2)) .append("\n"); uniqueSet.add(indicesMap.get(correlationIndex1)[1] + " " @@ -719,7 +785,7 @@ public static String buildPyLSDInputFileContent(final Correlations correlations, < correlationList.size(); i++) { correlation = correlationList.get(i); collection.get("MULT") - .add(buildMULT(correlation, i, indicesMap, detections.getDetectedHybridizations())); + .add(buildMULT(correlationList, i, indicesMap, detections.getDetectedHybridizations())); collection.get("HSQC") .add(buildHSQC(correlationList, i, indicesMap)); collection.get("HMBC") From 59de93bfd4b7a0f591edfc354b6995f1e98e1ff4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 8 Jan 2022 13:01:10 +0100 Subject: [PATCH 349/405] chore: use common method for COSY and HMBC section build --- .../lsd/inputfile/PyLSDInputFileBuilder.java | 111 ++++++------------ 1 file changed, 38 insertions(+), 73 deletions(-) diff --git a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java index 1823335..1e3d990 100644 --- a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java @@ -402,44 +402,8 @@ private static String buildHMBC(final List correlationList, final i for (final Link link : correlation.getLink()) { if (link.getExperimentType() .equals("hmbc")) { - for (final int matchIndex : link.getMatch()) { - // for (int k = 1; k - // < indicesMap.get(index).length; k++) { - for (int l = 1; l - < indicesMap.get(matchIndex).length; l++) { - // // only add an HMBC correlation if there is no direct link via HSQC and the equivalence index is not equal - // if (!(correlationList.get(matchIndex) - // .getAttachment() - // .containsKey(correlation.getAtomType()) - // && correlationList.get(matchIndex) - // .getAttachment() - // .get(correlation.getAtomType()) - // .contains(index))) { - bondDistanceString = null; - signal2DMap = (Map) link.getSignal(); - if (signal2DMap - != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); - bondDistanceString = pathLengthMap.get("min") - + " " - + pathLengthMap.get("max"); - } - uniqueSet.add(buildPossibilitiesString(indicesMap, index) - //indicesMap.get(index)[k] - + " " - + indicesMap.get(matchIndex)[l] - + " " - + (bondDistanceString - != null - ? bondDistanceString - : defaultBondDistanceString) - + buildShiftsComment(correlationList, correlation, - correlationList.get(matchIndex))); - // } - // } - } - } + buildPerLink(correlationList, index, indicesMap, correlation, defaultBondDistanceString, uniqueSet, + link); } } @@ -467,41 +431,8 @@ private static String buildCOSY(final List correlationList, final i for (final Link link : correlation.getLink()) { if (link.getExperimentType() .equals("cosy")) { - for (final int matchIndex : link.getMatch()) { - // // only add a COSY entry if it is not from same signal - // if (!correlationList.get(matchIndex) - // .getId() - // .equals(correlation.getId())) { - for (int l = 1; l - < indicesMap.get(matchIndex).length; l++) { - // // only allow COSY values between possible equivalent protons and only one another non-equivalent proton - // if (indicesMap.get(matchIndex).length - // == 2) { - bondDistanceString = null; - signal2DMap = (Map) link.getSignal(); - if (signal2DMap - != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); - bondDistanceString = pathLengthMap.get("min") - + " " - + pathLengthMap.get("max"); - } - uniqueSet.add(buildPossibilitiesString(indicesMap, index) - //indicesMap.get(index)[k] - + " " - + indicesMap.get(matchIndex)[l] - + " " - + (bondDistanceString - != null - ? bondDistanceString - : defaultBondDistanceString) - + buildShiftsComment(correlationList, correlation, - correlationList.get(matchIndex))); - // } - } - // } - } + buildPerLink(correlationList, index, indicesMap, correlation, defaultBondDistanceString, uniqueSet, + link); } } @@ -513,6 +444,40 @@ private static String buildCOSY(final List correlationList, final i + str); } + private static void buildPerLink(final List correlationList, final int index, + final Map indicesMap, final Correlation correlation, + final String defaultBondDistanceString, final Set uniqueSet, + final Link link) { + String bondDistanceString; + Map signal2DMap; + Map pathLengthMap; + for (final int matchIndex : link.getMatch()) { + for (int l = 1; l + < indicesMap.get(matchIndex).length; l++) { + bondDistanceString = null; + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + bondDistanceString = pathLengthMap.get("min") + + " " + + pathLengthMap.get("max"); + } + uniqueSet.add(buildPossibilitiesString(indicesMap, index) + + " " + + indicesMap.get(matchIndex)[l] + + " " + + (bondDistanceString + != null + ? bondDistanceString + : defaultBondDistanceString) + + buildShiftsComment(correlationList, correlation, + correlationList.get(matchIndex))); + } + } + } + private static String buildSHIX(final Correlation correlation, final int index, final Map indicesMap) { if (correlation.getAtomType() From f7bc1425dbc9e1124cd9c9e79e039ac1e95eac52 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 8 Jan 2022 13:03:45 +0100 Subject: [PATCH 350/405] feat: added onAtomTypeLevel parameter to filterExtractedConnectivities method & added convertToNumericHybridizationMapKeys method --- .../nmr/analysis/ConnectivityStatistics.java | 125 ++++++++++++------ 1 file changed, 88 insertions(+), 37 deletions(-) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index bcc2f72..33cb154 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -1,5 +1,6 @@ package casekit.nmr.analysis; +import casekit.nmr.lsd.Constants; import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Utils; @@ -149,51 +150,70 @@ public static Map>> extractConnectivit return extractedConnectivities; } - public static Map>> filterExtractedConnectivities( - final Map>> extractedConnectivities, - final double thresholdElementCount) { + public static Map>> filterExtractedConnectivities( + final Map>> extractedConnectivities, + final double thresholdElementCount, final boolean onAtomTypeLevel, + final Set knownCarbonHybridizations) { + // remove hybridization of carbons which we do not expect + for (final String atomType : extractedConnectivities.keySet()) { + if (atomType.equals("C")) { + for (final int hybridization : new HashSet<>(extractedConnectivities.get(atomType) + .keySet())) { + if (!knownCarbonHybridizations.contains(hybridization)) { + extractedConnectivities.get(atomType) + .remove(hybridization); + } + } + } + } final Map totalCounts = getTotalCounts(extractedConnectivities); - final int totalCountsSum = getTotalCount(totalCounts); - - final Map>> filteredExtractedConnectivities = new HashMap<>(); + final int totalCountsSum = getSum(new HashSet<>(totalCounts.values())); + final Map>> filteredExtractedConnectivities = new HashMap<>(); extractedConnectivities.keySet() .forEach(neighborAtomType -> { - extractedConnectivities.get(neighborAtomType) - .keySet() - .forEach(neighborHybridization -> { - for (final Map.Entry entryProtonsCount : extractedConnectivities.get( - neighborAtomType) - .get(neighborHybridization) - .entrySet()) { - if (entryProtonsCount.getValue() - / (double) totalCountsSum - >= thresholdElementCount) { - filteredExtractedConnectivities.putIfAbsent( - neighborAtomType, new HashMap<>()); - filteredExtractedConnectivities.get( - neighborAtomType) - .putIfAbsent( - neighborHybridization, - new HashSet<>()); - filteredExtractedConnectivities.get( - neighborAtomType) - .get(neighborHybridization) - .add(entryProtonsCount.getKey()); - } - } - }); + int sum = 0; + for (final Map.Entry> entryPerHybridization : extractedConnectivities.get( + neighborAtomType) + .entrySet()) { + for (final Map.Entry entryProtonsCount : extractedConnectivities.get( + neighborAtomType) + .get(entryPerHybridization.getKey()) + .entrySet()) { + if (onAtomTypeLevel) { + sum += entryProtonsCount.getValue(); + } else if (entryProtonsCount.getValue() + / (double) totalCountsSum + >= thresholdElementCount) { + filteredExtractedConnectivities.putIfAbsent(neighborAtomType, + new HashMap<>()); + filteredExtractedConnectivities.get(neighborAtomType) + .putIfAbsent( + entryPerHybridization.getKey(), + new HashSet<>()); + filteredExtractedConnectivities.get(neighborAtomType) + .get(entryPerHybridization.getKey()) + .add(entryProtonsCount.getKey()); + } + } + } + if (onAtomTypeLevel + && sum + / (double) totalCountsSum + >= thresholdElementCount) { + filteredExtractedConnectivities.putIfAbsent(neighborAtomType, new HashMap<>()); + } }); return filteredExtractedConnectivities; } private static Map getTotalCounts( - final Map>> extractedConnectivities) { + final Map>> extractedConnectivities) { final Map totalCounts = new HashMap<>(); for (final String key1 : extractedConnectivities.keySet()) { totalCounts.putIfAbsent(key1, 0); - for (final String key2 : extractedConnectivities.get(key1) - .keySet()) { + for (final int key2 : extractedConnectivities.get(key1) + .keySet()) { for (final Map.Entry countsEntry : extractedConnectivities.get(key1) .get(key2) .entrySet()) { @@ -206,9 +226,40 @@ private static Map getTotalCounts( return totalCounts; } - private static int getTotalCount(final Map totalCounts) { - return totalCounts.values() - .stream() - .reduce(0, (total, current) -> total += current); + private static int getSum(final Set values) { + return values.stream() + .reduce(0, (total, current) -> total += current); + } + + public static Map>> convertToNumericHybridizationMapKeys( + final Map>> map) { + final Map>> converted = new HashMap<>(); + int numericHybridization; + for (final Map.Entry>> entryPerAtomType : map.entrySet()) { + converted.put(entryPerAtomType.getKey(), new HashMap<>()); + for (final Map.Entry> entryPerHybridizationString : entryPerAtomType.getValue() + .entrySet()) { + if (Constants.hybridizationConversionMap.containsKey(entryPerHybridizationString.getKey())) { + numericHybridization = Constants.hybridizationConversionMap.get( + entryPerHybridizationString.getKey()); + converted.get(entryPerAtomType.getKey()) + .putIfAbsent(numericHybridization, new HashMap<>()); + for (final Map.Entry entryPerProtonsCount : entryPerHybridizationString.getValue() + .entrySet()) { + converted.get(entryPerAtomType.getKey()) + .get(numericHybridization) + .putIfAbsent(entryPerProtonsCount.getKey(), 0); + converted.get(entryPerAtomType.getKey()) + .get(numericHybridization) + .put(entryPerProtonsCount.getKey(), converted.get(entryPerAtomType.getKey()) + .get(numericHybridization) + .get(entryPerProtonsCount.getKey()) + + entryPerProtonsCount.getValue()); + } + } + } + } + + return converted; } } From cb121c52ca932bc2e014894184aa2c4f2b735bdb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Jan 2022 19:21:58 +0100 Subject: [PATCH 351/405] feat: added buildOccurrenceStatistics method in ConnectivityStatistics.java --- .../nmr/analysis/ConnectivityStatistics.java | 145 +++++++++++++----- 1 file changed, 110 insertions(+), 35 deletions(-) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index 33cb154..03d759b 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -1,6 +1,5 @@ package casekit.nmr.analysis; -import casekit.nmr.lsd.Constants; import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; import casekit.nmr.utils.Utils; @@ -116,6 +115,108 @@ public static void buildConnectivityStatistics(final DataSet dataSet, final Stri } } + /** + * @param dataSet + * @param atomType + * @param occurrenceStatistics multiplicity -> hybridization -> shift (int) -> "elemental composition" -> connected atom symbol -> [#found, #notFound] + */ + public static void buildOccurrenceStatistics(final DataSet dataSet, final String atomType, + final Map>>>> occurrenceStatistics) { + final IAtomContainer structure = dataSet.getStructure() + .toAtomContainer(); + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); + final List elements = new ArrayList<>(Utils.getMolecularFormulaElementCounts(dataSet.getMeta() + .get("mf")) + .keySet()); + elements.remove("H"); + Collections.sort(elements); + final String elementsString = String.join(",", elements); + + int shift, atomIndex; + IAtom atom; + String multiplicity, hybridization; + Set found, notFound; + for (int signalIndex = 0; signalIndex + < spectrum.getSignalCount(); signalIndex++) { + shift = spectrum.getShift(signalIndex, 0) + .intValue(); + for (int equivalenceIndex = 0; equivalenceIndex + < dataSet.getAssignment() + .getAssignment(0, signalIndex).length; equivalenceIndex++) { + atomIndex = dataSet.getAssignment() + .getAssignment(0, signalIndex, equivalenceIndex); + atom = structure.getAtom(atomIndex); + if (atom.getSymbol() + .equals(atomType)) { + multiplicity = Utils.getMultiplicityFromProtonsCount(atom.getImplicitHydrogenCount()); + if (multiplicity + == null) { + continue; + } + multiplicity = multiplicity.toLowerCase(); + hybridization = atom.getHybridization() + .name(); + occurrenceStatistics.putIfAbsent(multiplicity, new ConcurrentHashMap<>()); + occurrenceStatistics.get(multiplicity) + .putIfAbsent(hybridization, new ConcurrentHashMap<>()); + occurrenceStatistics.get(multiplicity) + .get(hybridization) + .putIfAbsent(shift, new ConcurrentHashMap<>()); + occurrenceStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .putIfAbsent(elementsString, new ConcurrentHashMap<>()); + // check for connected hetero atoms + found = new HashSet<>(); + for (final IAtom connectedAtom : structure.getConnectedAtomsList(atom)) { + if (connectedAtom.getSymbol() + .equals("H")) { + continue; + } + found.add(connectedAtom.getSymbol()); + } + for (final String connectedAtomType : found) { + occurrenceStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(elementsString) + .putIfAbsent(connectedAtomType, new Integer[]{0, 0}); + occurrenceStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(elementsString) + .get(connectedAtomType)[0] = occurrenceStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(elementsString) + .get(connectedAtomType)[0] + + 1; + } + notFound = new HashSet<>(elements); + notFound.removeAll(found); + for (final String notConnectedAtomType : notFound) { + occurrenceStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(elementsString) + .putIfAbsent(notConnectedAtomType, new Integer[]{0, 0}); + occurrenceStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(elementsString) + .get(notConnectedAtomType)[1] = occurrenceStatistics.get(multiplicity) + .get(hybridization) + .get(shift) + .get(elementsString) + .get(notConnectedAtomType)[1] + + 1; + } + } + } + } + } + /** * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence * @param multiplicity @@ -150,9 +251,8 @@ public static Map>> extractConnectivit return extractedConnectivities; } - public static Map>> filterExtractedConnectivities( + public static Map>> filterExtractedConnectivitiesByHybridizations( final Map>> extractedConnectivities, - final double thresholdElementCount, final boolean onAtomTypeLevel, final Set knownCarbonHybridizations) { // remove hybridization of carbons which we do not expect for (final String atomType : extractedConnectivities.keySet()) { @@ -166,6 +266,13 @@ public static Map>> filterExtractedConnectivit } } } + + return extractedConnectivities; + } + + public static Map>> filterExtractedConnectivitiesByCount( + final Map>> extractedConnectivities, + final double thresholdElementCount, final boolean onAtomTypeLevel) { final Map totalCounts = getTotalCounts(extractedConnectivities); final int totalCountsSum = getSum(new HashSet<>(totalCounts.values())); final Map>> filteredExtractedConnectivities = new HashMap<>(); @@ -230,36 +337,4 @@ private static int getSum(final Set values) { return values.stream() .reduce(0, (total, current) -> total += current); } - - public static Map>> convertToNumericHybridizationMapKeys( - final Map>> map) { - final Map>> converted = new HashMap<>(); - int numericHybridization; - for (final Map.Entry>> entryPerAtomType : map.entrySet()) { - converted.put(entryPerAtomType.getKey(), new HashMap<>()); - for (final Map.Entry> entryPerHybridizationString : entryPerAtomType.getValue() - .entrySet()) { - if (Constants.hybridizationConversionMap.containsKey(entryPerHybridizationString.getKey())) { - numericHybridization = Constants.hybridizationConversionMap.get( - entryPerHybridizationString.getKey()); - converted.get(entryPerAtomType.getKey()) - .putIfAbsent(numericHybridization, new HashMap<>()); - for (final Map.Entry entryPerProtonsCount : entryPerHybridizationString.getValue() - .entrySet()) { - converted.get(entryPerAtomType.getKey()) - .get(numericHybridization) - .putIfAbsent(entryPerProtonsCount.getKey(), 0); - converted.get(entryPerAtomType.getKey()) - .get(numericHybridization) - .put(entryPerProtonsCount.getKey(), converted.get(entryPerAtomType.getKey()) - .get(numericHybridization) - .get(entryPerProtonsCount.getKey()) - + entryPerProtonsCount.getValue()); - } - } - } - } - - return converted; - } } From e0f67b532f56a43d915ef7fbba8f1d3470201e72 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Jan 2022 20:12:19 +0100 Subject: [PATCH 352/405] feat: use grouping information when building HMBC or COSY PyLSD input file section --- .../lsd/inputfile/PyLSDInputFileBuilder.java | 157 +++++++++--------- 1 file changed, 81 insertions(+), 76 deletions(-) diff --git a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java index 1e3d990..7ee2e0a 100644 --- a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java @@ -3,6 +3,7 @@ import casekit.nmr.lsd.Constants; import casekit.nmr.lsd.model.Detections; import casekit.nmr.lsd.model.ElucidationOptions; +import casekit.nmr.lsd.model.Grouping; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Correlations; @@ -342,68 +343,108 @@ private static String buildHSQC(final List correlationList, final i return stringBuilder.toString(); } - private static String buildPossibilitiesString(final Map indicesMap, final int index) { + private static String buildPossibilitiesString(final List correlationList, final int index, + final Map indicesMap, final Grouping grouping) { final StringBuilder possibilitiesStringBuilder = new StringBuilder(); - if (indicesMap.get(index).length - > 2) { + final Correlation correlation = correlationList.get(index); + // add PyLSD indices from grouping + final Set pyLSDIndices = new HashSet<>(); + if (grouping.getTransformedGroups() + .containsKey(correlation.getAtomType())) { + final int groupIndex = grouping.getTransformedGroups() + .get(correlation.getAtomType()) + .get(index); + for (final int groupCorrelationIndex : grouping.getGroups() + .get(correlation.getAtomType()) + .get(groupIndex)) { + // add equivalence indices as well + for (int k = 1; k + < indicesMap.get(groupCorrelationIndex).length; k++) { + pyLSDIndices.add((int) indicesMap.get(groupCorrelationIndex)[k]); + } + } + } else { + // add for equivalences only + for (int k = 1; k + < indicesMap.get(index).length; k++) { + pyLSDIndices.add((int) indicesMap.get(index)[k]); + } + } + // build the string + if (pyLSDIndices.size() + > 1) { possibilitiesStringBuilder.append("("); } - for (int k = 1; k - < indicesMap.get(index).length; k++) { - possibilitiesStringBuilder.append((int) indicesMap.get(index)[k]); + int k = 0; + for (final int pyLSDIndex : pyLSDIndices) { + possibilitiesStringBuilder.append(pyLSDIndex); if (k - < indicesMap.get(index).length + < pyLSDIndices.size() - 1) { possibilitiesStringBuilder.append(" "); } + k++; } - if (indicesMap.get(index).length - > 2) { + if (pyLSDIndices.size() + > 1) { possibilitiesStringBuilder.append(")"); } return possibilitiesStringBuilder.toString(); } + private static void buildMultipleBondCorrelationPerLink(final List correlationList, final int index, + final Map indicesMap, + final Grouping grouping, + final String defaultBondDistanceString, + final Set uniqueSet, final Link link) { + String bondDistanceString; + Map signal2DMap; + Map pathLengthMap; + for (final int matchIndex : link.getMatch()) { + for (int l = 1; l + < indicesMap.get(matchIndex).length; l++) { + bondDistanceString = null; + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + bondDistanceString = pathLengthMap.get("min") + + " " + + pathLengthMap.get("max"); + } + uniqueSet.add(buildPossibilitiesString(correlationList, index, indicesMap, grouping) + + " " + + indicesMap.get(matchIndex)[l] + + " " + + (bondDistanceString + != null + ? bondDistanceString + : defaultBondDistanceString) + + buildShiftsComment(correlationList, correlationList.get(index), + correlationList.get(matchIndex))); + } + } + } + private static String buildHMBC(final List correlationList, final int index, - final Map indicesMap) { + final Map indicesMap, final Grouping grouping) { final Correlation correlation = correlationList.get(index); if (correlation.getAtomType() .equals("H")) { return null; } - // final Set group = new HashSet<>(); - // if (grouping.getTransformedGroups() - // .containsKey(correlation.getAtomType()) - // && grouping.getTransformedGroups() - // .get(correlation.getAtomType()) - // .containsKey(index)) { - // final int groupIndex = grouping.getTransformedGroups() - // .get(correlation.getAtomType()) - // .get(index); - // group.addAll(grouping.getGroups() - // .get(correlation.getAtomType()) - // .get(groupIndex)); - // System.out.println("\nindex: " - // + index - // + " -> groupIndex: " - // + groupIndex - // + " -> group: " - // + group); - // } - final String defaultBondDistanceString = 2 + " " + 3; - String bondDistanceString; - Map signal2DMap, pathLengthMap; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType() .equals("hmbc")) { - buildPerLink(correlationList, index, indicesMap, correlation, defaultBondDistanceString, uniqueSet, - link); + buildMultipleBondCorrelationPerLink(correlationList, index, indicesMap, grouping, + defaultBondDistanceString, uniqueSet, link); } } @@ -416,7 +457,7 @@ private static String buildHMBC(final List correlationList, final i } private static String buildCOSY(final List correlationList, final int index, - final Map indicesMap) { + final Map indicesMap, final Grouping grouping) { final Correlation correlation = correlationList.get(index); if (!correlation.getAtomType() .equals("H")) { @@ -425,14 +466,12 @@ private static String buildCOSY(final List correlationList, final i final String defaultBondDistanceString = 3 + " " + 4; - String bondDistanceString; - Map signal2DMap, pathLengthMap; final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times for (final Link link : correlation.getLink()) { if (link.getExperimentType() .equals("cosy")) { - buildPerLink(correlationList, index, indicesMap, correlation, defaultBondDistanceString, uniqueSet, - link); + buildMultipleBondCorrelationPerLink(correlationList, index, indicesMap, grouping, + defaultBondDistanceString, uniqueSet, link); } } @@ -444,40 +483,6 @@ private static String buildCOSY(final List correlationList, final i + str); } - private static void buildPerLink(final List correlationList, final int index, - final Map indicesMap, final Correlation correlation, - final String defaultBondDistanceString, final Set uniqueSet, - final Link link) { - String bondDistanceString; - Map signal2DMap; - Map pathLengthMap; - for (final int matchIndex : link.getMatch()) { - for (int l = 1; l - < indicesMap.get(matchIndex).length; l++) { - bondDistanceString = null; - signal2DMap = (Map) link.getSignal(); - if (signal2DMap - != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); - bondDistanceString = pathLengthMap.get("min") - + " " - + pathLengthMap.get("max"); - } - uniqueSet.add(buildPossibilitiesString(indicesMap, index) - + " " - + indicesMap.get(matchIndex)[l] - + " " - + (bondDistanceString - != null - ? bondDistanceString - : defaultBondDistanceString) - + buildShiftsComment(correlationList, correlation, - correlationList.get(matchIndex))); - } - } - } - private static String buildSHIX(final Correlation correlation, final int index, final Map indicesMap) { if (correlation.getAtomType() @@ -713,7 +718,7 @@ private static String buildBOND(final List correlationList, final M } public static String buildPyLSDInputFileContent(final Correlations correlations, final String mf, - final Detections detections, + final Detections detections, final Grouping grouping, final ElucidationOptions elucidationOptions) { if (mf != null) { @@ -754,9 +759,9 @@ public static String buildPyLSDInputFileContent(final Correlations correlations, collection.get("HSQC") .add(buildHSQC(correlationList, i, indicesMap)); collection.get("HMBC") - .add(buildHMBC(correlationList, i, indicesMap)); + .add(buildHMBC(correlationList, i, indicesMap, grouping)); collection.get("COSY") - .add(buildCOSY(correlationList, i, indicesMap)); + .add(buildCOSY(correlationList, i, indicesMap, grouping)); collection.get("SHIX") .add(buildSHIX(correlation, i, indicesMap)); collection.get("SHIH") From e7abbe1a9c7aa9680b8754755f7853f0cf7deabb Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 20 Jan 2022 10:13:27 +0100 Subject: [PATCH 353/405] fix: do not insert incomplete HMBC or COSY lines in PyLSD file by missing proton attachments --- .../lsd/inputfile/PyLSDInputFileBuilder.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java index 7ee2e0a..076c997 100644 --- a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java @@ -398,12 +398,32 @@ private static void buildMultipleBondCorrelationPerLink(final List final Grouping grouping, final String defaultBondDistanceString, final Set uniqueSet, final Link link) { + // ignore H atoms without any attachment to a heavy atom + if (correlationList.get(index) + .getAtomType() + .equals("H") + && correlationList.get(index) + .getAttachment() + .keySet() + .isEmpty()) { + return; + } String bondDistanceString; Map signal2DMap; Map pathLengthMap; for (final int matchIndex : link.getMatch()) { for (int l = 1; l < indicesMap.get(matchIndex).length; l++) { + // ignore linked H atoms without any attachment to a heavy atom + if (correlationList.get(matchIndex) + .getAtomType() + .equals("H") + && correlationList.get(matchIndex) + .getAttachment() + .keySet() + .isEmpty()) { + continue; + } bondDistanceString = null; signal2DMap = (Map) link.getSignal(); if (signal2DMap From b93c91b2261e178315b3e75527e2860695a24a3d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 25 Jan 2022 16:30:47 +0100 Subject: [PATCH 354/405] chore: introduction of MolecularConnectivity class && extended list of default atoms and use of all valencies in MULT section --- src/casekit/nmr/lsd/Constants.java | 7 + src/casekit/nmr/lsd/Utilities.java | 382 ++++++++ .../lsd/inputfile/LISTAndPROPUtilities.java | 139 +-- .../lsd/inputfile/PyLSDInputFileBuilder.java | 927 ++++++------------ .../nmr/lsd/model/MolecularConnectivity.java | 28 + 5 files changed, 796 insertions(+), 687 deletions(-) create mode 100644 src/casekit/nmr/lsd/model/MolecularConnectivity.java diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index ad40f78..51ebe35 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -18,7 +18,9 @@ private static Map createNucleiMap() { nuclei.put("C", "13C"); nuclei.put("N", "15N"); nuclei.put("H", "1H"); + nuclei.put("S", "33S"); nuclei.put("F", "19F"); + nuclei.put("P", "31P"); return Collections.unmodifiableMap(nuclei); } @@ -33,6 +35,7 @@ private static Map createDefaultHybridizationMap() { defaultHybridization.put("F", new int[]{3}); defaultHybridization.put("Cl", new int[]{3}); defaultHybridization.put("Br", new int[]{3}); + defaultHybridization.put("P", new int[]{1, 2, 3}); return Collections.unmodifiableMap(defaultHybridization); } @@ -52,6 +55,9 @@ private static Map createDefaultProtonsCountPerValencyMap() { defaultProtonsCountPerValency.put("F", new int[]{0}); defaultProtonsCountPerValency.put("Cl", new int[]{0}); defaultProtonsCountPerValency.put("Br", new int[]{0}); + defaultProtonsCountPerValency.put("P", new int[]{0, 1, 2}); + defaultProtonsCountPerValency.put("P5", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("P35", new int[]{0, 1, 2, 3}); return defaultProtonsCountPerValency; } @@ -66,6 +72,7 @@ private static Map createDefaultAtomLabelMap() { defaultAtomLabel.put("F", "F"); defaultAtomLabel.put("Cl", "Cl"); defaultAtomLabel.put("Br", "Br"); + defaultAtomLabel.put("P", "P35"); return Collections.unmodifiableMap(defaultAtomLabel); } diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index b14b05e..8db730d 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -1,7 +1,9 @@ package casekit.nmr.lsd; import casekit.io.FileSystem; +import casekit.nmr.lsd.model.Detections; import casekit.nmr.lsd.model.Grouping; +import casekit.nmr.lsd.model.MolecularConnectivity; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Link; @@ -426,4 +428,384 @@ public static Grouping buildGroups(final List correlationList, fina return new Grouping(tolerances, groups, transformGroups(groups)); } + + private static Set getProtonCounts(final List correlationList, final int index) { + final Correlation correlation = correlationList.get(index); + if (correlation.getProtonsCount() + != null + && !correlation.getProtonsCount() + .isEmpty()) { + // if protonCounts is already given + return new HashSet<>(correlation.getProtonsCount()); + } + final Set protonCounts = new HashSet<>(); + for (int i = 0; i + < Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length; i++) { + protonCounts.add(Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType()))[i]); + } + + return protonCounts; + } + + private static Set getHybridizations(final List correlationList, final int index, + final Map> detectedHybridizations) { + final Correlation correlation = correlationList.get(index); + Set hybridizations = new HashSet<>(); + if (correlation.getHybridization() + != null + && !correlation.getHybridization() + .isEmpty()) { + // if hybridization is already given + return new HashSet<>(correlation.getHybridization()); + } else { + // if hybridization is not given then use the detected ones + if (detectedHybridizations.containsKey(index)) { + hybridizations = new HashSet<>(detectedHybridizations.get(index)); + } + if (hybridizations.isEmpty() + && correlation.getAtomType() + .equals("C") + && correlation.getProtonsCount() + .size() + == 1 + && correlation.getProtonsCount() + .get(0) + >= 2) { + // a carbon with at least two protons can only be SP2 or SP3 + hybridizations.add(2); + hybridizations.add(3); + } + } + if (hybridizations.isEmpty()) { + for (int i = 0; i + < Constants.defaultHybridizationMap.get(correlation.getAtomType()).length; i++) { + hybridizations.add(Constants.defaultHybridizationMap.get(correlation.getAtomType())[i]); + } + } + + return hybridizations; + } + + private static Map buildIndicesMap(final List correlationList) { + // index in correlation data -> [indices in PyLSD file...] + final Map indicesMap = new HashMap<>(); + // init element indices within correlations with same order as in correlation data input + int heavyAtomIndexInPyLSDFile = 1; + int protonIndexInPyLSDFile = 1; + int protonsToInsert, protonsCount; + Correlation correlation; + for (int i = 0; i + < correlationList.size(); i++) { + correlation = correlationList.get(i); + // set entry for each correlation with consideration of equivalences + if (correlation.getAtomType() + .equals("H")) { + protonsToInsert = 0; + for (final Link link : correlation.getLink()) { + if (link.getExperimentType() + .equals("hsqc") + || link.getExperimentType() + .equals("hmqc")) { + for (final int matchIndex : link.getMatch()) { + protonsCount = correlationList.get(matchIndex) + .getProtonsCount() + .get(0); + protonsToInsert += (correlation.getEquivalence() + / (double) protonsCount) + * correlationList.get(matchIndex) + .getAttachment() + .get("H") + .size(); + } + } + } + indicesMap.put(i, new Integer[protonsToInsert]); + for (int j = 0; j + < protonsToInsert; j++) { + indicesMap.get(i)[j] = protonIndexInPyLSDFile; + protonIndexInPyLSDFile++; + } + } else { + indicesMap.put(i, new Integer[correlation.getEquivalence()]); + for (int j = 0; j + < correlation.getEquivalence(); j++) { + indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; + heavyAtomIndexInPyLSDFile++; + } + } + } + + return indicesMap; + } + + public static MolecularConnectivity findMolecularConnectivityByIndex( + final Map> molecularConnectivityMap, final String atomType, + final boolean exclude, final int index) { + for (final int correlationIndex : molecularConnectivityMap.keySet()) { + for (final MolecularConnectivity molecularConnectivity : molecularConnectivityMap.get(correlationIndex)) { + if (((exclude + && !molecularConnectivity.getAtomType() + .equals(atomType)) + || (!exclude + && molecularConnectivity.getAtomType() + .equals(atomType))) + && molecularConnectivity.getIndex() + == index) { + return molecularConnectivity; + } + } + } + + return null; + } + + private static Set buildGroupMembers(final Map indicesMap, + final List correlationList, final int correlationIndex, + final Grouping grouping) { + final Correlation correlation = correlationList.get(correlationIndex); + // add possible indices from grouping + final int groupIndex; + final Set possibilities = new HashSet<>(); + if (grouping.getTransformedGroups() + .containsKey(correlation.getAtomType())) { + groupIndex = grouping.getTransformedGroups() + .get(correlation.getAtomType()) + .get(correlationIndex); + for (final int groupCorrelationIndex : grouping.getGroups() + .get(correlation.getAtomType()) + .get(groupIndex)) { + // add equivalence indices of group members + for (int k = 0; k + < indicesMap.get(groupCorrelationIndex).length; k++) { + possibilities.add(indicesMap.get(groupCorrelationIndex)[k]); + } + } + } else { + // add for equivalences only + for (int k = 0; k + < indicesMap.get(correlationIndex).length; k++) { + possibilities.add(indicesMap.get(correlationIndex)[k]); + } + } + + return possibilities; + } + + private static void addMolecularConnectivity( + final Map> molecularConnectivityMap, + final Map indicesMap, final List correlationList, + final int correlationIndex, final int index, final Detections detections, final Grouping grouping) { + final Correlation correlation = correlationList.get(correlationIndex); + molecularConnectivityMap.putIfAbsent(correlationIndex, new ArrayList<>()); + if (molecularConnectivityMap.get(correlationIndex) + .stream() + .noneMatch(molecularConnectivityTemp -> molecularConnectivityTemp.getIndex() + == index)) { + final MolecularConnectivity molecularConnectivity = new MolecularConnectivity(); + molecularConnectivity.setIndex(index); + molecularConnectivity.setAtomType(correlation.getAtomType()); + molecularConnectivity.setSignal(Utils.extractSignalFromCorrelation(correlation)); + + if (!correlationList.get(correlationIndex) + .getAtomType() + .equals("H")) { + molecularConnectivity.setProtonCounts(getProtonCounts(correlationList, correlationIndex)); + molecularConnectivity.setHybridizations( + getHybridizations(correlationList, correlationIndex, detections.getDetectedHybridizations())); + } + molecularConnectivity.setGroupMembers( + buildGroupMembers(indicesMap, correlationList, correlationIndex, grouping)); + molecularConnectivityMap.get(correlationIndex) + .add(molecularConnectivity); + } + } + + public static Map> buildMolecularConnectivityMap( + final List correlationList, final Detections detections, final Grouping grouping, + final Map defaultBondDistances) { + + final Map indicesMap = buildIndicesMap(correlationList); + // correlation index -> [MolecularConnectivity] + final Map> molecularConnectivityMap = new HashMap<>(); + Correlation correlation, correlation2; + int index, protonIndex; + Map signal2DMap; + Map pathLengthMap; + MolecularConnectivity molecularConnectivity; + for (int correlationIndex = 0; correlationIndex + < correlationList.size(); correlationIndex++) { + correlation = correlationList.get(correlationIndex); + for (int k = 0; k + < indicesMap.get(correlationIndex).length; k++) { + index = indicesMap.get(correlationIndex)[k]; + addMolecularConnectivity(molecularConnectivityMap, indicesMap, correlationList, correlationIndex, index, + detections, grouping); + molecularConnectivity = findMolecularConnectivityByIndex(molecularConnectivityMap, + correlation.getAtomType(), false, index); + for (final Link link : correlation.getLink()) { + if (link.getExperimentType() + .equals("hsqc") + || link.getExperimentType() + .equals("hmqc") + && !correlation.getAtomType() + .equals("H")) { + for (final int matchIndex : link.getMatch()) { + protonIndex = indicesMap.get(matchIndex)[k]; + if (molecularConnectivity.getHsqc() + == null) { + molecularConnectivity.setHsqc(new HashSet<>()); + } + molecularConnectivity.getHsqc() + .add(protonIndex); + } + } else if (link.getExperimentType() + .equals("hmbc") + || link.getExperimentType() + .equals("cosy")) { + if (link.getExperimentType() + .equals("hmbc") + && correlation.getAtomType() + .equals("H")) { + continue; + } + if (link.getExperimentType() + .equals("cosy") + && !correlation.getAtomType() + .equals("H")) { + continue; + } + // ignore H atoms without any attachment to a heavy atom + if (correlationList.get(correlationIndex) + .getAtomType() + .equals("H") + && correlationList.get(correlationIndex) + .getAttachment() + .keySet() + .isEmpty()) { + continue; + } + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + } else { + pathLengthMap = null; + } + for (final int matchIndex : link.getMatch()) { + // ignore linked H atoms without any attachment to a heavy atom + if (correlationList.get(matchIndex) + .getAtomType() + .equals("H") + && correlationList.get(matchIndex) + .getAttachment() + .keySet() + .isEmpty()) { + continue; + } + + if (link.getExperimentType() + .equals("hmbc")) { + for (int l = 0; l + < indicesMap.get(matchIndex).length; l++) { + protonIndex = indicesMap.get(matchIndex)[l]; + if (molecularConnectivity.getHmbc() + == null) { + molecularConnectivity.setHmbc(new HashMap<>()); + } + molecularConnectivity.getHmbc() + .put(protonIndex, pathLengthMap + == null + ? defaultBondDistances.get("hmbc") + : new Integer[]{(int) pathLengthMap.get( + "min"), (int) pathLengthMap.get( + "max")}); + } + } else { + if (k + < indicesMap.get(matchIndex).length) { + protonIndex = indicesMap.get(matchIndex)[k]; + if (molecularConnectivity.getCosy() + == null) { + molecularConnectivity.setCosy(new HashMap<>()); + } + molecularConnectivity.getCosy() + .put(protonIndex, pathLengthMap + == null + ? defaultBondDistances.get("cosy") + : new Integer[]{(int) pathLengthMap.get( + "min"), (int) pathLengthMap.get( + "max")}); + } + } + } + } + } + } + // set detections + if (!correlation.getAtomType() + .equals("H") + && !correlation.isPseudo()) { + for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityMap.get( + correlationIndex)) { + molecularConnectivityTemp.setForbiddenNeighbors(detections.getForbiddenNeighbors() + .get(correlationIndex)); + molecularConnectivityTemp.setSetNeighbors(detections.getSetNeighbors() + .get(correlationIndex)); + } + } + // fill in fixed neighbors + if (correlation.getEquivalence() + == 1 + && detections.getFixedNeighbors() + .containsKey(correlationIndex)) { + for (final int correlationIndex2 : detections.getFixedNeighbors() + .get(correlationIndex)) { + correlation2 = correlationList.get(correlationIndex2); + // use fixed neighbor information of atoms without equivalence equals 1 only + if (correlation2.getEquivalence() + > 1) { + continue; + } + index = indicesMap.get(correlationIndex)[0]; + molecularConnectivity = findMolecularConnectivityByIndex(molecularConnectivityMap, + correlation.getAtomType(), false, index); + if (molecularConnectivity.getFixedNeighbors() + == null) { + molecularConnectivity.setFixedNeighbors(new HashSet<>()); + } + molecularConnectivity.getFixedNeighbors() + .add(indicesMap.get(correlationIndex2)[0]); + } + } + } + // filter out HMBC or COSY correlation to itself + for (final int correlationIndex : molecularConnectivityMap.keySet()) { + for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityMap.get( + correlationIndex)) { + if (molecularConnectivityTemp.getHsqc() + != null) { + if (molecularConnectivityTemp.getHmbc() + != null) { + for (final int hmbcIndexInPyLSD : molecularConnectivityTemp.getHsqc()) { + molecularConnectivityTemp.getHmbc() + .remove(hmbcIndexInPyLSD); + } + } + if (molecularConnectivityTemp.getCosy() + != null) { + for (final int cosyIndexInPyLSD : molecularConnectivityTemp.getHsqc()) { + molecularConnectivityTemp.getCosy() + .remove(cosyIndexInPyLSD); + } + } + } + } + } + + + return molecularConnectivityMap; + } } diff --git a/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java b/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java index e8dc797..7663aab 100644 --- a/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java @@ -1,31 +1,14 @@ package casekit.nmr.lsd.inputfile; +import casekit.nmr.lsd.model.MolecularConnectivity; import casekit.nmr.model.Signal; -import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.utils.Statistics; -import casekit.nmr.utils.Utils; import java.util.*; +import java.util.stream.Collectors; public class LISTAndPROPUtilities { - public static void insertELEM(final StringBuilder stringBuilder, final Map listMap, - final Set atomTypesByMf) { - final Set atomTypes = new HashSet<>(atomTypesByMf); - atomTypes.remove("H"); - for (final String atomType : atomTypes) { - listMap.put(atomType, new Object[]{"L" - + (listMap.size() - + 1)}); - stringBuilder.append("ELEM") - .append(" ") - .append(listMap.get(atomType)[0]) - .append(" ") - .append(atomType) - .append("\n"); - } - } - public static void insertNoHeteroHeteroBonds(final StringBuilder stringBuilder, final Map listMap) { // create hetero atom list automatically to forbid hetero-hetero bonds @@ -38,8 +21,41 @@ public static void insertNoHeteroHeteroBonds(final StringBuilder stringBuilder, stringBuilder.append("PROP L1 0 L1 -; no hetero-hetero bonds\n"); } - private static String buildListKey(final String atomType, final List hybridizations, - final List protonsCounts) { + public static void insertGeneralLISTs(final StringBuilder stringBuilder, final Map listMap, + final Map> molecularConnectivityMap, + final Set atomTypesByMf) { + final Set atomTypes = new HashSet<>(atomTypesByMf); + atomTypes.remove("H"); + Set elementIndices; + for (final String atomType : atomTypes) { + listMap.put(atomType, new Object[]{"L" + + (listMap.size() + + 1)}); + elementIndices = new HashSet<>(); + for (final int correlationIndex : molecularConnectivityMap.keySet()) { + elementIndices.addAll(molecularConnectivityMap.get(correlationIndex) + .stream() + .filter(molecularConnectivity -> molecularConnectivity.getAtomType() + .equals(atomType)) + .map(MolecularConnectivity::getIndex) + .collect(Collectors.toSet())); + } + + stringBuilder.append("LIST ") + .append(listMap.get(atomType)[0]); + for (final int elementIndex : elementIndices) { + stringBuilder.append(" ") + .append(elementIndex); + } + stringBuilder.append("; list of all ") + .append(atomType) + .append(" atoms") + .append("\n"); + } + } + + private static String buildListKey(final String atomType, final Set hybridizations, + final Set protonsCounts) { return atomType + "_" + (!hybridizations.isEmpty() @@ -53,33 +69,27 @@ private static String buildListKey(final String atomType, final List hy public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBuilder, final Map listMap, - final List correlationList, - final Map indicesMap) { + final Map> molecularConnectivityMap) { final Map> atomIndicesMap = new LinkedHashMap<>(); - Correlation correlation; - int indexInPyLSD; String listKey; - for (int i = 0; i - < correlationList.size(); i++) { - for (int k = 1; k - < indicesMap.get(i).length; k++) { - correlation = correlationList.get(i); - if (correlation.getAtomType() - .equals("H") - || correlation.getHybridization() - .size() - != 1 - || correlation.getProtonsCount() - .size() + for (final int correlationIndex : molecularConnectivityMap.keySet()) { + for (final MolecularConnectivity molecularConnectivity : molecularConnectivityMap.get(correlationIndex)) { + if (molecularConnectivity.getAtomType() + .equals("H") + // || molecularConnectivity.getHybridizations() + // .size() + // != 1 + || molecularConnectivity.getProtonCounts() + .size() != 1) { continue; } - listKey = buildListKey(correlation.getAtomType(), new ArrayList<>(), //correlation.getHybridization(), - correlation.getProtonsCount()); - indexInPyLSD = (int) indicesMap.get(i)[k]; + listKey = buildListKey(molecularConnectivity.getAtomType(), new HashSet<>(), + // correlation.getHybridization(), + molecularConnectivity.getProtonCounts()); atomIndicesMap.putIfAbsent(listKey, new HashSet<>()); atomIndicesMap.get(listKey) - .add(indexInPyLSD); + .add(molecularConnectivity.getIndex()); } } String[] split; @@ -157,33 +167,29 @@ private static void insertPROP(final StringBuilder stringBuilder, final Map listMap, - final List correlationList, - final Map indicesMap, - final Map>>> neighbors, + final Map> molecularConnectivityMap, final String mode) { - Correlation correlation; - Signal signal; - String atomType, listKey; - int indexInPyLSD; + String listKey; Map>> neighborsTemp; final Map usedPropsCount = new HashMap<>(); - for (int i = 0; i - < correlationList.size(); i++) { - if (neighbors.containsKey(i)) { - correlation = correlationList.get(i); - signal = Utils.extractSignalFromCorrelation(correlation); - atomType = correlation.getAtomType(); - neighborsTemp = neighbors.get(i); - - // put in the extracted information per correlation and equivalent - for (int k = 1; k - < indicesMap.get(i).length; k++) { - indexInPyLSD = (int) indicesMap.get(i)[k]; + for (final int correlationIndex : molecularConnectivityMap.keySet()) { + for (final MolecularConnectivity molecularConnectivity : molecularConnectivityMap.get(correlationIndex)) { + if (mode.equals("forbid")) { + neighborsTemp = molecularConnectivity.getForbiddenNeighbors(); + } else if (mode.equals("allow")) { + neighborsTemp = molecularConnectivity.getSetNeighbors(); + } else { + neighborsTemp = null; + } + if (neighborsTemp + != null) { for (final String neighborAtomType : neighborsTemp.keySet()) { // forbid/set bonds to whole element groups if there is an empty map for an atom type if (neighborsTemp.get(neighborAtomType) .isEmpty()) { - insertPROP(stringBuilder, listMap, atomType, signal, indexInPyLSD, neighborAtomType, mode); + insertPROP(stringBuilder, listMap, molecularConnectivity.getAtomType(), + molecularConnectivity.getSignal(), molecularConnectivity.getIndex(), + neighborAtomType, mode); } else { for (final int neighborHybridization : neighborsTemp.get(neighborAtomType) .keySet()) { @@ -191,15 +197,16 @@ public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuild .get(neighborHybridization)) { listKey = buildListKey(neighborAtomType, neighborHybridization == -1 - ? new ArrayList<>() - : List.of(neighborHybridization), - List.of(protonsCount)); + ? new HashSet<>() + : Set.of(neighborHybridization), + Set.of(protonsCount)); if (checkSkipPROPInsertion(listMap, usedPropsCount, listKey, mode)) { continue; } if (listMap.containsKey(listKey)) { - insertPROP(stringBuilder, listMap, atomType, signal, indexInPyLSD, listKey, - mode); + insertPROP(stringBuilder, listMap, molecularConnectivity.getAtomType(), + molecularConnectivity.getSignal(), molecularConnectivity.getIndex(), + listKey, mode); usedPropsCount.put((String) listMap.get(listKey)[0], usedPropsCount.get((String) listMap.get(listKey)[0]) + 1); diff --git a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java index 076c997..58f18a4 100644 --- a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java @@ -1,13 +1,13 @@ package casekit.nmr.lsd.inputfile; import casekit.nmr.lsd.Constants; +import casekit.nmr.lsd.Utilities; import casekit.nmr.lsd.model.Detections; import casekit.nmr.lsd.model.ElucidationOptions; import casekit.nmr.lsd.model.Grouping; -import casekit.nmr.model.Signal; +import casekit.nmr.lsd.model.MolecularConnectivity; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Correlations; -import casekit.nmr.model.nmrium.Link; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; @@ -52,340 +52,24 @@ private static String buildELIM(final int elimP1, final int elimP2) { + elimP2; } - private static Map buildIndicesMap(final List correlationList) { - // index in correlation data -> [atom type, indices in PyLSD file...] - final Map indicesMap = new HashMap<>(); - // init element indices within correlations with same order as in correlation data input - int heavyAtomIndexInPyLSDFile = 1; - int protonIndexInPyLSDFile = 1; - int protonsToInsert, protonsCount; - Correlation correlation; - for (int i = 0; i - < correlationList.size(); i++) { - correlation = correlationList.get(i); - // set entry for each correlation with consideration of equivalences - if (correlation.getAtomType() - .equals("H")) { - protonsToInsert = 0; - for (final Link link : correlation.getLink()) { - if (link.getExperimentType() - .equals("hsqc") - || link.getExperimentType() - .equals("hmqc")) { - for (final int matchIndex : link.getMatch()) { - protonsCount = correlationList.get(matchIndex) - .getProtonsCount() - .get(0); - protonsToInsert += (correlation.getEquivalence() - / (double) protonsCount) - * correlationList.get(matchIndex) - .getAttachment() - .get("H") - .size(); - } - } - } - indicesMap.put(i, new Object[1 - + protonsToInsert]); - indicesMap.get(i)[0] = correlation.getAtomType(); - for (int j = 1; j - <= protonsToInsert; j++) { - indicesMap.get(i)[j] = protonIndexInPyLSDFile; - protonIndexInPyLSDFile++; - } - } else { - indicesMap.put(i, new Object[1 - + correlation.getEquivalence()]); - indicesMap.get(i)[0] = correlation.getAtomType(); - for (int j = 1; j - <= correlation.getEquivalence(); j++) { - indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; - heavyAtomIndexInPyLSDFile++; - } - } - } - // System.out.println("\n\n"); - // for (final Map.Entry entry : indicesMap.entrySet()) { - // System.out.println(entry.getKey() - // + ": " - // + entry.getValue()[0]); - // for (int i = 1; i - // < entry.getValue().length; i++) { - // System.out.println(entry.getValue()[i]); - // } - // } - // System.out.println("\n\n"); - - return indicesMap; - } - - private static String buildMULT(final List correlationList, final int index, - final Map indicesMap, - final Map> detectedHybridizations) { - final Correlation correlation = correlationList.get(index); - if (correlation.getAtomType() - .equals("H")) { - return null; - } - final StringBuilder stringBuilder = new StringBuilder(); - List hybridizations = new ArrayList<>(); - final StringBuilder hybridizationStringBuilder; - final StringBuilder attachedProtonsCountStringBuilder; - - if (correlation.getHybridization() - != null - && !correlation.getHybridization() - .isEmpty()) { - // if hybridization is already given - hybridizations.addAll(correlation.getHybridization()); - } else { - // if hybridization is not given then use the detected ones - if (detectedHybridizations.containsKey(index)) { - hybridizations = detectedHybridizations.get(index); - } - if (hybridizations.isEmpty() - && correlation.getAtomType() - .equals("C") - && !correlation.getProtonsCount() - .isEmpty() - && correlation.getProtonsCount() - .get(0) - >= 2) { - // a carbon with at least two protons can only be SP2 or SP3 - hybridizations.add(2); - hybridizations.add(3); - } - } - if (hybridizations.isEmpty()) { - hybridizationStringBuilder = new StringBuilder(); - if (Constants.defaultHybridizationMap.get(correlation.getAtomType()).length - > 1) { - hybridizationStringBuilder.append("("); - } - for (int i = 0; i - < Constants.defaultHybridizationMap.get(correlation.getAtomType()).length; i++) { - hybridizationStringBuilder.append(Constants.defaultHybridizationMap.get(correlation.getAtomType())[i]); - if (i - < Constants.defaultHybridizationMap.get(correlation.getAtomType()).length - - 1) { - hybridizationStringBuilder.append(" "); - } - } - if (Constants.defaultHybridizationMap.get(correlation.getAtomType()).length - > 1) { - hybridizationStringBuilder.append(")"); - } - } else { - hybridizationStringBuilder = new StringBuilder(); - if (hybridizations.size() - > 1) { - hybridizationStringBuilder.append("("); - } - for (int k = 0; k - < hybridizations.size(); k++) { - hybridizationStringBuilder.append(hybridizations.get(k)); - if (k - < hybridizations.size() - - 1) { - hybridizationStringBuilder.append(" "); - } - } - if (hybridizations.size() - > 1) { - hybridizationStringBuilder.append(")"); - } - } - // set attached protons count - attachedProtonsCountStringBuilder = new StringBuilder(); - // if protons count is given - if (correlation.getProtonsCount() - != null - && !correlation.getProtonsCount() - .isEmpty()) { - if (correlation.getProtonsCount() - .size() - == 1) { - attachedProtonsCountStringBuilder.append(correlation.getProtonsCount() - .get(0)); - } else { - attachedProtonsCountStringBuilder.append("("); - for (final int protonsCount : correlation.getProtonsCount()) { - attachedProtonsCountStringBuilder.append(protonsCount) - .append(" "); - } - attachedProtonsCountStringBuilder.deleteCharAt(attachedProtonsCountStringBuilder.length() - - 1); - attachedProtonsCountStringBuilder.append(")"); - } - } else { // if protons count is not given then set it to default value - if (Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length - > 1) { - attachedProtonsCountStringBuilder.append("("); - } - for (int i = 0; i - < Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length; i++) { - attachedProtonsCountStringBuilder.append(Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType()))[i]); - if (i - < Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length - - 1) { - attachedProtonsCountStringBuilder.append(" "); - } - } - if (Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length - > 1) { - attachedProtonsCountStringBuilder.append(")"); - } - } - for (int j = 1; j - < indicesMap.get(index).length; j++) { - stringBuilder.append("MULT ") - .append(indicesMap.get(index)[j]) - .append(" ") - .append(correlation.getAtomType()) - .append(" ") - .append(hybridizationStringBuilder) - .append(" ") - .append(attachedProtonsCountStringBuilder); - if (!correlation.isPseudo()) { - stringBuilder.append("; ") - .append(buildShiftString(correlationList, correlation)); - } - if (j - >= 2) { - stringBuilder.append("; equivalent to ") - .append(indicesMap.get(index)[1]); - } - stringBuilder.append("\n"); - } - - return stringBuilder.toString(); - } - - private static String buildShiftString(final List correlationList, final Correlation correlation) { - - final Signal signal = Utils.extractSignalFromCorrelation(correlation); - if (signal - == null) { - return "?"; - } - - String heavyAtomShiftString = ""; - if (correlation.getAtomType() - .equals("H")) { - final ArrayList bondHeavyAtomTypes = new ArrayList<>(correlation.getAttachment() - .keySet()); - if (!bondHeavyAtomTypes.isEmpty()) { - final Optional firstOptional = correlation.getAttachment() - .get(bondHeavyAtomTypes.get(0)) - .stream() - .findFirst(); - if (firstOptional.isPresent()) { - heavyAtomShiftString = " (" - + buildShiftString(correlationList, correlationList.get(firstOptional.get())) - + ")"; - } - } - - } - - return correlation.isPseudo() - ? "?" - : Statistics.roundDouble(signal.getShift(0), 3) - + heavyAtomShiftString; - } - - private static String buildShiftsComment(final List correlationList, final Correlation correlation1, - final Correlation correlation2) { - return "; " - + correlation1.getAtomType() - + ": " - + buildShiftString(correlationList, correlation1) - + " -> " - + correlation2.getAtomType() - + ": " - + buildShiftString(correlationList, correlation2); - } - - private static String buildHSQC(final List correlationList, final int index, - final Map indicesMap) { - final Correlation correlation = correlationList.get(index); - if (correlation.getAtomType() - .equals("H")) { - return null; - } - final StringBuilder stringBuilder = new StringBuilder(); - for (final Link link : correlation.getLink()) { - if (link.getExperimentType() - .equals("hsqc") - || link.getExperimentType() - .equals("hmqc")) { - for (final int matchIndex : link.getMatch()) { - // for each equivalence of heavy atom and attached protons - for (int k = 1; k - < indicesMap.get(index).length; k++) { - stringBuilder.append("HSQC ") - .append(indicesMap.get(index)[k]) - .append(" ") - .append(indicesMap.get(matchIndex)[k]) - .append(buildShiftsComment(correlationList, correlation, - correlationList.get(matchIndex))) - .append("\n"); - } - } - } - } - - return stringBuilder.toString(); - } - - private static String buildPossibilitiesString(final List correlationList, final int index, - final Map indicesMap, final Grouping grouping) { + private static String buildPossibilitiesString(final Set possibilities) { final StringBuilder possibilitiesStringBuilder = new StringBuilder(); - final Correlation correlation = correlationList.get(index); - // add PyLSD indices from grouping - final Set pyLSDIndices = new HashSet<>(); - if (grouping.getTransformedGroups() - .containsKey(correlation.getAtomType())) { - final int groupIndex = grouping.getTransformedGroups() - .get(correlation.getAtomType()) - .get(index); - for (final int groupCorrelationIndex : grouping.getGroups() - .get(correlation.getAtomType()) - .get(groupIndex)) { - // add equivalence indices as well - for (int k = 1; k - < indicesMap.get(groupCorrelationIndex).length; k++) { - pyLSDIndices.add((int) indicesMap.get(groupCorrelationIndex)[k]); - } - } - } else { - // add for equivalences only - for (int k = 1; k - < indicesMap.get(index).length; k++) { - pyLSDIndices.add((int) indicesMap.get(index)[k]); - } - } - // build the string - if (pyLSDIndices.size() + + if (possibilities.size() > 1) { possibilitiesStringBuilder.append("("); } - int k = 0; - for (final int pyLSDIndex : pyLSDIndices) { - possibilitiesStringBuilder.append(pyLSDIndex); - if (k - < pyLSDIndices.size() + int counter = 0; + for (final int possibility : possibilities) { + possibilitiesStringBuilder.append(possibility); + if (counter + < possibilities.size() - 1) { possibilitiesStringBuilder.append(" "); } - k++; + counter++; } - if (pyLSDIndices.size() + if (possibilities.size() > 1) { possibilitiesStringBuilder.append(")"); } @@ -393,181 +77,253 @@ private static String buildPossibilitiesString(final List correlati return possibilitiesStringBuilder.toString(); } - private static void buildMultipleBondCorrelationPerLink(final List correlationList, final int index, - final Map indicesMap, - final Grouping grouping, - final String defaultBondDistanceString, - final Set uniqueSet, final Link link) { - // ignore H atoms without any attachment to a heavy atom - if (correlationList.get(index) - .getAtomType() - .equals("H") - && correlationList.get(index) - .getAttachment() - .keySet() - .isEmpty()) { - return; - } - String bondDistanceString; - Map signal2DMap; - Map pathLengthMap; - for (final int matchIndex : link.getMatch()) { - for (int l = 1; l - < indicesMap.get(matchIndex).length; l++) { - // ignore linked H atoms without any attachment to a heavy atom - if (correlationList.get(matchIndex) - .getAtomType() - .equals("H") - && correlationList.get(matchIndex) - .getAttachment() - .keySet() - .isEmpty()) { - continue; + private static Map buildStringBuilderMap( + final Map> molecularConnectivityMap) { + StringBuilder stringBuilder; + final Map stringBuilderMap = new HashMap<>(); + stringBuilderMap.put("MULT", new StringBuilder()); + stringBuilderMap.put("HSQC", new StringBuilder()); + stringBuilderMap.put("HMBC", new StringBuilder()); + stringBuilderMap.put("COSY", new StringBuilder()); + stringBuilderMap.put("BOND", new StringBuilder()); + stringBuilderMap.put("SHIX", new StringBuilder()); + stringBuilderMap.put("SHIH", new StringBuilder()); + StringBuilder hybridizationStringBuilder, attachedProtonsCountStringBuilder; + int counter; + int firstOfEquivalenceIndexPyLSD; + for (final int correlationIndex : molecularConnectivityMap.keySet()) { + firstOfEquivalenceIndexPyLSD = -1; + for (final MolecularConnectivity molecularConnectivity : molecularConnectivityMap.get(correlationIndex)) { + if (firstOfEquivalenceIndexPyLSD + == -1) { + firstOfEquivalenceIndexPyLSD = molecularConnectivity.getIndex(); } - bondDistanceString = null; - signal2DMap = (Map) link.getSignal(); - if (signal2DMap - != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); - bondDistanceString = pathLengthMap.get("min") - + " " - + pathLengthMap.get("max"); + if (!molecularConnectivity.getAtomType() + .equals("H")) { + + hybridizationStringBuilder = new StringBuilder(); + if (molecularConnectivity.getHybridizations() + .size() + > 1) { + hybridizationStringBuilder.append("("); + } + counter = 0; + for (final int hybrid : molecularConnectivity.getHybridizations()) { + hybridizationStringBuilder.append(hybrid); + if (counter + < molecularConnectivity.getHybridizations() + .size() + - 1) { + hybridizationStringBuilder.append(" "); + } + counter++; + } + if (molecularConnectivity.getHybridizations() + .size() + > 1) { + hybridizationStringBuilder.append(")"); + } + attachedProtonsCountStringBuilder = new StringBuilder(); + if (molecularConnectivity.getProtonCounts() + .size() + > 1) { + attachedProtonsCountStringBuilder.append("("); + } + counter = 0; + for (final int protonCount : molecularConnectivity.getProtonCounts()) { + attachedProtonsCountStringBuilder.append(protonCount); + if (counter + < molecularConnectivity.getProtonCounts() + .size() + - 1) { + attachedProtonsCountStringBuilder.append(" "); + } + counter++; + } + if (molecularConnectivity.getProtonCounts() + .size() + > 1) { + attachedProtonsCountStringBuilder.append(")"); + } + stringBuilder = stringBuilderMap.get("MULT"); + stringBuilder.append("MULT ") + .append(molecularConnectivity.getIndex()) + .append(" ") + .append(Constants.defaultAtomLabelMap.get(molecularConnectivity.getAtomType())) + .append(" ") + .append(hybridizationStringBuilder) + .append(" ") + .append(attachedProtonsCountStringBuilder); + stringBuilder.append("; ") + .append(buildShiftString(molecularConnectivityMap, molecularConnectivity)); + if (molecularConnectivityMap.get(correlationIndex) + .size() + > 1 + && molecularConnectivity.getIndex() + != firstOfEquivalenceIndexPyLSD) { + stringBuilder.append("; equivalent to ") + .append(firstOfEquivalenceIndexPyLSD); + } + stringBuilder.append("\n"); + if (molecularConnectivity.getHsqc() + != null) { + stringBuilder = stringBuilderMap.get("HSQC"); + for (final int protonIndexPyLSD : molecularConnectivity.getHsqc()) { + stringBuilder.append("HSQC ") + .append(molecularConnectivity.getIndex()) + .append(" ") + .append(protonIndexPyLSD) + .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, + Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", false, + protonIndexPyLSD))) + .append("\n"); + } + } + if (molecularConnectivity.getHmbc() + != null) { + stringBuilder = stringBuilderMap.get("HMBC"); + for (final int protonIndexPyLSD : molecularConnectivity.getHmbc() + .keySet()) { + stringBuilder.append("HMBC ") + .append(buildPossibilitiesString(molecularConnectivity.getGroupMembers())) + .append(" ") + .append(protonIndexPyLSD) + .append(" ") + .append(molecularConnectivity.getHmbc() + .get(protonIndexPyLSD)[0]) + .append(" ") + .append(molecularConnectivity.getHmbc() + .get(protonIndexPyLSD)[1]) + .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, + + Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", false, + protonIndexPyLSD))) + .append("\n"); + } + } + if (molecularConnectivity.getFixedNeighbors() + != null) { + stringBuilder = stringBuilderMap.get("BOND"); + for (final int bondedIndexInPyLSD : molecularConnectivity.getFixedNeighbors()) { + stringBuilder.append("BOND ") + .append(molecularConnectivity.getIndex()) + .append(" ") + .append(bondedIndexInPyLSD) + .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, + Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", true, + bondedIndexInPyLSD))) + .append("\n"); + } + } + } else if (molecularConnectivity.getAtomType() + .equals("H")) { + if (molecularConnectivity.getCosy() + != null) { + stringBuilder = stringBuilderMap.get("COSY"); + for (final int protonIndexInPyLSD : molecularConnectivity.getCosy() + .keySet()) { + stringBuilder.append("COSY ") + .append(buildPossibilitiesString(molecularConnectivity.getGroupMembers())) + .append(" ") + .append(protonIndexInPyLSD) + .append(" ") + .append(molecularConnectivity.getCosy() + .get(protonIndexInPyLSD)[0]) + .append(" ") + .append(molecularConnectivity.getCosy() + .get(protonIndexInPyLSD)[1]) + .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, + Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", false, + protonIndexInPyLSD))) + .append("\n"); + } + } + } + if (molecularConnectivity.getSignal() + != null) { + stringBuilder = stringBuilderMap.get(molecularConnectivity.getAtomType() + .equals("H") + ? "SHIH" + : "SHIX"); + stringBuilder.append(molecularConnectivity.getAtomType() + .equals("H") + ? "SHIH" + : "SHIX") + .append(" ") + .append(molecularConnectivity.getIndex()) + .append(" ") + .append(Statistics.roundDouble(molecularConnectivity.getSignal() + .getShift(0), 5)) + .append("\n"); } - uniqueSet.add(buildPossibilitiesString(correlationList, index, indicesMap, grouping) - + " " - + indicesMap.get(matchIndex)[l] - + " " - + (bondDistanceString - != null - ? bondDistanceString - : defaultBondDistanceString) - + buildShiftsComment(correlationList, correlationList.get(index), - correlationList.get(matchIndex))); - } - } - } - - private static String buildHMBC(final List correlationList, final int index, - final Map indicesMap, final Grouping grouping) { - final Correlation correlation = correlationList.get(index); - if (correlation.getAtomType() - .equals("H")) { - return null; - } - - final String defaultBondDistanceString = 2 - + " " - + 3; - final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times - for (final Link link : correlation.getLink()) { - if (link.getExperimentType() - .equals("hmbc")) { - buildMultipleBondCorrelationPerLink(correlationList, index, indicesMap, grouping, - defaultBondDistanceString, uniqueSet, link); - } - } - - return uniqueSet.stream() - .map(str -> "HMBC " - + str - + "\n") - .reduce("", (strAll, str) -> strAll - + str); - } - - private static String buildCOSY(final List correlationList, final int index, - final Map indicesMap, final Grouping grouping) { - final Correlation correlation = correlationList.get(index); - if (!correlation.getAtomType() - .equals("H")) { - return null; - } - final String defaultBondDistanceString = 3 - + " " - + 4; - final Set uniqueSet = new LinkedHashSet<>(); // in case of same content exists multiple times - for (final Link link : correlation.getLink()) { - if (link.getExperimentType() - .equals("cosy")) { - buildMultipleBondCorrelationPerLink(correlationList, index, indicesMap, grouping, - defaultBondDistanceString, uniqueSet, link); } } - return uniqueSet.stream() - .map(str -> "COSY " - + str - + "\n") - .reduce("", (strAll, str) -> strAll - + str); + return stringBuilderMap; } - private static String buildSHIX(final Correlation correlation, final int index, - final Map indicesMap) { - if (correlation.getAtomType() - .equals("H") - || correlation.isPseudo()) { - return null; - } - final Signal signal = Utils.extractSignalFromCorrelation(correlation); - if (signal + private static String buildShiftString(final Map> molecularConnectivityMap, + final MolecularConnectivity molecularConnectivity) { + if (molecularConnectivity + == null + || molecularConnectivity.getSignal() == null) { - return null; - } - final StringBuilder stringBuilder = new StringBuilder(); - for (int k = 1; k - < indicesMap.get(index).length; k++) { - stringBuilder.append("SHIX ") - .append(indicesMap.get(index)[k]) - .append(" ") - .append(Statistics.roundDouble(signal.getShift(0), 3)) - .append("\n"); + return "?"; } - return stringBuilder.toString(); - } + final String heavyAtomShiftString = ""; + // if (molecularConnectivity.getAtomType() + // .equals("H")) { + // MolecularConnectivity heavyAtomMolecularConnectivity = null; + // boolean found = false; + // for (final int correlationIndex : molecularConnectivityMap.keySet()) { + // for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityMap.get( + // correlationIndex)) { + // if (molecularConnectivityTemp.getHsqc() + // != null + // && molecularConnectivityTemp.getHsqc() + // .contains(molecularConnectivity.getIndex())) { + // heavyAtomMolecularConnectivity = molecularConnectivityTemp; + // found = true; + // break; + // } + // } + // if (found) { + // break; + // } + // } + // if (heavyAtomMolecularConnectivity + // != null) { + // heavyAtomShiftString = " (" + // + buildShiftString(molecularConnectivityMap, heavyAtomMolecularConnectivity) + // + ")"; + // } + // } - private static String buildSHIH(final Correlation correlation, final int index, - final Map indicesMap) { - if (!correlation.getAtomType() - .equals("H") - || correlation.isPseudo()) { - return null; - } - final Signal signal = Utils.extractSignalFromCorrelation(correlation); - if (signal - == null) { - return null; - } - final StringBuilder stringBuilder = new StringBuilder(); - // only consider protons which are attached via HSQC/HMQC (pseudo and real links) - for (final Link link : correlation.getLink()) { - if ((link.getExperimentType() - .equals("hsqc") - || link.getExperimentType() - .equals("hmqc")) - && !link.getMatch() - .isEmpty()) { // && !link.isPseudo() - for (int k = 1; k - < indicesMap.get(index).length; k++) { - stringBuilder.append("SHIH ") - .append(indicesMap.get(index)[k]) - .append(" ") - .append(Statistics.roundDouble(signal.getShift(0), 5)) - .append("\n"); - } - } - } + return Statistics.roundDouble(molecularConnectivity.getSignal() + .getShift(0), 3) + + heavyAtomShiftString; + } - return stringBuilder.toString(); + private static String buildShiftsComment(final Map> molecularConnectivityMap, + final MolecularConnectivity molecularConnectivity1, + final MolecularConnectivity molecularConnectivity2) { + return "; " + + molecularConnectivity1.getAtomType() + + ": " + + buildShiftString(molecularConnectivityMap, molecularConnectivity1) + + " -> " + + molecularConnectivity2.getAtomType() + + ": " + + buildShiftString(molecularConnectivityMap, molecularConnectivity2); } - private static String buildLISTsAndPROPs(final List correlationList, - final Map indicesMap, + private static String buildLISTsAndPROPs(final Map> molecularConnectivityMap, final Map elementCounts, - final Map>>> forbiddenNeighbors, - final Map>>> setNeighbors, final boolean allowHeteroHeteroBonds) { final StringBuilder stringBuilder = new StringBuilder(); // list key -> [list name, size] @@ -577,16 +333,15 @@ private static String buildLISTsAndPROPs(final List correlationList if (!allowHeteroHeteroBonds) { LISTAndPROPUtilities.insertNoHeteroHeteroBonds(stringBuilder, listMap); } - // insert ELEM for each heavy atom type in MF - LISTAndPROPUtilities.insertELEM(stringBuilder, listMap, elementCounts.keySet()); + // insert LIST for each heavy atom type in MF + LISTAndPROPUtilities.insertGeneralLISTs(stringBuilder, listMap, molecularConnectivityMap, + elementCounts.keySet()); // insert list combinations of carbon and hybridization states - LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, correlationList, indicesMap); + LISTAndPROPUtilities.insertHeavyAtomCombinationLISTs(stringBuilder, listMap, molecularConnectivityMap); // insert forbidden connection lists and properties - LISTAndPROPUtilities.insertConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, - forbiddenNeighbors, "forbid"); + LISTAndPROPUtilities.insertConnectionLISTsAndPROPs(stringBuilder, listMap, molecularConnectivityMap, "forbid"); // insert set connection lists and properties - LISTAndPROPUtilities.insertConnectionLISTsAndPROPs(stringBuilder, listMap, correlationList, indicesMap, - setNeighbors, "allow"); + LISTAndPROPUtilities.insertConnectionLISTsAndPROPs(stringBuilder, listMap, molecularConnectivityMap, "allow"); return stringBuilder.toString(); } @@ -645,11 +400,7 @@ private static String buildFEXP(final Map fexpMap) { return stringBuilder.toString(); } - private static String buildDEFFsAndFEXP(final List correlationList, - final Map indicesMap, - final ElucidationOptions elucidationOptions, - final Map>>> forbiddenNeighbors, - final Map>>> setNeighbors) { + private static String buildDEFFsAndFEXP(final ElucidationOptions elucidationOptions) { final StringBuilder stringBuilder = new StringBuilder(); final Map fexpMap = new HashMap<>(); for (int i = 0; i @@ -686,134 +437,68 @@ private static String buildDEFFsAndFEXP(final List correlationList, return stringBuilder.toString(); } - private static String buildBONDByFixedNeighbors(final List correlationList, - final Map indicesMap, - final Map> fixedNeighbors) { - final StringBuilder stringBuilder = new StringBuilder(); - - final Set uniqueSet = new HashSet<>(); - int correlationIndex1; - Correlation correlation1, correlation2; - for (final Map.Entry> entry : fixedNeighbors.entrySet()) { - correlationIndex1 = entry.getKey(); - correlation1 = correlationList.get(correlationIndex1); - if (correlation1.getEquivalence() - > 1) { - continue; - } - for (final int correlationIndex2 : entry.getValue()) { - correlation2 = correlationList.get(correlationIndex2); - // @TODO for now use fixed neighbor information of atoms without equivalences only - if (correlation2.getEquivalence() - > 1) { - continue; - } - // insert BOND pair once only and not if equivalences exist - if (!uniqueSet.contains(indicesMap.get(correlationIndex1)[1] - + " " - + indicesMap.get(correlationIndex2)[1])) { - stringBuilder.append("BOND ") - .append(indicesMap.get(correlationIndex1)[1]) - .append(" ") - .append(indicesMap.get(correlationIndex2)[1]) - .append(buildShiftsComment(correlationList, correlation1, correlation2)) - .append("\n"); - uniqueSet.add(indicesMap.get(correlationIndex1)[1] - + " " - + indicesMap.get(correlationIndex2)[1]); - uniqueSet.add(indicesMap.get(correlationIndex2)[1] - + " " - + indicesMap.get(correlationIndex1)[1]); - } - } - } - - return stringBuilder.toString(); - } - - private static String buildBOND(final List correlationList, final Map indicesMap, - final Map> fixedNeighbors) { - return buildBONDByFixedNeighbors(correlationList, indicesMap, fixedNeighbors) - + "\n"; - } - public static String buildPyLSDInputFileContent(final Correlations correlations, final String mf, final Detections detections, final Grouping grouping, final ElucidationOptions elucidationOptions) { if (mf - != null) { - final List correlationList = correlations.getValues(); - final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); - final StringBuilder stringBuilder = new StringBuilder(); - // create header - stringBuilder.append(buildHeader()) - .append("\n\n"); - // FORM - stringBuilder.append(buildFORM(mf, elementCounts)) - .append("\n\n"); - // PIEC - stringBuilder.append(buildPIEC()) + == null) { + return ""; + } + final List correlationList = correlations.getValues(); + final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); + final StringBuilder stringBuilder = new StringBuilder(); + // create header + stringBuilder.append(buildHeader()) + .append("\n\n"); + // FORM + stringBuilder.append(buildFORM(mf, elementCounts)) + .append("\n\n"); + // PIEC + stringBuilder.append(buildPIEC()) + .append("\n\n"); + // ELIM + if (elucidationOptions.isUseElim()) { + stringBuilder.append(buildELIM(elucidationOptions.getElimP1(), elucidationOptions.getElimP2())) .append("\n\n"); - // ELIM - if (elucidationOptions.isUseElim()) { - stringBuilder.append(buildELIM(elucidationOptions.getElimP1(), elucidationOptions.getElimP2())) - .append("\n\n"); - } + } - final Map> collection = new LinkedHashMap<>(); - collection.put("MULT", new ArrayList<>()); - collection.put("HSQC", new ArrayList<>()); - collection.put("HMBC", new ArrayList<>()); - collection.put("COSY", new ArrayList<>()); - collection.put("SHIX", new ArrayList<>()); - collection.put("SHIH", new ArrayList<>()); - // index in correlation data -> [atom type, index in PyLSD file] - final Map indicesMap = buildIndicesMap(correlationList); - - Correlation correlation; - for (int i = 0; i - < correlationList.size(); i++) { - correlation = correlationList.get(i); - collection.get("MULT") - .add(buildMULT(correlationList, i, indicesMap, detections.getDetectedHybridizations())); - collection.get("HSQC") - .add(buildHSQC(correlationList, i, indicesMap)); - collection.get("HMBC") - .add(buildHMBC(correlationList, i, indicesMap, grouping)); - collection.get("COSY") - .add(buildCOSY(correlationList, i, indicesMap, grouping)); - collection.get("SHIX") - .add(buildSHIX(correlation, i, indicesMap)); - collection.get("SHIH") - .add(buildSHIH(correlation, i, indicesMap)); - } + final Map defaultBondDistances = new HashMap<>(); + defaultBondDistances.put("hmbc", new Integer[]{2, 3}); + defaultBondDistances.put("cosy", new Integer[]{3, 4}); + final Map> molecularConnectivityMap = Utilities.buildMolecularConnectivityMap( + correlationList, detections, grouping, defaultBondDistances); + final Map stringBuilderMap = buildStringBuilderMap(molecularConnectivityMap); + stringBuilder.append(stringBuilderMap.get("MULT") + .toString()) + .append("\n"); + stringBuilder.append(stringBuilderMap.get("HSQC") + .toString()) + .append("\n"); - collection.keySet() - .forEach(key -> { - collection.get(key) - .stream() - .filter(Objects::nonNull) - .forEach(stringBuilder::append); - stringBuilder.append("\n"); - }); - - // BOND (interpretation, INADEQUATE, previous assignments) -> input fragments - stringBuilder.append(buildBOND(correlationList, indicesMap, detections.getFixedNeighbors())) - .append("\n"); - - // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance - stringBuilder.append( - buildLISTsAndPROPs(correlationList, indicesMap, elementCounts, detections.getForbiddenNeighbors(), - detections.getSetNeighbors(), elucidationOptions.isAllowHeteroHeteroBonds())) - .append("\n"); - // DEFF and FEXP as filters (good/bad lists) - stringBuilder.append(buildDEFFsAndFEXP(correlationList, indicesMap, elucidationOptions, - detections.getForbiddenNeighbors(), detections.getSetNeighbors())) - .append("\n"); - - return stringBuilder.toString(); - } + stringBuilder.append(stringBuilderMap.get("BOND") + .toString()) + .append("\n"); + stringBuilder.append(stringBuilderMap.get("HMBC") + .toString()) + .append("\n"); + stringBuilder.append(stringBuilderMap.get("COSY") + .toString()) + .append("\n"); + stringBuilder.append(stringBuilderMap.get("SHIX") + .toString()) + .append("\n"); + stringBuilder.append(stringBuilderMap.get("SHIH") + .toString()) + .append("\n"); - return ""; + // LIST PROP for certain limitations or properties of atoms in lists, e.g. hetero hetero bonds allowance + stringBuilder.append(buildLISTsAndPROPs(molecularConnectivityMap, elementCounts, + elucidationOptions.isAllowHeteroHeteroBonds())) + .append("\n"); + // DEFF and FEXP as filters (good/bad lists) + stringBuilder.append(buildDEFFsAndFEXP(elucidationOptions)) + .append("\n"); + + return stringBuilder.toString(); } } diff --git a/src/casekit/nmr/lsd/model/MolecularConnectivity.java b/src/casekit/nmr/lsd/model/MolecularConnectivity.java new file mode 100644 index 0000000..b36dd93 --- /dev/null +++ b/src/casekit/nmr/lsd/model/MolecularConnectivity.java @@ -0,0 +1,28 @@ +package casekit.nmr.lsd.model; + +import casekit.nmr.model.Signal; +import lombok.*; + +import java.util.Map; +import java.util.Set; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString +public class MolecularConnectivity { + + private int index; // e.g. index within PyLSD + private String atomType; + private Signal signal; + private Set protonCounts; + private Set hybridizations; + private Set hsqc; + private Map hmbc; + private Map cosy; + private Map>> forbiddenNeighbors; + private Map>> setNeighbors; + private Set fixedNeighbors; + private Set groupMembers; +} From e1b3aec82745ced6200fa0c6ae28577c7ff8989b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 26 Jan 2022 14:12:32 +0100 Subject: [PATCH 355/405] chore: added Si properties to Constants --- src/casekit/nmr/lsd/Constants.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/lsd/Constants.java index 51ebe35..042bfae 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/lsd/Constants.java @@ -21,6 +21,7 @@ private static Map createNucleiMap() { nuclei.put("S", "33S"); nuclei.put("F", "19F"); nuclei.put("P", "31P"); + nuclei.put("Si", "29Si"); return Collections.unmodifiableMap(nuclei); } @@ -36,6 +37,7 @@ private static Map createDefaultHybridizationMap() { defaultHybridization.put("Cl", new int[]{3}); defaultHybridization.put("Br", new int[]{3}); defaultHybridization.put("P", new int[]{1, 2, 3}); + defaultHybridization.put("Si", new int[]{1, 2, 3}); return Collections.unmodifiableMap(defaultHybridization); } @@ -58,6 +60,7 @@ private static Map createDefaultProtonsCountPerValencyMap() { defaultProtonsCountPerValency.put("P", new int[]{0, 1, 2}); defaultProtonsCountPerValency.put("P5", new int[]{0, 1, 2, 3}); defaultProtonsCountPerValency.put("P35", new int[]{0, 1, 2, 3}); + defaultProtonsCountPerValency.put("Si", new int[]{0, 1, 2, 3}); return defaultProtonsCountPerValency; } @@ -73,6 +76,7 @@ private static Map createDefaultAtomLabelMap() { defaultAtomLabel.put("Cl", "Cl"); defaultAtomLabel.put("Br", "Br"); defaultAtomLabel.put("P", "P35"); + defaultAtomLabel.put("Si", "Si"); return Collections.unmodifiableMap(defaultAtomLabel); } From 24e32e278bb082ae77996d9a1a9c7d95ab9d815b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 27 Jan 2022 09:33:48 +0100 Subject: [PATCH 356/405] feat: filter group members when building HMBC and COSY section --- src/casekit/nmr/lsd/Utilities.java | 21 +++- .../lsd/inputfile/PyLSDInputFileBuilder.java | 113 ++++++++++++------ 2 files changed, 98 insertions(+), 36 deletions(-) diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/lsd/Utilities.java index 8db730d..bab9801 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/lsd/Utilities.java @@ -805,7 +805,26 @@ public static Map> buildMolecularConnectivi } } - return molecularConnectivityMap; } + + public static MolecularConnectivity getHeavyAtomMolecularConnectivity( + final Map> molecularConnectivityMap, final int protonIndex) { + for (final Map.Entry> entry : molecularConnectivityMap.entrySet()) { + for (final MolecularConnectivity molecularConnectivityTemp : entry.getValue() + .stream() + .filter(mc -> !mc.getAtomType() + .equals("H")) + .collect(Collectors.toSet())) { + if (molecularConnectivityTemp.getHsqc() + != null + && molecularConnectivityTemp.getHsqc() + .contains(protonIndex)) { + return molecularConnectivityTemp; + } + } + } + + return null; + } } diff --git a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java index 58f18a4..6bf7fc1 100644 --- a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java @@ -89,8 +89,9 @@ private static Map buildStringBuilderMap( stringBuilderMap.put("SHIX", new StringBuilder()); stringBuilderMap.put("SHIH", new StringBuilder()); StringBuilder hybridizationStringBuilder, attachedProtonsCountStringBuilder; - int counter; - int firstOfEquivalenceIndexPyLSD; + int counter, firstOfEquivalenceIndexPyLSD; + Set groupMembers; + MolecularConnectivity molecularConnectivityGroupMember, molecularConnectivityHeavyAtom; for (final int correlationIndex : molecularConnectivityMap.keySet()) { firstOfEquivalenceIndexPyLSD = -1; for (final MolecularConnectivity molecularConnectivity : molecularConnectivityMap.get(correlationIndex)) { @@ -183,24 +184,38 @@ private static Map buildStringBuilderMap( if (molecularConnectivity.getHmbc() != null) { stringBuilder = stringBuilderMap.get("HMBC"); - for (final int protonIndexPyLSD : molecularConnectivity.getHmbc() - .keySet()) { - stringBuilder.append("HMBC ") - .append(buildPossibilitiesString(molecularConnectivity.getGroupMembers())) - .append(" ") - .append(protonIndexPyLSD) - .append(" ") - .append(molecularConnectivity.getHmbc() - .get(protonIndexPyLSD)[0]) - .append(" ") - .append(molecularConnectivity.getHmbc() - .get(protonIndexPyLSD)[1]) - .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, - - Utilities.findMolecularConnectivityByIndex( - molecularConnectivityMap, "H", false, - protonIndexPyLSD))) - .append("\n"); + for (final int protonIndexInPyLSD : molecularConnectivity.getHmbc() + .keySet()) { + // filter out group members which are directly bonded to that proton + groupMembers = new HashSet<>(molecularConnectivity.getGroupMembers()); + for (final int groupMemberIndex : new HashSet<>(groupMembers)) { + molecularConnectivityGroupMember = Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, molecularConnectivity.getAtomType(), false, + groupMemberIndex); + if (molecularConnectivityGroupMember.getHsqc() + != null + && molecularConnectivityGroupMember.getHsqc() + .contains(protonIndexInPyLSD)) { + groupMembers.remove(groupMemberIndex); + } + } + if (!groupMembers.isEmpty()) { + stringBuilder.append("HMBC ") + .append(buildPossibilitiesString(groupMembers)) + .append(" ") + .append(protonIndexInPyLSD) + .append(" ") + .append(molecularConnectivity.getHmbc() + .get(protonIndexInPyLSD)[0]) + .append(" ") + .append(molecularConnectivity.getHmbc() + .get(protonIndexInPyLSD)[1]) + .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, + Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", false, + protonIndexInPyLSD))) + .append("\n"); + } } } if (molecularConnectivity.getFixedNeighbors() @@ -225,21 +240,49 @@ private static Map buildStringBuilderMap( stringBuilder = stringBuilderMap.get("COSY"); for (final int protonIndexInPyLSD : molecularConnectivity.getCosy() .keySet()) { - stringBuilder.append("COSY ") - .append(buildPossibilitiesString(molecularConnectivity.getGroupMembers())) - .append(" ") - .append(protonIndexInPyLSD) - .append(" ") - .append(molecularConnectivity.getCosy() - .get(protonIndexInPyLSD)[0]) - .append(" ") - .append(molecularConnectivity.getCosy() - .get(protonIndexInPyLSD)[1]) - .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, - Utilities.findMolecularConnectivityByIndex( - molecularConnectivityMap, "H", false, - protonIndexInPyLSD))) - .append("\n"); + // filter out group members which + groupMembers = new HashSet<>(molecularConnectivity.getGroupMembers()); + // 1) use only one attached proton of a CH2 group (optional) + final Set alreadyFoundHeavyAtomIndex = new HashSet<>(); + for (final int groupMemberIndex : new HashSet<>(groupMembers)) { + molecularConnectivityHeavyAtom = Utilities.getHeavyAtomMolecularConnectivity( + molecularConnectivityMap, groupMemberIndex); + if (alreadyFoundHeavyAtomIndex.contains(molecularConnectivityHeavyAtom.getIndex())) { + groupMembers.remove(groupMemberIndex); + } else { + alreadyFoundHeavyAtomIndex.add(molecularConnectivityHeavyAtom.getIndex()); + } + } + // 2) would direct to itself when using COSY correlation + molecularConnectivityHeavyAtom = Utilities.getHeavyAtomMolecularConnectivity( + molecularConnectivityMap, protonIndexInPyLSD); + if (molecularConnectivityHeavyAtom + != null) { + for (final int groupMemberIndex : new HashSet<>(groupMembers)) { + if (molecularConnectivityHeavyAtom.getHsqc() + .contains(groupMemberIndex)) { + groupMembers.remove(groupMemberIndex); + } + } + if (!groupMembers.isEmpty()) { + stringBuilder.append("COSY ") + .append(buildPossibilitiesString(groupMembers)) + .append(" ") + .append(protonIndexInPyLSD) + .append(" ") + .append(molecularConnectivity.getCosy() + .get(protonIndexInPyLSD)[0]) + .append(" ") + .append(molecularConnectivity.getCosy() + .get(protonIndexInPyLSD)[1]) + .append(buildShiftsComment(molecularConnectivityMap, + molecularConnectivity, + Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", + false, protonIndexInPyLSD))) + .append("\n"); + } + } } } } From 52100c521f252f2dd0e3d14078b25c061c71ab43 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 28 Jan 2022 17:46:20 +0100 Subject: [PATCH 357/405] chore: moved Distance.java to model class --- src/casekit/nmr/similarity/Similarity.java | 4 ++++ src/casekit/nmr/similarity/Utilities.java | 1 + src/casekit/nmr/similarity/{ => model}/Distance.java | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) rename src/casekit/nmr/similarity/{ => model}/Distance.java (96%) diff --git a/src/casekit/nmr/similarity/Similarity.java b/src/casekit/nmr/similarity/Similarity.java index 91160d7..2ffed58 100644 --- a/src/casekit/nmr/similarity/Similarity.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -16,6 +16,7 @@ import casekit.nmr.model.Assignment; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; +import casekit.nmr.similarity.model.Distance; import casekit.nmr.utils.Statistics; import org.openscience.cdk.fingerprint.BitSetFingerprint; import org.openscience.cdk.similarity.Tanimoto; @@ -280,6 +281,8 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s shiftTolerance, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount); + System.out.println("\n" + + distanceList); final Assignment matchAssignment = new Assignment(); matchAssignment.setNuclei(spectrum1.getNuclei()); matchAssignment.initAssignments(spectrum1.getSignalCount()); @@ -297,6 +300,7 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s assignedSpectrum2.add(distance.getSignalIndexSpectrum2()); } } + System.out.println(matchAssignment); return matchAssignment; } diff --git a/src/casekit/nmr/similarity/Utilities.java b/src/casekit/nmr/similarity/Utilities.java index 4d35655..e160813 100644 --- a/src/casekit/nmr/similarity/Utilities.java +++ b/src/casekit/nmr/similarity/Utilities.java @@ -2,6 +2,7 @@ import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; +import casekit.nmr.similarity.model.Distance; import java.util.ArrayList; import java.util.Comparator; diff --git a/src/casekit/nmr/similarity/Distance.java b/src/casekit/nmr/similarity/model/Distance.java similarity index 96% rename from src/casekit/nmr/similarity/Distance.java rename to src/casekit/nmr/similarity/model/Distance.java index 36a0067..f50dc34 100644 --- a/src/casekit/nmr/similarity/Distance.java +++ b/src/casekit/nmr/similarity/model/Distance.java @@ -1,4 +1,4 @@ -package casekit.nmr.similarity; +package casekit.nmr.similarity.model; public class Distance { From a5ea98c4d1d5601e9c00ddcacd9888f978413ee4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Feb 2022 14:27:13 +0100 Subject: [PATCH 358/405] chore: separation and movements to new elucidation package and lsd subpackage --- .../nmr/{lsd => elucidation}/Constants.java | 2 +- .../nmr/{lsd => elucidation}/Utilities.java | 8 +- .../lsd}/LISTAndPROPUtilities.java | 4 +- .../lsd}/PyLSDInputFileBuilder.java | 12 +- .../nmr/elucidation/lsd/Utilities.java | 149 ++++++++++++++++++ .../model/Detections.java | 2 +- .../model/ElucidationOptions.java | 2 +- .../{lsd => elucidation}/model/Grouping.java | 2 +- .../model/MolecularConnectivity.java | 2 +- src/casekit/nmr/lsd/inputfile/Utilities.java | 68 -------- src/casekit/nmr/similarity/Similarity.java | 3 - src/casekit/nmr/utils/Utils.java | 2 +- 12 files changed, 165 insertions(+), 91 deletions(-) rename src/casekit/nmr/{lsd => elucidation}/Constants.java (99%) rename src/casekit/nmr/{lsd => elucidation}/Utilities.java (99%) rename src/casekit/nmr/{lsd/inputfile => elucidation/lsd}/LISTAndPROPUtilities.java (99%) rename src/casekit/nmr/{lsd/inputfile => elucidation/lsd}/PyLSDInputFileBuilder.java (99%) create mode 100644 src/casekit/nmr/elucidation/lsd/Utilities.java rename src/casekit/nmr/{lsd => elucidation}/model/Detections.java (93%) rename src/casekit/nmr/{lsd => elucidation}/model/ElucidationOptions.java (91%) rename src/casekit/nmr/{lsd => elucidation}/model/Grouping.java (89%) rename src/casekit/nmr/{lsd => elucidation}/model/MolecularConnectivity.java (94%) delete mode 100644 src/casekit/nmr/lsd/inputfile/Utilities.java diff --git a/src/casekit/nmr/lsd/Constants.java b/src/casekit/nmr/elucidation/Constants.java similarity index 99% rename from src/casekit/nmr/lsd/Constants.java rename to src/casekit/nmr/elucidation/Constants.java index 042bfae..02ab23b 100644 --- a/src/casekit/nmr/lsd/Constants.java +++ b/src/casekit/nmr/elucidation/Constants.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd; +package casekit.nmr.elucidation; import java.util.Collections; import java.util.HashMap; diff --git a/src/casekit/nmr/lsd/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java similarity index 99% rename from src/casekit/nmr/lsd/Utilities.java rename to src/casekit/nmr/elucidation/Utilities.java index bab9801..094ad66 100644 --- a/src/casekit/nmr/lsd/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -1,13 +1,11 @@ package casekit.nmr.lsd; -import casekit.io.FileSystem; -import casekit.nmr.lsd.model.Detections; -import casekit.nmr.lsd.model.Grouping; -import casekit.nmr.lsd.model.MolecularConnectivity; +import casekit.nmr.elucidation.model.Detections; +import casekit.nmr.elucidation.model.Grouping; +import casekit.nmr.elucidation.model.MolecularConnectivity; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Link; -import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; import java.util.*; diff --git a/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java b/src/casekit/nmr/elucidation/lsd/LISTAndPROPUtilities.java similarity index 99% rename from src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java rename to src/casekit/nmr/elucidation/lsd/LISTAndPROPUtilities.java index 7663aab..4a0ecf3 100644 --- a/src/casekit/nmr/lsd/inputfile/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/elucidation/lsd/LISTAndPROPUtilities.java @@ -1,6 +1,6 @@ -package casekit.nmr.lsd.inputfile; +package casekit.nmr.elucidation.lsd; -import casekit.nmr.lsd.model.MolecularConnectivity; +import casekit.nmr.elucidation.model.MolecularConnectivity; import casekit.nmr.model.Signal; import casekit.nmr.utils.Statistics; diff --git a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java similarity index 99% rename from src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java rename to src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index 6bf7fc1..d4f8c46 100644 --- a/src/casekit/nmr/lsd/inputfile/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -1,12 +1,10 @@ package casekit.nmr.lsd.inputfile; -import casekit.nmr.lsd.Constants; -import casekit.nmr.lsd.Utilities; -import casekit.nmr.lsd.model.Detections; -import casekit.nmr.lsd.model.ElucidationOptions; -import casekit.nmr.lsd.model.Grouping; -import casekit.nmr.lsd.model.MolecularConnectivity; -import casekit.nmr.model.nmrium.Correlation; +import casekit.nmr.elucidation.Constants; +import casekit.nmr.elucidation.model.Detections; +import casekit.nmr.elucidation.model.ElucidationOptions; +import casekit.nmr.elucidation.model.Grouping; +import casekit.nmr.elucidation.model.MolecularConnectivity; import casekit.nmr.model.nmrium.Correlations; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; diff --git a/src/casekit/nmr/elucidation/lsd/Utilities.java b/src/casekit/nmr/elucidation/lsd/Utilities.java new file mode 100644 index 0000000..da13aa0 --- /dev/null +++ b/src/casekit/nmr/elucidation/lsd/Utilities.java @@ -0,0 +1,149 @@ +package casekit.nmr.elucidation.lsd; + +import casekit.io.FileSystem; +import casekit.nmr.elucidation.Constants; +import casekit.nmr.model.Signal; +import casekit.nmr.model.nmrium.Correlation; +import casekit.nmr.utils.Statistics; +import casekit.nmr.utils.Utils; + +import java.util.*; +import java.util.stream.Collectors; + +public class Utilities { + public static String buildSSTR(final int sstrIndex, final String atomType, final List hybridization, + final List protonsCount) { + if (hybridization.isEmpty()) { + hybridization.addAll(Arrays.stream(Constants.defaultHybridizationMap.get(atomType)) + .boxed() + .collect(Collectors.toList())); + } + if (protonsCount.isEmpty()) { + protonsCount.addAll(Arrays.stream(Constants.defaultProtonsCountPerValencyMap.get(atomType)) + .boxed() + .collect(Collectors.toList())); + } + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("SSTR S") + .append(sstrIndex) + .append(" ") + .append(atomType) + .append(" "); + if (hybridization.size() + == 1) { + stringBuilder.append(hybridization.get(0)) + .append(" "); + if (protonsCount.size() + == 1) { + stringBuilder.append(protonsCount.get(0)); + } else { + stringBuilder.append(buildMultipleValuesString(protonsCount)); + } + } else { + stringBuilder.append(buildMultipleValuesString(hybridization)); + stringBuilder.append(" "); + if (protonsCount.size() + == 1) { + stringBuilder.append(protonsCount.get(0)); + } else { + stringBuilder.append(buildMultipleValuesString(protonsCount)); + } + } + + return stringBuilder.toString(); + } + + private static String buildMultipleValuesString(final List values) { + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("("); + for (int l = 0; l + < values.size(); l++) { + stringBuilder.append(values.get(l)); + if (l + < values.size() + - 1) { + stringBuilder.append(" "); + } + } + stringBuilder.append(")"); + + return stringBuilder.toString(); + } + + public static boolean writeNeighborsFile(final String pathToNeighborsFile, final List correlationList, + final Map indicesMap, + final Map>>> neighbors) { + final StringBuilder stringBuilder = new StringBuilder(); + Correlation correlation; + Signal signal; + String atomType; + int indexInPyLSD; + int sstrIndex = 1, sstrIndexCorrelation; + Map>> neighborsTemp; + for (int i = 0; i + < correlationList.size(); i++) { + if (neighbors.containsKey(i)) { + correlation = correlationList.get(i); + signal = Utils.extractSignalFromCorrelation(correlation); + atomType = correlation.getAtomType(); + neighborsTemp = neighbors.get(i); + + // put in the extracted information per correlation and equivalent + for (int k = 1; k + < indicesMap.get(i).length; k++) { + indexInPyLSD = (int) indicesMap.get(i)[k]; + for (final String neighborAtomType : neighborsTemp.keySet()) { + for (final Map.Entry> entryPerHybridization : neighborsTemp.get( + neighborAtomType) + .entrySet()) { + sstrIndexCorrelation = sstrIndex; + stringBuilder.append( + buildSSTR(sstrIndexCorrelation, atomType, correlation.getHybridization(), + correlation.getProtonsCount())); + stringBuilder.append("; ") + .append(atomType) + .append(" at ") + .append(signal + != null + ? Statistics.roundDouble(signal.getShift(0), 2) + : "?") + .append(" (") + .append(indexInPyLSD) + .append(")") + .append("\n"); + stringBuilder.append("ASGN S") + .append(sstrIndexCorrelation) + .append(" ") + .append(indexInPyLSD) + .append("\n"); + sstrIndex++; + + final List tempList = new ArrayList<>(); + if (entryPerHybridization.getKey() + != -1) { + tempList.add(entryPerHybridization.getKey()); + } + stringBuilder.append(buildSSTR(sstrIndex, neighborAtomType, tempList, + new ArrayList<>(entryPerHybridization.getValue()))) + .append("\n"); + stringBuilder.append("LINK S") + .append(sstrIndexCorrelation) + .append(" S") + .append(sstrIndex) + .append("\n") + .append("\n"); + sstrIndex++; + } + } + } + } + } + + System.out.println(stringBuilder); + + + return !stringBuilder.toString() + .isEmpty() + && FileSystem.writeFile(pathToNeighborsFile, stringBuilder.toString()); + } +} diff --git a/src/casekit/nmr/lsd/model/Detections.java b/src/casekit/nmr/elucidation/model/Detections.java similarity index 93% rename from src/casekit/nmr/lsd/model/Detections.java rename to src/casekit/nmr/elucidation/model/Detections.java index 0dd3cc7..1b1c680 100644 --- a/src/casekit/nmr/lsd/model/Detections.java +++ b/src/casekit/nmr/elucidation/model/Detections.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd.model; +package casekit.nmr.elucidation.model; import lombok.*; diff --git a/src/casekit/nmr/lsd/model/ElucidationOptions.java b/src/casekit/nmr/elucidation/model/ElucidationOptions.java similarity index 91% rename from src/casekit/nmr/lsd/model/ElucidationOptions.java rename to src/casekit/nmr/elucidation/model/ElucidationOptions.java index b0a4805..86dcb1e 100644 --- a/src/casekit/nmr/lsd/model/ElucidationOptions.java +++ b/src/casekit/nmr/elucidation/model/ElucidationOptions.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd.model; +package casekit.nmr.elucidation.model; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/src/casekit/nmr/lsd/model/Grouping.java b/src/casekit/nmr/elucidation/model/Grouping.java similarity index 89% rename from src/casekit/nmr/lsd/model/Grouping.java rename to src/casekit/nmr/elucidation/model/Grouping.java index 6b9d708..fad5bdf 100644 --- a/src/casekit/nmr/lsd/model/Grouping.java +++ b/src/casekit/nmr/elucidation/model/Grouping.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd.model; +package casekit.nmr.elucidation.model; import lombok.*; diff --git a/src/casekit/nmr/lsd/model/MolecularConnectivity.java b/src/casekit/nmr/elucidation/model/MolecularConnectivity.java similarity index 94% rename from src/casekit/nmr/lsd/model/MolecularConnectivity.java rename to src/casekit/nmr/elucidation/model/MolecularConnectivity.java index b36dd93..6ba4128 100644 --- a/src/casekit/nmr/lsd/model/MolecularConnectivity.java +++ b/src/casekit/nmr/elucidation/model/MolecularConnectivity.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd.model; +package casekit.nmr.elucidation.model; import casekit.nmr.model.Signal; import lombok.*; diff --git a/src/casekit/nmr/lsd/inputfile/Utilities.java b/src/casekit/nmr/lsd/inputfile/Utilities.java deleted file mode 100644 index d66ea41..0000000 --- a/src/casekit/nmr/lsd/inputfile/Utilities.java +++ /dev/null @@ -1,68 +0,0 @@ -package casekit.nmr.lsd.inputfile; - -import casekit.nmr.lsd.Constants; - -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -public class Utilities { - public static String buildSSTR(final int sstrIndex, final String atomType, final List hybridization, - final List protonsCount) { - if (hybridization.isEmpty()) { - hybridization.addAll(Arrays.stream(Constants.defaultHybridizationMap.get(atomType)) - .boxed() - .collect(Collectors.toList())); - } - if (protonsCount.isEmpty()) { - protonsCount.addAll(Arrays.stream(Constants.defaultProtonsCountPerValencyMap.get(atomType)) - .boxed() - .collect(Collectors.toList())); - } - final StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append("SSTR S") - .append(sstrIndex) - .append(" ") - .append(atomType) - .append(" "); - if (hybridization.size() - == 1) { - stringBuilder.append(hybridization.get(0)) - .append(" "); - if (protonsCount.size() - == 1) { - stringBuilder.append(protonsCount.get(0)); - } else { - stringBuilder.append(buildMultipleValuesString(protonsCount)); - } - } else { - stringBuilder.append(buildMultipleValuesString(hybridization)); - stringBuilder.append(" "); - if (protonsCount.size() - == 1) { - stringBuilder.append(protonsCount.get(0)); - } else { - stringBuilder.append(buildMultipleValuesString(protonsCount)); - } - } - - return stringBuilder.toString(); - } - - private static String buildMultipleValuesString(final List values) { - final StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append("("); - for (int l = 0; l - < values.size(); l++) { - stringBuilder.append(values.get(l)); - if (l - < values.size() - - 1) { - stringBuilder.append(" "); - } - } - stringBuilder.append(")"); - - return stringBuilder.toString(); - } -} diff --git a/src/casekit/nmr/similarity/Similarity.java b/src/casekit/nmr/similarity/Similarity.java index 2ffed58..97a9b3b 100644 --- a/src/casekit/nmr/similarity/Similarity.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -281,8 +281,6 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s shiftTolerance, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount); - System.out.println("\n" - + distanceList); final Assignment matchAssignment = new Assignment(); matchAssignment.setNuclei(spectrum1.getNuclei()); matchAssignment.initAssignments(spectrum1.getSignalCount()); @@ -300,7 +298,6 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s assignedSpectrum2.add(distance.getSignalIndexSpectrum2()); } } - System.out.println(matchAssignment); return matchAssignment; } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 8751d58..5bc8546 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,6 +1,6 @@ package casekit.nmr.utils; -import casekit.nmr.lsd.Constants; +import casekit.nmr.elucidation.Constants; import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; From 725fa4ca87ebb8900fa2ba90307678ed50dae5ad Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 4 Feb 2022 14:32:05 +0100 Subject: [PATCH 359/405] chore: use one proton index only instead of multiple ones --- src/casekit/nmr/elucidation/Utilities.java | 361 ++++++++++++------ .../lsd/PyLSDInputFileBuilder.java | 72 ++-- .../model/MolecularConnectivity.java | 2 +- 3 files changed, 276 insertions(+), 159 deletions(-) diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index 094ad66..522ccfa 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd; +package casekit.nmr.elucidation; import casekit.nmr.elucidation.model.Detections; import casekit.nmr.elucidation.model.Grouping; @@ -190,85 +190,6 @@ public static Map>> buildForbiddenNeighbors( return forbiddenNeighbors; } - public static boolean writeNeighborsFile(final String pathToNeighborsFile, final List correlationList, - final Map indicesMap, - final Map>>> neighbors) { - final StringBuilder stringBuilder = new StringBuilder(); - Correlation correlation; - Signal signal; - String atomType; - int indexInPyLSD; - int sstrIndex = 1, sstrIndexCorrelation; - Map>> neighborsTemp; - for (int i = 0; i - < correlationList.size(); i++) { - if (neighbors.containsKey(i)) { - correlation = correlationList.get(i); - signal = Utils.extractSignalFromCorrelation(correlation); - atomType = correlation.getAtomType(); - neighborsTemp = neighbors.get(i); - - // put in the extracted information per correlation and equivalent - for (int k = 1; k - < indicesMap.get(i).length; k++) { - indexInPyLSD = (int) indicesMap.get(i)[k]; - for (final String neighborAtomType : neighborsTemp.keySet()) { - for (final Map.Entry> entryPerHybridization : neighborsTemp.get( - neighborAtomType) - .entrySet()) { - sstrIndexCorrelation = sstrIndex; - stringBuilder.append( - casekit.nmr.lsd.inputfile.Utilities.buildSSTR(sstrIndexCorrelation, atomType, - correlation.getHybridization(), - correlation.getProtonsCount())); - stringBuilder.append("; ") - .append(atomType) - .append(" at ") - .append(signal - != null - ? Statistics.roundDouble(signal.getShift(0), 2) - : "?") - .append(" (") - .append(indexInPyLSD) - .append(")") - .append("\n"); - stringBuilder.append("ASGN S") - .append(sstrIndexCorrelation) - .append(" ") - .append(indexInPyLSD) - .append("\n"); - sstrIndex++; - - final List tempList = new ArrayList<>(); - if (entryPerHybridization.getKey() - != -1) { - tempList.add(entryPerHybridization.getKey()); - } - stringBuilder.append( - casekit.nmr.lsd.inputfile.Utilities.buildSSTR(sstrIndex, neighborAtomType, tempList, - new ArrayList<>( - entryPerHybridization.getValue()))) - .append("\n"); - stringBuilder.append("LINK S") - .append(sstrIndexCorrelation) - .append(" S") - .append(sstrIndex) - .append("\n") - .append("\n"); - sstrIndex++; - } - } - } - } - } - - System.out.println(stringBuilder); - - - return !stringBuilder.toString() - .isEmpty() - && FileSystem.writeFile(pathToNeighborsFile, stringBuilder.toString()); - } public static Map> buildFixedNeighborsByINADEQUATE(final List correlationList) { final Map> fixedNeighbors = new HashMap<>(); @@ -311,8 +232,8 @@ public static Map> buildFixedNeighborsByINADEQUATE(final L return fixedNeighbors; } - public static boolean hasMatch(final Correlation correlation1, final Correlation correlation2, - final double tolerance) { + private static boolean hasMatch(final Correlation correlation1, final Correlation correlation2, + final double tolerance) { final Signal signal1 = Utils.extractSignalFromCorrelation(correlation1); final Signal signal2 = Utils.extractSignalFromCorrelation(correlation2); if (signal1 @@ -356,8 +277,8 @@ public static boolean hasMatch(final Correlation correlation1, final Correlation } - public static Map>> findGroups(final List correlationList, - final Map tolerances) { + private static Map>> findGroups(final List correlationList, + final Map tolerances) { // cluster group index -> list of correlation index pair final Map>> groups = new HashMap<>(); int groupIndex = 0; @@ -404,7 +325,7 @@ public static Map>> findGroups(final List> transformGroups( + private static Map> transformGroups( final Map>> groups) { final Map> transformedGroups = new HashMap<>(); for (final Map.Entry>> atomTypeEntry : groups.entrySet()) { @@ -486,46 +407,100 @@ private static Set getHybridizations(final List correlatio return hybridizations; } - private static Map buildIndicesMap(final List correlationList) { + // public static Map buildIndicesMap(final List correlationList) { + // // index in correlation data -> [indices in PyLSD file...] + // final Map indicesMap = new HashMap<>(); + // // init element indices within correlations with same order as in correlation data input + // int heavyAtomIndexInPyLSDFile = 1; + // int protonIndexInPyLSDFile = 1; + // int protonsToInsert, protonsCount; + // Correlation correlation; + // for (int i = 0; i + // < correlationList.size(); i++) { + // correlation = correlationList.get(i); + // // set entry for each correlation with consideration of equivalences + // if (correlation.getAtomType() + // .equals("H")) { + // protonsToInsert = 0; + // for (final Link link : correlation.getLink()) { + // if (link.getExperimentType() + // .equals("hsqc") + // || link.getExperimentType() + // .equals("hmqc")) { + // for (final int matchIndex : link.getMatch()) { + // protonsCount = correlationList.get(matchIndex) + // .getProtonsCount() + // .get(0); + // protonsToInsert += (correlation.getEquivalence() + // / (double) protonsCount) + // * correlationList.get(matchIndex) + // .getAttachment() + // .get("H") + // .size(); + // } + // } + // } + // indicesMap.put(i, new Integer[protonsToInsert]); + // for (int j = 0; j + // < protonsToInsert; j++) { + // indicesMap.get(i)[j] = protonIndexInPyLSDFile; + // protonIndexInPyLSDFile++; + // } + // } else { + // indicesMap.put(i, new Integer[correlation.getEquivalence()]); + // for (int j = 0; j + // < correlation.getEquivalence(); j++) { + // indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; + // heavyAtomIndexInPyLSDFile++; + // } + // } + // } + // + // return indicesMap; + // } + + private static List getAttachedProtonIndices(final Correlation correlation) { + return correlation.getLink() + .stream() + .filter(link -> link.getExperimentType() + .equals("hsqc") + || link.getExperimentType() + .equals("hmqc")) + .map(Link::getMatch) + .reduce(new ArrayList<>(), ((l, curr) -> { + l.addAll(curr); + return l; + })); + } + + public static Map buildIndicesMap(final List correlationList) { // index in correlation data -> [indices in PyLSD file...] final Map indicesMap = new HashMap<>(); // init element indices within correlations with same order as in correlation data input int heavyAtomIndexInPyLSDFile = 1; int protonIndexInPyLSDFile = 1; - int protonsToInsert, protonsCount; + int protonsToInsert; Correlation correlation; + List attachedProtonIndices; for (int i = 0; i < correlationList.size(); i++) { correlation = correlationList.get(i); // set entry for each correlation with consideration of equivalences - if (correlation.getAtomType() - .equals("H")) { - protonsToInsert = 0; - for (final Link link : correlation.getLink()) { - if (link.getExperimentType() - .equals("hsqc") - || link.getExperimentType() - .equals("hmqc")) { - for (final int matchIndex : link.getMatch()) { - protonsCount = correlationList.get(matchIndex) - .getProtonsCount() - .get(0); - protonsToInsert += (correlation.getEquivalence() - / (double) protonsCount) - * correlationList.get(matchIndex) - .getAttachment() - .get("H") - .size(); - } - } - } - indicesMap.put(i, new Integer[protonsToInsert]); + if (!correlation.getAtomType() + .equals("H")) { + // insert for protons + attachedProtonIndices = getAttachedProtonIndices(correlation); + protonsToInsert = correlation.getEquivalence(); + final Integer[] arrayToInsert = new Integer[protonsToInsert]; for (int j = 0; j - < protonsToInsert; j++) { - indicesMap.get(i)[j] = protonIndexInPyLSDFile; + < arrayToInsert.length; j++) { + arrayToInsert[j] = protonIndexInPyLSDFile; protonIndexInPyLSDFile++; } - } else { + for (final int attachedProtonIndex : attachedProtonIndices) { + indicesMap.put(attachedProtonIndex, arrayToInsert); + } + // insert for heavy atom itself indicesMap.put(i, new Integer[correlation.getEquivalence()]); for (int j = 0; j < correlation.getEquivalence(); j++) { @@ -535,6 +510,15 @@ private static Map buildIndicesMap(final List c } } + System.out.println("\n -> indicesMap:\n"); + for (final int index : indicesMap.keySet()) { + System.out.println("i: " + + index + + " -> " + + Arrays.toString(indicesMap.get(index))); + } + System.out.println("\n"); + return indicesMap; } @@ -635,6 +619,10 @@ public static Map> buildMolecularConnectivi for (int correlationIndex = 0; correlationIndex < correlationList.size(); correlationIndex++) { correlation = correlationList.get(correlationIndex); + // skip in case of non-linked proton which has no index in indices map + if (!indicesMap.containsKey(correlationIndex)) { + continue; + } for (int k = 0; k < indicesMap.get(correlationIndex).length; k++) { index = indicesMap.get(correlationIndex)[k]; @@ -651,12 +639,8 @@ public static Map> buildMolecularConnectivi .equals("H")) { for (final int matchIndex : link.getMatch()) { protonIndex = indicesMap.get(matchIndex)[k]; - if (molecularConnectivity.getHsqc() - == null) { - molecularConnectivity.setHsqc(new HashSet<>()); - } - molecularConnectivity.getHsqc() - .add(protonIndex); + + molecularConnectivity.setHsqc(protonIndex); } } else if (link.getExperimentType() .equals("hmbc") @@ -787,17 +771,13 @@ public static Map> buildMolecularConnectivi != null) { if (molecularConnectivityTemp.getHmbc() != null) { - for (final int hmbcIndexInPyLSD : molecularConnectivityTemp.getHsqc()) { - molecularConnectivityTemp.getHmbc() - .remove(hmbcIndexInPyLSD); - } + molecularConnectivityTemp.getHmbc() + .remove(molecularConnectivityTemp.getHsqc()); } if (molecularConnectivityTemp.getCosy() != null) { - for (final int cosyIndexInPyLSD : molecularConnectivityTemp.getHsqc()) { - molecularConnectivityTemp.getCosy() - .remove(cosyIndexInPyLSD); - } + molecularConnectivityTemp.getCosy() + .remove(molecularConnectivityTemp.getHsqc()); } } } @@ -817,7 +797,7 @@ public static MolecularConnectivity getHeavyAtomMolecularConnectivity( if (molecularConnectivityTemp.getHsqc() != null && molecularConnectivityTemp.getHsqc() - .contains(protonIndex)) { + == protonIndex) { return molecularConnectivityTemp; } } @@ -825,4 +805,131 @@ public static MolecularConnectivity getHeavyAtomMolecularConnectivity( return null; } + + // private static void extendMolecularConnectivityMapCombinationList( + // // correlation index -> equivalence index -> proton index in hsqc -> proton group member index + // final Map>> combinationsMap, + // final Map> originalMolecularConnectivityMap, + // final int correlationIndex, final int equivalenceIndex) { + // + // if (!originalMolecularConnectivityMap.containsKey(correlationIndex) + // || equivalenceIndex + // >= originalMolecularConnectivityMap.get(correlationIndex) + // .size()) { + // return; + // } + // + // final MolecularConnectivity molecularConnectivity = originalMolecularConnectivityMap.get(correlationIndex) + // .get(equivalenceIndex); + // System.out.println("\n--------\n" + // + molecularConnectivity + // + "\n"); + // + // if (!molecularConnectivity.getAtomType() + // .equals("H")) { + // MolecularConnectivity molecularConnectivityGroupMemberHeavyAtom, molecularConnectivityProton; + // for (final int groupMemberHeavyAtom : molecularConnectivity.getGroupMembers()) { + // molecularConnectivityGroupMemberHeavyAtom = findMolecularConnectivityByIndex( + // originalMolecularConnectivityMap, "H", true, groupMemberHeavyAtom); + // + // if (molecularConnectivityGroupMemberHeavyAtom.getAttachedProtonIndex() + // != null) { + // int i = 0; + // for (final int protonIndexInPyLSD : molecularConnectivityGroupMemberHeavyAtom.getHsqc()) { + // molecularConnectivityProton = findMolecularConnectivityByIndex(originalMolecularConnectivityMap, + // "H", false, protonIndexInPyLSD); + // + // for (final int protonGroupMemberIndexInPyLSD : molecularConnectivityProton.getGroupMembers()) { + // System.out.println(" -> " + // + correlationIndex + // + " -> " + // + equivalenceIndex + // + " -> protonIndexInPyLSD: " + // + protonIndexInPyLSD + // + " -> " + // + molecularConnectivityProton.getGroupMembers() + // + " -> group member: " + // + protonGroupMemberIndexInPyLSD); + // + // combinationsMap.putIfAbsent(correlationIndex, new HashMap<>()); + // combinationsMap.get(correlationIndex) + // .putIfAbsent(equivalenceIndex, new HashMap<>()); + // if (combinationsMap.get(correlationIndex) + // + // .entrySet() + // .stream() + // .noneMatch(entryPerEquivalence -> entryPerEquivalence.getValue() + // .entrySet() + // .stream() + // .anyMatch( + // entryPerHSQCIndex -> entryPerHSQCIndex.getValue() + // == protonGroupMemberIndexInPyLSD))) { + // combinationsMap.get(correlationIndex) + // .get(equivalenceIndex) + // .putIfAbsent(i, protonGroupMemberIndexInPyLSD); + // } + // + // + // if (equivalenceIndex + // + 1 + // < originalMolecularConnectivityMap.get(correlationIndex) + // .size()) { + // System.out.println(" --> equivalenceIndex++"); + // extendMolecularConnectivityMapCombinationList(combinationsMap, + // originalMolecularConnectivityMap, + // correlationIndex, equivalenceIndex + // + 1); + // } else { + // System.out.println(" --> correlationIndex++"); + // extendMolecularConnectivityMapCombinationList(combinationsMap, + // originalMolecularConnectivityMap, + // correlationIndex + // + 1, 0); + // } + // } + // i++; + // } + // } else { + // if (equivalenceIndex + // + 1 + // < originalMolecularConnectivityMap.get(correlationIndex) + // .size()) { + // System.out.println(" --> equivalenceIndex++"); + // extendMolecularConnectivityMapCombinationList(combinationsMap, originalMolecularConnectivityMap, + // correlationIndex, equivalenceIndex + // + 1); + // } else { + // System.out.println(" --> correlationIndex++"); + // extendMolecularConnectivityMapCombinationList(combinationsMap, originalMolecularConnectivityMap, + // correlationIndex + // + 1, 0); + // } + // } + // } + // } + // } + + public static List>> buildMolecularConnectivityMapCombinationList( + final List correlationList, final Detections detections, final Grouping grouping, + final Map defaultBondDistances) { + final List>> molecularConnectivityMapCombinationList = new ArrayList<>(); + + final Map> originalMolecularConnectivityMap = buildMolecularConnectivityMap( + correlationList, detections, grouping, defaultBondDistances); + + molecularConnectivityMapCombinationList.add(originalMolecularConnectivityMap); + + System.out.println("\n -> originalMolecularConnectivityMap: \n"); + System.out.println(originalMolecularConnectivityMap); + + // final Map>> combinationsMap = new HashMap<>(); + // extendMolecularConnectivityMapCombinationList(combinationsMap, originalMolecularConnectivityMap, 0, 0); + // + // System.out.println("\n\n" + // + combinationsMap + // + "\n\n"); + + return molecularConnectivityMapCombinationList; + + } } diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index d4f8c46..c38e151 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -1,4 +1,4 @@ -package casekit.nmr.lsd.inputfile; +package casekit.nmr.elucidation.lsd; import casekit.nmr.elucidation.Constants; import casekit.nmr.elucidation.model.Detections; @@ -167,17 +167,16 @@ private static Map buildStringBuilderMap( if (molecularConnectivity.getHsqc() != null) { stringBuilder = stringBuilderMap.get("HSQC"); - for (final int protonIndexPyLSD : molecularConnectivity.getHsqc()) { - stringBuilder.append("HSQC ") - .append(molecularConnectivity.getIndex()) - .append(" ") - .append(protonIndexPyLSD) - .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, - Utilities.findMolecularConnectivityByIndex( - molecularConnectivityMap, "H", false, - protonIndexPyLSD))) - .append("\n"); - } + stringBuilder.append("HSQC ") + .append(molecularConnectivity.getIndex()) + .append(" ") + .append(molecularConnectivity.getHsqc()) + .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, + casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", false, + molecularConnectivity.getHsqc()))) + .append("\n"); + } if (molecularConnectivity.getHmbc() != null) { @@ -187,13 +186,13 @@ private static Map buildStringBuilderMap( // filter out group members which are directly bonded to that proton groupMembers = new HashSet<>(molecularConnectivity.getGroupMembers()); for (final int groupMemberIndex : new HashSet<>(groupMembers)) { - molecularConnectivityGroupMember = Utilities.findMolecularConnectivityByIndex( + molecularConnectivityGroupMember = casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( molecularConnectivityMap, molecularConnectivity.getAtomType(), false, groupMemberIndex); if (molecularConnectivityGroupMember.getHsqc() != null && molecularConnectivityGroupMember.getHsqc() - .contains(protonIndexInPyLSD)) { + == protonIndexInPyLSD) { groupMembers.remove(groupMemberIndex); } } @@ -209,7 +208,7 @@ private static Map buildStringBuilderMap( .append(molecularConnectivity.getHmbc() .get(protonIndexInPyLSD)[1]) .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, - Utilities.findMolecularConnectivityByIndex( + casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( molecularConnectivityMap, "H", false, protonIndexInPyLSD))) .append("\n"); @@ -225,7 +224,7 @@ private static Map buildStringBuilderMap( .append(" ") .append(bondedIndexInPyLSD) .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, - Utilities.findMolecularConnectivityByIndex( + casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( molecularConnectivityMap, "H", true, bondedIndexInPyLSD))) .append("\n"); @@ -243,7 +242,7 @@ private static Map buildStringBuilderMap( // 1) use only one attached proton of a CH2 group (optional) final Set alreadyFoundHeavyAtomIndex = new HashSet<>(); for (final int groupMemberIndex : new HashSet<>(groupMembers)) { - molecularConnectivityHeavyAtom = Utilities.getHeavyAtomMolecularConnectivity( + molecularConnectivityHeavyAtom = casekit.nmr.elucidation.Utilities.getHeavyAtomMolecularConnectivity( molecularConnectivityMap, groupMemberIndex); if (alreadyFoundHeavyAtomIndex.contains(molecularConnectivityHeavyAtom.getIndex())) { groupMembers.remove(groupMemberIndex); @@ -252,13 +251,13 @@ private static Map buildStringBuilderMap( } } // 2) would direct to itself when using COSY correlation - molecularConnectivityHeavyAtom = Utilities.getHeavyAtomMolecularConnectivity( + molecularConnectivityHeavyAtom = casekit.nmr.elucidation.Utilities.getHeavyAtomMolecularConnectivity( molecularConnectivityMap, protonIndexInPyLSD); if (molecularConnectivityHeavyAtom != null) { for (final int groupMemberIndex : new HashSet<>(groupMembers)) { if (molecularConnectivityHeavyAtom.getHsqc() - .contains(groupMemberIndex)) { + == groupMemberIndex) { groupMembers.remove(groupMemberIndex); } } @@ -275,7 +274,7 @@ private static Map buildStringBuilderMap( .get(protonIndexInPyLSD)[1]) .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, - Utilities.findMolecularConnectivityByIndex( + casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( molecularConnectivityMap, "H", false, protonIndexInPyLSD))) .append("\n"); @@ -478,14 +477,30 @@ private static String buildDEFFsAndFEXP(final ElucidationOptions elucidationOpti return stringBuilder.toString(); } - public static String buildPyLSDInputFileContent(final Correlations correlations, final String mf, - final Detections detections, final Grouping grouping, - final ElucidationOptions elucidationOptions) { + + public static List buildPyLSDInputFileContentList(final Correlations correlations, final String mf, + final Detections detections, final Grouping grouping, + final ElucidationOptions elucidationOptions, + final Map defaultBondDistances) { if (mf - == null) { - return ""; + == null + || mf.isEmpty()) { + return new ArrayList<>(); } - final List correlationList = correlations.getValues(); + final List inputFilesContentList = new ArrayList<>(); + final List>> molecularConnectivityMapCombinationList = casekit.nmr.elucidation.Utilities.buildMolecularConnectivityMapCombinationList( + correlations.getValues(), detections, grouping, defaultBondDistances); + for (final Map> molecularConnectivityMap : molecularConnectivityMapCombinationList) { + inputFilesContentList.add(buildPyLSDInputFileContent(molecularConnectivityMap, mf, elucidationOptions)); + } + + return inputFilesContentList; + } + + public static String buildPyLSDInputFileContent( + final Map> molecularConnectivityMap, final String mf, + final ElucidationOptions elucidationOptions) { + final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); final StringBuilder stringBuilder = new StringBuilder(); // create header @@ -503,11 +518,6 @@ public static String buildPyLSDInputFileContent(final Correlations correlations, .append("\n\n"); } - final Map defaultBondDistances = new HashMap<>(); - defaultBondDistances.put("hmbc", new Integer[]{2, 3}); - defaultBondDistances.put("cosy", new Integer[]{3, 4}); - final Map> molecularConnectivityMap = Utilities.buildMolecularConnectivityMap( - correlationList, detections, grouping, defaultBondDistances); final Map stringBuilderMap = buildStringBuilderMap(molecularConnectivityMap); stringBuilder.append(stringBuilderMap.get("MULT") .toString()) diff --git a/src/casekit/nmr/elucidation/model/MolecularConnectivity.java b/src/casekit/nmr/elucidation/model/MolecularConnectivity.java index 6ba4128..ccab8bc 100644 --- a/src/casekit/nmr/elucidation/model/MolecularConnectivity.java +++ b/src/casekit/nmr/elucidation/model/MolecularConnectivity.java @@ -18,7 +18,7 @@ public class MolecularConnectivity { private Signal signal; private Set protonCounts; private Set hybridizations; - private Set hsqc; + private Integer hsqc; private Map hmbc; private Map cosy; private Map>> forbiddenNeighbors; From 020ba05f68267991cb9b0a2f9de6e4a81f6b7a5f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 10 Feb 2022 23:27:25 +0100 Subject: [PATCH 360/405] chore: added PathLength to Signal class --- src/casekit/nmr/dbservice/COCONUT.java | 37 +------------------ src/casekit/nmr/dbservice/NMRShiftDB.java | 4 +- .../nmr/model/{nmrium => }/PathLength.java | 2 +- src/casekit/nmr/model/Signal.java | 5 ++- src/casekit/nmr/model/nmrium/Signal2D.java | 1 + src/casekit/nmr/model/nmrium/Spectrum.java | 7 ++-- 6 files changed, 14 insertions(+), 42 deletions(-) rename src/casekit/nmr/model/{nmrium => }/PathLength.java (84%) diff --git a/src/casekit/nmr/dbservice/COCONUT.java b/src/casekit/nmr/dbservice/COCONUT.java index 64b4c24..779f551 100644 --- a/src/casekit/nmr/dbservice/COCONUT.java +++ b/src/casekit/nmr/dbservice/COCONUT.java @@ -36,12 +36,6 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri for (final String nucleus : nuclei) { final String atomType = casekit.nmr.utils.Utils.getAtomTypeFromNucleus(nucleus); final List atomIndices = Utils.getAtomTypeIndicesByElement(structure, atomType); - // spectrumPropertyString = ((String) structure.getProperty("CNMR_CALC_SHIFTS")).replaceAll("[\\n\\r]", - // ""); - // split = spectrumPropertyString.split("\\d+:"); - // spectrumPropertyString = structure.getProperty("Predicted " - // + nucleus - // + " shifts", String.class); spectrumPropertyString = structure.getProperty("Predicted " + nucleus + " shifts", String.class); @@ -64,20 +58,11 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri assignment = new Assignment(); assignment.setNuclei(spectrum.getNuclei()); assignment.initAssignments(spectrum.getSignalCount()); - // for (int i = 1; i for (int i = 0; i < split.length; i++) { - // split2 = split[i].split(","); split2 = split[i].split("\\s+"); - // calcShift = Double.parseDouble(split2[0].split("Exact = ")[1]); - atomIndex = atomIndices.get(i); //Integer.parseInt(split2[0].split("\\[")[0]) - //- 1; - // System.out.println("// COCONUT " - // + structure.getProperty("cdk:Title")); - // System.out.println(atomIndex); + atomIndex = atomIndices.get(i); calcShift = Double.parseDouble(split2[1]); - // System.out.println(calcShift); - // System.out.println(structure.getAtomCount()); multiplicity = Utils.getMultiplicityFromProtonsCount(structure.getAtom(atomIndex) .getImplicitHydrogenCount()) .toLowerCase(); @@ -93,27 +78,9 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri // add signal spectrum.addSignal( new Signal(new String[]{nucleus}, new Double[]{calcShift}, multiplicity, "signal", null, 1, - 0)); + 0, null)); } - // System.out.println("// COCONUT " - // + structure.getTitle()); - // System.out.println("// ???"); - // System.out.println("// " - // + casekit.nmr.HOSECodeUtilities.molecularFormularToString(mf)); - // for (int i = 0; i - // < spectrum.getSignalCount(); i++) { - // System.out.println(nucleus - // + ", " - // + spectrum.getSignal(i) - // .getShift(0) - // + ", " - // + spectrum.getSignal(i) - // .getMultiplicity() - // + ", 0.0, " - // + spectrum.getSignal(i) - // .getEquivalencesCount()); - // } // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(spectrum, Utils.getMolecularFormulaFromString( diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 6c95474..624f670 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -393,8 +393,8 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect : spectrumStringArray[i][2].trim() .toLowerCase(); spectrum.addSignal( - new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 1, - 0)); + new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 1, 0, + null)); } } catch (final Exception e) { return null; diff --git a/src/casekit/nmr/model/nmrium/PathLength.java b/src/casekit/nmr/model/PathLength.java similarity index 84% rename from src/casekit/nmr/model/nmrium/PathLength.java rename to src/casekit/nmr/model/PathLength.java index 75e8296..1515164 100644 --- a/src/casekit/nmr/model/nmrium/PathLength.java +++ b/src/casekit/nmr/model/PathLength.java @@ -1,4 +1,4 @@ -package casekit.nmr.model.nmrium; +package casekit.nmr.model; import lombok.*; diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index 828dff8..fbbe132 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -50,6 +50,7 @@ public class Signal { private Double intensity; private int equivalencesCount; private Integer phase; + private PathLength pathLength; public int getNDim() { @@ -86,7 +87,9 @@ public Double getShift(final int dim) { public Signal buildClone() { return new Signal(this.getNuclei() .clone(), this.shifts.clone(), this.multiplicity, this.kind, this.intensity, - this.equivalencesCount, this.phase); + this.equivalencesCount, this.phase, + new PathLength(this.pathLength.getMin(), this.pathLength.getMax(), + this.pathLength.getSource())); } @Override diff --git a/src/casekit/nmr/model/nmrium/Signal2D.java b/src/casekit/nmr/model/nmrium/Signal2D.java index 638b99c..914eb12 100644 --- a/src/casekit/nmr/model/nmrium/Signal2D.java +++ b/src/casekit/nmr/model/nmrium/Signal2D.java @@ -24,6 +24,7 @@ package casekit.nmr.model.nmrium; +import casekit.nmr.model.PathLength; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; diff --git a/src/casekit/nmr/model/nmrium/Spectrum.java b/src/casekit/nmr/model/nmrium/Spectrum.java index 369342e..0e74389 100644 --- a/src/casekit/nmr/model/nmrium/Spectrum.java +++ b/src/casekit/nmr/model/nmrium/Spectrum.java @@ -66,8 +66,8 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { new Double[]{ signal1D.getDelta()}, signal1D.getMultiplicity(), - signal1D.getKind(), null, 0, - 0)); + signal1D.getKind(), null, 0, 0, + null)); } })); spectrum.addMetaInfo("solvent", (String) this.info.get("solvent")); @@ -92,7 +92,8 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { .get("delta"), (Double) signal2D.getY() .get("delta")}, signal2D.getMultiplicity(), - signal2D.getKind(), null, 0, 0)); + signal2D.getKind(), null, 0, 0, + signal2D.getPathLength())); } })); spectrum.addMetaInfo("solvent", (String) this.info.get("solvent")); From 43451cdc9df1b8764cf3944a1dd4a6b29e1204dc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 10 Feb 2022 23:28:48 +0100 Subject: [PATCH 361/405] chore: added PathLength to Signal class (2) --- src/casekit/nmr/utils/Utils.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 5bc8546..065b574 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -633,17 +633,22 @@ public static Signal extractSignalFromCorrelation(final Correlation correlation) ? (Integer) signalMap.get( "sign") : null); + // 1D signal if (signalMap.containsKey("delta")) { final Signal1D signal1D = new Signal1D(signal); signal1D.setDelta((double) signalMap.get("delta")); return new Signal(new String[]{Constants.nucleiMap.get(correlation.getAtomType())}, new Double[]{signal1D.getDelta()}, signal1D.getMultiplicity(), signal1D.getKind(), null, - correlation.getEquivalence(), signal1D.getSign()); + correlation.getEquivalence(), signal1D.getSign(), null); } else if (signalMap.containsKey("x")) { + // 2D signal final Signal2D signal2D = new Signal2D(signal); signal2D.setX((Map) signalMap.get("x")); signal2D.setY((Map) signalMap.get("y")); + if (signalMap.containsKey("pathLength")) { + signal2D.setPathLength((PathLength) signalMap.get("pathLength")); + } final double shift = link.getAxis() .equals("x") ? (double) signal2D.getX() @@ -653,7 +658,7 @@ public static Signal extractSignalFromCorrelation(final Correlation correlation) return new Signal(new String[]{Constants.nucleiMap.get(correlation.getAtomType())}, new Double[]{shift}, signal2D.getMultiplicity(), signal2D.getKind(), null, correlation.getEquivalence(), - signal2D.getSign()); + signal2D.getSign(), signal2D.getPathLength()); } return null; From c54db343bcc0acbf635aa3e7b6fb92d74967a2a4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 10 Feb 2022 23:38:59 +0100 Subject: [PATCH 362/405] chore: use Lists in MolecularConnectivity.java instead of Sets --- src/casekit/nmr/elucidation/Utilities.java | 47 +++++++++---------- .../elucidation/lsd/LISTAndPROPUtilities.java | 12 ++--- .../lsd/PyLSDInputFileBuilder.java | 26 +++++----- .../nmr/elucidation/model/Grouping.java | 4 +- .../model/MolecularConnectivity.java | 13 +++-- 5 files changed, 53 insertions(+), 49 deletions(-) diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index 522ccfa..ad72e72 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -277,10 +277,10 @@ private static boolean hasMatch(final Correlation correlation1, final Correlatio } - private static Map>> findGroups(final List correlationList, - final Map tolerances) { + private static Map>> findGroups(final List correlationList, + final Map tolerances) { // cluster group index -> list of correlation index pair - final Map>> groups = new HashMap<>(); + final Map>> groups = new HashMap<>(); int groupIndex = 0; final Set inserted = new HashSet<>(); int foundGroupIndex; @@ -295,8 +295,8 @@ private static Map>> findGroups(final List> groupEntry : groups.get(correlation.getAtomType()) - .entrySet()) { + for (final Map.Entry> groupEntry : groups.get(correlation.getAtomType()) + .entrySet()) { if (groupEntry.getValue() .stream() .anyMatch(correlationIndex -> hasMatch(correlation, correlationList.get(correlationIndex), @@ -313,7 +313,7 @@ private static Map>> findGroups(final List()); + .put(groupIndex, new ArrayList<>()); groups.get(correlation.getAtomType()) .get(groupIndex) .add(i); @@ -326,12 +326,12 @@ private static Map>> findGroups(final List> transformGroups( - final Map>> groups) { + final Map>> groups) { final Map> transformedGroups = new HashMap<>(); - for (final Map.Entry>> atomTypeEntry : groups.entrySet()) { + for (final Map.Entry>> atomTypeEntry : groups.entrySet()) { transformedGroups.put(atomTypeEntry.getKey(), new HashMap<>()); - for (final Map.Entry> groupEntry : atomTypeEntry.getValue() - .entrySet()) { + for (final Map.Entry> groupEntry : atomTypeEntry.getValue() + .entrySet()) { for (final int correlationIndex : groupEntry.getValue()) { transformedGroups.get(atomTypeEntry.getKey()) .put(correlationIndex, groupEntry.getKey()); @@ -343,45 +343,44 @@ private static Map> transformGroups( } public static Grouping buildGroups(final List correlationList, final Map tolerances) { - final Map>> groups = findGroups(correlationList, tolerances); + final Map>> groups = findGroups(correlationList, tolerances); return new Grouping(tolerances, groups, transformGroups(groups)); } - private static Set getProtonCounts(final List correlationList, final int index) { + private static List getProtonCounts(final List correlationList, final int index) { final Correlation correlation = correlationList.get(index); if (correlation.getProtonsCount() != null && !correlation.getProtonsCount() .isEmpty()) { // if protonCounts is already given - return new HashSet<>(correlation.getProtonsCount()); + return correlation.getProtonsCount(); } - final Set protonCounts = new HashSet<>(); - for (int i = 0; i - < Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType())).length; i++) { - protonCounts.add(Constants.defaultProtonsCountPerValencyMap.get( - Constants.defaultAtomLabelMap.get(correlation.getAtomType()))[i]); + final List protonCounts = new ArrayList<>(); + final int[] defaultProtonCounts = Constants.defaultProtonsCountPerValencyMap.get( + Constants.defaultAtomLabelMap.get(correlation.getAtomType())); + for (final int defaultProtonCount : defaultProtonCounts) { + protonCounts.add(defaultProtonCount); } return protonCounts; } - private static Set getHybridizations(final List correlationList, final int index, - final Map> detectedHybridizations) { + private static List getHybridizations(final List correlationList, final int index, + final Map> detectedHybridizations) { final Correlation correlation = correlationList.get(index); - Set hybridizations = new HashSet<>(); + List hybridizations = new ArrayList<>(); if (correlation.getHybridization() != null && !correlation.getHybridization() .isEmpty()) { // if hybridization is already given - return new HashSet<>(correlation.getHybridization()); + return correlation.getHybridization(); } else { // if hybridization is not given then use the detected ones if (detectedHybridizations.containsKey(index)) { - hybridizations = new HashSet<>(detectedHybridizations.get(index)); + hybridizations = new ArrayList<>(detectedHybridizations.get(index)); } if (hybridizations.isEmpty() && correlation.getAtomType() diff --git a/src/casekit/nmr/elucidation/lsd/LISTAndPROPUtilities.java b/src/casekit/nmr/elucidation/lsd/LISTAndPROPUtilities.java index 4a0ecf3..1de3426 100644 --- a/src/casekit/nmr/elucidation/lsd/LISTAndPROPUtilities.java +++ b/src/casekit/nmr/elucidation/lsd/LISTAndPROPUtilities.java @@ -54,8 +54,8 @@ public static void insertGeneralLISTs(final StringBuilder stringBuilder, final M } } - private static String buildListKey(final String atomType, final Set hybridizations, - final Set protonsCounts) { + private static String buildListKey(final String atomType, final List hybridizations, + final List protonsCounts) { return atomType + "_" + (!hybridizations.isEmpty() @@ -84,7 +84,7 @@ public static void insertHeavyAtomCombinationLISTs(final StringBuilder stringBui != 1) { continue; } - listKey = buildListKey(molecularConnectivity.getAtomType(), new HashSet<>(), + listKey = buildListKey(molecularConnectivity.getAtomType(), new ArrayList<>(), // correlation.getHybridization(), molecularConnectivity.getProtonCounts()); atomIndicesMap.putIfAbsent(listKey, new HashSet<>()); @@ -197,9 +197,9 @@ public static void insertConnectionLISTsAndPROPs(final StringBuilder stringBuild .get(neighborHybridization)) { listKey = buildListKey(neighborAtomType, neighborHybridization == -1 - ? new HashSet<>() - : Set.of(neighborHybridization), - Set.of(protonsCount)); + ? new ArrayList<>() + : List.of(neighborHybridization), + List.of(protonsCount)); if (checkSkipPROPInsertion(listMap, usedPropsCount, listKey, mode)) { continue; } diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index c38e151..8c16cac 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -50,9 +50,8 @@ private static String buildELIM(final int elimP1, final int elimP2) { + elimP2; } - private static String buildPossibilitiesString(final Set possibilities) { + private static String buildPossibilitiesString(final Collection possibilities) { final StringBuilder possibilitiesStringBuilder = new StringBuilder(); - if (possibilities.size() > 1) { possibilitiesStringBuilder.append("("); @@ -88,7 +87,7 @@ private static Map buildStringBuilderMap( stringBuilderMap.put("SHIH", new StringBuilder()); StringBuilder hybridizationStringBuilder, attachedProtonsCountStringBuilder; int counter, firstOfEquivalenceIndexPyLSD; - Set groupMembers; + Set groupMembers; // use as a Set to remove the actual value and not at a list index MolecularConnectivity molecularConnectivityGroupMember, molecularConnectivityHeavyAtom; for (final int correlationIndex : molecularConnectivityMap.keySet()) { firstOfEquivalenceIndexPyLSD = -1; @@ -166,15 +165,16 @@ private static Map buildStringBuilderMap( stringBuilder.append("\n"); if (molecularConnectivity.getHsqc() != null) { + final List hsqcList = new ArrayList<>(molecularConnectivity.getHsqc()); stringBuilder = stringBuilderMap.get("HSQC"); stringBuilder.append("HSQC ") .append(molecularConnectivity.getIndex()) .append(" ") - .append(molecularConnectivity.getHsqc()) + .append(hsqcList.get(0)) .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( molecularConnectivityMap, "H", false, - molecularConnectivity.getHsqc()))) + hsqcList.get(0)))) .append("\n"); } @@ -185,14 +185,14 @@ private static Map buildStringBuilderMap( .keySet()) { // filter out group members which are directly bonded to that proton groupMembers = new HashSet<>(molecularConnectivity.getGroupMembers()); - for (final int groupMemberIndex : new HashSet<>(groupMembers)) { + for (final int groupMemberIndex : new ArrayList<>(groupMembers)) { molecularConnectivityGroupMember = casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( molecularConnectivityMap, molecularConnectivity.getAtomType(), false, groupMemberIndex); if (molecularConnectivityGroupMember.getHsqc() != null && molecularConnectivityGroupMember.getHsqc() - == protonIndexInPyLSD) { + .contains(protonIndexInPyLSD)) { groupMembers.remove(groupMemberIndex); } } @@ -237,27 +237,29 @@ private static Map buildStringBuilderMap( stringBuilder = stringBuilderMap.get("COSY"); for (final int protonIndexInPyLSD : molecularConnectivity.getCosy() .keySet()) { - // filter out group members which groupMembers = new HashSet<>(molecularConnectivity.getGroupMembers()); // 1) use only one attached proton of a CH2 group (optional) final Set alreadyFoundHeavyAtomIndex = new HashSet<>(); - for (final int groupMemberIndex : new HashSet<>(groupMembers)) { + for (final int groupMemberIndex : new ArrayList<>(groupMembers)) { molecularConnectivityHeavyAtom = casekit.nmr.elucidation.Utilities.getHeavyAtomMolecularConnectivity( molecularConnectivityMap, groupMemberIndex); - if (alreadyFoundHeavyAtomIndex.contains(molecularConnectivityHeavyAtom.getIndex())) { + if (molecularConnectivityHeavyAtom + == null + || alreadyFoundHeavyAtomIndex.contains( + molecularConnectivityHeavyAtom.getIndex())) { groupMembers.remove(groupMemberIndex); } else { alreadyFoundHeavyAtomIndex.add(molecularConnectivityHeavyAtom.getIndex()); } } - // 2) would direct to itself when using COSY correlation + // 2) filter out group members which would direct to itself when using COSY correlation molecularConnectivityHeavyAtom = casekit.nmr.elucidation.Utilities.getHeavyAtomMolecularConnectivity( molecularConnectivityMap, protonIndexInPyLSD); if (molecularConnectivityHeavyAtom != null) { for (final int groupMemberIndex : new HashSet<>(groupMembers)) { if (molecularConnectivityHeavyAtom.getHsqc() - == groupMemberIndex) { + .contains(groupMemberIndex)) { groupMembers.remove(groupMemberIndex); } } diff --git a/src/casekit/nmr/elucidation/model/Grouping.java b/src/casekit/nmr/elucidation/model/Grouping.java index fad5bdf..7060471 100644 --- a/src/casekit/nmr/elucidation/model/Grouping.java +++ b/src/casekit/nmr/elucidation/model/Grouping.java @@ -3,8 +3,8 @@ import lombok.*; import java.util.HashMap; +import java.util.List; import java.util.Map; -import java.util.Set; @AllArgsConstructor @NoArgsConstructor @@ -14,6 +14,6 @@ public class Grouping { Map tolerances = new HashMap<>(); - Map>> groups; + Map>> groups; Map> transformedGroups; } diff --git a/src/casekit/nmr/elucidation/model/MolecularConnectivity.java b/src/casekit/nmr/elucidation/model/MolecularConnectivity.java index ccab8bc..95099e9 100644 --- a/src/casekit/nmr/elucidation/model/MolecularConnectivity.java +++ b/src/casekit/nmr/elucidation/model/MolecularConnectivity.java @@ -3,6 +3,7 @@ import casekit.nmr.model.Signal; import lombok.*; +import java.util.List; import java.util.Map; import java.util.Set; @@ -16,13 +17,15 @@ public class MolecularConnectivity { private int index; // e.g. index within PyLSD private String atomType; private Signal signal; - private Set protonCounts; - private Set hybridizations; - private Integer hsqc; + private int equivalence; + private boolean pseudo; + private List protonCounts; + private List hybridizations; + private List hsqc; private Map hmbc; private Map cosy; private Map>> forbiddenNeighbors; private Map>> setNeighbors; - private Set fixedNeighbors; - private Set groupMembers; + private List fixedNeighbors; + private List groupMembers; } From 1098b175c924e6888a4702d245244a31630b2689 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 11 Feb 2022 00:51:22 +0100 Subject: [PATCH 363/405] chore: use MolecularConnectivity class --- src/casekit/nmr/elucidation/Utilities.java | 909 +++++++++++++-------- 1 file changed, 586 insertions(+), 323 deletions(-) diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index ad72e72..16dbb88 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -3,6 +3,7 @@ import casekit.nmr.elucidation.model.Detections; import casekit.nmr.elucidation.model.Grouping; import casekit.nmr.elucidation.model.MolecularConnectivity; +import casekit.nmr.model.PathLength; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Link; @@ -406,46 +407,49 @@ private static List getHybridizations(final List correlati return hybridizations; } + // private static List getAttachedProtonIndices(final Correlation correlation) { + // return correlation.getLink() + // .stream() + // .filter(link -> link.getExperimentType() + // .equals("hsqc") + // || link.getExperimentType() + // .equals("hmqc")) + // .map(Link::getMatch) + // .reduce(new ArrayList<>(), ((l, curr) -> { + // l.addAll(curr); + // return l; + // })); + // } + // public static Map buildIndicesMap(final List correlationList) { // // index in correlation data -> [indices in PyLSD file...] // final Map indicesMap = new HashMap<>(); // // init element indices within correlations with same order as in correlation data input // int heavyAtomIndexInPyLSDFile = 1; // int protonIndexInPyLSDFile = 1; - // int protonsToInsert, protonsCount; + // int protonsToInsert; // Correlation correlation; + // List attachedProtonIndices; + // Integer[] arrayToInsert; // for (int i = 0; i // < correlationList.size(); i++) { // correlation = correlationList.get(i); // // set entry for each correlation with consideration of equivalences - // if (correlation.getAtomType() - // .equals("H")) { - // protonsToInsert = 0; - // for (final Link link : correlation.getLink()) { - // if (link.getExperimentType() - // .equals("hsqc") - // || link.getExperimentType() - // .equals("hmqc")) { - // for (final int matchIndex : link.getMatch()) { - // protonsCount = correlationList.get(matchIndex) - // .getProtonsCount() - // .get(0); - // protonsToInsert += (correlation.getEquivalence() - // / (double) protonsCount) - // * correlationList.get(matchIndex) - // .getAttachment() - // .get("H") - // .size(); - // } - // } - // } - // indicesMap.put(i, new Integer[protonsToInsert]); + // if (!correlation.getAtomType() + // .equals("H")) { + // // insert for protons + // attachedProtonIndices = getAttachedProtonIndices(correlation); + // protonsToInsert = correlation.getEquivalence(); + // arrayToInsert = new Integer[protonsToInsert]; // for (int j = 0; j - // < protonsToInsert; j++) { - // indicesMap.get(i)[j] = protonIndexInPyLSDFile; + // < arrayToInsert.length; j++) { + // arrayToInsert[j] = protonIndexInPyLSDFile; // protonIndexInPyLSDFile++; // } - // } else { + // for (final int attachedProtonIndex : attachedProtonIndices) { + // indicesMap.put(attachedProtonIndex, arrayToInsert); + // } + // // insert for heavy atom itself // indicesMap.put(i, new Integer[correlation.getEquivalence()]); // for (int j = 0; j // < correlation.getEquivalence(); j++) { @@ -455,55 +459,49 @@ private static List getHybridizations(final List correlati // } // } // + // System.out.println("\n -> indicesMap:\n"); + // for (final int index : indicesMap.keySet()) { + // System.out.println("i: " + // + index + // + " -> " + // + Arrays.toString(indicesMap.get(index))); + // } + // System.out.println("\n"); + // // return indicesMap; // } - private static List getAttachedProtonIndices(final Correlation correlation) { - return correlation.getLink() - .stream() - .filter(link -> link.getExperimentType() - .equals("hsqc") - || link.getExperimentType() - .equals("hmqc")) - .map(Link::getMatch) - .reduce(new ArrayList<>(), ((l, curr) -> { - l.addAll(curr); - return l; - })); - } - - public static Map buildIndicesMap(final List correlationList) { + public static Map buildIndicesMap(final List molecularConnectivityList) { // index in correlation data -> [indices in PyLSD file...] final Map indicesMap = new HashMap<>(); // init element indices within correlations with same order as in correlation data input int heavyAtomIndexInPyLSDFile = 1; int protonIndexInPyLSDFile = 1; int protonsToInsert; - Correlation correlation; - List attachedProtonIndices; - for (int i = 0; i - < correlationList.size(); i++) { - correlation = correlationList.get(i); + Integer[] arrayToInsert; + for (final MolecularConnectivity molecularConnectivity : molecularConnectivityList) { // set entry for each correlation with consideration of equivalences - if (!correlation.getAtomType() - .equals("H")) { - // insert for protons - attachedProtonIndices = getAttachedProtonIndices(correlation); - protonsToInsert = correlation.getEquivalence(); - final Integer[] arrayToInsert = new Integer[protonsToInsert]; - for (int j = 0; j - < arrayToInsert.length; j++) { - arrayToInsert[j] = protonIndexInPyLSDFile; - protonIndexInPyLSDFile++; - } - for (final int attachedProtonIndex : attachedProtonIndices) { - indicesMap.put(attachedProtonIndex, arrayToInsert); + if (!molecularConnectivity.getAtomType() + .equals("H")) { + if (molecularConnectivity.getHsqc() + != null) { + // insert for protons + protonsToInsert = molecularConnectivity.getEquivalence(); + arrayToInsert = new Integer[protonsToInsert]; + for (int j = 0; j + < arrayToInsert.length; j++) { + arrayToInsert[j] = protonIndexInPyLSDFile; + protonIndexInPyLSDFile++; + } + for (final int attachedProtonIndex : molecularConnectivity.getHsqc()) { + indicesMap.put(attachedProtonIndex, arrayToInsert); + } } // insert for heavy atom itself - indicesMap.put(i, new Integer[correlation.getEquivalence()]); + indicesMap.put(molecularConnectivity.getIndex(), new Integer[molecularConnectivity.getEquivalence()]); for (int j = 0; j - < correlation.getEquivalence(); j++) { - indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; + < molecularConnectivity.getEquivalence(); j++) { + indicesMap.get(molecularConnectivity.getIndex())[j] = heavyAtomIndexInPyLSDFile; heavyAtomIndexInPyLSDFile++; } } @@ -542,82 +540,71 @@ public static MolecularConnectivity findMolecularConnectivityByIndex( return null; } - private static Set buildGroupMembers(final Map indicesMap, - final List correlationList, final int correlationIndex, - final Grouping grouping) { - final Correlation correlation = correlationList.get(correlationIndex); + private static List buildPossibilities(final Map indicesMap, + final List molecularConnectivityList, + final int correlationIndex, final Grouping grouping) { + final MolecularConnectivity molecularConnectivity = molecularConnectivityList.get(correlationIndex); // add possible indices from grouping final int groupIndex; final Set possibilities = new HashSet<>(); if (grouping.getTransformedGroups() - .containsKey(correlation.getAtomType())) { + .containsKey(molecularConnectivity.getAtomType()) + && grouping.getTransformedGroups() + .get(molecularConnectivity.getAtomType()) + .containsKey(correlationIndex)) { groupIndex = grouping.getTransformedGroups() - .get(correlation.getAtomType()) + .get(molecularConnectivity.getAtomType()) .get(correlationIndex); - for (final int groupCorrelationIndex : grouping.getGroups() - .get(correlation.getAtomType()) - .get(groupIndex)) { + for (final int groupMemberIndex : grouping.getGroups() + .get(molecularConnectivity.getAtomType()) + .get(groupIndex)) { + + // if (indicesMap.containsKey(groupMemberIndex)) { // add equivalence indices of group members - for (int k = 0; k - < indicesMap.get(groupCorrelationIndex).length; k++) { - possibilities.add(indicesMap.get(groupCorrelationIndex)[k]); - } + possibilities.addAll(Arrays.asList(indicesMap.get(groupMemberIndex))); + // } } } else { // add for equivalences only - for (int k = 0; k - < indicesMap.get(correlationIndex).length; k++) { - possibilities.add(indicesMap.get(correlationIndex)[k]); - } + possibilities.addAll(Arrays.asList(indicesMap.get(correlationIndex))); } - return possibilities; + return new ArrayList<>(possibilities); } - private static void addMolecularConnectivity( - final Map> molecularConnectivityMap, - final Map indicesMap, final List correlationList, - final int correlationIndex, final int index, final Detections detections, final Grouping grouping) { - final Correlation correlation = correlationList.get(correlationIndex); - molecularConnectivityMap.putIfAbsent(correlationIndex, new ArrayList<>()); - if (molecularConnectivityMap.get(correlationIndex) - .stream() - .noneMatch(molecularConnectivityTemp -> molecularConnectivityTemp.getIndex() - == index)) { - final MolecularConnectivity molecularConnectivity = new MolecularConnectivity(); - molecularConnectivity.setIndex(index); - molecularConnectivity.setAtomType(correlation.getAtomType()); - molecularConnectivity.setSignal(Utils.extractSignalFromCorrelation(correlation)); - - if (!correlationList.get(correlationIndex) - .getAtomType() - .equals("H")) { - molecularConnectivity.setProtonCounts(getProtonCounts(correlationList, correlationIndex)); - molecularConnectivity.setHybridizations( - getHybridizations(correlationList, correlationIndex, detections.getDetectedHybridizations())); - } - molecularConnectivity.setGroupMembers( - buildGroupMembers(indicesMap, correlationList, correlationIndex, grouping)); - molecularConnectivityMap.get(correlationIndex) - .add(molecularConnectivity); - } - } + // private static void addMolecularConnectivity( + // final Map> molecularConnectivityMap, + // final MolecularConnectivity molecularConnectivity, final int index) { + // molecularConnectivityMap.putIfAbsent(molecularConnectivity.getIndex(), new ArrayList<>()); + // // do not insert duplicates + // if (molecularConnectivityMap.get(molecularConnectivity.getIndex()) + // .stream() + // .noneMatch(molecularConnectivityTemp -> molecularConnectivityTemp.getIndex() + // == index)) { + // final MolecularConnectivity newMolecularConnectivity = Utils.cloneObject(molecularConnectivity, + // MolecularConnectivity.class) + // newMolecularConnectivity.setIndex(index); + // molecularConnectivity.setGroupMembers( + // buildGroupMembers(indicesMap, correlationList, correlationIndex, grouping)); + // + // molecularConnectivityMap.get(molecularConnectivity.getIndex()) + // .add(newMolecularConnectivity); + // } + // } public static Map> buildMolecularConnectivityMap( - final List correlationList, final Detections detections, final Grouping grouping, - final Map defaultBondDistances) { + final List molecularConnectivityList, final Detections detections, + final Grouping grouping, final Map defaultBondDistances) { - final Map indicesMap = buildIndicesMap(correlationList); + final Map indicesMap = buildIndicesMap(molecularConnectivityList); // correlation index -> [MolecularConnectivity] final Map> molecularConnectivityMap = new HashMap<>(); - Correlation correlation, correlation2; - int index, protonIndex; - Map signal2DMap; - Map pathLengthMap; - MolecularConnectivity molecularConnectivity; - for (int correlationIndex = 0; correlationIndex - < correlationList.size(); correlationIndex++) { - correlation = correlationList.get(correlationIndex); + MolecularConnectivity newMolecularConnectivity; + + int index, correlationIndex, protonCorrelationIndex, protonIndex; + PathLength pathLength; + for (final MolecularConnectivity molecularConnectivity : molecularConnectivityList) { + correlationIndex = molecularConnectivity.getIndex(); // skip in case of non-linked proton which has no index in indices map if (!indicesMap.containsKey(correlationIndex)) { continue; @@ -625,147 +612,142 @@ public static Map> buildMolecularConnectivi for (int k = 0; k < indicesMap.get(correlationIndex).length; k++) { index = indicesMap.get(correlationIndex)[k]; - addMolecularConnectivity(molecularConnectivityMap, indicesMap, correlationList, correlationIndex, index, - detections, grouping); - molecularConnectivity = findMolecularConnectivityByIndex(molecularConnectivityMap, - correlation.getAtomType(), false, index); - for (final Link link : correlation.getLink()) { - if (link.getExperimentType() - .equals("hsqc") - || link.getExperimentType() - .equals("hmqc") - && !correlation.getAtomType() - .equals("H")) { - for (final int matchIndex : link.getMatch()) { - protonIndex = indicesMap.get(matchIndex)[k]; + newMolecularConnectivity = new MolecularConnectivity(); + newMolecularConnectivity.setIndex(index); + newMolecularConnectivity.setAtomType(molecularConnectivity.getAtomType()); + newMolecularConnectivity.setEquivalence(1); + newMolecularConnectivity.setPseudo(molecularConnectivity.isPseudo()); + newMolecularConnectivity.setProtonCounts(molecularConnectivity.getProtonCounts()); + newMolecularConnectivity.setHybridizations(molecularConnectivity.getHybridizations()); + newMolecularConnectivity.setSignal(molecularConnectivity.getSignal()); - molecularConnectivity.setHsqc(protonIndex); - } - } else if (link.getExperimentType() - .equals("hmbc") - || link.getExperimentType() - .equals("cosy")) { - if (link.getExperimentType() - .equals("hmbc") - && correlation.getAtomType() - .equals("H")) { - continue; - } - if (link.getExperimentType() - .equals("cosy") - && !correlation.getAtomType() - .equals("H")) { - continue; - } - // ignore H atoms without any attachment to a heavy atom - if (correlationList.get(correlationIndex) - .getAtomType() - .equals("H") - && correlationList.get(correlationIndex) - .getAttachment() - .keySet() - .isEmpty()) { - continue; - } - signal2DMap = (Map) link.getSignal(); - if (signal2DMap - != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); - } else { - pathLengthMap = null; - } - for (final int matchIndex : link.getMatch()) { - // ignore linked H atoms without any attachment to a heavy atom - if (correlationList.get(matchIndex) - .getAtomType() - .equals("H") - && correlationList.get(matchIndex) - .getAttachment() - .keySet() - .isEmpty()) { - continue; - } - - if (link.getExperimentType() - .equals("hmbc")) { - for (int l = 0; l - < indicesMap.get(matchIndex).length; l++) { - protonIndex = indicesMap.get(matchIndex)[l]; - if (molecularConnectivity.getHmbc() - == null) { - molecularConnectivity.setHmbc(new HashMap<>()); - } - molecularConnectivity.getHmbc() - .put(protonIndex, pathLengthMap - == null - ? defaultBondDistances.get("hmbc") - : new Integer[]{(int) pathLengthMap.get( - "min"), (int) pathLengthMap.get( - "max")}); - } - } else { - if (k - < indicesMap.get(matchIndex).length) { - protonIndex = indicesMap.get(matchIndex)[k]; - if (molecularConnectivity.getCosy() - == null) { - molecularConnectivity.setCosy(new HashMap<>()); - } - molecularConnectivity.getCosy() - .put(protonIndex, pathLengthMap - == null - ? defaultBondDistances.get("cosy") - : new Integer[]{(int) pathLengthMap.get( - "min"), (int) pathLengthMap.get( - "max")}); + if (!molecularConnectivity.getAtomType() + .equals("H") + && molecularConnectivity.getHsqc() + != null) { + // using the first proton correlation index from HSQC list is enough because show will direct to same heavy atom index + protonIndex = indicesMap.get(molecularConnectivity.getHsqc() + .get(0))[k]; + newMolecularConnectivity.setHsqc(new ArrayList<>()); + newMolecularConnectivity.getHsqc() + .add(protonIndex); + } + if (!molecularConnectivity.getAtomType() + .equals("H") + && molecularConnectivity.getHmbc() + != null) { + pathLength = molecularConnectivity.getSignal() + .getPathLength(); + for (final Map.Entry entry : molecularConnectivity.getHmbc() + .entrySet()) { + protonCorrelationIndex = entry.getKey(); + // // ignore linked H atoms without any attachment to a heavy atom + // if (molecularConnectivityList.get(entry.getKey()) + // .getAtomType() + // .equals("H") + // && correlationList.get(matchIndex) + // .getAttachment() + // .keySet() + // .isEmpty()) { + // continue; + // } + if (indicesMap.containsKey(protonCorrelationIndex)) { + for (int l = 0; l + < indicesMap.get(protonCorrelationIndex).length; l++) { + protonIndex = indicesMap.get(protonCorrelationIndex)[l]; + if (newMolecularConnectivity.getHmbc() + == null) { + newMolecularConnectivity.setHmbc(new HashMap<>()); } + newMolecularConnectivity.getHmbc() + .put(protonIndex, pathLength + == null + ? defaultBondDistances.get("hmbc") + : new Integer[]{pathLength.getMin(), + pathLength.getMax()}); + } + } + } + } + if (molecularConnectivity.getAtomType() + .equals("H") + && molecularConnectivity.getCosy() + != null) { + pathLength = molecularConnectivity.getSignal() + .getPathLength(); + for (final Map.Entry entry : molecularConnectivity.getCosy() + .entrySet()) { + protonCorrelationIndex = entry.getKey(); + if (indicesMap.containsKey(protonCorrelationIndex) + && k + < indicesMap.get(protonCorrelationIndex).length) { + protonIndex = indicesMap.get(protonCorrelationIndex)[k]; + if (newMolecularConnectivity.getCosy() + == null) { + newMolecularConnectivity.setCosy(new HashMap<>()); } + newMolecularConnectivity.getCosy() + .put(protonIndex, pathLength + == null + ? defaultBondDistances.get("cosy") + : new Integer[]{pathLength.getMin(), + pathLength.getMax()}); } } } + + molecularConnectivityMap.putIfAbsent(correlationIndex, new ArrayList<>()); + molecularConnectivityMap.get(correlationIndex) + .add(newMolecularConnectivity); } // set detections - if (!correlation.getAtomType() - .equals("H") - && !correlation.isPseudo()) { - for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityMap.get( - correlationIndex)) { + for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityMap.get( + correlationIndex)) { + if (detections.getForbiddenNeighbors() + .containsKey(correlationIndex)) { molecularConnectivityTemp.setForbiddenNeighbors(detections.getForbiddenNeighbors() .get(correlationIndex)); + } + if (detections.getSetNeighbors() + .containsKey(correlationIndex)) { molecularConnectivityTemp.setSetNeighbors(detections.getSetNeighbors() .get(correlationIndex)); } + molecularConnectivityTemp.setGroupMembers( + buildPossibilities(indicesMap, molecularConnectivityList, correlationIndex, grouping)); } // fill in fixed neighbors - if (correlation.getEquivalence() + if (molecularConnectivity.getEquivalence() == 1 && detections.getFixedNeighbors() .containsKey(correlationIndex)) { + MolecularConnectivity molecularConnectivityTemp; for (final int correlationIndex2 : detections.getFixedNeighbors() .get(correlationIndex)) { - correlation2 = correlationList.get(correlationIndex2); + molecularConnectivityTemp = molecularConnectivityList.get(correlationIndex2); // use fixed neighbor information of atoms without equivalence equals 1 only - if (correlation2.getEquivalence() + if (molecularConnectivityTemp.getEquivalence() > 1) { continue; } index = indicesMap.get(correlationIndex)[0]; - molecularConnectivity = findMolecularConnectivityByIndex(molecularConnectivityMap, - correlation.getAtomType(), false, index); - if (molecularConnectivity.getFixedNeighbors() + newMolecularConnectivity = findMolecularConnectivityByIndex(molecularConnectivityMap, + molecularConnectivity.getAtomType(), + false, index); + if (newMolecularConnectivity.getFixedNeighbors() == null) { - molecularConnectivity.setFixedNeighbors(new HashSet<>()); + newMolecularConnectivity.setFixedNeighbors(new ArrayList<>()); } - molecularConnectivity.getFixedNeighbors() - .add(indicesMap.get(correlationIndex2)[0]); + newMolecularConnectivity.getFixedNeighbors() + .add(indicesMap.get(correlationIndex2)[0]); } } } + // filter out HMBC or COSY correlation to itself - for (final int correlationIndex : molecularConnectivityMap.keySet()) { + for (final int correlationIndexTemp : molecularConnectivityMap.keySet()) { for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityMap.get( - correlationIndex)) { + correlationIndexTemp)) { if (molecularConnectivityTemp.getHsqc() != null) { if (molecularConnectivityTemp.getHmbc() @@ -785,6 +767,156 @@ public static Map> buildMolecularConnectivi return molecularConnectivityMap; } + public static List buildMolecularConnectivityList(final List correlationList, + final Detections detections, + final Grouping grouping, + final Map defaultBondDistances) { + final List molecularConnectivityList = new ArrayList<>(); + Correlation correlation; + int groupIndex; + Map signal2DMap; + Map pathLengthMap; + MolecularConnectivity molecularConnectivity; + for (int correlationIndex = 0; correlationIndex + < correlationList.size(); correlationIndex++) { + correlation = correlationList.get(correlationIndex); + molecularConnectivity = new MolecularConnectivity(); + molecularConnectivity.setIndex(correlationIndex); + molecularConnectivity.setAtomType(correlation.getAtomType()); + molecularConnectivity.setSignal(Utils.extractSignalFromCorrelation(correlation)); + molecularConnectivity.setEquivalence(correlation.getEquivalence()); + molecularConnectivity.setPseudo(correlation.isPseudo()); + + if (!correlation.getAtomType() + .equals("H")) { + molecularConnectivity.setProtonCounts(getProtonCounts(correlationList, correlationIndex)); + molecularConnectivity.setHybridizations( + getHybridizations(correlationList, correlationIndex, detections.getDetectedHybridizations())); + } + if (grouping.getGroups() + .containsKey(correlation.getAtomType())) { + groupIndex = grouping.getTransformedGroups() + .get(correlation.getAtomType()) + .get(correlationIndex); + molecularConnectivity.setGroupMembers(grouping.getGroups() + .get(correlation.getAtomType()) + .get(groupIndex)); + } + for (final Link link : correlation.getLink()) { + if (link.getExperimentType() + .equals("hsqc") + || link.getExperimentType() + .equals("hmqc") + && !correlation.getAtomType() + .equals("H")) { + if (molecularConnectivity.getHsqc() + == null) { + molecularConnectivity.setHsqc(new ArrayList<>()); + } + for (final int matchIndex : link.getMatch()) { + molecularConnectivity.getHsqc() + .add(matchIndex); + } + } else if (link.getExperimentType() + .equals("hmbc") + || link.getExperimentType() + .equals("cosy")) { + if (link.getExperimentType() + .equals("hmbc") + && correlation.getAtomType() + .equals("H")) { + continue; + } + if (link.getExperimentType() + .equals("cosy") + && !correlation.getAtomType() + .equals("H")) { + continue; + } + // ignore H atoms without any attachment to a heavy atom + if (correlationList.get(correlationIndex) + .getAtomType() + .equals("H") + && correlationList.get(correlationIndex) + .getAttachment() + .keySet() + .isEmpty()) { + continue; + } + signal2DMap = (Map) link.getSignal(); + if (signal2DMap + != null + && signal2DMap.containsKey("pathLength")) { + pathLengthMap = (Map) signal2DMap.get("pathLength"); + } else { + pathLengthMap = null; + } + for (final int matchIndex : link.getMatch()) { + // ignore linked H atoms without any attachment to a heavy atom + if (correlationList.get(matchIndex) + .getAtomType() + .equals("H") + && correlationList.get(matchIndex) + .getAttachment() + .keySet() + .isEmpty()) { + continue; + } + if (link.getExperimentType() + .equals("hmbc")) { + if (molecularConnectivity.getHmbc() + == null) { + molecularConnectivity.setHmbc(new HashMap<>()); + } + molecularConnectivity.getHmbc() + .put(matchIndex, pathLengthMap + == null + ? defaultBondDistances.get("hmbc") + : new Integer[]{(int) pathLengthMap.get("min"), + (int) pathLengthMap.get("max")}); + } else { + if (molecularConnectivity.getCosy() + == null) { + molecularConnectivity.setCosy(new HashMap<>()); + } + molecularConnectivity.getCosy() + .put(matchIndex, pathLengthMap + == null + ? defaultBondDistances.get("cosy") + : new Integer[]{(int) pathLengthMap.get("min"), + (int) pathLengthMap.get("max")}); + } + } + } + } + + // set detections + if (detections.getForbiddenNeighbors() + .containsKey(correlationIndex)) { + molecularConnectivity.setForbiddenNeighbors(detections.getForbiddenNeighbors() + .get(correlationIndex)); + } + if (detections.getSetNeighbors() + .containsKey(correlationIndex)) { + molecularConnectivity.setSetNeighbors(detections.getSetNeighbors() + .get(correlationIndex)); + } + // fill in fixed neighbors + if (correlation.getEquivalence() + == 1 + && detections.getFixedNeighbors() + .containsKey(correlationIndex)) { + molecularConnectivity.setFixedNeighbors(new ArrayList<>()); + molecularConnectivity.getFixedNeighbors() + .add(correlationIndex); + } + + molecularConnectivityList.add(molecularConnectivity); + } + + return molecularConnectivityList; + } + public static MolecularConnectivity getHeavyAtomMolecularConnectivity( final Map> molecularConnectivityMap, final int protonIndex) { for (final Map.Entry> entry : molecularConnectivityMap.entrySet()) { @@ -796,7 +928,7 @@ public static MolecularConnectivity getHeavyAtomMolecularConnectivity( if (molecularConnectivityTemp.getHsqc() != null && molecularConnectivityTemp.getHsqc() - == protonIndex) { + .contains(protonIndex)) { return molecularConnectivityTemp; } } @@ -805,107 +937,224 @@ public static MolecularConnectivity getHeavyAtomMolecularConnectivity( return null; } - // private static void extendMolecularConnectivityMapCombinationList( - // // correlation index -> equivalence index -> proton index in hsqc -> proton group member index - // final Map>> combinationsMap, - // final Map> originalMolecularConnectivityMap, - // final int correlationIndex, final int equivalenceIndex) { - // - // if (!originalMolecularConnectivityMap.containsKey(correlationIndex) - // || equivalenceIndex - // >= originalMolecularConnectivityMap.get(correlationIndex) - // .size()) { + + // private static void buildCombinations(final Stack correlationListStack, + // final List> correlationListList, final Grouping grouping) { + // if (correlationListStack.isEmpty()) { // return; // } + // final Object[] objects = correlationListStack.pop(); + // final List correlationList = (List) objects[0]; + // final int correlationIndex = (int) objects[1]; + // final Set alreadySwapped = (Set) objects[2]; + // if (correlationIndex + // >= correlationList.size()) { + // correlationListList.add(correlationList); + // buildCombinations(correlationListStack, correlationListList, grouping); + // return; + // } + // final Correlation correlation = correlationList.get(correlationIndex); + // if (!grouping.getGroups() + // .containsKey(correlation.getAtomType()) + // || correlation.getAtomType() + // .equals("H")) { + // correlationListStack.push(new Object[]{correlationList, correlationIndex + // + 1, alreadySwapped}); + // buildCombinations(correlationListStack, correlationListList, grouping); + // return; + // } + // final List linkList = correlation.getLink() + // .stream() + // .filter(link -> link.getExperimentType() + // .equals("hsqc") + // || link.getExperimentType() + // .equals("hmqc")) + // .collect(Collectors.toList()); + // final List linkIndicesList = new ArrayList<>(); + // for (final Link link : linkList) { + // linkIndicesList.add(correlation.getLink() + // .indexOf(link)); + // } + // for (int l = 0; l + // < linkList.size(); l++) { + // final Link link = linkList.get(l); + // final int protonIndex = link.getMatch() + // .get(0); + // final int protonGroupIndex = grouping.getTransformedGroups() + // .get("H") + // .get(protonIndex); + // final List protonGroupMemberList = new ArrayList<>(grouping.getGroups() + // .get("H") + // .get(protonGroupIndex)); // - // final MolecularConnectivity molecularConnectivity = originalMolecularConnectivityMap.get(correlationIndex) - // .get(equivalenceIndex); - // System.out.println("\n--------\n" - // + molecularConnectivity - // + "\n"); + // for (final int protonGroupMemberIndex : protonGroupMemberList) { + // if (protonGroupMemberIndex + // == protonIndex) { + // continue; + // } + // System.out.println("\n\n swap at: " + // + correlationIndex + // + " -> " + // + l + // + " ---> " + // + protonIndex + // + " <-> " + // + protonGroupMemberIndex); + // final List indicesToSwap = new ArrayList<>(); + // indicesToSwap.add(protonIndex); + // indicesToSwap.add(protonGroupMemberIndex); + // indicesToSwap.sort(Integer::compare); + // System.out.println("-> indicesToSwap: " + // + indicesToSwap); + // final String swapKey = indicesToSwap.stream() + // .map(String::valueOf) + // .collect(Collectors.joining("_")); + // System.out.println("-> swapKey: " + // + swapKey); + // if (alreadySwapped.contains(swapKey)) { + // continue; + // } // - // if (!molecularConnectivity.getAtomType() - // .equals("H")) { - // MolecularConnectivity molecularConnectivityGroupMemberHeavyAtom, molecularConnectivityProton; - // for (final int groupMemberHeavyAtom : molecularConnectivity.getGroupMembers()) { - // molecularConnectivityGroupMemberHeavyAtom = findMolecularConnectivityByIndex( - // originalMolecularConnectivityMap, "H", true, groupMemberHeavyAtom); // - // if (molecularConnectivityGroupMemberHeavyAtom.getAttachedProtonIndex() - // != null) { - // int i = 0; - // for (final int protonIndexInPyLSD : molecularConnectivityGroupMemberHeavyAtom.getHsqc()) { - // molecularConnectivityProton = findMolecularConnectivityByIndex(originalMolecularConnectivityMap, - // "H", false, protonIndexInPyLSD); + // // replace current link by new one in heavy atom correlation + // final Link newLink = Utils.cloneObject(link, Link.class); + // newLink.setId(Utils.generateID()); + // newLink.setAtomType(new String[]{link.getAtomType()[0], + // correlation.getAtomType()}); // H and current heavy atom type + // newLink.getMatch() + // .set(0, protonGroupMemberIndex); + // final Correlation clonedCorrelation = Utils.cloneObject(correlation, Correlation.class); + // clonedCorrelation.getLink() + // .set(linkIndicesList.get(l), newLink); + // System.out.println(" ------> replaced proton " + // + protonIndex + // + " by " + // + protonGroupMemberIndex); + // // remove link from link list in current proton correlation + // final Correlation clonedCorrelationProton = Utils.cloneObject(correlationList.get(protonIndex), + // Correlation.class); + // System.out.println(" ------> remove heavy atom " + // + correlationIndex + // + " from " + // + protonIndex); + // System.out.println(" --> link array size before: " + // + clonedCorrelationProton.getLink() + // .size()); + // clonedCorrelationProton.setLink(clonedCorrelationProton.getLink() + // .stream() + // .filter(linkTemp -> !(linkTemp.getExperimentID() + // .equals(link.getExperimentID()) + // && Objects.equals( + // ((Map) linkTemp.getSignal()).get( + // "id"), + // ((Map) link.getSignal()).get( + // "id")))) + // .collect(Collectors.toList())); + // System.out.println(" --> link array size after: " + // + clonedCorrelationProton.getLink() + // .size()); + // // add new link in proton group member correlation + // final Correlation clonedCorrelationGroupMemberProton = Utils.cloneObject( + // correlationList.get(protonGroupMemberIndex), Correlation.class); + // final Link clonedNewLink = Utils.cloneObject(newLink, Link.class); + // clonedNewLink.setId(Utils.generateID()); + // clonedNewLink.setAxis("x"); + // clonedNewLink.getMatch() + // .set(0, correlationIndex); + // clonedCorrelationGroupMemberProton.getLink() + // .add(clonedNewLink); + // System.out.println(" ------> added heavy atom " + // + correlationIndex + // + " to " + // + protonGroupMemberIndex); // - // for (final int protonGroupMemberIndexInPyLSD : molecularConnectivityProton.getGroupMembers()) { - // System.out.println(" -> " - // + correlationIndex - // + " -> " - // + equivalenceIndex - // + " -> protonIndexInPyLSD: " - // + protonIndexInPyLSD - // + " -> " - // + molecularConnectivityProton.getGroupMembers() - // + " -> group member: " - // + protonGroupMemberIndexInPyLSD); + // // clone the current correlation list and set the modified correlations + // final List clonedCorrelationList = Utils.cloneList(correlationList, Correlation.class); + // clonedCorrelationList.set(correlationIndex, clonedCorrelation); + // clonedCorrelationList.set(protonGroupMemberIndex, clonedCorrelationGroupMemberProton); // - // combinationsMap.putIfAbsent(correlationIndex, new HashMap<>()); - // combinationsMap.get(correlationIndex) - // .putIfAbsent(equivalenceIndex, new HashMap<>()); - // if (combinationsMap.get(correlationIndex) // - // .entrySet() - // .stream() - // .noneMatch(entryPerEquivalence -> entryPerEquivalence.getValue() - // .entrySet() - // .stream() - // .anyMatch( - // entryPerHSQCIndex -> entryPerHSQCIndex.getValue() - // == protonGroupMemberIndexInPyLSD))) { - // combinationsMap.get(correlationIndex) - // .get(equivalenceIndex) - // .putIfAbsent(i, protonGroupMemberIndexInPyLSD); - // } + // // if proton group member is attached to another heavy atom(s) then replace it there by current proton + // final List linkListGroupMemberProton = clonedCorrelationGroupMemberProton.getLink() + // .stream() + // .filter(linkTemp -> (linkTemp.getExperimentType() + // .equals("hsqc") + // || linkTemp.getExperimentType() + // .equals("hmqc")) + // && !linkTemp.getId() + // .equals(clonedNewLink.getId())) + // .collect( + // Collectors.toList()); + // for (final Link linkGroupMemberProton : linkListGroupMemberProton) { + // final int heavyAtomIndexGroupMemberProton = linkGroupMemberProton.getMatch() + // .get(0); + // final Correlation clonedLinkedHeavyAtomCorrelationOfGroupMemberProton = Utils.cloneObject( + // correlationList.get(heavyAtomIndexGroupMemberProton), Correlation.class); + // System.out.println(" ------> remove group member proton " + // + protonGroupMemberIndex + // + " from " + // + linkGroupMemberProton.getMatch() + // .get(0)); + // System.out.println(" --> link array size before: " + // + clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() + // .size()); + // // remove link to previously added proton group member + // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.setLink( + // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() + // .stream() + // .filter(linkTemp -> !(linkTemp.getExperimentID() + // .equals(linkGroupMemberProton.getExperimentID()) + // && Objects.equals( + // ((Map) linkTemp.getSignal()).get( + // "id"), + // ((Map) linkGroupMemberProton.getSignal()).get( + // "id")))) + // .collect(Collectors.toList())); + // System.out.println(" --> link array size after: " + // + clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() + // .size()); + // // add new link to current proton to heavy atom correlation and to current proton correlation + // Link clonedLink = Utils.cloneObject(link, Link.class); + // clonedLink.getMatch() + // .set(0, protonIndex); + // clonedLink.setId(Utils.generateID()); + // clonedLink.setAxis("y"); + // clonedLink.setAtomType(new String[]{link.getAtomType()[0], + // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getAtomType()}); + // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() + // .add(clonedLink); + // System.out.println(" ------> added proton " + // + protonIndex + // + " to " + // + heavyAtomIndexGroupMemberProton); + // clonedLink = Utils.cloneObject(link, Link.class); + // clonedLink.getMatch() + // .set(0, heavyAtomIndexGroupMemberProton); + // clonedLink.setId(Utils.generateID()); + // clonedCorrelationProton.getLink() + // .add(clonedLink); + // System.out.println(" ------> added heavy atom " + // + heavyAtomIndexGroupMemberProton + // + " to " + // + protonIndex); // // - // if (equivalenceIndex - // + 1 - // < originalMolecularConnectivityMap.get(correlationIndex) - // .size()) { - // System.out.println(" --> equivalenceIndex++"); - // extendMolecularConnectivityMapCombinationList(combinationsMap, - // originalMolecularConnectivityMap, - // correlationIndex, equivalenceIndex - // + 1); - // } else { - // System.out.println(" --> correlationIndex++"); - // extendMolecularConnectivityMapCombinationList(combinationsMap, - // originalMolecularConnectivityMap, - // correlationIndex - // + 1, 0); - // } - // } - // i++; - // } - // } else { - // if (equivalenceIndex - // + 1 - // < originalMolecularConnectivityMap.get(correlationIndex) - // .size()) { - // System.out.println(" --> equivalenceIndex++"); - // extendMolecularConnectivityMapCombinationList(combinationsMap, originalMolecularConnectivityMap, - // correlationIndex, equivalenceIndex - // + 1); - // } else { - // System.out.println(" --> correlationIndex++"); - // extendMolecularConnectivityMapCombinationList(combinationsMap, originalMolecularConnectivityMap, - // correlationIndex - // + 1, 0); - // } + // // add it to new correlation list + // clonedCorrelationList.set(linkGroupMemberProton.getMatch() + // .get(0), + // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton); // } + // clonedCorrelationList.set(protonIndex, clonedCorrelationProton); + // + // alreadySwapped.add(swapKey); + // + // correlationListStack.push(new Object[]{clonedCorrelationList, correlationIndex + // + 1, alreadySwapped}); // } // } + // correlationListStack.push(new Object[]{correlationList, correlationIndex + // + 1, alreadySwapped}); + // buildCombinations(correlationListStack, correlationListList, grouping); // } public static List>> buildMolecularConnectivityMapCombinationList( @@ -913,22 +1162,36 @@ public static List>> buildMolecularConn final Map defaultBondDistances) { final List>> molecularConnectivityMapCombinationList = new ArrayList<>(); - final Map> originalMolecularConnectivityMap = buildMolecularConnectivityMap( + final List originalMolecularConnectivityList = buildMolecularConnectivityList( correlationList, detections, grouping, defaultBondDistances); + System.out.println("\n -> originalMolecularConnectivityList: \n"); + System.out.println(originalMolecularConnectivityList); + + final Map> originalMolecularConnectivityMap = buildMolecularConnectivityMap( + originalMolecularConnectivityList, detections, grouping, defaultBondDistances); molecularConnectivityMapCombinationList.add(originalMolecularConnectivityMap); System.out.println("\n -> originalMolecularConnectivityMap: \n"); System.out.println(originalMolecularConnectivityMap); - // final Map>> combinationsMap = new HashMap<>(); - // extendMolecularConnectivityMapCombinationList(combinationsMap, originalMolecularConnectivityMap, 0, 0); + + // final Stack stack = new Stack<>(); + // // experiment -> [swapKey] + // final Map> swapped = new HashMap<>(); + // stack.push(new Object[]{correlationList, 0, swapped}); + // final List> correlationListList = new ArrayList<>(); + // buildCombinations(stack, correlationListList, grouping); // - // System.out.println("\n\n" - // + combinationsMap + // for (final List correlationListTemp : correlationListList) { + // molecularConnectivityMapCombinationList.add( + // buildMolecularConnectivityMapWithIndices(correlationListTemp, detections, grouping, + // defaultBondDistances)); + // } + // System.out.println("\n\n -> correlationListList size: " + // + correlationListList.size() // + "\n\n"); return molecularConnectivityMapCombinationList; - } } From caec67719503c783a2b35c571cc35b6e6c1812f3 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 11 Feb 2022 13:56:55 +0100 Subject: [PATCH 364/405] fix: avoid NullPointerException and duplicated entries in PYLSD input file --- src/casekit/nmr/elucidation/Utilities.java | 5 +- .../lsd/PyLSDInputFileBuilder.java | 90 ++++++++++++++----- 2 files changed, 72 insertions(+), 23 deletions(-) diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index 16dbb88..c4693e0 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -794,7 +794,10 @@ public static List buildMolecularConnectivityList(final L getHybridizations(correlationList, correlationIndex, detections.getDetectedHybridizations())); } if (grouping.getGroups() - .containsKey(correlation.getAtomType())) { + .containsKey(correlation.getAtomType()) + && grouping.getTransformedGroups() + .get(correlation.getAtomType()) + .containsKey(correlationIndex)) { groupIndex = grouping.getTransformedGroups() .get(correlation.getAtomType()) .get(correlationIndex); diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index 8c16cac..11dd710 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -76,19 +76,21 @@ private static String buildPossibilitiesString(final Collection possibi private static Map buildStringBuilderMap( final Map> molecularConnectivityMap) { - StringBuilder stringBuilder; final Map stringBuilderMap = new HashMap<>(); - stringBuilderMap.put("MULT", new StringBuilder()); - stringBuilderMap.put("HSQC", new StringBuilder()); - stringBuilderMap.put("HMBC", new StringBuilder()); - stringBuilderMap.put("COSY", new StringBuilder()); - stringBuilderMap.put("BOND", new StringBuilder()); - stringBuilderMap.put("SHIX", new StringBuilder()); - stringBuilderMap.put("SHIH", new StringBuilder()); - StringBuilder hybridizationStringBuilder, attachedProtonsCountStringBuilder; + final Map> stringListMap = new HashMap<>(); + stringListMap.put("MULT", new ArrayList<>()); + stringListMap.put("HSQC", new ArrayList<>()); + stringListMap.put("HMBC", new ArrayList<>()); + stringListMap.put("COSY", new ArrayList<>()); + stringListMap.put("BOND", new ArrayList<>()); + stringListMap.put("SHIX", new ArrayList<>()); + stringListMap.put("SHIH", new ArrayList<>()); + StringBuilder stringBuilder, hybridizationStringBuilder, attachedProtonsCountStringBuilder; + List stringList; int counter, firstOfEquivalenceIndexPyLSD; Set groupMembers; // use as a Set to remove the actual value and not at a list index MolecularConnectivity molecularConnectivityGroupMember, molecularConnectivityHeavyAtom; + final Set addedKeysSHIH = new HashSet<>(); for (final int correlationIndex : molecularConnectivityMap.keySet()) { firstOfEquivalenceIndexPyLSD = -1; for (final MolecularConnectivity molecularConnectivity : molecularConnectivityMap.get(correlationIndex)) { @@ -143,7 +145,9 @@ private static Map buildStringBuilderMap( > 1) { attachedProtonsCountStringBuilder.append(")"); } - stringBuilder = stringBuilderMap.get("MULT"); + // MULT section + stringList = stringListMap.get("MULT"); + stringBuilder = new StringBuilder(); stringBuilder.append("MULT ") .append(molecularConnectivity.getIndex()) .append(" ") @@ -163,24 +167,29 @@ private static Map buildStringBuilderMap( .append(firstOfEquivalenceIndexPyLSD); } stringBuilder.append("\n"); + stringList.add(stringBuilder.toString()); + // HSQC section if (molecularConnectivity.getHsqc() != null) { - final List hsqcList = new ArrayList<>(molecularConnectivity.getHsqc()); - stringBuilder = stringBuilderMap.get("HSQC"); + stringList = stringListMap.get("HSQC"); + stringBuilder = new StringBuilder(); stringBuilder.append("HSQC ") .append(molecularConnectivity.getIndex()) .append(" ") - .append(hsqcList.get(0)) + .append(molecularConnectivity.getHsqc() + .get(0)) .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( molecularConnectivityMap, "H", false, - hsqcList.get(0)))) + molecularConnectivity.getHsqc() + .get(0)))) .append("\n"); - + stringList.add(stringBuilder.toString()); } + // HMBC section if (molecularConnectivity.getHmbc() != null) { - stringBuilder = stringBuilderMap.get("HMBC"); + stringList = stringListMap.get("HMBC"); for (final int protonIndexInPyLSD : molecularConnectivity.getHmbc() .keySet()) { // filter out group members which are directly bonded to that proton @@ -197,6 +206,7 @@ private static Map buildStringBuilderMap( } } if (!groupMembers.isEmpty()) { + stringBuilder = new StringBuilder(); stringBuilder.append("HMBC ") .append(buildPossibilitiesString(groupMembers)) .append(" ") @@ -212,13 +222,18 @@ private static Map buildStringBuilderMap( molecularConnectivityMap, "H", false, protonIndexInPyLSD))) .append("\n"); + if (!stringList.contains(stringBuilder.toString())) { + stringList.add(stringBuilder.toString()); + } } } } + // BOND section if (molecularConnectivity.getFixedNeighbors() != null) { - stringBuilder = stringBuilderMap.get("BOND"); + stringList = stringListMap.get("BOND"); for (final int bondedIndexInPyLSD : molecularConnectivity.getFixedNeighbors()) { + stringBuilder = new StringBuilder(); stringBuilder.append("BOND ") .append(molecularConnectivity.getIndex()) .append(" ") @@ -228,13 +243,17 @@ private static Map buildStringBuilderMap( molecularConnectivityMap, "H", true, bondedIndexInPyLSD))) .append("\n"); + if (!stringList.contains(stringBuilder.toString())) { + stringList.add(stringBuilder.toString()); + } } } } else if (molecularConnectivity.getAtomType() .equals("H")) { + // COSY section if (molecularConnectivity.getCosy() != null) { - stringBuilder = stringBuilderMap.get("COSY"); + stringList = stringListMap.get("COSY"); for (final int protonIndexInPyLSD : molecularConnectivity.getCosy() .keySet()) { groupMembers = new HashSet<>(molecularConnectivity.getGroupMembers()); @@ -264,6 +283,7 @@ private static Map buildStringBuilderMap( } } if (!groupMembers.isEmpty()) { + stringBuilder = new StringBuilder(); stringBuilder.append("COSY ") .append(buildPossibilitiesString(groupMembers)) .append(" ") @@ -280,17 +300,22 @@ private static Map buildStringBuilderMap( molecularConnectivityMap, "H", false, protonIndexInPyLSD))) .append("\n"); + if (!stringList.contains(stringBuilder.toString())) { + stringList.add(stringBuilder.toString()); + } } } } } } + // SHIH/SHIX section if (molecularConnectivity.getSignal() != null) { - stringBuilder = stringBuilderMap.get(molecularConnectivity.getAtomType() - .equals("H") - ? "SHIH" - : "SHIX"); + stringList = stringListMap.get(molecularConnectivity.getAtomType() + .equals("H") + ? "SHIH" + : "SHIX"); + stringBuilder = new StringBuilder(); stringBuilder.append(molecularConnectivity.getAtomType() .equals("H") ? "SHIH" @@ -301,9 +326,30 @@ private static Map buildStringBuilderMap( .append(Statistics.roundDouble(molecularConnectivity.getSignal() .getShift(0), 5)) .append("\n"); + if (!stringList.contains(stringBuilder.toString())) { + if (!molecularConnectivity.getAtomType() + .equals("H")) { + stringList.add(stringBuilder.toString()); + } else if (!addedKeysSHIH.contains(molecularConnectivity.getIndex())) { + stringList.add(stringBuilder.toString()); + addedKeysSHIH.add(molecularConnectivity.getIndex()); + } + } } } } + // put sections into stringBuilderMap + System.out.println("\n -> stringListMap: " + + stringListMap); + for (final String sectionKey : stringListMap.keySet()) { + stringBuilder = new StringBuilder(); + for (final String sectionLine : stringListMap.get(sectionKey)) { + stringBuilder.append(sectionLine); + } + stringBuilderMap.put(sectionKey, stringBuilder); + } + System.out.println("\n -> stringBuilderMap: " + + stringBuilderMap); return stringBuilderMap; } From e462d6860c2626671d24a1042d94f2c75c2e9e43 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 14 Feb 2022 16:39:54 +0100 Subject: [PATCH 365/405] feat: enabling of flexibility in HSQC --- src/casekit/nmr/elucidation/Utilities.java | 535 ++++++------------ .../lsd/PyLSDInputFileBuilder.java | 4 - src/casekit/nmr/utils/Utils.java | 18 +- 3 files changed, 173 insertions(+), 384 deletions(-) diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index c4693e0..6607eaf 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -407,70 +407,6 @@ private static List getHybridizations(final List correlati return hybridizations; } - // private static List getAttachedProtonIndices(final Correlation correlation) { - // return correlation.getLink() - // .stream() - // .filter(link -> link.getExperimentType() - // .equals("hsqc") - // || link.getExperimentType() - // .equals("hmqc")) - // .map(Link::getMatch) - // .reduce(new ArrayList<>(), ((l, curr) -> { - // l.addAll(curr); - // return l; - // })); - // } - - // public static Map buildIndicesMap(final List correlationList) { - // // index in correlation data -> [indices in PyLSD file...] - // final Map indicesMap = new HashMap<>(); - // // init element indices within correlations with same order as in correlation data input - // int heavyAtomIndexInPyLSDFile = 1; - // int protonIndexInPyLSDFile = 1; - // int protonsToInsert; - // Correlation correlation; - // List attachedProtonIndices; - // Integer[] arrayToInsert; - // for (int i = 0; i - // < correlationList.size(); i++) { - // correlation = correlationList.get(i); - // // set entry for each correlation with consideration of equivalences - // if (!correlation.getAtomType() - // .equals("H")) { - // // insert for protons - // attachedProtonIndices = getAttachedProtonIndices(correlation); - // protonsToInsert = correlation.getEquivalence(); - // arrayToInsert = new Integer[protonsToInsert]; - // for (int j = 0; j - // < arrayToInsert.length; j++) { - // arrayToInsert[j] = protonIndexInPyLSDFile; - // protonIndexInPyLSDFile++; - // } - // for (final int attachedProtonIndex : attachedProtonIndices) { - // indicesMap.put(attachedProtonIndex, arrayToInsert); - // } - // // insert for heavy atom itself - // indicesMap.put(i, new Integer[correlation.getEquivalence()]); - // for (int j = 0; j - // < correlation.getEquivalence(); j++) { - // indicesMap.get(i)[j] = heavyAtomIndexInPyLSDFile; - // heavyAtomIndexInPyLSDFile++; - // } - // } - // } - // - // System.out.println("\n -> indicesMap:\n"); - // for (final int index : indicesMap.keySet()) { - // System.out.println("i: " - // + index - // + " -> " - // + Arrays.toString(indicesMap.get(index))); - // } - // System.out.println("\n"); - // - // return indicesMap; - // } - public static Map buildIndicesMap(final List molecularConnectivityList) { // index in correlation data -> [indices in PyLSD file...] final Map indicesMap = new HashMap<>(); @@ -507,15 +443,6 @@ public static Map buildIndicesMap(final List indicesMap:\n"); - for (final int index : indicesMap.keySet()) { - System.out.println("i: " - + index - + " -> " - + Arrays.toString(indicesMap.get(index))); - } - System.out.println("\n"); - return indicesMap; } @@ -543,55 +470,35 @@ public static MolecularConnectivity findMolecularConnectivityByIndex( private static List buildPossibilities(final Map indicesMap, final List molecularConnectivityList, final int correlationIndex, final Grouping grouping) { - final MolecularConnectivity molecularConnectivity = molecularConnectivityList.get(correlationIndex); - // add possible indices from grouping - final int groupIndex; + // final MolecularConnectivity molecularConnectivity = molecularConnectivityList.get(correlationIndex); + // // add possible indices from grouping + // final int groupIndex; final Set possibilities = new HashSet<>(); - if (grouping.getTransformedGroups() - .containsKey(molecularConnectivity.getAtomType()) - && grouping.getTransformedGroups() - .get(molecularConnectivity.getAtomType()) - .containsKey(correlationIndex)) { - groupIndex = grouping.getTransformedGroups() - .get(molecularConnectivity.getAtomType()) - .get(correlationIndex); - for (final int groupMemberIndex : grouping.getGroups() - .get(molecularConnectivity.getAtomType()) - .get(groupIndex)) { - - // if (indicesMap.containsKey(groupMemberIndex)) { - // add equivalence indices of group members - possibilities.addAll(Arrays.asList(indicesMap.get(groupMemberIndex))); - // } - } - } else { - // add for equivalences only - possibilities.addAll(Arrays.asList(indicesMap.get(correlationIndex))); - } + // if (grouping.getTransformedGroups() + // .containsKey(molecularConnectivity.getAtomType()) + // && grouping.getTransformedGroups() + // .get(molecularConnectivity.getAtomType()) + // .containsKey(correlationIndex)) { + // groupIndex = grouping.getTransformedGroups() + // .get(molecularConnectivity.getAtomType()) + // .get(correlationIndex); + // for (final int groupMemberIndex : grouping.getGroups() + // .get(molecularConnectivity.getAtomType()) + // .get(groupIndex)) { + // + // if (indicesMap.containsKey(groupMemberIndex)) { + // // add equivalence indices of group members + // possibilities.addAll(Arrays.asList(indicesMap.get(groupMemberIndex))); + // } + // } + // } else { + // add for equivalences only + possibilities.addAll(Arrays.asList(indicesMap.get(correlationIndex))); + // } return new ArrayList<>(possibilities); } - // private static void addMolecularConnectivity( - // final Map> molecularConnectivityMap, - // final MolecularConnectivity molecularConnectivity, final int index) { - // molecularConnectivityMap.putIfAbsent(molecularConnectivity.getIndex(), new ArrayList<>()); - // // do not insert duplicates - // if (molecularConnectivityMap.get(molecularConnectivity.getIndex()) - // .stream() - // .noneMatch(molecularConnectivityTemp -> molecularConnectivityTemp.getIndex() - // == index)) { - // final MolecularConnectivity newMolecularConnectivity = Utils.cloneObject(molecularConnectivity, - // MolecularConnectivity.class) - // newMolecularConnectivity.setIndex(index); - // molecularConnectivity.setGroupMembers( - // buildGroupMembers(indicesMap, correlationList, correlationIndex, grouping)); - // - // molecularConnectivityMap.get(molecularConnectivity.getIndex()) - // .add(newMolecularConnectivity); - // } - // } - public static Map> buildMolecularConnectivityMap( final List molecularConnectivityList, final Detections detections, final Grouping grouping, final Map defaultBondDistances) { @@ -641,16 +548,6 @@ public static Map> buildMolecularConnectivi for (final Map.Entry entry : molecularConnectivity.getHmbc() .entrySet()) { protonCorrelationIndex = entry.getKey(); - // // ignore linked H atoms without any attachment to a heavy atom - // if (molecularConnectivityList.get(entry.getKey()) - // .getAtomType() - // .equals("H") - // && correlationList.get(matchIndex) - // .getAttachment() - // .keySet() - // .isEmpty()) { - // continue; - // } if (indicesMap.containsKey(protonCorrelationIndex)) { for (int l = 0; l < indicesMap.get(protonCorrelationIndex).length; l++) { @@ -940,260 +837,150 @@ public static MolecularConnectivity getHeavyAtomMolecularConnectivity( return null; } + public static int getHeavyAtomMolecularConnectivityIndex( + final List molecularConnectivityList, final int protonIndex) { + for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityList.stream() + .filter(mc -> !mc.getAtomType() + .equals("H")) + .collect( + Collectors.toSet())) { + if (molecularConnectivityTemp.getHsqc() + != null + && molecularConnectivityTemp.getHsqc() + .contains(protonIndex)) { + return molecularConnectivityTemp.getIndex(); + } + } + + return -1; + } + + private static List> buildCombinations( + final List initialMolecularConnectivityList, final Grouping grouping) { + + final List> molecularConnectivityListList = new ArrayList<>(); + final Stack stack = new Stack<>(); + stack.push(new Object[]{initialMolecularConnectivityList, 0, new HashSet<>()}); + + Object[] objects; + List molecularConnectivityList, clonedMolecularConnectivityList; + int correlationIndex, protonGroupIndex, molecularConnectivityIndexBondedToGroupMember; + Set swapped, swappedCopy; + MolecularConnectivity molecularConnectivity, clonedMolecularConnectivity, clonedMolecularConnectivityBondedToGroupMember; + List protonGroupMemberList, indicesToSwap; + String swapKey; + while (!stack.isEmpty()) { + objects = stack.pop(); + molecularConnectivityList = (List) objects[0]; + correlationIndex = (int) objects[1]; + swapped = (Set) objects[2]; + if (correlationIndex + >= molecularConnectivityList.size()) { + molecularConnectivityListList.add(molecularConnectivityList); + continue; + } + molecularConnectivity = molecularConnectivityList.get(correlationIndex); + if (!grouping.getGroups() + .containsKey(molecularConnectivity.getAtomType()) + || molecularConnectivity.getAtomType() + .equals("H")) { + stack.push(new Object[]{molecularConnectivityList, correlationIndex + + 1, swapped}); + continue; + } + + // HSQC + if (molecularConnectivity.getHsqc() + != null) { + // loop over all matching protons in HSQC + for (final int protonIndex : molecularConnectivity.getHsqc()) { + // only change if it has a group entry + if (grouping.getTransformedGroups() + .get("H") + .containsKey(protonIndex)) { + protonGroupIndex = grouping.getTransformedGroups() + .get("H") + .get(protonIndex); + protonGroupMemberList = grouping.getGroups() + .get("H") + .get(protonGroupIndex) + .stream() + .filter(protonGroupMemberIndex -> protonGroupMemberIndex + != protonIndex) + .collect(Collectors.toList()); + for (final int protonGroupMemberIndex : protonGroupMemberList) { + indicesToSwap = new ArrayList<>(); + indicesToSwap.add(protonIndex); + indicesToSwap.add(protonGroupMemberIndex); + indicesToSwap.sort(Integer::compare); // to always keep the same order + swapKey = "HSQC_" + + indicesToSwap.stream() + .map(String::valueOf) + .collect(Collectors.joining("_")); + if (swapped.contains(swapKey)) { + continue; + } + // clone current list element + clonedMolecularConnectivity = Utils.cloneObject(molecularConnectivity, + MolecularConnectivity.class); + // remove the current proton and add the group member proton + clonedMolecularConnectivity.getHsqc() + .remove((Integer) protonIndex); + clonedMolecularConnectivity.getHsqc() + .add(protonGroupMemberIndex); + // copy current list + clonedMolecularConnectivityList = new ArrayList<>(molecularConnectivityList); + + // set cloned and changed list element + clonedMolecularConnectivityList.set(correlationIndex, clonedMolecularConnectivity); + // check whether group member proton is attached to a heavy atom + molecularConnectivityIndexBondedToGroupMember = getHeavyAtomMolecularConnectivityIndex( + molecularConnectivityList, protonGroupMemberIndex); + if (molecularConnectivityIndexBondedToGroupMember + >= 0) { + // remove the group member proton from heavy atom and add the current proton + clonedMolecularConnectivityBondedToGroupMember = Utils.cloneObject( + molecularConnectivityList.get(molecularConnectivityIndexBondedToGroupMember), + MolecularConnectivity.class); + clonedMolecularConnectivityBondedToGroupMember.getHsqc() + .remove((Integer) protonGroupMemberIndex); + clonedMolecularConnectivityBondedToGroupMember.getHsqc() + .add(protonIndex); + clonedMolecularConnectivityList.set(molecularConnectivityIndexBondedToGroupMember, + clonedMolecularConnectivityBondedToGroupMember); + } + + swappedCopy = new HashSet<>(swapped); + swappedCopy.add(swapKey); + stack.push(new Object[]{clonedMolecularConnectivityList, correlationIndex + + 1, swappedCopy}); + } + } + } + } + // push unchanged stack data as well + stack.push(new Object[]{molecularConnectivityList, correlationIndex + + 1, swapped}); + } - // private static void buildCombinations(final Stack correlationListStack, - // final List> correlationListList, final Grouping grouping) { - // if (correlationListStack.isEmpty()) { - // return; - // } - // final Object[] objects = correlationListStack.pop(); - // final List correlationList = (List) objects[0]; - // final int correlationIndex = (int) objects[1]; - // final Set alreadySwapped = (Set) objects[2]; - // if (correlationIndex - // >= correlationList.size()) { - // correlationListList.add(correlationList); - // buildCombinations(correlationListStack, correlationListList, grouping); - // return; - // } - // final Correlation correlation = correlationList.get(correlationIndex); - // if (!grouping.getGroups() - // .containsKey(correlation.getAtomType()) - // || correlation.getAtomType() - // .equals("H")) { - // correlationListStack.push(new Object[]{correlationList, correlationIndex - // + 1, alreadySwapped}); - // buildCombinations(correlationListStack, correlationListList, grouping); - // return; - // } - // final List linkList = correlation.getLink() - // .stream() - // .filter(link -> link.getExperimentType() - // .equals("hsqc") - // || link.getExperimentType() - // .equals("hmqc")) - // .collect(Collectors.toList()); - // final List linkIndicesList = new ArrayList<>(); - // for (final Link link : linkList) { - // linkIndicesList.add(correlation.getLink() - // .indexOf(link)); - // } - // for (int l = 0; l - // < linkList.size(); l++) { - // final Link link = linkList.get(l); - // final int protonIndex = link.getMatch() - // .get(0); - // final int protonGroupIndex = grouping.getTransformedGroups() - // .get("H") - // .get(protonIndex); - // final List protonGroupMemberList = new ArrayList<>(grouping.getGroups() - // .get("H") - // .get(protonGroupIndex)); - // - // for (final int protonGroupMemberIndex : protonGroupMemberList) { - // if (protonGroupMemberIndex - // == protonIndex) { - // continue; - // } - // System.out.println("\n\n swap at: " - // + correlationIndex - // + " -> " - // + l - // + " ---> " - // + protonIndex - // + " <-> " - // + protonGroupMemberIndex); - // final List indicesToSwap = new ArrayList<>(); - // indicesToSwap.add(protonIndex); - // indicesToSwap.add(protonGroupMemberIndex); - // indicesToSwap.sort(Integer::compare); - // System.out.println("-> indicesToSwap: " - // + indicesToSwap); - // final String swapKey = indicesToSwap.stream() - // .map(String::valueOf) - // .collect(Collectors.joining("_")); - // System.out.println("-> swapKey: " - // + swapKey); - // if (alreadySwapped.contains(swapKey)) { - // continue; - // } - // - // - // // replace current link by new one in heavy atom correlation - // final Link newLink = Utils.cloneObject(link, Link.class); - // newLink.setId(Utils.generateID()); - // newLink.setAtomType(new String[]{link.getAtomType()[0], - // correlation.getAtomType()}); // H and current heavy atom type - // newLink.getMatch() - // .set(0, protonGroupMemberIndex); - // final Correlation clonedCorrelation = Utils.cloneObject(correlation, Correlation.class); - // clonedCorrelation.getLink() - // .set(linkIndicesList.get(l), newLink); - // System.out.println(" ------> replaced proton " - // + protonIndex - // + " by " - // + protonGroupMemberIndex); - // // remove link from link list in current proton correlation - // final Correlation clonedCorrelationProton = Utils.cloneObject(correlationList.get(protonIndex), - // Correlation.class); - // System.out.println(" ------> remove heavy atom " - // + correlationIndex - // + " from " - // + protonIndex); - // System.out.println(" --> link array size before: " - // + clonedCorrelationProton.getLink() - // .size()); - // clonedCorrelationProton.setLink(clonedCorrelationProton.getLink() - // .stream() - // .filter(linkTemp -> !(linkTemp.getExperimentID() - // .equals(link.getExperimentID()) - // && Objects.equals( - // ((Map) linkTemp.getSignal()).get( - // "id"), - // ((Map) link.getSignal()).get( - // "id")))) - // .collect(Collectors.toList())); - // System.out.println(" --> link array size after: " - // + clonedCorrelationProton.getLink() - // .size()); - // // add new link in proton group member correlation - // final Correlation clonedCorrelationGroupMemberProton = Utils.cloneObject( - // correlationList.get(protonGroupMemberIndex), Correlation.class); - // final Link clonedNewLink = Utils.cloneObject(newLink, Link.class); - // clonedNewLink.setId(Utils.generateID()); - // clonedNewLink.setAxis("x"); - // clonedNewLink.getMatch() - // .set(0, correlationIndex); - // clonedCorrelationGroupMemberProton.getLink() - // .add(clonedNewLink); - // System.out.println(" ------> added heavy atom " - // + correlationIndex - // + " to " - // + protonGroupMemberIndex); - // - // // clone the current correlation list and set the modified correlations - // final List clonedCorrelationList = Utils.cloneList(correlationList, Correlation.class); - // clonedCorrelationList.set(correlationIndex, clonedCorrelation); - // clonedCorrelationList.set(protonGroupMemberIndex, clonedCorrelationGroupMemberProton); - // - // - // // if proton group member is attached to another heavy atom(s) then replace it there by current proton - // final List linkListGroupMemberProton = clonedCorrelationGroupMemberProton.getLink() - // .stream() - // .filter(linkTemp -> (linkTemp.getExperimentType() - // .equals("hsqc") - // || linkTemp.getExperimentType() - // .equals("hmqc")) - // && !linkTemp.getId() - // .equals(clonedNewLink.getId())) - // .collect( - // Collectors.toList()); - // for (final Link linkGroupMemberProton : linkListGroupMemberProton) { - // final int heavyAtomIndexGroupMemberProton = linkGroupMemberProton.getMatch() - // .get(0); - // final Correlation clonedLinkedHeavyAtomCorrelationOfGroupMemberProton = Utils.cloneObject( - // correlationList.get(heavyAtomIndexGroupMemberProton), Correlation.class); - // System.out.println(" ------> remove group member proton " - // + protonGroupMemberIndex - // + " from " - // + linkGroupMemberProton.getMatch() - // .get(0)); - // System.out.println(" --> link array size before: " - // + clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() - // .size()); - // // remove link to previously added proton group member - // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.setLink( - // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() - // .stream() - // .filter(linkTemp -> !(linkTemp.getExperimentID() - // .equals(linkGroupMemberProton.getExperimentID()) - // && Objects.equals( - // ((Map) linkTemp.getSignal()).get( - // "id"), - // ((Map) linkGroupMemberProton.getSignal()).get( - // "id")))) - // .collect(Collectors.toList())); - // System.out.println(" --> link array size after: " - // + clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() - // .size()); - // // add new link to current proton to heavy atom correlation and to current proton correlation - // Link clonedLink = Utils.cloneObject(link, Link.class); - // clonedLink.getMatch() - // .set(0, protonIndex); - // clonedLink.setId(Utils.generateID()); - // clonedLink.setAxis("y"); - // clonedLink.setAtomType(new String[]{link.getAtomType()[0], - // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getAtomType()}); - // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton.getLink() - // .add(clonedLink); - // System.out.println(" ------> added proton " - // + protonIndex - // + " to " - // + heavyAtomIndexGroupMemberProton); - // clonedLink = Utils.cloneObject(link, Link.class); - // clonedLink.getMatch() - // .set(0, heavyAtomIndexGroupMemberProton); - // clonedLink.setId(Utils.generateID()); - // clonedCorrelationProton.getLink() - // .add(clonedLink); - // System.out.println(" ------> added heavy atom " - // + heavyAtomIndexGroupMemberProton - // + " to " - // + protonIndex); - // - // - // // add it to new correlation list - // clonedCorrelationList.set(linkGroupMemberProton.getMatch() - // .get(0), - // clonedLinkedHeavyAtomCorrelationOfGroupMemberProton); - // } - // clonedCorrelationList.set(protonIndex, clonedCorrelationProton); - // - // alreadySwapped.add(swapKey); - // - // correlationListStack.push(new Object[]{clonedCorrelationList, correlationIndex - // + 1, alreadySwapped}); - // } - // } - // correlationListStack.push(new Object[]{correlationList, correlationIndex - // + 1, alreadySwapped}); - // buildCombinations(correlationListStack, correlationListList, grouping); - // } + return molecularConnectivityListList; + } public static List>> buildMolecularConnectivityMapCombinationList( final List correlationList, final Detections detections, final Grouping grouping, final Map defaultBondDistances) { final List>> molecularConnectivityMapCombinationList = new ArrayList<>(); - - final List originalMolecularConnectivityList = buildMolecularConnectivityList( + final List initialMolecularConnectivityList = buildMolecularConnectivityList( correlationList, detections, grouping, defaultBondDistances); - System.out.println("\n -> originalMolecularConnectivityList: \n"); - System.out.println(originalMolecularConnectivityList); - - final Map> originalMolecularConnectivityMap = buildMolecularConnectivityMap( - originalMolecularConnectivityList, detections, grouping, defaultBondDistances); - - molecularConnectivityMapCombinationList.add(originalMolecularConnectivityMap); - - System.out.println("\n -> originalMolecularConnectivityMap: \n"); - System.out.println(originalMolecularConnectivityMap); - - - // final Stack stack = new Stack<>(); - // // experiment -> [swapKey] - // final Map> swapped = new HashMap<>(); - // stack.push(new Object[]{correlationList, 0, swapped}); - // final List> correlationListList = new ArrayList<>(); - // buildCombinations(stack, correlationListList, grouping); - // - // for (final List correlationListTemp : correlationListList) { - // molecularConnectivityMapCombinationList.add( - // buildMolecularConnectivityMapWithIndices(correlationListTemp, detections, grouping, - // defaultBondDistances)); - // } - // System.out.println("\n\n -> correlationListList size: " - // + correlationListList.size() - // + "\n\n"); + // System.out.println("\n -> initialMolecularConnectivityList: \n"); + // System.out.println(initialMolecularConnectivityList); + final List> molecularConnectivityListList = buildCombinations( + initialMolecularConnectivityList, grouping); + for (final List molecularConnectivityList : molecularConnectivityListList) { + molecularConnectivityMapCombinationList.add( + buildMolecularConnectivityMap(molecularConnectivityList, detections, grouping, + defaultBondDistances)); + } return molecularConnectivityMapCombinationList; } diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index 11dd710..5e9a110 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -339,8 +339,6 @@ private static Map buildStringBuilderMap( } } // put sections into stringBuilderMap - System.out.println("\n -> stringListMap: " - + stringListMap); for (final String sectionKey : stringListMap.keySet()) { stringBuilder = new StringBuilder(); for (final String sectionLine : stringListMap.get(sectionKey)) { @@ -348,8 +346,6 @@ private static Map buildStringBuilderMap( } stringBuilderMap.put(sectionKey, stringBuilder); } - System.out.println("\n -> stringBuilderMap: " - + stringBuilderMap); return stringBuilderMap; } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 065b574..062abb8 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,14 +1,12 @@ package casekit.nmr.utils; import casekit.nmr.elucidation.Constants; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Signal; -import casekit.nmr.model.Spectrum; -import casekit.nmr.model.StructureCompact; +import casekit.nmr.model.*; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Link; import casekit.nmr.model.nmrium.Signal1D; import casekit.nmr.model.nmrium.Signal2D; +import com.google.gson.Gson; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; import org.openscience.cdk.aromaticity.Kekulization; @@ -630,8 +628,10 @@ public static Signal extractSignalFromCorrelation(final Correlation correlation) "kind"), multiplicity, signalMap.containsKey("sign") - ? (Integer) signalMap.get( - "sign") + ? (int) Double.parseDouble( + String.valueOf( + signalMap.get( + "sign"))) : null); // 1D signal if (signalMap.containsKey("delta")) { @@ -686,4 +686,10 @@ public static Spectrum correlationListToSpectrum1D(final List corre return spectrum; } + + public static T cloneObject(final T object, final Class clazz) { + final Gson gson = new Gson(); + final String jsonString = gson.toJson(object, clazz); + return gson.fromJson(jsonString, clazz); + } } From db93021036aa13e6e1edf60bb5012a4ff60a2e80 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 17 Feb 2022 12:51:26 +0100 Subject: [PATCH 366/405] feat: check whether hetero atoms are present before disallowing hetero hetero bonds --- .../nmr/elucidation/lsd/PyLSDInputFileBuilder.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index 5e9a110..b90b477 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -413,8 +413,15 @@ private static String buildLISTsAndPROPs(final Map [list name, size] final Map listMap = new HashMap<>(); - // LIST and PROP for hetero hetero bonds to disallow - if (!allowHeteroHeteroBonds) { + // LIST and PROP for hetero hetero bonds to disallow in case hetero atoms are present + final boolean containsHeteroAtoms = elementCounts.keySet() + .stream() + .anyMatch(atomType -> !atomType.equals("C") + && !atomType.equals("H")); + System.out.println("containsHeteroAtoms: " + + containsHeteroAtoms); + if (containsHeteroAtoms + && !allowHeteroHeteroBonds) { LISTAndPROPUtilities.insertNoHeteroHeteroBonds(stringBuilder, listMap); } // insert LIST for each heavy atom type in MF From 608c3a7bd6fa7dcd613a6ad90dd694cf6330fb54 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 17 Feb 2022 22:37:35 +0100 Subject: [PATCH 367/405] feat: added HMBC combinations --- src/casekit/nmr/elucidation/Utilities.java | 383 ++++++++++++------ .../lsd/PyLSDInputFileBuilder.java | 4 +- 2 files changed, 257 insertions(+), 130 deletions(-) diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index 6607eaf..26bfc20 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -469,32 +469,34 @@ public static MolecularConnectivity findMolecularConnectivityByIndex( private static List buildPossibilities(final Map indicesMap, final List molecularConnectivityList, - final int correlationIndex, final Grouping grouping) { - // final MolecularConnectivity molecularConnectivity = molecularConnectivityList.get(correlationIndex); - // // add possible indices from grouping - // final int groupIndex; + final int correlationIndex, final Grouping grouping, + final boolean useGrouping) { + final MolecularConnectivity molecularConnectivity = molecularConnectivityList.get(correlationIndex); + // add possible indices from grouping + final int groupIndex; final Set possibilities = new HashSet<>(); - // if (grouping.getTransformedGroups() - // .containsKey(molecularConnectivity.getAtomType()) - // && grouping.getTransformedGroups() - // .get(molecularConnectivity.getAtomType()) - // .containsKey(correlationIndex)) { - // groupIndex = grouping.getTransformedGroups() - // .get(molecularConnectivity.getAtomType()) - // .get(correlationIndex); - // for (final int groupMemberIndex : grouping.getGroups() - // .get(molecularConnectivity.getAtomType()) - // .get(groupIndex)) { - // - // if (indicesMap.containsKey(groupMemberIndex)) { - // // add equivalence indices of group members - // possibilities.addAll(Arrays.asList(indicesMap.get(groupMemberIndex))); - // } - // } - // } else { - // add for equivalences only - possibilities.addAll(Arrays.asList(indicesMap.get(correlationIndex))); - // } + if (useGrouping + && grouping.getTransformedGroups() + .containsKey(molecularConnectivity.getAtomType()) + && grouping.getTransformedGroups() + .get(molecularConnectivity.getAtomType()) + .containsKey(correlationIndex)) { + groupIndex = grouping.getTransformedGroups() + .get(molecularConnectivity.getAtomType()) + .get(correlationIndex); + for (final int groupMemberIndex : grouping.getGroups() + .get(molecularConnectivity.getAtomType()) + .get(groupIndex)) { + + if (indicesMap.containsKey(groupMemberIndex)) { + // add equivalence indices of group members + possibilities.addAll(Arrays.asList(indicesMap.get(groupMemberIndex))); + } + } + } else { + // add for equivalences only + possibilities.addAll(Arrays.asList(indicesMap.get(correlationIndex))); + } return new ArrayList<>(possibilities); } @@ -531,7 +533,10 @@ public static Map> buildMolecularConnectivi if (!molecularConnectivity.getAtomType() .equals("H") && molecularConnectivity.getHsqc() - != null) { + != null + && k + < indicesMap.get(molecularConnectivity.getHsqc() + .get(0)).length) { // using the first proton correlation index from HSQC list is enough because show will direct to same heavy atom index protonIndex = indicesMap.get(molecularConnectivity.getHsqc() .get(0))[k]; @@ -611,7 +616,10 @@ public static Map> buildMolecularConnectivi .get(correlationIndex)); } molecularConnectivityTemp.setGroupMembers( - buildPossibilities(indicesMap, molecularConnectivityList, correlationIndex, grouping)); + buildPossibilities(indicesMap, molecularConnectivityList, correlationIndex, grouping, + // !molecularConnectivity.getAtomType() + // .equals("H")) + true)); } // fill in fixed neighbors if (molecularConnectivity.getEquivalence() @@ -837,7 +845,7 @@ public static MolecularConnectivity getHeavyAtomMolecularConnectivity( return null; } - public static int getHeavyAtomMolecularConnectivityIndex( + public static int findBondedHeavyAtomMolecularConnectivityIndex( final List molecularConnectivityList, final int protonIndex) { for (final MolecularConnectivity molecularConnectivityTemp : molecularConnectivityList.stream() .filter(mc -> !mc.getAtomType() @@ -855,114 +863,237 @@ public static int getHeavyAtomMolecularConnectivityIndex( return -1; } - private static List> buildCombinations( - final List initialMolecularConnectivityList, final Grouping grouping) { + private static boolean checkDistance(final List molecularConnectivityList, final int index1, + final int index2, final Grouping grouping) { + final Double distanceValue = casekit.nmr.similarity.Utilities.getDistanceValue( + molecularConnectivityList.get(index1) + .getSignal(), molecularConnectivityList.get(index2) + .getSignal(), 0, 0, false, false, false, + grouping.getTolerances() + .get("H")); - final List> molecularConnectivityListList = new ArrayList<>(); - final Stack stack = new Stack<>(); - stack.push(new Object[]{initialMolecularConnectivityList, 0, new HashSet<>()}); - - Object[] objects; - List molecularConnectivityList, clonedMolecularConnectivityList; - int correlationIndex, protonGroupIndex, molecularConnectivityIndexBondedToGroupMember; - Set swapped, swappedCopy; - MolecularConnectivity molecularConnectivity, clonedMolecularConnectivity, clonedMolecularConnectivityBondedToGroupMember; + return distanceValue + != null; + } + + private static List swap(final MolecularConnectivity molecularConnectivity, final Grouping grouping, + final String experiment, + final List molecularConnectivityList, + final int correlationIndex, final Set swapped) { + final List newStatesList = new ArrayList<>(); + + List clonedMolecularConnectivityList; + int protonGroupIndex, molecularConnectivityIndexBondedToGroupMember; + Set swappedCopy; + MolecularConnectivity clonedMolecularConnectivity, clonedMolecularConnectivityBondedToGroupMember; List protonGroupMemberList, indicesToSwap; String swapKey; - while (!stack.isEmpty()) { - objects = stack.pop(); - molecularConnectivityList = (List) objects[0]; - correlationIndex = (int) objects[1]; - swapped = (Set) objects[2]; - if (correlationIndex - >= molecularConnectivityList.size()) { - molecularConnectivityListList.add(molecularConnectivityList); - continue; - } - molecularConnectivity = molecularConnectivityList.get(correlationIndex); - if (!grouping.getGroups() - .containsKey(molecularConnectivity.getAtomType()) - || molecularConnectivity.getAtomType() - .equals("H")) { - stack.push(new Object[]{molecularConnectivityList, correlationIndex - + 1, swapped}); - continue; - } + Integer[] pathLengthProton; + if (experiment.equals("hsqc") + && molecularConnectivity.getHsqc() + != null) { + // loop over all matching protons in HSQC + for (final Integer protonIndex : molecularConnectivity.getHsqc()) { + // only change if it has a group entry + if (grouping.getTransformedGroups() + .containsKey("H") + && grouping.getTransformedGroups() + .get("H") + .containsKey(protonIndex)) { + protonGroupIndex = grouping.getTransformedGroups() + .get("H") + .get(protonIndex); + protonGroupMemberList = grouping.getGroups() + .get("H") + .get(protonGroupIndex) + .stream() + .filter(protonGroupMemberIndex -> !Objects.equals( + protonGroupMemberIndex, protonIndex) + && !molecularConnectivity.getHsqc() + .contains(protonGroupMemberIndex)) + .collect(Collectors.toList()); + for (final Integer protonGroupMemberIndex : protonGroupMemberList) { + indicesToSwap = new ArrayList<>(); + indicesToSwap.add(protonIndex); + indicesToSwap.add(protonGroupMemberIndex); + indicesToSwap.sort(Integer::compare); // to always keep the same order + swapKey = "hsqc_" + + indicesToSwap.stream() + .map(String::valueOf) + .collect(Collectors.joining("_")); + if (swapped.contains(swapKey) + || !checkDistance(molecularConnectivityList, protonIndex, protonGroupMemberIndex, + grouping)) { + continue; + } + // clone current list element + clonedMolecularConnectivity = Utils.cloneObject(molecularConnectivity, + MolecularConnectivity.class); + // remove the current proton and add the group member proton + clonedMolecularConnectivity.getHsqc() + .remove(protonIndex); + clonedMolecularConnectivity.getHsqc() + .add(protonGroupMemberIndex); + // copy current list + clonedMolecularConnectivityList = new ArrayList<>(molecularConnectivityList); + // set cloned and changed list element + clonedMolecularConnectivityList.set(correlationIndex, clonedMolecularConnectivity); + // check whether group member proton is attached to a heavy atom + molecularConnectivityIndexBondedToGroupMember = findBondedHeavyAtomMolecularConnectivityIndex( + molecularConnectivityList, protonGroupMemberIndex); + if (molecularConnectivityIndexBondedToGroupMember + >= 0) { + // remove the group member proton from heavy atom and add the current proton + clonedMolecularConnectivityBondedToGroupMember = Utils.cloneObject( + molecularConnectivityList.get(molecularConnectivityIndexBondedToGroupMember), + MolecularConnectivity.class); + clonedMolecularConnectivityBondedToGroupMember.getHsqc() + .remove(protonGroupMemberIndex); + clonedMolecularConnectivityBondedToGroupMember.getHsqc() + .add(protonIndex); + clonedMolecularConnectivityList.set(molecularConnectivityIndexBondedToGroupMember, + clonedMolecularConnectivityBondedToGroupMember); + } - // HSQC - if (molecularConnectivity.getHsqc() - != null) { - // loop over all matching protons in HSQC - for (final int protonIndex : molecularConnectivity.getHsqc()) { - // only change if it has a group entry - if (grouping.getTransformedGroups() - .get("H") - .containsKey(protonIndex)) { - protonGroupIndex = grouping.getTransformedGroups() - .get("H") - .get(protonIndex); - protonGroupMemberList = grouping.getGroups() - .get("H") - .get(protonGroupIndex) - .stream() - .filter(protonGroupMemberIndex -> protonGroupMemberIndex - != protonIndex) - .collect(Collectors.toList()); - for (final int protonGroupMemberIndex : protonGroupMemberList) { - indicesToSwap = new ArrayList<>(); - indicesToSwap.add(protonIndex); - indicesToSwap.add(protonGroupMemberIndex); - indicesToSwap.sort(Integer::compare); // to always keep the same order - swapKey = "HSQC_" - + indicesToSwap.stream() - .map(String::valueOf) - .collect(Collectors.joining("_")); - if (swapped.contains(swapKey)) { - continue; - } - // clone current list element - clonedMolecularConnectivity = Utils.cloneObject(molecularConnectivity, - MolecularConnectivity.class); - // remove the current proton and add the group member proton - clonedMolecularConnectivity.getHsqc() - .remove((Integer) protonIndex); - clonedMolecularConnectivity.getHsqc() - .add(protonGroupMemberIndex); - // copy current list - clonedMolecularConnectivityList = new ArrayList<>(molecularConnectivityList); - - // set cloned and changed list element - clonedMolecularConnectivityList.set(correlationIndex, clonedMolecularConnectivity); - // check whether group member proton is attached to a heavy atom - molecularConnectivityIndexBondedToGroupMember = getHeavyAtomMolecularConnectivityIndex( - molecularConnectivityList, protonGroupMemberIndex); - if (molecularConnectivityIndexBondedToGroupMember - >= 0) { - // remove the group member proton from heavy atom and add the current proton - clonedMolecularConnectivityBondedToGroupMember = Utils.cloneObject( - molecularConnectivityList.get(molecularConnectivityIndexBondedToGroupMember), - MolecularConnectivity.class); - clonedMolecularConnectivityBondedToGroupMember.getHsqc() - .remove((Integer) protonGroupMemberIndex); - clonedMolecularConnectivityBondedToGroupMember.getHsqc() - .add(protonIndex); - clonedMolecularConnectivityList.set(molecularConnectivityIndexBondedToGroupMember, - clonedMolecularConnectivityBondedToGroupMember); - } + // System.out.println(" --> swapped HSQC at i: " + // + molecularConnectivity.getIndex() + // + " -> " + // + swapKey + // + " -> " + // + swapped); + swappedCopy = new HashSet<>(swapped); + swappedCopy.add(swapKey); - swappedCopy = new HashSet<>(swapped); - swappedCopy.add(swapKey); - stack.push(new Object[]{clonedMolecularConnectivityList, correlationIndex - + 1, swappedCopy}); + newStatesList.add(new Object[]{clonedMolecularConnectivityList, correlationIndex + + 1, swappedCopy}); + } + } + } + } else if (experiment.equals("hmbc") + && molecularConnectivity.getHmbc() + != null) { + + // loop over all matching protons in HMBC + for (final Integer protonIndex : molecularConnectivity.getHmbc() + .keySet()) { + // only change if it has a group entry + if (grouping.getTransformedGroups() + .containsKey("H") + && grouping.getTransformedGroups() + .get("H") + .containsKey(protonIndex)) { + protonGroupIndex = grouping.getTransformedGroups() + .get("H") + .get(protonIndex); + protonGroupMemberList = grouping.getGroups() + .get("H") + .get(protonGroupIndex) + .stream() + .filter(protonGroupMemberIndex -> !Objects.equals( + protonGroupMemberIndex, protonIndex) + && (molecularConnectivity.getHsqc() + == null + || !molecularConnectivity.getHsqc() + .contains(protonGroupMemberIndex)) + && !molecularConnectivity.getHmbc() + .containsKey( + protonGroupMemberIndex)) + .collect(Collectors.toList()); + for (final Integer protonGroupMemberIndex : protonGroupMemberList) { + if (!checkDistance(molecularConnectivityList, protonIndex, protonGroupMemberIndex, grouping)) { + continue; } + + // copy current list + clonedMolecularConnectivityList = new ArrayList<>(molecularConnectivityList); + // clone current list element + clonedMolecularConnectivity = Utils.cloneObject(molecularConnectivity, + MolecularConnectivity.class); + // remove the current proton and add the group member proton + pathLengthProton = clonedMolecularConnectivity.getHmbc() + .get(protonIndex); + clonedMolecularConnectivity.getHmbc() + .remove(protonIndex); + clonedMolecularConnectivity.getHmbc() + .putIfAbsent(protonGroupMemberIndex, pathLengthProton); + // set cloned and changed list element + clonedMolecularConnectivityList.set(correlationIndex, clonedMolecularConnectivity); + + // System.out.println(" --> swapped HMBC at i: " + // + molecularConnectivity.getIndex() + // + " -> " + // + swapped); + swappedCopy = new HashSet<>(swapped); + + newStatesList.add(new Object[]{clonedMolecularConnectivityList, correlationIndex + + 1, swappedCopy}); } } } - // push unchanged stack data as well - stack.push(new Object[]{molecularConnectivityList, correlationIndex - + 1, swapped}); } + return newStatesList; + } + + private static List> buildCombinations( + final List initialMolecularConnectivityList, final Grouping grouping) { + + final List> molecularConnectivityListList = new ArrayList<>(); + molecularConnectivityListList.add(initialMolecularConnectivityList); + + // final Stack stack = new Stack<>(); + // stack.push(new Object[]{initialMolecularConnectivityList, 0, new HashSet<>()}); + // + // Object[] objects; + // List molecularConnectivityList; + // int correlationIndex; + // Set swapped; + // MolecularConnectivity molecularConnectivity; + // List newStateList, newStateList2; + // while (!stack.isEmpty()) { + // objects = stack.pop(); + // molecularConnectivityList = (List) objects[0]; + // correlationIndex = (int) objects[1]; + // swapped = (Set) objects[2]; + // if (correlationIndex + // >= molecularConnectivityList.size()) { + // molecularConnectivityListList.add(molecularConnectivityList); + // continue; + // } + // molecularConnectivity = molecularConnectivityList.get(correlationIndex); + // if (!grouping.getGroups() + // .containsKey(molecularConnectivity.getAtomType()) + // || molecularConnectivity.getAtomType() + // .equals("H")) { + // stack.push(new Object[]{molecularConnectivityList, correlationIndex + // + 1, swapped}); + // continue; + // } + // + // // HSQC + // newStateList = swap(molecularConnectivity, grouping, "hsqc", molecularConnectivityList, correlationIndex, + // swapped); + // // HMBC + // newStateList2 = new ArrayList<>(newStateList); + // // push unchanged stack data that HMBC can swap from previous data too + // newStateList2.add(new Object[]{molecularConnectivityList, correlationIndex, swapped}); + // for (final Object[] stateObjects : newStateList2) { + // newStateList.addAll( + // swap(molecularConnectivity, grouping, "hmbc", (List) stateObjects[0], + // correlationIndex, (Set) stateObjects[2])); + // } + // + // // push unchanged stack data as well + // newStateList.add(new Object[]{molecularConnectivityList, correlationIndex + // + 1, swapped}); + // + // // push all new states into the stack + // for (final Object[] newStateObjects : newStateList) { + // stack.push(newStateObjects); + // } + // } + + return molecularConnectivityListList; } @@ -972,8 +1103,6 @@ public static List>> buildMolecularConn final List>> molecularConnectivityMapCombinationList = new ArrayList<>(); final List initialMolecularConnectivityList = buildMolecularConnectivityList( correlationList, detections, grouping, defaultBondDistances); - // System.out.println("\n -> initialMolecularConnectivityList: \n"); - // System.out.println(initialMolecularConnectivityList); final List> molecularConnectivityListList = buildCombinations( initialMolecularConnectivityList, grouping); for (final List molecularConnectivityList : molecularConnectivityListList) { diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index b90b477..1709270 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -417,9 +417,7 @@ private static String buildLISTsAndPROPs(final Map !atomType.equals("C") - && !atomType.equals("H")); - System.out.println("containsHeteroAtoms: " - + containsHeteroAtoms); + && !atomType.equals("H")); if (containsHeteroAtoms && !allowHeteroHeteroBonds) { LISTAndPROPUtilities.insertNoHeteroHeteroBonds(stringBuilder, listMap); From 4979d8fb4f7af1e84d80d4abeb70ed92737a1de6 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 18 Feb 2022 16:34:08 +0100 Subject: [PATCH 368/405] feat: added attachment of objects to DataSet --- src/casekit/nmr/model/DataSet.java | 18 ++++++++++++++++-- src/casekit/nmr/prediction/Prediction.java | 4 ++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/casekit/nmr/model/DataSet.java b/src/casekit/nmr/model/DataSet.java index a707bd8..e5b285e 100644 --- a/src/casekit/nmr/model/DataSet.java +++ b/src/casekit/nmr/model/DataSet.java @@ -19,13 +19,15 @@ public class DataSet { private SpectrumCompact spectrum; private Assignment assignment; private Map meta; + private Map attachment; public DataSet(final IAtomContainer structure, final Spectrum spectrum, final Assignment assignment, - final Map meta) { + final Map meta, final Map attachment) { this.structure = new StructureCompact(structure); this.spectrum = new SpectrumCompact(spectrum); this.assignment = assignment; this.meta = new HashMap<>(meta); + this.attachment = new HashMap<>(attachment); } public void addMetaInfo(final String key, final String value) { @@ -40,13 +42,25 @@ public void removeMetaInfo(final String key) { this.meta.remove(key); } + public void addAttachment(final String key, final Object object) { + if (this.attachment + == null) { + this.attachment = new HashMap<>(); + } + this.attachment.put(key, object); + } + + public void removeAttachment(final String key) { + this.attachment.remove(key); + } + public DataSet buildClone() { final Map metaTemp = this.meta == null ? new HashMap<>() : new HashMap<>(this.meta); return new DataSet(this.structure.buildClone(), this.spectrum.buildClone(), this.assignment.buildClone(), - new HashMap<>(metaTemp)); + new HashMap<>(metaTemp), new HashMap<>(this.attachment)); } @Override diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index 094e454..bb1dab1 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -129,7 +129,7 @@ public static DataSet predict1D(final Map> hoseCod return null; } - return new DataSet(structure, spectrum, assignment, new HashMap<>()); + return new DataSet(structure, spectrum, assignment, new HashMap<>(), new HashMap<>()); } /** @@ -249,7 +249,7 @@ public static DataSet predict2D(final IAtomContainer structure, final Spectrum s } } - return new DataSet(structure, predictedSpectrum2D, assignment2D, new HashMap<>()); + return new DataSet(structure, predictedSpectrum2D, assignment2D, new HashMap<>(), new HashMap<>()); } public static DataSet predictHSQC(final IAtomContainer structure, final Spectrum spectrumDim1, From 5afdcb17b290f68ce539c7173fbec1efa48d1822 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 22 Feb 2022 18:59:56 +0100 Subject: [PATCH 369/405] feat: enable 3D HOSE code in HOSECodeShiftStatistics --- pom.xml | 5 ++ .../nmr/analysis/HOSECodeShiftStatistics.java | 70 ++++++++++++++----- src/casekit/nmr/dbservice/NMRShiftDB.java | 41 ----------- src/casekit/nmr/utils/Utils.java | 45 +++++++++--- 4 files changed, 93 insertions(+), 68 deletions(-) diff --git a/pom.xml b/pom.xml index bb1362a..2502a92 100644 --- a/pom.xml +++ b/pom.xml @@ -51,6 +51,11 @@ cdk-bundle 2.5 + + org.openscience.nmrshiftdb + predictorc + 1.0 + commons-cli commons-cli diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index bd318d5..a993b6e 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -17,6 +17,9 @@ import org.bson.Document; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.layout.StructureDiagramGenerator; +import org.openscience.nmrshiftdb.util.AtomUtils; +import org.openscience.nmrshiftdb.util.ExtendedHOSECodeGenerator; import java.io.*; import java.util.*; @@ -26,11 +29,12 @@ public class HOSECodeShiftStatistics { private final static Gson GSON = new GsonBuilder().setLenient() - .create(); //.setPrettyPrinting() + .create(); public static Map>> collectHOSECodeShifts( - final List dataSetList, final Integer maxSphere, final boolean withExplicitH) { - return collectHOSECodeShifts(dataSetList, maxSphere, withExplicitH, new ConcurrentHashMap<>()); + final List dataSetList, final Integer maxSphere, final boolean use3D, + final boolean withExplicitH) { + return collectHOSECodeShifts(dataSetList, maxSphere, use3D, withExplicitH, new ConcurrentHashMap<>()); } /** @@ -43,17 +47,20 @@ public static Map>> collectHOS * @return */ public static Map>> collectHOSECodeShifts( - final List dataSetList, final Integer maxSphere, final boolean withExplicitH, + final List dataSetList, final Integer maxSphere, final boolean use3D, final boolean withExplicitH, final Map>> hoseCodeShifts) { for (final DataSet dataSet : dataSetList) { - insert(dataSet, maxSphere, withExplicitH, hoseCodeShifts); + insert(dataSet, maxSphere, use3D, withExplicitH, hoseCodeShifts); } return hoseCodeShifts; } - public static boolean insert(final DataSet dataSet, final Integer maxSphere, final boolean withExplicitH, + public static boolean insert(final DataSet dataSet, final Integer maxSphere, final boolean use3D, + final boolean withExplicitH, final Map>> hoseCodeShifts) { + final StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); + final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); final IAtomContainer structure; Signal signal; String hoseCode; @@ -73,7 +80,8 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin } // create atom index map to know which indices the explicit hydrogens will have atomIndexMap = new HashMap<>(); - if (withExplicitH) { + if (use3D + || withExplicitH) { try { int nextAtomIndexExplicitH = structure.getAtomCount(); for (int i = 0; i @@ -90,7 +98,21 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin } } - Utils.convertImplicitToExplicitHydrogens(structure); + if (use3D) { + try { + // set 2D coordinates + structureDiagramGenerator.setMolecule(structure); + structureDiagramGenerator.generateCoordinates(structure); + /* !!! No explicit H in mol !!! */ + Utils.convertExplicitToImplicitHydrogens(structure); + /* add explicit H atoms */ + AtomUtils.addAndPlaceHydrogens(structure); + } catch (final CDKException | IOException | ClassNotFoundException e) { + e.printStackTrace(); + } + } else { + Utils.convertImplicitToExplicitHydrogens(structure); + } Utils.setAromaticityAndKekulize(structure); } catch (final CDKException e) { e.printStackTrace(); @@ -141,7 +163,17 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin } for (int sphere = 1; sphere <= maxSphereTemp; sphere++) { - hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + if (use3D) { + try { + hoseCode = extendedHOSECodeGenerator.getHOSECode(structure, structure.getAtom(i), + sphere); + } catch (final Exception e) { + // e.printStackTrace(); + continue; + } + } else { + hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); + } hoseCodeShifts.putIfAbsent(hoseCode, new ConcurrentHashMap<>()); hoseCodeShifts.get(hoseCode) .putIfAbsent(solvent, new ConcurrentLinkedQueue<>()); @@ -185,19 +217,18 @@ public static Map> buildHOSECodeShiftStatistics(fi final String[] pathsToCOCONUTs, final String[] nuclei, final Integer maxSphere, + final boolean use3D, final boolean withExplicitH) { try { final Map>> hoseCodeShifts = new HashMap<>(); - for (int i = 0; i - < pathsToNMRShiftDBs.length; i++) { + for (final String pathsToNMRShiftDB : pathsToNMRShiftDBs) { HOSECodeShiftStatistics.collectHOSECodeShifts( - NMRShiftDB.getDataSetsFromNMRShiftDB(pathsToNMRShiftDBs[i], nuclei), maxSphere, withExplicitH, - hoseCodeShifts); + NMRShiftDB.getDataSetsFromNMRShiftDB(pathsToNMRShiftDB, nuclei), maxSphere, use3D, + withExplicitH, hoseCodeShifts); } - for (int i = 0; i - < pathsToCOCONUTs.length; i++) { + for (final String pathsToCOCONUT : pathsToCOCONUTs) { HOSECodeShiftStatistics.collectHOSECodeShifts( - COCONUT.getDataSetsWithShiftPredictionFromCOCONUT(pathsToCOCONUTs[i], nuclei), maxSphere, + COCONUT.getDataSetsWithShiftPredictionFromCOCONUT(pathsToCOCONUT, nuclei), maxSphere, use3D, withExplicitH, hoseCodeShifts); } return HOSECodeShiftStatistics.buildHOSECodeShiftStatistics(hoseCodeShifts); @@ -210,9 +241,10 @@ public static Map> buildHOSECodeShiftStatistics(fi public static Map> buildHOSECodeShiftStatistics(final List dataSetList, final Integer maxSphere, + final boolean use3D, final boolean withExplicitH) { return HOSECodeShiftStatistics.buildHOSECodeShiftStatistics( - collectHOSECodeShifts(dataSetList, maxSphere, withExplicitH)); + collectHOSECodeShifts(dataSetList, maxSphere, use3D, withExplicitH)); } public static boolean writeHOSECodeShiftStatistics(final Map> hoseCodeShifts, @@ -278,8 +310,8 @@ public static Map> readHOSECodeShiftStatistics( hoseCodeShiftsStatisticInJSON.append(line); } final JsonObject jsonObject = JsonParser.parseString(hoseCodeShiftsStatisticInJSON.substring( - hoseCodeShiftsStatisticInJSON.toString() - .indexOf("{"))) + hoseCodeShiftsStatisticInJSON.toString() + .indexOf("{"))) .getAsJsonObject(); hoseCodeShiftStatistics.put(jsonObject.get("HOSECode") .getAsString(), GSON.fromJson(jsonObject.get("values") diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 624f670..75baef8 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -328,47 +328,6 @@ public static String[][] parseNMRShiftDBSpectrum(final String NMRShiftDBSpectrum return values; } - @Deprecated - public static String NMRShiftDBSpectrumToBasicTextSpectrum(final String NMRShiftDBSpectrum, final String nucleus, - final String description) { - if ((NMRShiftDBSpectrum - == null) - || NMRShiftDBSpectrum.trim() - .isEmpty()) { - return null; - } - final StringBuilder basicSpectrum = new StringBuilder(); - // append description - if (!description.trim() - .startsWith("//")) { - basicSpectrum.append("// "); - } - basicSpectrum.append(description) - .append("\n"); - final String[][] spectrumStringArray = NMRShiftDB.parseNMRShiftDBSpectrum(NMRShiftDBSpectrum); - try { - for (int i = 0; i - < spectrumStringArray.length; i++) { - // append nucleus - basicSpectrum.append(nucleus) - .append(", "); - // append chemical shift - basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][0])) - .append(", "); - // append multiplicity - basicSpectrum.append(spectrumStringArray[i][2]) - .append(", "); - // append intensity - basicSpectrum.append(Double.parseDouble(spectrumStringArray[i][1])) - .append("\n"); - } - } catch (final Exception e) { - return null; - } - - return basicSpectrum.toString(); - } - public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpectrum, final String nucleus) { if ((NMRShiftDBSpectrum == null) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 062abb8..99b7093 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -557,15 +557,43 @@ public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex return bondsOrderSum; } + /** + * @param structure molecule to build the DataSet from and + * 1) all atom types and configuration will be perceived, + * 2) explicit hydrogens will be converted to implicit ones, + * 3) setting of aromaticity and Kekulization + * + * @return + * + * @throws CDKException + */ public static DataSet atomContainerToDataSet(final IAtomContainer structure) throws CDKException { - final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); - if (Utils.containsExplicitHydrogens(structure)) { - // remove explicit hydrogens - Utils.removeAtoms(structure, "H"); - } - hydrogenAdder.addImplicitHydrogens(structure); - setAromaticityAndKekulize(structure); + return atomContainerToDataSet(structure, true); + } + + /** + * @param structure molecule to build the DataSet from + * @param manipulate if set to true then + * 1) all atom types and configuration will be perceived, + * 2) explicit hydrogens will be converted to implicit ones, + * 3) setting of aromaticity and Kekulization + * + * @return + * + * @throws CDKException + */ + public static DataSet atomContainerToDataSet(final IAtomContainer structure, + final boolean manipulate) throws CDKException { + if (manipulate) { + final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); + if (Utils.containsExplicitHydrogens(structure)) { + // remove explicit hydrogens + Utils.removeAtoms(structure, "H"); + } + hydrogenAdder.addImplicitHydrogens(structure); + setAromaticityAndKekulize(structure); + } final Map meta = new HashMap<>(); // meta.put("title", structure.getTitle()); final String source = structure.getProperty("nmrshiftdb2 ID", String.class) @@ -605,6 +633,7 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure) thr final DataSet dataSet = new DataSet(); dataSet.setStructure(new StructureCompact(structure)); dataSet.setMeta(meta); + dataSet.setAttachment(new HashMap<>()); return dataSet; } From 35d66d15bacf437e4abddd69112f9674186c2a69 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 26 Feb 2022 10:04:04 +0100 Subject: [PATCH 370/405] feat: moved prediction, filter and rank methods from Sherlock to here --- .../elucidation/model/ElucidationOptions.java | 2 + .../nmr/filterandrank/FilterAndRank.java | 196 ++++++++++++++++++ src/casekit/nmr/prediction/Prediction.java | 181 +++++++++++++++- 3 files changed, 374 insertions(+), 5 deletions(-) create mode 100644 src/casekit/nmr/filterandrank/FilterAndRank.java diff --git a/src/casekit/nmr/elucidation/model/ElucidationOptions.java b/src/casekit/nmr/elucidation/model/ElucidationOptions.java index 86dcb1e..e18996e 100644 --- a/src/casekit/nmr/elucidation/model/ElucidationOptions.java +++ b/src/casekit/nmr/elucidation/model/ElucidationOptions.java @@ -18,4 +18,6 @@ public class ElucidationOptions { private boolean useElim; private int elimP1; private int elimP2; + private double shiftTolerance; + private double maximumAverageDeviation; } diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java new file mode 100644 index 0000000..b4d40db --- /dev/null +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -0,0 +1,196 @@ +package casekit.nmr.filterandrank; + +import casekit.nmr.analysis.MultiplicitySectionsBuilder; +import casekit.nmr.model.Assignment; +import casekit.nmr.model.DataSet; +import casekit.nmr.model.Spectrum; +import casekit.nmr.similarity.Similarity; +import casekit.nmr.utils.Statistics; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.fingerprint.BitSetFingerprint; +import org.openscience.cdk.io.MDLV3000Writer; + +import java.io.ByteArrayOutputStream; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +public class FilterAndRank { + + public static List filterAndRank(final List dataSetList, final Spectrum querySpectrum, + final double shiftTolerance, final double maxAverageDeviation, + final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder, + final boolean allowIncompleteMatch) { + return rank(filter(dataSetList, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, + checkEquivalencesCount, multiplicitySectionsBuilder, allowIncompleteMatch)); + } + + public static List filter(List dataSetList, final Spectrum querySpectrum, + final double shiftTolerance, final double maxAverageDeviation, + final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder, + final boolean allowIncompleteMatch) { + if (querySpectrum.getNDim() + == 1 + && querySpectrum.getNuclei()[0].equals("13C")) { + // @TODO get shift tolerance as arguments + + dataSetList = dataSetList.stream() + .filter(dataSet -> checkDataSet(dataSet, querySpectrum, shiftTolerance, + maxAverageDeviation, checkMultiplicity, + checkEquivalencesCount, + multiplicitySectionsBuilder, allowIncompleteMatch) + != null) + .collect(Collectors.toList()); + } + + return dataSetList; + } + + public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySpectrum, final double shiftTolerance, + final double maxAverageDeviation, final boolean checkMultiplicity, + final boolean checkEquivalencesCount, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder, + final boolean allowIncompleteMatch) { + try { + final BitSetFingerprint bitSetFingerprintQuerySpectrum = Similarity.getBitSetFingerprint(querySpectrum, 0, + multiplicitySectionsBuilder); + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); + final Assignment spectralMatchAssignment = Similarity.matchSpectra(spectrum, querySpectrum, 0, 0, + shiftTolerance, checkMultiplicity, + checkEquivalencesCount, false); + + dataSet.addMetaInfo("querySpectrumSignalCount", String.valueOf(querySpectrum.getSignalCount())); + dataSet.addMetaInfo("querySpectrumSignalCountWithEquivalences", + String.valueOf(querySpectrum.getSignalCountWithEquivalences())); + dataSet.addMetaInfo("setAssignmentsCountWithEquivalences", + String.valueOf(spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0))); + final boolean isCompleteSpectralMatch = querySpectrum.getSignalCount() + == spectralMatchAssignment.getSetAssignmentsCount(0); + final boolean isCompleteSpectralMatchWithEquivalences = querySpectrum.getSignalCountWithEquivalences() + == spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0); + dataSet.addMetaInfo("setAssignmentsCount", + String.valueOf(spectralMatchAssignment.getSetAssignmentsCount(0))); + dataSet.addMetaInfo("setAssignmentsCountWithEquivalences", + String.valueOf(spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0))); + dataSet.addMetaInfo("isCompleteSpectralMatch", String.valueOf(isCompleteSpectralMatch)); + dataSet.addMetaInfo("isCompleteSpectralMatchWithEquivalences", + String.valueOf(isCompleteSpectralMatchWithEquivalences)); + dataSet.addAttachment("spectralMatchAssignment", spectralMatchAssignment); + + + // store as MOL file + final MDLV3000Writer mdlv3000Writer = new MDLV3000Writer(); + final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + mdlv3000Writer.setWriter(byteArrayOutputStream); + mdlv3000Writer.write(dataSet.getStructure() + .toAtomContainer()); + dataSet.addMetaInfo("molfile", byteArrayOutputStream.toString()); + + Double[] deviations = Similarity.getDeviations(spectrum, querySpectrum, 0, 0, spectralMatchAssignment); + if (allowIncompleteMatch) { + deviations = Arrays.stream(deviations) + .filter(Objects::nonNull) + .toArray(Double[]::new); + } + final Double averageDeviation = Statistics.calculateAverageDeviation(deviations); + if (averageDeviation + != null + && averageDeviation + <= maxAverageDeviation) { + dataSet.addMetaInfo("averageDeviation", String.valueOf(averageDeviation)); + final Double rmsd = Statistics.calculateRMSD(deviations); + dataSet.addMetaInfo("rmsd", String.valueOf(rmsd)); + + final BitSetFingerprint bitSetFingerprintDataSet = Similarity.getBitSetFingerprint(spectrum, 0, + multiplicitySectionsBuilder); + final Double tanimotoCoefficient = Similarity.calculateTanimotoCoefficient( + bitSetFingerprintQuerySpectrum, bitSetFingerprintDataSet); + dataSet.addMetaInfo("tanimoto", String.valueOf(tanimotoCoefficient)); + + return dataSet; + } + } catch (final CDKException e) { + e.printStackTrace(); + } + + return null; + } + + public static List rank(final List dataSetList) { + dataSetList.sort((dataSet1, dataSet2) -> { + final int avgDevComparison = compareNumericDataSetMetaKey(dataSet1, dataSet2, "averageDeviation"); + if (avgDevComparison + != 0) { + return avgDevComparison; + } + + final boolean isCompleteSpectralMatchDataSet1 = Boolean.parseBoolean(dataSet1.getMeta() + .get("isCompleteSpectralMatch")); + final boolean isCompleteSpectralMatchDataSet2 = Boolean.parseBoolean(dataSet2.getMeta() + .get("isCompleteSpectralMatch")); + if (isCompleteSpectralMatchDataSet1 + && !isCompleteSpectralMatchDataSet2) { + return -1; + } else if (!isCompleteSpectralMatchDataSet1 + && isCompleteSpectralMatchDataSet2) { + return 1; + } + final int setAssignmentsCountComparison = compareNumericDataSetMetaKey(dataSet1, dataSet2, + "setAssignmentsCount"); + if (setAssignmentsCountComparison + != 0) { + return -1 + * setAssignmentsCountComparison; + } + + return 0; + }); + + return dataSetList; + } + + private static int compareNumericDataSetMetaKey(final DataSet dataSet1, final DataSet dataSet2, + final String metaKey) { + Double valueDataSet1 = null; + Double valueDataSet2 = null; + try { + valueDataSet1 = Double.parseDouble(dataSet1.getMeta() + .get(metaKey)); + } catch (final NullPointerException | NumberFormatException e) { + // e.printStackTrace(); + } + try { + valueDataSet2 = Double.parseDouble(dataSet2.getMeta() + .get(metaKey)); + } catch (final NullPointerException | NumberFormatException e) { + // e.printStackTrace(); + } + + if (valueDataSet1 + != null + && valueDataSet2 + != null) { + if (valueDataSet1 + < valueDataSet2) { + return -1; + } else if (valueDataSet1 + > valueDataSet2) { + return 1; + } + return 0; + } + if (valueDataSet1 + != null) { + return -1; + } else if (valueDataSet2 + != null) { + return 1; + } + + return 0; + } +} diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index bb1dab1..ea5cf1e 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -24,25 +24,33 @@ package casekit.nmr.prediction; +import casekit.nmr.analysis.MultiplicitySectionsBuilder; +import casekit.nmr.filterandrank.FilterAndRank; import casekit.nmr.fragments.model.ConnectionTree; import casekit.nmr.fragments.model.ConnectionTreeNode; import casekit.nmr.hose.HOSECodeBuilder; -import casekit.nmr.model.Assignment; -import casekit.nmr.model.DataSet; -import casekit.nmr.model.Signal; -import casekit.nmr.model.Spectrum; +import casekit.nmr.model.*; +import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; +import casekit.threading.MultiThreading; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.layout.StructureDiagramGenerator; import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.smiles.SmilesGenerator; import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.nmrshiftdb.util.AtomUtils; +import org.openscience.nmrshiftdb.util.ExtendedHOSECodeGenerator; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.function.Consumer; /** * @author Michael Wenk [https://github.com/michaelwenk] @@ -267,7 +275,7 @@ public static DataSet predictHSQCEdited(final IAtomContainer structure, final Sp final String atomTypeDim2 = Utils.getAtomTypeFromSpectrum(spectrumDim2, 0); IAtom atom; - Integer explicitHydrogensCount; + int explicitHydrogensCount; for (int i = 0; i < spectrum.getSignalCount(); i++) { atom = structure.getAtom(dataSet.getAssignment() @@ -292,4 +300,167 @@ public static DataSet predictHSQCEdited(final IAtomContainer structure, final Sp return dataSet; } + + public static List predict1DByStereoHOSECodeAndFilter(final Spectrum querySpectrum, + final double shiftTolerance, + final double maximumAverageDeviation, + final int maxSphere, + final List structureList, + final Map> hoseCodeDBEntriesMap, + final Map multiplicitySectionsSettings) { + final MultiplicitySectionsBuilder multiplicitySectionsBuilder = new MultiplicitySectionsBuilder(); + multiplicitySectionsBuilder.setMinLimit(multiplicitySectionsSettings.get(querySpectrum.getNuclei()[0])[0]); + multiplicitySectionsBuilder.setMaxLimit(multiplicitySectionsSettings.get(querySpectrum.getNuclei()[0])[1]); + multiplicitySectionsBuilder.setStepSize(multiplicitySectionsSettings.get(querySpectrum.getNuclei()[0])[2]); + + List dataSetList = new ArrayList<>(); + try { + final ConcurrentLinkedQueue dataSetConcurrentLinkedQueue = new ConcurrentLinkedQueue<>(); + final List> callables = new ArrayList<>(); + for (final IAtomContainer structure : structureList) { + callables.add( + () -> predict1DByStereoHOSECodeAndFilter(structure, querySpectrum, maxSphere, shiftTolerance, + maximumAverageDeviation, true, true, + hoseCodeDBEntriesMap, multiplicitySectionsBuilder)); + } + final Consumer consumer = (dataSet) -> { + if (dataSet + != null) { + dataSetConcurrentLinkedQueue.add(dataSet); + } + }; + MultiThreading.processTasks(callables, consumer, 2, 5); + dataSetList = new ArrayList<>(dataSetConcurrentLinkedQueue); + } catch (final Exception e) { + e.printStackTrace(); + } + + return dataSetList; + } + + private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer structure, + final Spectrum querySpectrum, final int maxSphere, + final double shiftTolerance, + final double maxAverageDeviation, + final boolean checkMultiplicity, + final boolean checkEquivalencesCount, + final Map> hoseCodeDBEntriesMap, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder) { + final String nucleus = querySpectrum.getNuclei()[0]; + final String atomType = Utils.getAtomTypeFromNucleus(nucleus); + final StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); + final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); + + final Assignment assignment; + Signal signal; + Map hoseCodeObjectValues; + double predictedShift; + String hoseCode; + Double[] statistics; + int signalIndex, sphere; + List medians; + + try { + // set 2D coordinates + structureDiagramGenerator.setMolecule(structure); + structureDiagramGenerator.generateCoordinates(structure); + /* !!! No explicit H in mol !!! */ + Utils.convertExplicitToImplicitHydrogens(structure); + /* add explicit H atoms */ + AtomUtils.addAndPlaceHydrogens(structure); + /* detect aromaticity */ + Utils.setAromaticityAndKekulize(structure); + + final DataSet dataSet = Utils.atomContainerToDataSet(structure, false); + + final Spectrum predictedSpectrum = new Spectrum(); + predictedSpectrum.setNuclei(querySpectrum.getNuclei()); + predictedSpectrum.setSignals(new ArrayList<>()); + + final Map> assignmentMap = new HashMap<>(); + for (int i = 0; i + < structure.getAtomCount(); i++) { + if (!structure.getAtom(i) + .getSymbol() + .equals(atomType)) { + continue; + } + medians = new ArrayList<>(); + sphere = maxSphere; + while (sphere + >= 1) { + try { + hoseCode = extendedHOSECodeGenerator.getHOSECode(structure, structure.getAtom(i), sphere); + hoseCodeObjectValues = hoseCodeDBEntriesMap.get(hoseCode); + if (hoseCodeObjectValues + != null) { + for (final Map.Entry solventEntry : hoseCodeObjectValues.entrySet()) { + statistics = hoseCodeObjectValues.get(solventEntry.getKey()); + medians.add(statistics[3]); + } + break; + } + } catch (final Exception ignored) { + } + sphere--; + } + if (medians.isEmpty()) { + continue; + } + predictedShift = Statistics.getMean(medians); + signal = new Signal(); + signal.setNuclei(querySpectrum.getNuclei()); + signal.setShifts(new Double[]{predictedShift}); + signal.setMultiplicity(Utils.getMultiplicityFromProtonsCount( + AtomUtils.getHcount(structure, structure.getAtom(i)))); // counts explicit H + signal.setEquivalencesCount(1); + + signalIndex = predictedSpectrum.addSignal(signal); + + assignmentMap.putIfAbsent(signalIndex, new ArrayList<>()); + assignmentMap.get(signalIndex) + .add(i); + } + + // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule + try { + if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(predictedSpectrum, + Utils.getMolecularFormulaFromString( + dataSet.getMeta() + .get("mf")), 0) + != 0) { + return null; + } + } catch (final CDKException e) { + e.printStackTrace(); + return null; + } + + + Utils.convertExplicitToImplicitHydrogens(structure); + dataSet.setStructure(new StructureCompact(structure)); + dataSet.addMetaInfo("smiles", SmilesGenerator.generic() + .create(structure)); + + dataSet.setSpectrum(new SpectrumCompact(predictedSpectrum)); + assignment = new Assignment(); + assignment.setNuclei(predictedSpectrum.getNuclei()); + assignment.initAssignments(predictedSpectrum.getSignalCount()); + + for (final Map.Entry> entry : assignmentMap.entrySet()) { + for (final int atomIndex : assignmentMap.get(entry.getKey())) { + assignment.addAssignmentEquivalence(0, entry.getKey(), atomIndex); + } + } + dataSet.setAssignment(assignment); + + return FilterAndRank.checkDataSet(dataSet, querySpectrum, shiftTolerance, maxAverageDeviation, + checkMultiplicity, checkEquivalencesCount, multiplicitySectionsBuilder, + true); + } catch (final Exception e) { + e.printStackTrace(); + } + + return null; + } } From e47c3f49d6cdf319ac035221aa5c7a58333195d8 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 2 Mar 2022 18:28:19 +0100 Subject: [PATCH 371/405] chore: do not store molfiles during filtering and ranking & store similarity values in DataSet attachment and not meta --- .../nmr/filterandrank/FilterAndRank.java | 146 ++++++++---------- 1 file changed, 63 insertions(+), 83 deletions(-) diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index b4d40db..923f03f 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -6,11 +6,8 @@ import casekit.nmr.model.Spectrum; import casekit.nmr.similarity.Similarity; import casekit.nmr.utils.Statistics; -import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.fingerprint.BitSetFingerprint; -import org.openscience.cdk.io.MDLV3000Writer; -import java.io.ByteArrayOutputStream; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -27,7 +24,7 @@ public static List filterAndRank(final List dataSetList, final checkEquivalencesCount, multiplicitySectionsBuilder, allowIncompleteMatch)); } - public static List filter(List dataSetList, final Spectrum querySpectrum, + public static List filter(final List dataSetList, final Spectrum querySpectrum, final double shiftTolerance, final double maxAverageDeviation, final boolean checkMultiplicity, final boolean checkEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, @@ -35,15 +32,13 @@ public static List filter(List dataSetList, final Spectrum que if (querySpectrum.getNDim() == 1 && querySpectrum.getNuclei()[0].equals("13C")) { - // @TODO get shift tolerance as arguments - - dataSetList = dataSetList.stream() - .filter(dataSet -> checkDataSet(dataSet, querySpectrum, shiftTolerance, - maxAverageDeviation, checkMultiplicity, - checkEquivalencesCount, - multiplicitySectionsBuilder, allowIncompleteMatch) - != null) - .collect(Collectors.toList()); + return dataSetList.stream() + .filter(dataSet -> checkDataSet(dataSet, querySpectrum, shiftTolerance, + maxAverageDeviation, checkMultiplicity, + checkEquivalencesCount, multiplicitySectionsBuilder, + allowIncompleteMatch) + != null) + .collect(Collectors.toList()); } return dataSetList; @@ -54,67 +49,52 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp final boolean checkEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, final boolean allowIncompleteMatch) { - try { - final BitSetFingerprint bitSetFingerprintQuerySpectrum = Similarity.getBitSetFingerprint(querySpectrum, 0, - multiplicitySectionsBuilder); - final Spectrum spectrum = dataSet.getSpectrum() - .toSpectrum(); - final Assignment spectralMatchAssignment = Similarity.matchSpectra(spectrum, querySpectrum, 0, 0, - shiftTolerance, checkMultiplicity, - checkEquivalencesCount, false); - - dataSet.addMetaInfo("querySpectrumSignalCount", String.valueOf(querySpectrum.getSignalCount())); - dataSet.addMetaInfo("querySpectrumSignalCountWithEquivalences", - String.valueOf(querySpectrum.getSignalCountWithEquivalences())); - dataSet.addMetaInfo("setAssignmentsCountWithEquivalences", - String.valueOf(spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0))); - final boolean isCompleteSpectralMatch = querySpectrum.getSignalCount() - == spectralMatchAssignment.getSetAssignmentsCount(0); - final boolean isCompleteSpectralMatchWithEquivalences = querySpectrum.getSignalCountWithEquivalences() - == spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0); - dataSet.addMetaInfo("setAssignmentsCount", - String.valueOf(spectralMatchAssignment.getSetAssignmentsCount(0))); - dataSet.addMetaInfo("setAssignmentsCountWithEquivalences", - String.valueOf(spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0))); - dataSet.addMetaInfo("isCompleteSpectralMatch", String.valueOf(isCompleteSpectralMatch)); - dataSet.addMetaInfo("isCompleteSpectralMatchWithEquivalences", - String.valueOf(isCompleteSpectralMatchWithEquivalences)); - dataSet.addAttachment("spectralMatchAssignment", spectralMatchAssignment); - - - // store as MOL file - final MDLV3000Writer mdlv3000Writer = new MDLV3000Writer(); - final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - mdlv3000Writer.setWriter(byteArrayOutputStream); - mdlv3000Writer.write(dataSet.getStructure() - .toAtomContainer()); - dataSet.addMetaInfo("molfile", byteArrayOutputStream.toString()); - - Double[] deviations = Similarity.getDeviations(spectrum, querySpectrum, 0, 0, spectralMatchAssignment); - if (allowIncompleteMatch) { - deviations = Arrays.stream(deviations) - .filter(Objects::nonNull) - .toArray(Double[]::new); - } - final Double averageDeviation = Statistics.calculateAverageDeviation(deviations); - if (averageDeviation - != null - && averageDeviation - <= maxAverageDeviation) { - dataSet.addMetaInfo("averageDeviation", String.valueOf(averageDeviation)); - final Double rmsd = Statistics.calculateRMSD(deviations); - dataSet.addMetaInfo("rmsd", String.valueOf(rmsd)); - - final BitSetFingerprint bitSetFingerprintDataSet = Similarity.getBitSetFingerprint(spectrum, 0, - multiplicitySectionsBuilder); - final Double tanimotoCoefficient = Similarity.calculateTanimotoCoefficient( - bitSetFingerprintQuerySpectrum, bitSetFingerprintDataSet); - dataSet.addMetaInfo("tanimoto", String.valueOf(tanimotoCoefficient)); - - return dataSet; - } - } catch (final CDKException e) { - e.printStackTrace(); + final BitSetFingerprint bitSetFingerprintQuerySpectrum = Similarity.getBitSetFingerprint(querySpectrum, 0, + multiplicitySectionsBuilder); + final Spectrum spectrum = dataSet.getSpectrum() + .toSpectrum(); + final Assignment spectralMatchAssignment = Similarity.matchSpectra(spectrum, querySpectrum, 0, 0, + shiftTolerance, checkMultiplicity, + checkEquivalencesCount, false); + + dataSet.addAttachment("querySpectrumSignalCount", querySpectrum.getSignalCount()); + dataSet.addAttachment("querySpectrumSignalCountWithEquivalences", + querySpectrum.getSignalCountWithEquivalences()); + dataSet.addAttachment("setAssignmentsCountWithEquivalences", + spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0)); + final boolean isCompleteSpectralMatch = querySpectrum.getSignalCount() + == spectralMatchAssignment.getSetAssignmentsCount(0); + final boolean isCompleteSpectralMatchWithEquivalences = querySpectrum.getSignalCountWithEquivalences() + == spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0); + dataSet.addAttachment("setAssignmentsCount", spectralMatchAssignment.getSetAssignmentsCount(0)); + dataSet.addAttachment("setAssignmentsCountWithEquivalences", + spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0)); + dataSet.addAttachment("isCompleteSpectralMatch", isCompleteSpectralMatch); + dataSet.addAttachment("isCompleteSpectralMatchWithEquivalences", isCompleteSpectralMatchWithEquivalences); + dataSet.addAttachment("spectralMatchAssignment", spectralMatchAssignment); + + Double[] deviations = Similarity.getDeviations(spectrum, querySpectrum, 0, 0, spectralMatchAssignment); + if (allowIncompleteMatch) { + deviations = Arrays.stream(deviations) + .filter(Objects::nonNull) + .toArray(Double[]::new); + } + final Double averageDeviation = Statistics.calculateAverageDeviation(deviations); + if (averageDeviation + != null + && averageDeviation + <= maxAverageDeviation) { + dataSet.addAttachment("averageDeviation", averageDeviation); + final Double rmsd = Statistics.calculateRMSD(deviations); + dataSet.addAttachment("rmsd", rmsd); + + final BitSetFingerprint bitSetFingerprintDataSet = Similarity.getBitSetFingerprint(spectrum, 0, + multiplicitySectionsBuilder); + final Double tanimotoCoefficient = Similarity.calculateTanimotoCoefficient(bitSetFingerprintQuerySpectrum, + bitSetFingerprintDataSet); + dataSet.addAttachment("tanimoto", tanimotoCoefficient); + + return dataSet; } return null; @@ -122,7 +102,7 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp public static List rank(final List dataSetList) { dataSetList.sort((dataSet1, dataSet2) -> { - final int avgDevComparison = compareNumericDataSetMetaKey(dataSet1, dataSet2, "averageDeviation"); + final int avgDevComparison = compareNumericDataSetAttachmentKey(dataSet1, dataSet2, "averageDeviation"); if (avgDevComparison != 0) { return avgDevComparison; @@ -139,8 +119,8 @@ public static List rank(final List dataSetList) { && isCompleteSpectralMatchDataSet2) { return 1; } - final int setAssignmentsCountComparison = compareNumericDataSetMetaKey(dataSet1, dataSet2, - "setAssignmentsCount"); + final int setAssignmentsCountComparison = compareNumericDataSetAttachmentKey(dataSet1, dataSet2, + "setAssignmentsCount"); if (setAssignmentsCountComparison != 0) { return -1 @@ -153,19 +133,19 @@ public static List rank(final List dataSetList) { return dataSetList; } - private static int compareNumericDataSetMetaKey(final DataSet dataSet1, final DataSet dataSet2, - final String metaKey) { + private static int compareNumericDataSetAttachmentKey(final DataSet dataSet1, final DataSet dataSet2, + final String attachmentKey) { Double valueDataSet1 = null; Double valueDataSet2 = null; try { - valueDataSet1 = Double.parseDouble(dataSet1.getMeta() - .get(metaKey)); + valueDataSet1 = (Double) dataSet1.getAttachment() + .get(attachmentKey); } catch (final NullPointerException | NumberFormatException e) { // e.printStackTrace(); } try { - valueDataSet2 = Double.parseDouble(dataSet2.getMeta() - .get(metaKey)); + valueDataSet2 = (Double) dataSet2.getAttachment() + .get(attachmentKey); } catch (final NullPointerException | NumberFormatException e) { // e.printStackTrace(); } From a9eb4bbf0b0808ec23279d0d931ba5510caedd67 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 2 Mar 2022 18:43:42 +0100 Subject: [PATCH 372/405] fix: avoid exceptions when parsing doubles --- src/casekit/nmr/filterandrank/FilterAndRank.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index 923f03f..8863a35 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -138,14 +138,14 @@ private static int compareNumericDataSetAttachmentKey(final DataSet dataSet1, fi Double valueDataSet1 = null; Double valueDataSet2 = null; try { - valueDataSet1 = (Double) dataSet1.getAttachment() - .get(attachmentKey); + valueDataSet1 = Double.parseDouble(String.valueOf(dataSet1.getAttachment() + .get(attachmentKey))); } catch (final NullPointerException | NumberFormatException e) { // e.printStackTrace(); } try { - valueDataSet2 = (Double) dataSet2.getAttachment() - .get(attachmentKey); + valueDataSet2 = Double.parseDouble(String.valueOf(dataSet2.getAttachment() + .get(attachmentKey))); } catch (final NullPointerException | NumberFormatException e) { // e.printStackTrace(); } From 0c9a594524d4dc3180c0120e264c8a709d1d7345 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 2 Mar 2022 19:56:37 +0100 Subject: [PATCH 373/405] chore: allow to set the number of threads to use for prediction --- src/casekit/nmr/prediction/Prediction.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index ea5cf1e..f37969a 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -307,7 +307,8 @@ public static List predict1DByStereoHOSECodeAndFilter(final Spectrum qu final int maxSphere, final List structureList, final Map> hoseCodeDBEntriesMap, - final Map multiplicitySectionsSettings) { + final Map multiplicitySectionsSettings, + final int nThreads) { final MultiplicitySectionsBuilder multiplicitySectionsBuilder = new MultiplicitySectionsBuilder(); multiplicitySectionsBuilder.setMinLimit(multiplicitySectionsSettings.get(querySpectrum.getNuclei()[0])[0]); multiplicitySectionsBuilder.setMaxLimit(multiplicitySectionsSettings.get(querySpectrum.getNuclei()[0])[1]); @@ -329,7 +330,7 @@ public static List predict1DByStereoHOSECodeAndFilter(final Spectrum qu dataSetConcurrentLinkedQueue.add(dataSet); } }; - MultiThreading.processTasks(callables, consumer, 2, 5); + MultiThreading.processTasks(callables, consumer, nThreads, 5); dataSetList = new ArrayList<>(dataSetConcurrentLinkedQueue); } catch (final Exception e) { e.printStackTrace(); From 3ea00d15b49b394d36be1db7467c6090ca47f547 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 2 Mar 2022 22:12:42 +0100 Subject: [PATCH 374/405] chore: rank results by number of hits at first and then by average deviation --- .../nmr/filterandrank/FilterAndRank.java | 21 ++----------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index 8863a35..b197096 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -102,23 +102,6 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp public static List rank(final List dataSetList) { dataSetList.sort((dataSet1, dataSet2) -> { - final int avgDevComparison = compareNumericDataSetAttachmentKey(dataSet1, dataSet2, "averageDeviation"); - if (avgDevComparison - != 0) { - return avgDevComparison; - } - - final boolean isCompleteSpectralMatchDataSet1 = Boolean.parseBoolean(dataSet1.getMeta() - .get("isCompleteSpectralMatch")); - final boolean isCompleteSpectralMatchDataSet2 = Boolean.parseBoolean(dataSet2.getMeta() - .get("isCompleteSpectralMatch")); - if (isCompleteSpectralMatchDataSet1 - && !isCompleteSpectralMatchDataSet2) { - return -1; - } else if (!isCompleteSpectralMatchDataSet1 - && isCompleteSpectralMatchDataSet2) { - return 1; - } final int setAssignmentsCountComparison = compareNumericDataSetAttachmentKey(dataSet1, dataSet2, "setAssignmentsCount"); if (setAssignmentsCountComparison @@ -126,8 +109,8 @@ public static List rank(final List dataSetList) { return -1 * setAssignmentsCountComparison; } - - return 0; + final int avgDevComparison = compareNumericDataSetAttachmentKey(dataSet1, dataSet2, "averageDeviation"); + return avgDevComparison; }); return dataSetList; From bd9e7cad010b7a2e8fecada4d2226283afbc6f5f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 9 Mar 2022 21:06:38 +0100 Subject: [PATCH 375/405] chore: return value directly --- src/casekit/nmr/filterandrank/FilterAndRank.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index b197096..3ab9ccf 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -109,8 +109,8 @@ public static List rank(final List dataSetList) { return -1 * setAssignmentsCountComparison; } - final int avgDevComparison = compareNumericDataSetAttachmentKey(dataSet1, dataSet2, "averageDeviation"); - return avgDevComparison; + + return compareNumericDataSetAttachmentKey(dataSet1, dataSet2, "averageDeviation"); }); return dataSetList; From 4aa4c67cfd45782e47ac8245834f81a3cb9dbd54 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 11 Mar 2022 03:20:24 +0100 Subject: [PATCH 376/405] chore: adoptions to new J and PathLength class (NMRium) --- src/casekit/nmr/elucidation/Utilities.java | 60 +++++++++---------- .../lsd/PyLSDInputFileBuilder.java | 2 + .../nmr/elucidation/lsd/Utilities.java | 2 +- src/casekit/nmr/model/PathLength.java | 5 +- src/casekit/nmr/model/Signal.java | 19 ++++-- src/casekit/nmr/model/nmrium/J.java | 16 +++++ src/casekit/nmr/model/nmrium/Signal2D.java | 4 +- src/casekit/nmr/model/nmrium/Spectrum.java | 2 +- src/casekit/nmr/utils/Utils.java | 33 +++++----- 9 files changed, 85 insertions(+), 58 deletions(-) create mode 100644 src/casekit/nmr/model/nmrium/J.java diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index 26bfc20..9b0b7b0 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -3,7 +3,6 @@ import casekit.nmr.elucidation.model.Detections; import casekit.nmr.elucidation.model.Grouping; import casekit.nmr.elucidation.model.MolecularConnectivity; -import casekit.nmr.model.PathLength; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.model.nmrium.Link; @@ -235,8 +234,8 @@ public static Map> buildFixedNeighborsByINADEQUATE(final L private static boolean hasMatch(final Correlation correlation1, final Correlation correlation2, final double tolerance) { - final Signal signal1 = Utils.extractSignalFromCorrelation(correlation1); - final Signal signal2 = Utils.extractSignalFromCorrelation(correlation2); + final Signal signal1 = Utils.extractFirstSignalFromCorrelation(correlation1); + final Signal signal2 = Utils.extractFirstSignalFromCorrelation(correlation2); if (signal1 == null || signal2 @@ -511,7 +510,6 @@ public static Map> buildMolecularConnectivi MolecularConnectivity newMolecularConnectivity; int index, correlationIndex, protonCorrelationIndex, protonIndex; - PathLength pathLength; for (final MolecularConnectivity molecularConnectivity : molecularConnectivityList) { correlationIndex = molecularConnectivity.getIndex(); // skip in case of non-linked proton which has no index in indices map @@ -548,11 +546,9 @@ public static Map> buildMolecularConnectivi .equals("H") && molecularConnectivity.getHmbc() != null) { - pathLength = molecularConnectivity.getSignal() - .getPathLength(); - for (final Map.Entry entry : molecularConnectivity.getHmbc() - .entrySet()) { - protonCorrelationIndex = entry.getKey(); + for (final Map.Entry entryHMBC : molecularConnectivity.getHmbc() + .entrySet()) { + protonCorrelationIndex = entryHMBC.getKey(); if (indicesMap.containsKey(protonCorrelationIndex)) { for (int l = 0; l < indicesMap.get(protonCorrelationIndex).length; l++) { @@ -562,11 +558,7 @@ public static Map> buildMolecularConnectivi newMolecularConnectivity.setHmbc(new HashMap<>()); } newMolecularConnectivity.getHmbc() - .put(protonIndex, pathLength - == null - ? defaultBondDistances.get("hmbc") - : new Integer[]{pathLength.getMin(), - pathLength.getMax()}); + .put(protonIndex, entryHMBC.getValue()); } } } @@ -575,11 +567,9 @@ public static Map> buildMolecularConnectivi .equals("H") && molecularConnectivity.getCosy() != null) { - pathLength = molecularConnectivity.getSignal() - .getPathLength(); - for (final Map.Entry entry : molecularConnectivity.getCosy() - .entrySet()) { - protonCorrelationIndex = entry.getKey(); + for (final Map.Entry entryCOSY : molecularConnectivity.getCosy() + .entrySet()) { + protonCorrelationIndex = entryCOSY.getKey(); if (indicesMap.containsKey(protonCorrelationIndex) && k < indicesMap.get(protonCorrelationIndex).length) { @@ -589,11 +579,7 @@ public static Map> buildMolecularConnectivi newMolecularConnectivity.setCosy(new HashMap<>()); } newMolecularConnectivity.getCosy() - .put(protonIndex, pathLength - == null - ? defaultBondDistances.get("cosy") - : new Integer[]{pathLength.getMin(), - pathLength.getMax()}); + .put(protonIndex, entryCOSY.getValue()); } } } @@ -680,6 +666,7 @@ public static List buildMolecularConnectivityList(final L Correlation correlation; int groupIndex; Map signal2DMap; + Map jMap; Map pathLengthMap; MolecularConnectivity molecularConnectivity; for (int correlationIndex = 0; correlationIndex @@ -688,7 +675,7 @@ public static List buildMolecularConnectivityList(final L molecularConnectivity = new MolecularConnectivity(); molecularConnectivity.setIndex(correlationIndex); molecularConnectivity.setAtomType(correlation.getAtomType()); - molecularConnectivity.setSignal(Utils.extractSignalFromCorrelation(correlation)); + molecularConnectivity.setSignal(Utils.extractFirstSignalFromCorrelation(correlation)); molecularConnectivity.setEquivalence(correlation.getEquivalence()); molecularConnectivity.setPseudo(correlation.isPseudo()); @@ -754,11 +741,19 @@ public static List buildMolecularConnectivityList(final L signal2DMap = (Map) link.getSignal(); if (signal2DMap != null - && signal2DMap.containsKey("pathLength")) { - pathLengthMap = (Map) signal2DMap.get("pathLength"); + && signal2DMap.containsKey("j")) { + jMap = (Map) signal2DMap.get("j"); + } else { + jMap = null; + } + if (jMap + != null + && jMap.containsKey("pathLength")) { + pathLengthMap = (Map) jMap.get("pathLength"); } else { pathLengthMap = null; } + for (final int matchIndex : link.getMatch()) { // ignore linked H atoms without any attachment to a heavy atom if (correlationList.get(matchIndex) @@ -780,8 +775,8 @@ public static List buildMolecularConnectivityList(final L .put(matchIndex, pathLengthMap == null ? defaultBondDistances.get("hmbc") - : new Integer[]{(int) pathLengthMap.get("min"), - (int) pathLengthMap.get("max")}); + : new Integer[]{(int) pathLengthMap.get("from"), + (int) pathLengthMap.get("to")}); } else { if (molecularConnectivity.getCosy() == null) { @@ -791,8 +786,8 @@ public static List buildMolecularConnectivityList(final L .put(matchIndex, pathLengthMap == null ? defaultBondDistances.get("cosy") - : new Integer[]{(int) pathLengthMap.get("min"), - (int) pathLengthMap.get("max")}); + : new Integer[]{(int) pathLengthMap.get("from"), + (int) pathLengthMap.get("to")}); } } } @@ -1101,10 +1096,13 @@ public static List>> buildMolecularConn final List correlationList, final Detections detections, final Grouping grouping, final Map defaultBondDistances) { final List>> molecularConnectivityMapCombinationList = new ArrayList<>(); + // build original molecular connectivity list which comes from correlation data directly final List initialMolecularConnectivityList = buildMolecularConnectivityList( correlationList, detections, grouping, defaultBondDistances); + // build combinations out pf original molecular connectivity list by using grouping information final List> molecularConnectivityListList = buildCombinations( initialMolecularConnectivityList, grouping); + // for each combination build a molecular connectivity map which is used for PyLSD input file creation for (final List molecularConnectivityList : molecularConnectivityListList) { molecularConnectivityMapCombinationList.add( buildMolecularConnectivityMap(molecularConnectivityList, detections, grouping, diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index 1709270..c23dc57 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -537,8 +537,10 @@ public static List buildPyLSDInputFileContentList(final Correlations cor return new ArrayList<>(); } final List inputFilesContentList = new ArrayList<>(); + // build different combinations final List>> molecularConnectivityMapCombinationList = casekit.nmr.elucidation.Utilities.buildMolecularConnectivityMapCombinationList( correlations.getValues(), detections, grouping, defaultBondDistances); + // for each combination insert an input file for PyLSD for (final Map> molecularConnectivityMap : molecularConnectivityMapCombinationList) { inputFilesContentList.add(buildPyLSDInputFileContent(molecularConnectivityMap, mf, elucidationOptions)); } diff --git a/src/casekit/nmr/elucidation/lsd/Utilities.java b/src/casekit/nmr/elucidation/lsd/Utilities.java index da13aa0..475ffec 100644 --- a/src/casekit/nmr/elucidation/lsd/Utilities.java +++ b/src/casekit/nmr/elucidation/lsd/Utilities.java @@ -84,7 +84,7 @@ public static boolean writeNeighborsFile(final String pathToNeighborsFile, final < correlationList.size(); i++) { if (neighbors.containsKey(i)) { correlation = correlationList.get(i); - signal = Utils.extractSignalFromCorrelation(correlation); + signal = Utils.extractFirstSignalFromCorrelation(correlation); atomType = correlation.getAtomType(); neighborsTemp = neighbors.get(i); diff --git a/src/casekit/nmr/model/PathLength.java b/src/casekit/nmr/model/PathLength.java index 1515164..bffc529 100644 --- a/src/casekit/nmr/model/PathLength.java +++ b/src/casekit/nmr/model/PathLength.java @@ -9,7 +9,6 @@ @ToString public class PathLength { - private int min; - private int max; - private String source; + private int from; + private int to; } diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index fbbe132..db59f19 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -27,6 +27,7 @@ */ package casekit.nmr.model; +import casekit.nmr.model.nmrium.J; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; @@ -50,7 +51,7 @@ public class Signal { private Double intensity; private int equivalencesCount; private Integer phase; - private PathLength pathLength; + private J j; public int getNDim() { @@ -85,11 +86,21 @@ public Double getShift(final int dim) { } public Signal buildClone() { + final J clonedJ = this.j + != null + ? new J() + : null; + if (this.j + != null + && this.j.getPathLength() + != null) { + clonedJ.setPathLength(new PathLength(this.j.getPathLength() + .getFrom(), this.j.getPathLength() + .getTo())); + } return new Signal(this.getNuclei() .clone(), this.shifts.clone(), this.multiplicity, this.kind, this.intensity, - this.equivalencesCount, this.phase, - new PathLength(this.pathLength.getMin(), this.pathLength.getMax(), - this.pathLength.getSource())); + this.equivalencesCount, this.phase, clonedJ); } @Override diff --git a/src/casekit/nmr/model/nmrium/J.java b/src/casekit/nmr/model/nmrium/J.java new file mode 100644 index 0000000..507230c --- /dev/null +++ b/src/casekit/nmr/model/nmrium/J.java @@ -0,0 +1,16 @@ +package casekit.nmr.model.nmrium; + +import casekit.nmr.model.PathLength; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.*; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@ToString +@JsonIgnoreProperties(ignoreUnknown = true) +public class J { + + private PathLength pathLength; +} diff --git a/src/casekit/nmr/model/nmrium/Signal2D.java b/src/casekit/nmr/model/nmrium/Signal2D.java index 914eb12..21f0743 100644 --- a/src/casekit/nmr/model/nmrium/Signal2D.java +++ b/src/casekit/nmr/model/nmrium/Signal2D.java @@ -24,7 +24,6 @@ package casekit.nmr.model.nmrium; -import casekit.nmr.model.PathLength; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; @@ -39,9 +38,10 @@ public class Signal2D extends Signal { + private Map x; private Map y; - private PathLength pathLength; + private J j; public Signal2D(final Signal signal) { super(signal.getId(), signal.getKind(), signal.getMultiplicity(), signal.getSign()); diff --git a/src/casekit/nmr/model/nmrium/Spectrum.java b/src/casekit/nmr/model/nmrium/Spectrum.java index 0e74389..aa51152 100644 --- a/src/casekit/nmr/model/nmrium/Spectrum.java +++ b/src/casekit/nmr/model/nmrium/Spectrum.java @@ -93,7 +93,7 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { .get("delta")}, signal2D.getMultiplicity(), signal2D.getKind(), null, 0, 0, - signal2D.getPathLength())); + signal2D.getJ())); } })); spectrum.addMetaInfo("solvent", (String) this.info.get("solvent")); diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 99b7093..aa76f29 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -1,11 +1,10 @@ package casekit.nmr.utils; import casekit.nmr.elucidation.Constants; +import casekit.nmr.model.Signal; +import casekit.nmr.model.Spectrum; import casekit.nmr.model.*; -import casekit.nmr.model.nmrium.Correlation; -import casekit.nmr.model.nmrium.Link; -import casekit.nmr.model.nmrium.Signal1D; -import casekit.nmr.model.nmrium.Signal2D; +import casekit.nmr.model.nmrium.*; import com.google.gson.Gson; import org.openscience.cdk.aromaticity.Aromaticity; import org.openscience.cdk.aromaticity.ElectronDonation; @@ -572,19 +571,19 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure) thr } /** - * @param structure molecule to build the DataSet from - * @param manipulate if set to true then - * 1) all atom types and configuration will be perceived, - * 2) explicit hydrogens will be converted to implicit ones, - * 3) setting of aromaticity and Kekulization + * @param structure molecule to build the DataSet from + * @param configure if set to true then + * 1) all atom types and configuration will be perceived, + * 2) explicit hydrogens will be converted to implicit ones, + * 3) setting of aromaticity and Kekulization * * @return * * @throws CDKException */ public static DataSet atomContainerToDataSet(final IAtomContainer structure, - final boolean manipulate) throws CDKException { - if (manipulate) { + final boolean configure) throws CDKException { + if (configure) { final CDKHydrogenAdder hydrogenAdder = CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(structure); if (Utils.containsExplicitHydrogens(structure)) { @@ -638,7 +637,7 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure, return dataSet; } - public static Signal extractSignalFromCorrelation(final Correlation correlation) { + public static Signal extractFirstSignalFromCorrelation(final Correlation correlation) { if (correlation.isPseudo()) { return null; } @@ -675,8 +674,10 @@ public static Signal extractSignalFromCorrelation(final Correlation correlation) final Signal2D signal2D = new Signal2D(signal); signal2D.setX((Map) signalMap.get("x")); signal2D.setY((Map) signalMap.get("y")); - if (signalMap.containsKey("pathLength")) { - signal2D.setPathLength((PathLength) signalMap.get("pathLength")); + if (signalMap.containsKey("j")) { + final Map jMap = (Map) signalMap.get("j"); + final Map pathLengthMap = (Map) jMap.get("pathLength"); + signal2D.setJ(new J(new PathLength((int) pathLengthMap.get("from"), (int) pathLengthMap.get("to")))); } final double shift = link.getAxis() .equals("x") @@ -687,7 +688,7 @@ public static Signal extractSignalFromCorrelation(final Correlation correlation) return new Signal(new String[]{Constants.nucleiMap.get(correlation.getAtomType())}, new Double[]{shift}, signal2D.getMultiplicity(), signal2D.getKind(), null, correlation.getEquivalence(), - signal2D.getSign(), signal2D.getPathLength()); + signal2D.getSign(), signal2D.getJ()); } return null; @@ -706,7 +707,7 @@ public static Spectrum correlationListToSpectrum1D(final List corre Signal signal; for (final Correlation correlation : correlationListAtomType) { - signal = extractSignalFromCorrelation(correlation); + signal = extractFirstSignalFromCorrelation(correlation); if (signal != null) { spectrum.addSignalWithoutEquivalenceSearch(signal); From f03d60cf7e29f519ac0c3bd065acc9413ddab99b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sat, 12 Mar 2022 01:11:02 +0100 Subject: [PATCH 377/405] feat: added methods to build heavy atom statistics --- .../nmr/analysis/ConnectivityStatistics.java | 511 ++++++++++-------- .../nmr/fragments/model/ConnectionTree.java | 3 +- 2 files changed, 298 insertions(+), 216 deletions(-) diff --git a/src/casekit/nmr/analysis/ConnectivityStatistics.java b/src/casekit/nmr/analysis/ConnectivityStatistics.java index 03d759b..d5498fb 100644 --- a/src/casekit/nmr/analysis/ConnectivityStatistics.java +++ b/src/casekit/nmr/analysis/ConnectivityStatistics.java @@ -11,127 +11,19 @@ public class ConnectivityStatistics { - /** - * @param dataSetList - * @param nucleus - * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence - */ - public static void buildConnectivityStatistics(final List dataSetList, final String nucleus, - final Map>>>>> connectivityStatistics) { - final String atomType = Utils.getAtomTypeFromNucleus(nucleus); - for (final DataSet dataSet : dataSetList) { - if (!dataSet.getSpectrum() - .getNuclei()[0].equals(nucleus)) { - continue; - } - buildConnectivityStatistics(dataSet, atomType, connectivityStatistics); - } - } - - /** - * @param dataSet - * @param atomType - * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence - */ - public static void buildConnectivityStatistics(final DataSet dataSet, final String atomType, - final Map>>>>> connectivityStatistics) { - final IAtomContainer structure = dataSet.getStructure() - .toAtomContainer(); - final Spectrum spectrum = dataSet.getSpectrum() - .toSpectrum(); - int shift, atomIndex; - IAtom atom; - String multiplicity, hybridization, connectedAtomType, connectedAtomHybridization; - for (int signalIndex = 0; signalIndex - < spectrum.getSignalCount(); signalIndex++) { - shift = spectrum.getShift(signalIndex, 0) - .intValue(); - for (int equivalenceIndex = 0; equivalenceIndex - < dataSet.getAssignment() - .getAssignment(0, signalIndex).length; equivalenceIndex++) { - atomIndex = dataSet.getAssignment() - .getAssignment(0, signalIndex, equivalenceIndex); - atom = structure.getAtom(atomIndex); - if (atom.getSymbol() - .equals(atomType)) { - multiplicity = Utils.getMultiplicityFromProtonsCount(atom.getImplicitHydrogenCount()); - if (multiplicity - == null) { - continue; - } - multiplicity = multiplicity.toLowerCase(); - hybridization = atom.getHybridization() - .name(); - connectivityStatistics.putIfAbsent(multiplicity, new ConcurrentHashMap<>()); - connectivityStatistics.get(multiplicity) - .putIfAbsent(hybridization, new ConcurrentHashMap<>()); - // check for connected hetero atoms - for (final IAtom connectedAtom : structure.getConnectedAtomsList(atom)) { - if (connectedAtom.getSymbol() - .equals("H")) { - continue; - } - connectedAtomType = connectedAtom.getSymbol(); - if (connectedAtom.getHybridization() - == null) { - continue; - } - connectedAtomHybridization = connectedAtom.getHybridization() - .name(); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .putIfAbsent(shift, new ConcurrentHashMap<>()); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .putIfAbsent(connectedAtomType, new ConcurrentHashMap<>()); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .putIfAbsent(connectedAtomHybridization, new ConcurrentHashMap<>()); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .get(connectedAtomHybridization) - .putIfAbsent(connectedAtom.getImplicitHydrogenCount(), 0); - connectivityStatistics.get(multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .get(connectedAtomHybridization) - .put(connectedAtom.getImplicitHydrogenCount(), connectivityStatistics.get( - multiplicity) - .get(hybridization) - .get(shift) - .get(connectedAtomType) - .get(connectedAtomHybridization) - .get(connectedAtom.getImplicitHydrogenCount()) - + 1); - } - } - } - } - } - /** * @param dataSet * @param atomType - * @param occurrenceStatistics multiplicity -> hybridization -> shift (int) -> "elemental composition" -> connected atom symbol -> [#found, #notFound] + * @param occurrenceStatistics multiplicity -> hybridization -> shift (int) -> elemental composition (mf) -> connected atom symbol -> [#found, #notFound] */ public static void buildOccurrenceStatistics(final DataSet dataSet, final String atomType, final Map>>>> occurrenceStatistics) { - final IAtomContainer structure = dataSet.getStructure() - .toAtomContainer(); final Spectrum spectrum = dataSet.getSpectrum() .toSpectrum(); - final List elements = new ArrayList<>(Utils.getMolecularFormulaElementCounts(dataSet.getMeta() - .get("mf")) - .keySet()); - elements.remove("H"); - Collections.sort(elements); - final String elementsString = String.join(",", elements); + final IAtomContainer structure = dataSet.getStructure() + .toAtomContainer(); + final List elements = buildElements(structure); + final String elementsString = buildElementsString(elements); int shift, atomIndex; IAtom atom; @@ -218,123 +110,314 @@ public static void buildOccurrenceStatistics(final DataSet dataSet, final String } /** - * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence - * @param multiplicity - * @param hybridization - * @param shift - * @param molecularFormulaElements - * - * @return + * @param structure structure to build and add the statistics from + * @param heavyAtomsStatistics elemental composition (mf) -> connected atom pair -> #found */ - public static Map>> extractConnectivities( - final Map>>>>> connectivityStatistics, - final String multiplicity, final String hybridization, final int shift, - final Set molecularFormulaElements) { - final Map>> extractedConnectivities = new HashMap<>(); - if (connectivityStatistics.containsKey(multiplicity) - && connectivityStatistics.get(multiplicity) - .containsKey(hybridization) - && connectivityStatistics.get(multiplicity) - .get(hybridization) - .containsKey(shift)) { - for (final Map.Entry>> entry : connectivityStatistics.get( - multiplicity) - .get(hybridization) - .get(shift) - .entrySet()) { - if (molecularFormulaElements.contains(entry.getKey())) { - extractedConnectivities.put(entry.getKey(), entry.getValue()); + public static void buildHeavyAtomsStatistics(final IAtomContainer structure, + final Map> heavyAtomsStatistics) { + final List elements = buildElements(structure); + final String elementsString = buildElementsString(elements); + heavyAtomsStatistics.putIfAbsent(elementsString, new HashMap<>()); + + for (final String combination : buildCombinations(elements)) { + heavyAtomsStatistics.get(elementsString) + .putIfAbsent(combination, 0); + } + + IAtom atom; + String atomPairKey; + final Map found = new HashMap<>(); + for (int i = 0; i + < structure.getAtomCount(); i++) { + atom = structure.getAtom(i); + // check for connected hetero atoms + for (final IAtom connectedAtom : structure.getConnectedAtomsList(atom)) { + if (connectedAtom.getSymbol() + .equals("H")) { + continue; } + atomPairKey = buildAtomPairString(atom.getSymbol(), connectedAtom.getSymbol()); + found.putIfAbsent(atomPairKey, 0); + found.put(atomPairKey, found.get(atomPairKey) + + 1); } } + for (final String connectedAtomPair : found.keySet()) { + heavyAtomsStatistics.get(elementsString) + .put(connectedAtomPair, heavyAtomsStatistics.get(elementsString) + .get(connectedAtomPair) + + found.get(connectedAtomPair) + / 2); // divided by two since we count for both bond partners in the previous loop + } + } - return extractedConnectivities; + public static List buildElements(final IAtomContainer structure) { + final String mf = Utils.molecularFormularToString(Utils.getMolecularFormulaFromAtomContainer(structure)); + return buildElements(mf); } - public static Map>> filterExtractedConnectivitiesByHybridizations( - final Map>> extractedConnectivities, - final Set knownCarbonHybridizations) { - // remove hybridization of carbons which we do not expect - for (final String atomType : extractedConnectivities.keySet()) { - if (atomType.equals("C")) { - for (final int hybridization : new HashSet<>(extractedConnectivities.get(atomType) - .keySet())) { - if (!knownCarbonHybridizations.contains(hybridization)) { - extractedConnectivities.get(atomType) - .remove(hybridization); - } - } - } - } + public static List buildElements(final String mf) { + final List elements = new ArrayList<>(Utils.getMolecularFormulaElementCounts(mf) + .keySet()); + elements.remove("H"); + Collections.sort(elements); - return extractedConnectivities; + return elements; } - public static Map>> filterExtractedConnectivitiesByCount( - final Map>> extractedConnectivities, - final double thresholdElementCount, final boolean onAtomTypeLevel) { - final Map totalCounts = getTotalCounts(extractedConnectivities); - final int totalCountsSum = getSum(new HashSet<>(totalCounts.values())); - final Map>> filteredExtractedConnectivities = new HashMap<>(); - extractedConnectivities.keySet() - .forEach(neighborAtomType -> { - int sum = 0; - for (final Map.Entry> entryPerHybridization : extractedConnectivities.get( - neighborAtomType) - .entrySet()) { - for (final Map.Entry entryProtonsCount : extractedConnectivities.get( - neighborAtomType) - .get(entryPerHybridization.getKey()) - .entrySet()) { - if (onAtomTypeLevel) { - sum += entryProtonsCount.getValue(); - } else if (entryProtonsCount.getValue() - / (double) totalCountsSum - >= thresholdElementCount) { - filteredExtractedConnectivities.putIfAbsent(neighborAtomType, - new HashMap<>()); - filteredExtractedConnectivities.get(neighborAtomType) - .putIfAbsent( - entryPerHybridization.getKey(), - new HashSet<>()); - filteredExtractedConnectivities.get(neighborAtomType) - .get(entryPerHybridization.getKey()) - .add(entryProtonsCount.getKey()); - } - } - } - if (onAtomTypeLevel - && sum - / (double) totalCountsSum - >= thresholdElementCount) { - filteredExtractedConnectivities.putIfAbsent(neighborAtomType, new HashMap<>()); - } - }); + public static String buildElementsString(final List elements) { + return String.join(",", elements); + } + + public static String buildAtomPairString(final String atomType1, final String atomType2) { + final List atomPairList = new ArrayList<>(); + atomPairList.add(atomType1); + atomPairList.add(atomType2); + Collections.sort(atomPairList); - return filteredExtractedConnectivities; + return String.join("_", atomPairList); } - private static Map getTotalCounts( - final Map>> extractedConnectivities) { - final Map totalCounts = new HashMap<>(); - for (final String key1 : extractedConnectivities.keySet()) { - totalCounts.putIfAbsent(key1, 0); - for (final int key2 : extractedConnectivities.get(key1) - .keySet()) { - for (final Map.Entry countsEntry : extractedConnectivities.get(key1) - .get(key2) - .entrySet()) { - totalCounts.put(key1, totalCounts.get(key1) - + countsEntry.getValue()); - } + public static Set buildCombinations(final List elements) { + final Set combinations = new HashSet<>(); + for (final String element1 : elements) { + for (final String element2 : elements) { + combinations.add(buildAtomPairString(element1, element2)); } } - return totalCounts; + return combinations; } - private static int getSum(final Set values) { - return values.stream() - .reduce(0, (total, current) -> total += current); - } + // /** + // * @param dataSetList + // * @param nucleus + // * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence + // */ + // @Deprecated + // public static void buildConnectivityStatistics(final List dataSetList, final String nucleus, + // final Map>>>>> connectivityStatistics) { + // final String atomType = Utils.getAtomTypeFromNucleus(nucleus); + // for (final DataSet dataSet : dataSetList) { + // if (!dataSet.getSpectrum() + // .getNuclei()[0].equals(nucleus)) { + // continue; + // } + // buildConnectivityStatistics(dataSet, atomType, connectivityStatistics); + // } + // } + // + // /** + // * @param dataSet + // * @param atomType + // * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence + // */ + // @Deprecated + // public static void buildConnectivityStatistics(final DataSet dataSet, final String atomType, + // final Map>>>>> connectivityStatistics) { + // final IAtomContainer structure = dataSet.getStructure() + // .toAtomContainer(); + // final Spectrum spectrum = dataSet.getSpectrum() + // .toSpectrum(); + // int shift, atomIndex; + // IAtom atom; + // String multiplicity, hybridization, connectedAtomType, connectedAtomHybridization; + // for (int signalIndex = 0; signalIndex + // < spectrum.getSignalCount(); signalIndex++) { + // shift = spectrum.getShift(signalIndex, 0) + // .intValue(); + // for (int equivalenceIndex = 0; equivalenceIndex + // < dataSet.getAssignment() + // .getAssignment(0, signalIndex).length; equivalenceIndex++) { + // atomIndex = dataSet.getAssignment() + // .getAssignment(0, signalIndex, equivalenceIndex); + // atom = structure.getAtom(atomIndex); + // if (atom.getSymbol() + // .equals(atomType)) { + // multiplicity = Utils.getMultiplicityFromProtonsCount(atom.getImplicitHydrogenCount()); + // if (multiplicity + // == null) { + // continue; + // } + // multiplicity = multiplicity.toLowerCase(); + // hybridization = atom.getHybridization() + // .name(); + // connectivityStatistics.putIfAbsent(multiplicity, new ConcurrentHashMap<>()); + // connectivityStatistics.get(multiplicity) + // .putIfAbsent(hybridization, new ConcurrentHashMap<>()); + // // check for connected hetero atoms + // for (final IAtom connectedAtom : structure.getConnectedAtomsList(atom)) { + // if (connectedAtom.getSymbol() + // .equals("H")) { + // continue; + // } + // connectedAtomType = connectedAtom.getSymbol(); + // if (connectedAtom.getHybridization() + // == null) { + // continue; + // } + // connectedAtomHybridization = connectedAtom.getHybridization() + // .name(); + // connectivityStatistics.get(multiplicity) + // .get(hybridization) + // .putIfAbsent(shift, new ConcurrentHashMap<>()); + // connectivityStatistics.get(multiplicity) + // .get(hybridization) + // .get(shift) + // .putIfAbsent(connectedAtomType, new ConcurrentHashMap<>()); + // connectivityStatistics.get(multiplicity) + // .get(hybridization) + // .get(shift) + // .get(connectedAtomType) + // .putIfAbsent(connectedAtomHybridization, new ConcurrentHashMap<>()); + // connectivityStatistics.get(multiplicity) + // .get(hybridization) + // .get(shift) + // .get(connectedAtomType) + // .get(connectedAtomHybridization) + // .putIfAbsent(connectedAtom.getImplicitHydrogenCount(), 0); + // connectivityStatistics.get(multiplicity) + // .get(hybridization) + // .get(shift) + // .get(connectedAtomType) + // .get(connectedAtomHybridization) + // .put(connectedAtom.getImplicitHydrogenCount(), connectivityStatistics.get( + // multiplicity) + // .get(hybridization) + // .get(shift) + // .get(connectedAtomType) + // .get(connectedAtomHybridization) + // .get(connectedAtom.getImplicitHydrogenCount()) + // + 1); + // } + // } + // } + // } + // } + // + // /** + // * @param connectivityStatistics multiplicity -> hybridization -> shift (int) -> connected atom symbol -> connected atom hybridization -> connected atom protons count -> occurrence + // * @param multiplicity + // * @param hybridization + // * @param shift + // * @param molecularFormulaElements + // * + // * @return + // */ + // @Deprecated + // public static Map>> extractConnectivities( + // final Map>>>>> connectivityStatistics, + // final String multiplicity, final String hybridization, final int shift, + // final Set molecularFormulaElements) { + // final Map>> extractedConnectivities = new HashMap<>(); + // if (connectivityStatistics.containsKey(multiplicity) + // && connectivityStatistics.get(multiplicity) + // .containsKey(hybridization) + // && connectivityStatistics.get(multiplicity) + // .get(hybridization) + // .containsKey(shift)) { + // for (final Map.Entry>> entry : connectivityStatistics.get( + // multiplicity) + // .get(hybridization) + // .get(shift) + // .entrySet()) { + // if (molecularFormulaElements.contains(entry.getKey())) { + // extractedConnectivities.put(entry.getKey(), entry.getValue()); + // } + // } + // } + // + // return extractedConnectivities; + // } + // + // @Deprecated + // public static Map>> filterExtractedConnectivitiesByHybridizations( + // final Map>> extractedConnectivities, + // final Set knownCarbonHybridizations) { + // // remove hybridization of carbons which we do not expect + // for (final String atomType : extractedConnectivities.keySet()) { + // if (atomType.equals("C")) { + // for (final int hybridization : new HashSet<>(extractedConnectivities.get(atomType) + // .keySet())) { + // if (!knownCarbonHybridizations.contains(hybridization)) { + // extractedConnectivities.get(atomType) + // .remove(hybridization); + // } + // } + // } + // } + // + // return extractedConnectivities; + // } + // + // @Deprecated + // public static Map>> filterExtractedConnectivitiesByCount( + // final Map>> extractedConnectivities, + // final double thresholdElementCount, final boolean onAtomTypeLevel) { + // final Map totalCounts = getTotalCounts(extractedConnectivities); + // final int totalCountsSum = getSum(new HashSet<>(totalCounts.values())); + // final Map>> filteredExtractedConnectivities = new HashMap<>(); + // extractedConnectivities.keySet() + // .forEach(neighborAtomType -> { + // int sum = 0; + // for (final Map.Entry> entryPerHybridization : extractedConnectivities.get( + // neighborAtomType) + // .entrySet()) { + // for (final Map.Entry entryProtonsCount : extractedConnectivities.get( + // neighborAtomType) + // .get(entryPerHybridization.getKey()) + // .entrySet()) { + // if (onAtomTypeLevel) { + // sum += entryProtonsCount.getValue(); + // } else if (entryProtonsCount.getValue() + // / (double) totalCountsSum + // >= thresholdElementCount) { + // filteredExtractedConnectivities.putIfAbsent(neighborAtomType, + // new HashMap<>()); + // filteredExtractedConnectivities.get(neighborAtomType) + // .putIfAbsent( + // entryPerHybridization.getKey(), + // new HashSet<>()); + // filteredExtractedConnectivities.get(neighborAtomType) + // .get(entryPerHybridization.getKey()) + // .add(entryProtonsCount.getKey()); + // } + // } + // } + // if (onAtomTypeLevel + // && sum + // / (double) totalCountsSum + // >= thresholdElementCount) { + // filteredExtractedConnectivities.putIfAbsent(neighborAtomType, new HashMap<>()); + // } + // }); + // + // return filteredExtractedConnectivities; + // } + // + // @Deprecated + // private static Map getTotalCounts( + // final Map>> extractedConnectivities) { + // final Map totalCounts = new HashMap<>(); + // for (final String key1 : extractedConnectivities.keySet()) { + // totalCounts.putIfAbsent(key1, 0); + // for (final int key2 : extractedConnectivities.get(key1) + // .keySet()) { + // for (final Map.Entry countsEntry : extractedConnectivities.get(key1) + // .get(key2) + // .entrySet()) { + // totalCounts.put(key1, totalCounts.get(key1) + // + countsEntry.getValue()); + // } + // } + // } + // + // return totalCounts; + // } + // + // @Deprecated + // private static int getSum(final Set values) { + // return values.stream() + // .reduce(0, (total, current) -> total += current); + // } } diff --git a/src/casekit/nmr/fragments/model/ConnectionTree.java b/src/casekit/nmr/fragments/model/ConnectionTree.java index 83adf85..6731acc 100644 --- a/src/casekit/nmr/fragments/model/ConnectionTree.java +++ b/src/casekit/nmr/fragments/model/ConnectionTree.java @@ -439,9 +439,8 @@ public void removeNode(final int key) { * @param childKey2 * * @return - * - * @deprecated */ + @Deprecated public boolean swapChildNodes(final int parentKey, final int childKey1, final int childKey2) { if (!this.containsKey(parentKey) || !this.containsKey(childKey1) From 9e877b88c91871161c0a9cfc922f14585642a74c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 15 Mar 2022 12:34:40 +0100 Subject: [PATCH 378/405] chore: allow to pass the checkMultiplicity and checkEquivalencesCount --- src/casekit/nmr/prediction/Prediction.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index f37969a..302b746 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -304,6 +304,8 @@ public static DataSet predictHSQCEdited(final IAtomContainer structure, final Sp public static List predict1DByStereoHOSECodeAndFilter(final Spectrum querySpectrum, final double shiftTolerance, final double maximumAverageDeviation, + final boolean checkMultiplicity, + final boolean checkEquivalencesCount, final int maxSphere, final List structureList, final Map> hoseCodeDBEntriesMap, @@ -321,8 +323,9 @@ public static List predict1DByStereoHOSECodeAndFilter(final Spectrum qu for (final IAtomContainer structure : structureList) { callables.add( () -> predict1DByStereoHOSECodeAndFilter(structure, querySpectrum, maxSphere, shiftTolerance, - maximumAverageDeviation, true, true, - hoseCodeDBEntriesMap, multiplicitySectionsBuilder)); + maximumAverageDeviation, checkMultiplicity, + checkEquivalencesCount, hoseCodeDBEntriesMap, + multiplicitySectionsBuilder)); } final Consumer consumer = (dataSet) -> { if (dataSet From 64539ccc73b6ef300600c93c8bbd5194735e99fd Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 22 Mar 2022 11:45:30 +0100 Subject: [PATCH 379/405] chore: renaming in Assignment class --- src/casekit/nmr/model/Assignment.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/model/Assignment.java b/src/casekit/nmr/model/Assignment.java index 2e1a7df..fd289c9 100644 --- a/src/casekit/nmr/model/Assignment.java +++ b/src/casekit/nmr/model/Assignment.java @@ -123,7 +123,7 @@ public int getAssignment(final int dim, final int index, final int equivalenceIn return this.assignments[dim][index][equivalenceIndex]; } - public void addAssignmentEquivalence(final int dim, final int index, final int equivalenceIndex) { + public void addAssignmentEquivalence(final int dim, final int index, final int assignment) { final int[] temp = this.getAssignment(dim, index); final int[] equivalenceIndices = new int[temp.length + 1]; @@ -132,7 +132,7 @@ public void addAssignmentEquivalence(final int dim, final int index, final int e equivalenceIndices[j] = temp[j]; } equivalenceIndices[equivalenceIndices.length - - 1] = equivalenceIndex; + - 1] = assignment; this.setAssignment(dim, index, equivalenceIndices); } From d572c68225e954a39949fc045360e7707c7bd896 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 22 Mar 2022 16:04:29 +0100 Subject: [PATCH 380/405] feat: switch position of query and dataset spectrum in spectral assignment && added prediction method for SMILES strings --- .../nmr/filterandrank/FilterAndRank.java | 29 ++++++++------- src/casekit/nmr/prediction/Prediction.java | 35 +++++++++++++------ 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index 3ab9ccf..8433619 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -18,15 +18,18 @@ public class FilterAndRank { public static List filterAndRank(final List dataSetList, final Spectrum querySpectrum, final double shiftTolerance, final double maxAverageDeviation, final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, final boolean allowIncompleteMatch) { return rank(filter(dataSetList, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, - checkEquivalencesCount, multiplicitySectionsBuilder, allowIncompleteMatch)); + checkEquivalencesCount, allowLowerEquivalencesCount, multiplicitySectionsBuilder, + allowIncompleteMatch)); } public static List filter(final List dataSetList, final Spectrum querySpectrum, final double shiftTolerance, final double maxAverageDeviation, final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, final boolean allowIncompleteMatch) { if (querySpectrum.getNDim() @@ -35,8 +38,8 @@ public static List filter(final List dataSetList, final Spectr return dataSetList.stream() .filter(dataSet -> checkDataSet(dataSet, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, - checkEquivalencesCount, multiplicitySectionsBuilder, - allowIncompleteMatch) + checkEquivalencesCount, allowLowerEquivalencesCount, + multiplicitySectionsBuilder, allowIncompleteMatch) != null) .collect(Collectors.toList()); } @@ -46,25 +49,23 @@ public static List filter(final List dataSetList, final Spectr public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySpectrum, final double shiftTolerance, final double maxAverageDeviation, final boolean checkMultiplicity, - final boolean checkEquivalencesCount, + final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, final boolean allowIncompleteMatch) { - final BitSetFingerprint bitSetFingerprintQuerySpectrum = Similarity.getBitSetFingerprint(querySpectrum, 0, - multiplicitySectionsBuilder); final Spectrum spectrum = dataSet.getSpectrum() .toSpectrum(); - final Assignment spectralMatchAssignment = Similarity.matchSpectra(spectrum, querySpectrum, 0, 0, + final Assignment spectralMatchAssignment = Similarity.matchSpectra(querySpectrum, spectrum, 0, 0, shiftTolerance, checkMultiplicity, - checkEquivalencesCount, false); - + checkEquivalencesCount, + allowLowerEquivalencesCount); dataSet.addAttachment("querySpectrumSignalCount", querySpectrum.getSignalCount()); dataSet.addAttachment("querySpectrumSignalCountWithEquivalences", querySpectrum.getSignalCountWithEquivalences()); dataSet.addAttachment("setAssignmentsCountWithEquivalences", spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0)); - final boolean isCompleteSpectralMatch = querySpectrum.getSignalCount() + final boolean isCompleteSpectralMatch = spectrum.getSignalCount() == spectralMatchAssignment.getSetAssignmentsCount(0); - final boolean isCompleteSpectralMatchWithEquivalences = querySpectrum.getSignalCountWithEquivalences() + final boolean isCompleteSpectralMatchWithEquivalences = spectrum.getSignalCountWithEquivalences() == spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0); dataSet.addAttachment("setAssignmentsCount", spectralMatchAssignment.getSetAssignmentsCount(0)); dataSet.addAttachment("setAssignmentsCountWithEquivalences", @@ -73,7 +74,7 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp dataSet.addAttachment("isCompleteSpectralMatchWithEquivalences", isCompleteSpectralMatchWithEquivalences); dataSet.addAttachment("spectralMatchAssignment", spectralMatchAssignment); - Double[] deviations = Similarity.getDeviations(spectrum, querySpectrum, 0, 0, spectralMatchAssignment); + Double[] deviations = Similarity.getDeviations(querySpectrum, spectrum, 0, 0, spectralMatchAssignment); if (allowIncompleteMatch) { deviations = Arrays.stream(deviations) .filter(Objects::nonNull) @@ -88,6 +89,8 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp final Double rmsd = Statistics.calculateRMSD(deviations); dataSet.addAttachment("rmsd", rmsd); + final BitSetFingerprint bitSetFingerprintQuerySpectrum = Similarity.getBitSetFingerprint(querySpectrum, 0, + multiplicitySectionsBuilder); final BitSetFingerprint bitSetFingerprintDataSet = Similarity.getBitSetFingerprint(spectrum, 0, multiplicitySectionsBuilder); final Double tanimotoCoefficient = Similarity.calculateTanimotoCoefficient(bitSetFingerprintQuerySpectrum, @@ -109,7 +112,7 @@ public static List rank(final List dataSetList) { return -1 * setAssignmentsCountComparison; } - + return compareNumericDataSetAttachmentKey(dataSet1, dataSet2, "averageDeviation"); }); diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index 302b746..854e790 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -306,9 +306,10 @@ public static List predict1DByStereoHOSECodeAndFilter(final Spectrum qu final double maximumAverageDeviation, final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, final int maxSphere, final List structureList, - final Map> hoseCodeDBEntriesMap, + final Map> hoseCodeShiftStatistics, final Map multiplicitySectionsSettings, final int nThreads) { final MultiplicitySectionsBuilder multiplicitySectionsBuilder = new MultiplicitySectionsBuilder(); @@ -324,8 +325,8 @@ public static List predict1DByStereoHOSECodeAndFilter(final Spectrum qu callables.add( () -> predict1DByStereoHOSECodeAndFilter(structure, querySpectrum, maxSphere, shiftTolerance, maximumAverageDeviation, checkMultiplicity, - checkEquivalencesCount, hoseCodeDBEntriesMap, - multiplicitySectionsBuilder)); + checkEquivalencesCount, allowLowerEquivalencesCount, + hoseCodeShiftStatistics, multiplicitySectionsBuilder)); } final Consumer consumer = (dataSet) -> { if (dataSet @@ -348,9 +349,25 @@ private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer s final double maxAverageDeviation, final boolean checkMultiplicity, final boolean checkEquivalencesCount, - final Map> hoseCodeDBEntriesMap, + final boolean allowLowerEquivalencesCount, + final Map> hoseCodeShiftStatistics, final MultiplicitySectionsBuilder multiplicitySectionsBuilder) { final String nucleus = querySpectrum.getNuclei()[0]; + final DataSet dataSet = predict1DByStereoHOSECode(structure, nucleus, maxSphere, hoseCodeShiftStatistics); + if (dataSet + != null) { + return FilterAndRank.checkDataSet(dataSet, querySpectrum, shiftTolerance, maxAverageDeviation, + checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, + multiplicitySectionsBuilder, true); + } + + return null; + } + + public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, final String nucleus, + final int maxSphere, + final Map> hoseCodeShiftStatistics) { + final String atomType = Utils.getAtomTypeFromNucleus(nucleus); final StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); @@ -378,7 +395,7 @@ private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer s final DataSet dataSet = Utils.atomContainerToDataSet(structure, false); final Spectrum predictedSpectrum = new Spectrum(); - predictedSpectrum.setNuclei(querySpectrum.getNuclei()); + predictedSpectrum.setNuclei(new String[]{nucleus}); predictedSpectrum.setSignals(new ArrayList<>()); final Map> assignmentMap = new HashMap<>(); @@ -395,7 +412,7 @@ private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer s >= 1) { try { hoseCode = extendedHOSECodeGenerator.getHOSECode(structure, structure.getAtom(i), sphere); - hoseCodeObjectValues = hoseCodeDBEntriesMap.get(hoseCode); + hoseCodeObjectValues = hoseCodeShiftStatistics.get(hoseCode); if (hoseCodeObjectValues != null) { for (final Map.Entry solventEntry : hoseCodeObjectValues.entrySet()) { @@ -413,7 +430,7 @@ private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer s } predictedShift = Statistics.getMean(medians); signal = new Signal(); - signal.setNuclei(querySpectrum.getNuclei()); + signal.setNuclei(new String[]{nucleus}); signal.setShifts(new Double[]{predictedShift}); signal.setMultiplicity(Utils.getMultiplicityFromProtonsCount( AtomUtils.getHcount(structure, structure.getAtom(i)))); // counts explicit H @@ -458,9 +475,7 @@ private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer s } dataSet.setAssignment(assignment); - return FilterAndRank.checkDataSet(dataSet, querySpectrum, shiftTolerance, maxAverageDeviation, - checkMultiplicity, checkEquivalencesCount, multiplicitySectionsBuilder, - true); + return dataSet; } catch (final Exception e) { e.printStackTrace(); } From 7ef8981500702665ce5b68eca60520cab8a3a9aa Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 29 Mar 2022 04:44:50 +0200 Subject: [PATCH 381/405] feat: added prediction meta data to Prediction --- src/casekit/nmr/prediction/Prediction.java | 24 ++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index 854e790..d561651 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -378,7 +378,8 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, double predictedShift; String hoseCode; Double[] statistics; - int signalIndex, sphere; + int signalIndex, sphere, count; + Double min, max; List medians; try { @@ -399,6 +400,7 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, predictedSpectrum.setSignals(new ArrayList<>()); final Map> assignmentMap = new HashMap<>(); + final Map predictionMeta = new HashMap<>(); for (int i = 0; i < structure.getAtomCount(); i++) { if (!structure.getAtom(i) @@ -408,6 +410,9 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, } medians = new ArrayList<>(); sphere = maxSphere; + count = 0; + min = null; + max = null; while (sphere >= 1) { try { @@ -418,6 +423,15 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, for (final Map.Entry solventEntry : hoseCodeObjectValues.entrySet()) { statistics = hoseCodeObjectValues.get(solventEntry.getKey()); medians.add(statistics[3]); + count += statistics[0].intValue(); + min = min + == null + ? statistics[1] + : Double.min(min, statistics[1]); + max = max + == null + ? statistics[4] + : Double.max(max, statistics[4]); } break; } @@ -441,6 +455,10 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, assignmentMap.putIfAbsent(signalIndex, new ArrayList<>()); assignmentMap.get(signalIndex) .add(i); + + if (!predictionMeta.containsKey(signalIndex)) { + predictionMeta.put(signalIndex, new Double[]{(double) sphere, (double) count, min, max}); + } } // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule @@ -469,12 +487,14 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, assignment.initAssignments(predictedSpectrum.getSignalCount()); for (final Map.Entry> entry : assignmentMap.entrySet()) { - for (final int atomIndex : assignmentMap.get(entry.getKey())) { + for (final int atomIndex : entry.getValue()) { assignment.addAssignmentEquivalence(0, entry.getKey(), atomIndex); } } dataSet.setAssignment(assignment); + dataSet.addAttachment("predictionMeta", predictionMeta); + return dataSet; } catch (final Exception e) { e.printStackTrace(); From 6d9f4572b0c55960ef9753eda4c741b619481a7d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 7 Apr 2022 01:21:46 +0200 Subject: [PATCH 382/405] feat: extended spectral matching methods to consider structural constraints if given --- .../nmr/filterandrank/FilterAndRank.java | 58 +++++++++-- src/casekit/nmr/prediction/Prediction.java | 41 ++++---- src/casekit/nmr/similarity/Similarity.java | 95 ++++++++++++++----- src/casekit/nmr/similarity/Utilities.java | 86 ++++++++++++++++- 4 files changed, 228 insertions(+), 52 deletions(-) diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index 8433619..b88af9f 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -1,6 +1,7 @@ package casekit.nmr.filterandrank; import casekit.nmr.analysis.MultiplicitySectionsBuilder; +import casekit.nmr.elucidation.model.Detections; import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; @@ -21,9 +22,20 @@ public static List filterAndRank(final List dataSetList, final final boolean allowLowerEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, final boolean allowIncompleteMatch) { + return filterAndRank(dataSetList, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, + checkEquivalencesCount, allowLowerEquivalencesCount, multiplicitySectionsBuilder, + allowIncompleteMatch, null); + } + + public static List filterAndRank(final List dataSetList, final Spectrum querySpectrum, + final double shiftTolerance, final double maxAverageDeviation, + final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder, + final boolean allowIncompleteMatch, final Detections detections) { return rank(filter(dataSetList, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, multiplicitySectionsBuilder, - allowIncompleteMatch)); + allowIncompleteMatch, detections)); } public static List filter(final List dataSetList, final Spectrum querySpectrum, @@ -32,6 +44,18 @@ public static List filter(final List dataSetList, final Spectr final boolean allowLowerEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, final boolean allowIncompleteMatch) { + + return filter(dataSetList, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, + checkEquivalencesCount, allowLowerEquivalencesCount, multiplicitySectionsBuilder, + allowIncompleteMatch, null); + } + + public static List filter(final List dataSetList, final Spectrum querySpectrum, + final double shiftTolerance, final double maxAverageDeviation, + final boolean checkMultiplicity, final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder, + final boolean allowIncompleteMatch, final Detections detections) { if (querySpectrum.getNDim() == 1 && querySpectrum.getNuclei()[0].equals("13C")) { @@ -39,7 +63,8 @@ public static List filter(final List dataSetList, final Spectr .filter(dataSet -> checkDataSet(dataSet, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, - multiplicitySectionsBuilder, allowIncompleteMatch) + multiplicitySectionsBuilder, allowIncompleteMatch, + detections) != null) .collect(Collectors.toList()); } @@ -52,12 +77,33 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount, final MultiplicitySectionsBuilder multiplicitySectionsBuilder, final boolean allowIncompleteMatch) { + return checkDataSet(dataSet, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, + checkEquivalencesCount, allowLowerEquivalencesCount, multiplicitySectionsBuilder, + allowIncompleteMatch, null); + } + + public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySpectrum, final double shiftTolerance, + final double maxAverageDeviation, final boolean checkMultiplicity, + final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount, + final MultiplicitySectionsBuilder multiplicitySectionsBuilder, + final boolean allowIncompleteMatch, final Detections detections) { final Spectrum spectrum = dataSet.getSpectrum() .toSpectrum(); - final Assignment spectralMatchAssignment = Similarity.matchSpectra(querySpectrum, spectrum, 0, 0, - shiftTolerance, checkMultiplicity, - checkEquivalencesCount, - allowLowerEquivalencesCount); + + + final Assignment spectralMatchAssignment = detections + != null + ? Similarity.matchSpectra(querySpectrum, spectrum, 0, 0, + shiftTolerance, checkMultiplicity, + checkEquivalencesCount, + allowLowerEquivalencesCount, + dataSet.getStructure() + .toAtomContainer(), + dataSet.getAssignment(), detections) + : Similarity.matchSpectra(querySpectrum, spectrum, 0, 0, + shiftTolerance, checkMultiplicity, + checkEquivalencesCount, + allowLowerEquivalencesCount); dataSet.addAttachment("querySpectrumSignalCount", querySpectrum.getSignalCount()); dataSet.addAttachment("querySpectrumSignalCountWithEquivalences", querySpectrum.getSignalCountWithEquivalences()); diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index d561651..3549e01 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -25,6 +25,7 @@ import casekit.nmr.analysis.MultiplicitySectionsBuilder; +import casekit.nmr.elucidation.model.Detections; import casekit.nmr.filterandrank.FilterAndRank; import casekit.nmr.fragments.model.ConnectionTree; import casekit.nmr.fragments.model.ConnectionTreeNode; @@ -312,6 +313,25 @@ public static List predict1DByStereoHOSECodeAndFilter(final Spectrum qu final Map> hoseCodeShiftStatistics, final Map multiplicitySectionsSettings, final int nThreads) { + + + return predict1DByStereoHOSECodeAndFilter(querySpectrum, shiftTolerance, maximumAverageDeviation, + checkMultiplicity, checkEquivalencesCount, + allowLowerEquivalencesCount, null, maxSphere, structureList, + hoseCodeShiftStatistics, multiplicitySectionsSettings, nThreads); + } + + public static List predict1DByStereoHOSECodeAndFilter(final Spectrum querySpectrum, + final double shiftTolerance, + final double maximumAverageDeviation, + final boolean checkMultiplicity, + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, + final Detections detections, final int maxSphere, + final List structureList, + final Map> hoseCodeShiftStatistics, + final Map multiplicitySectionsSettings, + final int nThreads) { final MultiplicitySectionsBuilder multiplicitySectionsBuilder = new MultiplicitySectionsBuilder(); multiplicitySectionsBuilder.setMinLimit(multiplicitySectionsSettings.get(querySpectrum.getNuclei()[0])[0]); multiplicitySectionsBuilder.setMaxLimit(multiplicitySectionsSettings.get(querySpectrum.getNuclei()[0])[1]); @@ -326,7 +346,8 @@ public static List predict1DByStereoHOSECodeAndFilter(final Spectrum qu () -> predict1DByStereoHOSECodeAndFilter(structure, querySpectrum, maxSphere, shiftTolerance, maximumAverageDeviation, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, - hoseCodeShiftStatistics, multiplicitySectionsBuilder)); + detections, hoseCodeShiftStatistics, + multiplicitySectionsBuilder)); } final Consumer consumer = (dataSet) -> { if (dataSet @@ -350,6 +371,7 @@ private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer s final boolean checkMultiplicity, final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount, + final Detections detections, final Map> hoseCodeShiftStatistics, final MultiplicitySectionsBuilder multiplicitySectionsBuilder) { final String nucleus = querySpectrum.getNuclei()[0]; @@ -358,7 +380,7 @@ private static DataSet predict1DByStereoHOSECodeAndFilter(final IAtomContainer s != null) { return FilterAndRank.checkDataSet(dataSet, querySpectrum, shiftTolerance, maxAverageDeviation, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, - multiplicitySectionsBuilder, true); + multiplicitySectionsBuilder, true, detections); } return null; @@ -461,21 +483,6 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, } } - // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule - try { - if (Utils.getDifferenceSpectrumSizeAndMolecularFormulaCount(predictedSpectrum, - Utils.getMolecularFormulaFromString( - dataSet.getMeta() - .get("mf")), 0) - != 0) { - return null; - } - } catch (final CDKException e) { - e.printStackTrace(); - return null; - } - - Utils.convertExplicitToImplicitHydrogens(structure); dataSet.setStructure(new StructureCompact(structure)); dataSet.addMetaInfo("smiles", SmilesGenerator.generic() diff --git a/src/casekit/nmr/similarity/Similarity.java b/src/casekit/nmr/similarity/Similarity.java index 97a9b3b..785145b 100644 --- a/src/casekit/nmr/similarity/Similarity.java +++ b/src/casekit/nmr/similarity/Similarity.java @@ -13,12 +13,14 @@ package casekit.nmr.similarity; import casekit.nmr.analysis.MultiplicitySectionsBuilder; +import casekit.nmr.elucidation.model.Detections; import casekit.nmr.model.Assignment; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import casekit.nmr.similarity.model.Distance; import casekit.nmr.utils.Statistics; import org.openscience.cdk.fingerprint.BitSetFingerprint; +import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.similarity.Tanimoto; import java.util.HashSet; @@ -274,32 +276,8 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s final int dim2, final double shiftTolerance, final boolean checkMultiplicity, final boolean checkEquivalencesCount, final boolean allowLowerEquivalencesCount) { - if (!Similarity.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { - return null; - } - final List distanceList = Utilities.buildDistanceList(spectrum1, spectrum2, dim1, dim2, - shiftTolerance, checkMultiplicity, - checkEquivalencesCount, - allowLowerEquivalencesCount); - final Assignment matchAssignment = new Assignment(); - matchAssignment.setNuclei(spectrum1.getNuclei()); - matchAssignment.initAssignments(spectrum1.getSignalCount()); - final Set assignedSpectrum1 = new HashSet<>(); - final Set assignedSpectrum2 = new HashSet<>(); - for (final Distance distance : distanceList) { - if (!assignedSpectrum1.contains(distance.getSignalIndexSpectrum1()) - && !assignedSpectrum2.contains(distance.getSignalIndexSpectrum2())) { - for (int equiv = 0; equiv - < spectrum2.getEquivalencesCount(distance.getSignalIndexSpectrum2()); equiv++) { - matchAssignment.addAssignmentEquivalence(0, distance.getSignalIndexSpectrum1(), - distance.getSignalIndexSpectrum2()); - } - assignedSpectrum1.add(distance.getSignalIndexSpectrum1()); - assignedSpectrum2.add(distance.getSignalIndexSpectrum2()); - } - } - - return matchAssignment; + return matchSpectra(spectrum1, spectrum2, dim1, dim2, shiftTolerance, checkMultiplicity, checkEquivalencesCount, + allowLowerEquivalencesCount, null, null, null); } @@ -346,4 +324,69 @@ public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum s return matchAssignment; } + /** + * Returns the closest shift matches between two spectra in selected dimensions + * as an Assignment object with one set dimension only.
+ * + * @param spectrum1 first spectrum (possible subspectrum) + * @param spectrum2 second spectrum + * @param dim1 dimension in first spectrum to take the shifts from + * @param dim2 dimension in second spectrum to take the shifts from + * @param shiftTolerance Tolerance value [ppm] used during spectra shift + * comparison + * @param checkMultiplicity indicates whether to compare the multiplicity of matched signals + * @param checkEquivalencesCount indicates whether to compare the equivalences counts of matched signals + * @param allowLowerEquivalencesCount indicates to allow a lower equivalences counts spectrum 2 + * @param structure structure belonging to second spectrum + * @param assignment assignments between structure and second spectrum + * @param detections detections object which contains structural constraints + * + * @return Assignments with signal indices of spectrum and matched indices + * in query spectrum; null if one of the spectra does not + * contain the selected dimension + */ + public static Assignment matchSpectra(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final double shiftTolerance, final boolean checkMultiplicity, + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, final IAtomContainer structure, + final Assignment assignment, final Detections detections) { + if (!Similarity.checkDimensions(spectrum1, spectrum2, dim1, dim2)) { + return null; + } + final List distanceList = detections + != null + && structure + != null + && assignment + != null + ? Utilities.buildDistanceList(spectrum1, spectrum2, dim1, dim2, + shiftTolerance, checkMultiplicity, + checkEquivalencesCount, + allowLowerEquivalencesCount, structure, + assignment, detections) + : Utilities.buildDistanceList(spectrum1, spectrum2, dim1, dim2, + shiftTolerance, checkMultiplicity, + checkEquivalencesCount, + allowLowerEquivalencesCount); + + final Assignment matchAssignment = new Assignment(); + matchAssignment.setNuclei(spectrum1.getNuclei()); + matchAssignment.initAssignments(spectrum1.getSignalCount()); + final Set assignedSpectrum1 = new HashSet<>(); + final Set assignedSpectrum2 = new HashSet<>(); + for (final Distance distance : distanceList) { + if (!assignedSpectrum1.contains(distance.getSignalIndexSpectrum1()) + && !assignedSpectrum2.contains(distance.getSignalIndexSpectrum2())) { + for (int equiv = 0; equiv + < spectrum2.getEquivalencesCount(distance.getSignalIndexSpectrum2()); equiv++) { + matchAssignment.addAssignmentEquivalence(0, distance.getSignalIndexSpectrum1(), + distance.getSignalIndexSpectrum2()); + } + assignedSpectrum1.add(distance.getSignalIndexSpectrum1()); + assignedSpectrum2.add(distance.getSignalIndexSpectrum2()); + } + } + + return matchAssignment; + } } diff --git a/src/casekit/nmr/similarity/Utilities.java b/src/casekit/nmr/similarity/Utilities.java index e160813..5a3dc17 100644 --- a/src/casekit/nmr/similarity/Utilities.java +++ b/src/casekit/nmr/similarity/Utilities.java @@ -1,12 +1,15 @@ package casekit.nmr.similarity; +import casekit.nmr.elucidation.Constants; +import casekit.nmr.elucidation.model.Detections; +import casekit.nmr.model.Assignment; import casekit.nmr.model.Signal; import casekit.nmr.model.Spectrum; import casekit.nmr.similarity.model.Distance; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; +import java.util.*; public class Utilities { @@ -71,4 +74,81 @@ public static Double getDistanceValue(final Signal signal1, final Signal signal2 ? null : distanceValue; } + + public static List buildDistanceList(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, + final int dim2, final double shiftTolerance, + final boolean checkMultiplicity, + final boolean checkEquivalencesCount, + final boolean allowLowerEquivalencesCount, + final IAtomContainer structure, final Assignment assignment, + final Detections detections) { + final List distanceList = new ArrayList<>(); + Double distanceValue; + List hybridizations; + Set forbiddenNeighbors, setNeighbors, setNeighborsTemp; + IAtom atom; + boolean skip; + for (int i = 0; i + < spectrum1.getSignalCount(); i++) { + forbiddenNeighbors = detections.getForbiddenNeighbors() + .get(i) + .keySet(); + setNeighbors = detections.getSetNeighbors() + .get(i) + .keySet(); + hybridizations = detections.getDetectedHybridizations() + .get(i); + + for (int j = 0; j + < spectrum2.getSignalCount(); j++) { + // check spectral constraints + distanceValue = getDistanceValue(spectrum1.getSignal(i), spectrum2.getSignal(j), dim1, dim2, + checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, + shiftTolerance); + if (distanceValue + == null) { + continue; + } + skip = false; + // check structural constraints + for (int k = 0; k + < assignment.getAssignment(0, j).length; k++) { + atom = structure.getAtom(assignment.getAssignment(0, j, k)); + // if certain hybridizations are given and the atom's hybridization is known + if (!hybridizations.isEmpty() + && Constants.hybridizationConversionMap.containsKey(atom.getHybridization() + .name())) { + skip = !hybridizations.contains(Constants.hybridizationConversionMap.get(atom.getHybridization() + .name())); + if (skip) { + break; + } + } + setNeighborsTemp = new HashSet<>(setNeighbors); + for (final IAtom neighborAtom : structure.getConnectedAtomsList(atom)) { + skip = forbiddenNeighbors.contains(neighborAtom.getSymbol()); + if (skip) { + break; + } + setNeighborsTemp.remove(neighborAtom.getSymbol()); + } + if (!setNeighborsTemp.isEmpty()) { + skip = true; + } + if (skip) { + break; + } + } + if (skip) { + continue; + } + + // if passed all check then add to distance list + distanceList.add(new Distance(i, j, distanceValue)); + } + } + distanceList.sort(Comparator.comparingDouble(Distance::getValue)); + + return distanceList; + } } From 1e65f576894393b5aa36ae2cacd23c0eafdb2fc9 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 11 Apr 2022 10:52:15 +0200 Subject: [PATCH 383/405] feat: added ID property to Signal --- src/casekit/nmr/dbservice/COCONUT.java | 2 +- src/casekit/nmr/dbservice/NMRShiftDB.java | 2 +- src/casekit/nmr/model/Signal.java | 3 ++- src/casekit/nmr/model/SignalCompact.java | 11 ++++++++++- src/casekit/nmr/model/nmrium/Spectrum.java | 5 +++-- src/casekit/nmr/utils/Utils.java | 5 +++-- 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/casekit/nmr/dbservice/COCONUT.java b/src/casekit/nmr/dbservice/COCONUT.java index 779f551..577678e 100644 --- a/src/casekit/nmr/dbservice/COCONUT.java +++ b/src/casekit/nmr/dbservice/COCONUT.java @@ -78,7 +78,7 @@ public static List getDataSetsWithShiftPredictionFromCOCONUT(final Stri // add signal spectrum.addSignal( new Signal(new String[]{nucleus}, new Double[]{calcShift}, multiplicity, "signal", null, 1, - 0, null)); + 0, null, null)); } // if no spectrum could be built or the number of signals in spectrum is different than the atom number in molecule diff --git a/src/casekit/nmr/dbservice/NMRShiftDB.java b/src/casekit/nmr/dbservice/NMRShiftDB.java index 75baef8..5c2ae4a 100644 --- a/src/casekit/nmr/dbservice/NMRShiftDB.java +++ b/src/casekit/nmr/dbservice/NMRShiftDB.java @@ -353,7 +353,7 @@ public static Spectrum NMRShiftDBSpectrumToSpectrum(final String NMRShiftDBSpect .toLowerCase(); spectrum.addSignal( new Signal(new String[]{nucleus}, new Double[]{shift}, multiplicity, "signal", intensity, 1, 0, - null)); + null, null)); } } catch (final Exception e) { return null; diff --git a/src/casekit/nmr/model/Signal.java b/src/casekit/nmr/model/Signal.java index db59f19..09ab610 100644 --- a/src/casekit/nmr/model/Signal.java +++ b/src/casekit/nmr/model/Signal.java @@ -52,6 +52,7 @@ public class Signal { private int equivalencesCount; private Integer phase; private J j; + private String id; public int getNDim() { @@ -100,7 +101,7 @@ public Signal buildClone() { } return new Signal(this.getNuclei() .clone(), this.shifts.clone(), this.multiplicity, this.kind, this.intensity, - this.equivalencesCount, this.phase, clonedJ); + this.equivalencesCount, this.phase, clonedJ, this.id); } @Override diff --git a/src/casekit/nmr/model/SignalCompact.java b/src/casekit/nmr/model/SignalCompact.java index 17e0611..9fd74c8 100644 --- a/src/casekit/nmr/model/SignalCompact.java +++ b/src/casekit/nmr/model/SignalCompact.java @@ -22,7 +22,7 @@ public class SignalCompact { public SignalCompact(final Signal signal) { this.strings = new String[signal.getNDim() - + 2]; + + 3]; this.doubles = new Double[signal.getNDim() + 1]; for (int dim = 0; dim @@ -33,6 +33,8 @@ public SignalCompact(final Signal signal) { this.strings[signal.getNDim()] = signal.getMultiplicity(); this.strings[signal.getNDim() + 1] = signal.getKind(); + this.strings[signal.getNDim() + + 2] = signal.getId(); this.doubles[signal.getNDim()] = signal.getIntensity(); this.integers = new Integer[]{signal.getNDim(), signal.getEquivalencesCount(), signal.getPhase()}; } @@ -57,6 +59,13 @@ public Signal toSignal() { signal.setMultiplicity(this.strings[this.dimensions()]); signal.setKind(this.strings[this.dimensions() + 1]); + // @TODO remove following condition if not needed anymore (sherlock dataset storage is currently without signal ID) + signal.setId(this.strings.length + - this.dimensions() + >= 3 + ? this.strings[this.dimensions() + + 2] + : null); signal.setShifts(new Double[this.dimensions()]); for (int dim = 0; dim < this.dimensions(); dim++) { diff --git a/src/casekit/nmr/model/nmrium/Spectrum.java b/src/casekit/nmr/model/nmrium/Spectrum.java index aa51152..b639c00 100644 --- a/src/casekit/nmr/model/nmrium/Spectrum.java +++ b/src/casekit/nmr/model/nmrium/Spectrum.java @@ -67,7 +67,7 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { signal1D.getDelta()}, signal1D.getMultiplicity(), signal1D.getKind(), null, 0, 0, - null)); + null, signal1D.getId())); } })); spectrum.addMetaInfo("solvent", (String) this.info.get("solvent")); @@ -93,7 +93,8 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { .get("delta")}, signal2D.getMultiplicity(), signal2D.getKind(), null, 0, 0, - signal2D.getJ())); + signal2D.getJ(), + signal2D.getId())); } })); spectrum.addMetaInfo("solvent", (String) this.info.get("solvent")); diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index aa76f29..b2a50ef 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -668,7 +668,7 @@ public static Signal extractFirstSignalFromCorrelation(final Correlation correla return new Signal(new String[]{Constants.nucleiMap.get(correlation.getAtomType())}, new Double[]{signal1D.getDelta()}, signal1D.getMultiplicity(), signal1D.getKind(), null, - correlation.getEquivalence(), signal1D.getSign(), null); + correlation.getEquivalence(), signal1D.getSign(), null, signal1D.getId()); } else if (signalMap.containsKey("x")) { // 2D signal final Signal2D signal2D = new Signal2D(signal); @@ -688,7 +688,7 @@ public static Signal extractFirstSignalFromCorrelation(final Correlation correla return new Signal(new String[]{Constants.nucleiMap.get(correlation.getAtomType())}, new Double[]{shift}, signal2D.getMultiplicity(), signal2D.getKind(), null, correlation.getEquivalence(), - signal2D.getSign(), signal2D.getJ()); + signal2D.getSign(), signal2D.getJ(), signal2D.getId()); } return null; @@ -710,6 +710,7 @@ public static Spectrum correlationListToSpectrum1D(final List corre signal = extractFirstSignalFromCorrelation(correlation); if (signal != null) { + signal.setId(correlation.getId()); spectrum.addSignalWithoutEquivalenceSearch(signal); } } From ffe21725d673627c0e83813c5e11a4818caeabb2 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 25 Apr 2022 21:01:00 +0200 Subject: [PATCH 384/405] chore: renamed Data class to NMRiumData --- src/casekit/nmr/model/nmrium/{Data.java => NMRiumData.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/casekit/nmr/model/nmrium/{Data.java => NMRiumData.java} (98%) diff --git a/src/casekit/nmr/model/nmrium/Data.java b/src/casekit/nmr/model/nmrium/NMRiumData.java similarity index 98% rename from src/casekit/nmr/model/nmrium/Data.java rename to src/casekit/nmr/model/nmrium/NMRiumData.java index 937090e..1db6a33 100644 --- a/src/casekit/nmr/model/nmrium/Data.java +++ b/src/casekit/nmr/model/nmrium/NMRiumData.java @@ -38,7 +38,7 @@ @ToString @JsonIgnoreProperties(ignoreUnknown = true) -public class Data { +public class NMRiumData { private List spectra; private Correlations correlations; From a2f3553df422433db9972d000a9dd17eb29ea070 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 3 May 2022 13:40:25 +0200 Subject: [PATCH 385/405] chore: adoptions to current spectra format in NMRium --- src/casekit/nmr/model/nmrium/Range.java | 9 ++++++++- src/casekit/nmr/model/nmrium/Signal1D.java | 8 +++++++- src/casekit/nmr/model/nmrium/Signal2D.java | 2 ++ src/casekit/nmr/model/nmrium/Spectrum.java | 20 +++++++++++++++++--- src/casekit/nmr/model/nmrium/Zone.java | 5 ++++- 5 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/casekit/nmr/model/nmrium/Range.java b/src/casekit/nmr/model/nmrium/Range.java index 90335e8..9618438 100644 --- a/src/casekit/nmr/model/nmrium/Range.java +++ b/src/casekit/nmr/model/nmrium/Range.java @@ -39,5 +39,12 @@ public class Range { private String id; private String kind; - private List signal; + private List signals; + private Double absolute; + private Double from; + private Double to; + private Double integration; + private Double originFrom; + private Double originTo; + } diff --git a/src/casekit/nmr/model/nmrium/Signal1D.java b/src/casekit/nmr/model/nmrium/Signal1D.java index 7141dac..cb2b441 100644 --- a/src/casekit/nmr/model/nmrium/Signal1D.java +++ b/src/casekit/nmr/model/nmrium/Signal1D.java @@ -27,6 +27,9 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; +import java.util.List; +import java.util.Map; + @NoArgsConstructor @AllArgsConstructor @Getter @@ -37,9 +40,12 @@ public class Signal1D extends Signal { private double delta; + private Double integration; + private List> js; + private Double originDelta; + private List> peaks; public Signal1D(final Signal signal) { super(signal.getId(), signal.getKind(), signal.getMultiplicity(), signal.getSign()); } - } diff --git a/src/casekit/nmr/model/nmrium/Signal2D.java b/src/casekit/nmr/model/nmrium/Signal2D.java index 21f0743..3ea26da 100644 --- a/src/casekit/nmr/model/nmrium/Signal2D.java +++ b/src/casekit/nmr/model/nmrium/Signal2D.java @@ -27,6 +27,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; +import java.util.List; import java.util.Map; @NoArgsConstructor @@ -42,6 +43,7 @@ public class Signal2D private Map x; private Map y; private J j; + private List> peaks; public Signal2D(final Signal signal) { super(signal.getId(), signal.getKind(), signal.getMultiplicity(), signal.getSign()); diff --git a/src/casekit/nmr/model/nmrium/Spectrum.java b/src/casekit/nmr/model/nmrium/Spectrum.java index b639c00..08aac44 100644 --- a/src/casekit/nmr/model/nmrium/Spectrum.java +++ b/src/casekit/nmr/model/nmrium/Spectrum.java @@ -32,6 +32,7 @@ import lombok.ToString; import java.util.ArrayList; +import java.util.List; import java.util.Map; @NoArgsConstructor @@ -42,9 +43,22 @@ public class Spectrum { private String id; + private Map info; + private Map data; + private Map display; + private List filters; + private Map meta; + private Map originalData; + private Map originalInfo; + private Map source; + // depending on spectrum type + // 1D private Default ranges; + private Map integrals; + private Map peaks; + // 2D + private Map processingController; private Default zones; - private Map info; public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { final int dimension = (int) this.info.get("dimension"); @@ -57,7 +71,7 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { final casekit.nmr.model.Spectrum spectrum = new casekit.nmr.model.Spectrum(); spectrum.setNuclei(new String[]{nucleus}); this.ranges.getValues() - .forEach(range -> range.getSignal() + .forEach(range -> range.getSignals() .forEach(signal1D -> { if (considerSignalKind && signal1D.getKind() @@ -82,7 +96,7 @@ public casekit.nmr.model.Spectrum toSpectrum(final boolean considerSignalKind) { spectrum.setNuclei(nuclei); this.zones.getValues() - .forEach(zone -> zone.getSignal() + .forEach(zone -> zone.getSignals() .forEach(signal2D -> { if (considerSignalKind && signal2D.getKind() diff --git a/src/casekit/nmr/model/nmrium/Zone.java b/src/casekit/nmr/model/nmrium/Zone.java index fca899e..279df4b 100644 --- a/src/casekit/nmr/model/nmrium/Zone.java +++ b/src/casekit/nmr/model/nmrium/Zone.java @@ -28,6 +28,7 @@ import lombok.*; import java.util.List; +import java.util.Map; @NoArgsConstructor @AllArgsConstructor @@ -39,5 +40,7 @@ public class Zone { private String id; private String kind; - private List signal; + private List signals; + private Map x; + private Map y; } From 8f0daa3806c4bab81eb6fcbb19f356774f343097 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 10 May 2022 22:02:36 +0200 Subject: [PATCH 386/405] fix: use manually set and detected hybridization in pyLSD input file --- src/casekit/nmr/elucidation/Utilities.java | 46 +++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/casekit/nmr/elucidation/Utilities.java b/src/casekit/nmr/elucidation/Utilities.java index 9b0b7b0..2528d03 100644 --- a/src/casekit/nmr/elucidation/Utilities.java +++ b/src/casekit/nmr/elucidation/Utilities.java @@ -370,30 +370,30 @@ private static List getProtonCounts(final List correlation private static List getHybridizations(final List correlationList, final int index, final Map> detectedHybridizations) { final Correlation correlation = correlationList.get(index); - List hybridizations = new ArrayList<>(); - if (correlation.getHybridization() - != null - && !correlation.getHybridization() - .isEmpty()) { - // if hybridization is already given - return correlation.getHybridization(); - } else { - // if hybridization is not given then use the detected ones - if (detectedHybridizations.containsKey(index)) { - hybridizations = new ArrayList<>(detectedHybridizations.get(index)); - } - if (hybridizations.isEmpty() - && correlation.getAtomType() - .equals("C") - && correlation.getProtonsCount() - .size() - == 1 - && correlation.getProtonsCount() - .get(0) - >= 2) { - // a carbon with at least two protons can only be SP2 or SP3 + final Set hybridizations = new HashSet<>(correlation.getHybridization() + != null + ? correlation.getHybridization() + : new ArrayList<>()); + if (detectedHybridizations.containsKey(index)) { + hybridizations.addAll(detectedHybridizations.get(index)); + } + if (hybridizations.isEmpty() + && correlation.getAtomType() + .equals("C") + && correlation.getProtonsCount() + .size() + == 1) { + if (correlation.getProtonsCount() + .get(0) + == 2) { + // a carbon with two protons can only be SP2 or SP3 hybridizations.add(2); hybridizations.add(3); + } else if (correlation.getProtonsCount() + .get(0) + == 3) { + // a carbon with three protons can only be SP3 + hybridizations.add(3); } } if (hybridizations.isEmpty()) { @@ -403,7 +403,7 @@ private static List getHybridizations(final List correlati } } - return hybridizations; + return new ArrayList<>(hybridizations); } public static Map buildIndicesMap(final List molecularConnectivityList) { From 7b6a3be7934a0943bcff5bbdfa263789c521c1dc Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 12 May 2022 15:33:00 +0200 Subject: [PATCH 387/405] chore: use capitalized AND for FEXP command --- src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index c23dc57..b0cab06 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -479,7 +479,7 @@ private static String buildFEXP(final Map fexpMap) { < fexpMap.keySet() .size() - 1) { - stringBuilder.append(" and "); + stringBuilder.append(" AND "); } counter++; } From 35afc0775cc3e034c44c491b9d99868bffe9e46c Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 13 May 2022 13:59:10 +0200 Subject: [PATCH 388/405] chore: no use of concurrency classes in HOSECodeShiftStatistics --- .../nmr/analysis/HOSECodeShiftStatistics.java | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index a993b6e..a8c6a1a 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -23,18 +23,17 @@ import java.io.*; import java.util.*; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentLinkedQueue; public class HOSECodeShiftStatistics { private final static Gson GSON = new GsonBuilder().setLenient() .create(); - public static Map>> collectHOSECodeShifts( - final List dataSetList, final Integer maxSphere, final boolean use3D, - final boolean withExplicitH) { - return collectHOSECodeShifts(dataSetList, maxSphere, use3D, withExplicitH, new ConcurrentHashMap<>()); + public static Map>> collectHOSECodeShifts(final List dataSetList, + final Integer maxSphere, + final boolean use3D, + final boolean withExplicitH) { + return collectHOSECodeShifts(dataSetList, maxSphere, use3D, withExplicitH, new HashMap<>()); } /** @@ -46,9 +45,11 @@ public static Map>> collectHOS * * @return */ - public static Map>> collectHOSECodeShifts( - final List dataSetList, final Integer maxSphere, final boolean use3D, final boolean withExplicitH, - final Map>> hoseCodeShifts) { + public static Map>> collectHOSECodeShifts(final List dataSetList, + final Integer maxSphere, + final boolean use3D, + final boolean withExplicitH, + final Map>> hoseCodeShifts) { for (final DataSet dataSet : dataSetList) { insert(dataSet, maxSphere, use3D, withExplicitH, hoseCodeShifts); } @@ -58,7 +59,7 @@ public static Map>> collectHOS public static boolean insert(final DataSet dataSet, final Integer maxSphere, final boolean use3D, final boolean withExplicitH, - final Map>> hoseCodeShifts) { + final Map>> hoseCodeShifts) { final StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); final IAtomContainer structure; @@ -174,9 +175,9 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin } else { hoseCode = HOSECodeBuilder.buildHOSECode(structure, i, sphere, false); } - hoseCodeShifts.putIfAbsent(hoseCode, new ConcurrentHashMap<>()); + hoseCodeShifts.putIfAbsent(hoseCode, new HashMap<>()); hoseCodeShifts.get(hoseCode) - .putIfAbsent(solvent, new ConcurrentLinkedQueue<>()); + .putIfAbsent(solvent, new ArrayList<>()); hoseCodeShifts.get(hoseCode) .get(solvent) .add(signal.getShift(0)); @@ -192,14 +193,14 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin } public static Map> buildHOSECodeShiftStatistics( - final Map>> hoseCodeShifts) { + final Map>> hoseCodeShifts) { final Map> hoseCodeShiftStatistics = new HashMap<>(); List values; - for (final Map.Entry>> hoseCodes : hoseCodeShifts.entrySet()) { + for (final Map.Entry>> hoseCodes : hoseCodeShifts.entrySet()) { hoseCodeShiftStatistics.put(hoseCodes.getKey(), new HashMap<>()); - for (final Map.Entry> solvents : hoseCodes.getValue() - .entrySet()) { + for (final Map.Entry> solvents : hoseCodes.getValue() + .entrySet()) { values = new ArrayList<>(solvents.getValue()); Statistics.removeOutliers(values, 1.5); hoseCodeShiftStatistics.get(hoseCodes.getKey()) @@ -220,7 +221,7 @@ public static Map> buildHOSECodeShiftStatistics(fi final boolean use3D, final boolean withExplicitH) { try { - final Map>> hoseCodeShifts = new HashMap<>(); + final Map>> hoseCodeShifts = new HashMap<>(); for (final String pathsToNMRShiftDB : pathsToNMRShiftDBs) { HOSECodeShiftStatistics.collectHOSECodeShifts( NMRShiftDB.getDataSetsFromNMRShiftDB(pathsToNMRShiftDB, nuclei), maxSphere, use3D, From 4be90031fa5ebce36685301e3f2a12cb70345fc5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 18 May 2022 01:52:40 +0200 Subject: [PATCH 389/405] feat: allow usage of Ertl functional groups (or fragments in general) in PyLSD --- .../lsd/PyLSDInputFileBuilder.java | 23 ++++++-- .../nmr/elucidation/lsd/Utilities.java | 55 +++++++++++++++++++ .../nmr/elucidation/model/Detections.java | 2 + .../elucidation/model/ElucidationOptions.java | 1 + .../nmr/fragments/FragmentUtilities.java | 21 +++---- 5 files changed, 87 insertions(+), 15 deletions(-) diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index b0cab06..2110bdf 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -489,7 +489,7 @@ private static String buildFEXP(final Map fexpMap) { return stringBuilder.toString(); } - private static String buildDEFFsAndFEXP(final ElucidationOptions elucidationOptions) { + private static String buildDEFFsAndFEXP(final ElucidationOptions elucidationOptions, final Detections detections) { final StringBuilder stringBuilder = new StringBuilder(); final Map fexpMap = new HashMap<>(); for (int i = 0; i @@ -514,10 +514,22 @@ private static String buildDEFFsAndFEXP(final ElucidationOptions elucidationOpti // + 1), true); // pathsToNeighborsFilesToUse.add(elucidationOptions.getPathsToNeighborsFiles()[1]); // } + + // build and write fragments files + final List pathsToFragmentFilesToUse = new ArrayList<>(); + if (Utilities.writeFragmentsFile(elucidationOptions.getPathToFragmentFiles()[0], + detections.getFunctionalGroups())) { + fexpMap.put("F" + + (fexpMap.size() + + 1), true); + pathsToFragmentFilesToUse.add(elucidationOptions.getPathToFragmentFiles()[0]); + } + // build DEFFs stringBuilder.append( // buildDEFFs(elucidationOptions.getFilterPaths(), pathsToNeighborsFilesToUse.toArray(String[]::new))) - buildDEFFs(elucidationOptions.getFilterPaths(), new String[]{})) + // buildDEFFs(elucidationOptions.getFilterPaths(), new String[]{}) + buildDEFFs(elucidationOptions.getFilterPaths(), pathsToFragmentFilesToUse.toArray(String[]::new))) .append("\n"); // build FEXP stringBuilder.append(buildFEXP(fexpMap)) @@ -542,7 +554,8 @@ public static List buildPyLSDInputFileContentList(final Correlations cor correlations.getValues(), detections, grouping, defaultBondDistances); // for each combination insert an input file for PyLSD for (final Map> molecularConnectivityMap : molecularConnectivityMapCombinationList) { - inputFilesContentList.add(buildPyLSDInputFileContent(molecularConnectivityMap, mf, elucidationOptions)); + inputFilesContentList.add( + buildPyLSDInputFileContent(molecularConnectivityMap, mf, elucidationOptions, detections)); } return inputFilesContentList; @@ -550,7 +563,7 @@ public static List buildPyLSDInputFileContentList(final Correlations cor public static String buildPyLSDInputFileContent( final Map> molecularConnectivityMap, final String mf, - final ElucidationOptions elucidationOptions) { + final ElucidationOptions elucidationOptions, final Detections detections) { final Map elementCounts = new LinkedHashMap<>(Utils.getMolecularFormulaElementCounts(mf)); final StringBuilder stringBuilder = new StringBuilder(); @@ -598,7 +611,7 @@ public static String buildPyLSDInputFileContent( elucidationOptions.isAllowHeteroHeteroBonds())) .append("\n"); // DEFF and FEXP as filters (good/bad lists) - stringBuilder.append(buildDEFFsAndFEXP(elucidationOptions)) + stringBuilder.append(buildDEFFsAndFEXP(elucidationOptions, detections)) .append("\n"); return stringBuilder.toString(); diff --git a/src/casekit/nmr/elucidation/lsd/Utilities.java b/src/casekit/nmr/elucidation/lsd/Utilities.java index 475ffec..3fd6aaa 100644 --- a/src/casekit/nmr/elucidation/lsd/Utilities.java +++ b/src/casekit/nmr/elucidation/lsd/Utilities.java @@ -2,10 +2,14 @@ import casekit.io.FileSystem; import casekit.nmr.elucidation.Constants; +import casekit.nmr.model.DataSet; import casekit.nmr.model.Signal; import casekit.nmr.model.nmrium.Correlation; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; import java.util.*; import java.util.stream.Collectors; @@ -146,4 +150,55 @@ public static boolean writeNeighborsFile(final String pathToNeighborsFile, final .isEmpty() && FileSystem.writeFile(pathToNeighborsFile, stringBuilder.toString()); } + + public static boolean writeFragmentsFile(final String pathToFragmentsFile, final List fragments) { + final StringBuilder stringBuilder = new StringBuilder(); + + IAtomContainer fragment; + IAtom atom; + IBond bond; + final Map sstrMap = new HashMap<>(); + for (final DataSet fragmentDataSet : fragments) { + if (fragmentDataSet.getAttachment() + == null + || !((boolean) fragmentDataSet.getAttachment() + .get("include"))) { + continue; + } + fragment = fragmentDataSet.getStructure() + .toAtomContainer(); + for (int i = 0; i + < fragment.getAtomCount(); i++) { + atom = fragment.getAtom(i); + sstrMap.put(atom, sstrMap.size() + + 1); + stringBuilder.append("SSTR S") + .append(sstrMap.size()) + .append(" ") + .append(atom.getSymbol()) + .append(" ") + .append(Constants.hybridizationConversionMap.get(atom.getHybridization() + .name())) + .append(" ") + .append(atom.getImplicitHydrogenCount()) + .append("\n"); + } + for (int i = 0; i + < fragment.getBondCount(); i++) { + bond = fragment.getBond(i); + stringBuilder.append("LINK S") + .append(sstrMap.get(bond.getBegin())) + .append(" S") + .append(sstrMap.get(bond.getEnd())) + .append("\n"); + } + stringBuilder.append("\n\n"); + } + System.out.println(stringBuilder); + + + return !stringBuilder.toString() + .isEmpty() + && FileSystem.writeFile(pathToFragmentsFile, stringBuilder.toString()); + } } diff --git a/src/casekit/nmr/elucidation/model/Detections.java b/src/casekit/nmr/elucidation/model/Detections.java index 1b1c680..eee968c 100644 --- a/src/casekit/nmr/elucidation/model/Detections.java +++ b/src/casekit/nmr/elucidation/model/Detections.java @@ -1,5 +1,6 @@ package casekit.nmr.elucidation.model; +import casekit.nmr.model.DataSet; import lombok.*; import java.util.List; @@ -18,4 +19,5 @@ public class Detections { private Map>>> forbiddenNeighbors; private Map>>> setNeighbors; private Map> fixedNeighbors; + private List functionalGroups; } diff --git a/src/casekit/nmr/elucidation/model/ElucidationOptions.java b/src/casekit/nmr/elucidation/model/ElucidationOptions.java index e18996e..8402b8b 100644 --- a/src/casekit/nmr/elucidation/model/ElucidationOptions.java +++ b/src/casekit/nmr/elucidation/model/ElucidationOptions.java @@ -14,6 +14,7 @@ public class ElucidationOptions { // PyLSD options private String[] filterPaths; private String[] pathsToNeighborsFiles; + private String[] pathToFragmentFiles; private boolean allowHeteroHeteroBonds; private boolean useElim; private int elimP1; diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index 9d32336..c3b8693 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -1,5 +1,6 @@ package casekit.nmr.fragments; +import casekit.nmr.elucidation.Constants; import casekit.nmr.model.Assignment; import casekit.nmr.model.DataSet; import casekit.nmr.model.Spectrum; @@ -82,7 +83,7 @@ public static Map> getGoodlistAndBadlist(final List> queryHybridizationList) { + final List> queryHybridizationList) { final List matches = new ArrayList<>(); final List nonMatches = new ArrayList<>(); Assignment matchAssignment; @@ -103,9 +104,9 @@ public static Map> getGoodlistAndBadlist(final List> queryHybridizationList) { + public static boolean isMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final Assignment matchAssignment, final double maxAverageDeviation, + final List> queryHybridizationList) { // check for nuclei if (!dataSet.getSpectrum() .getNuclei()[0].equals(querySpectrum.getNuclei()[0])) { @@ -139,8 +140,8 @@ private static boolean isMatch(final DataSet dataSet, final Spectrum querySpectr return true; } - private static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, - final Assignment matchAssigment) { + public static boolean isNonMatch(final DataSet dataSet, final Spectrum querySpectrum, final String mf, + final Assignment matchAssigment) { if (!isStructuralMatch(dataSet, mf)) { return false; } @@ -169,7 +170,7 @@ private static boolean isStructuralMatch(final DataSet dataSet, final String mf) } private static boolean checkHybridizations(final DataSet dataSet, final Assignment matchAssignment, - final List> queryHybridizationList) { + final List> queryHybridizationList) { if (queryHybridizationList.isEmpty()) { return true; } @@ -189,9 +190,9 @@ private static boolean checkHybridizations(final DataSet dataSet, final Assignme .get(0); signalIndexInQuerySpectrum = matchAssignment.getAssignment(0, signalIndexInDataSetSpectrum, 0); if (!queryHybridizationList.get(signalIndexInQuerySpectrum) - .contains(fragment.getAtom(i) - .getHybridization() - .name())) { + .contains(Constants.hybridizationConversionMap.get(fragment.getAtom(i) + .getHybridization() + .name()))) { return false; } } From e626ede9623923f43d0f6f40bfcc813e533d66c4 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 18 May 2022 14:02:34 +0200 Subject: [PATCH 390/405] chore: renamed FunctionalGroup to Fragment --- src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java | 3 +-- src/casekit/nmr/elucidation/model/Detections.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index 2110bdf..f251ac6 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -517,8 +517,7 @@ private static String buildDEFFsAndFEXP(final ElucidationOptions elucidationOpti // build and write fragments files final List pathsToFragmentFilesToUse = new ArrayList<>(); - if (Utilities.writeFragmentsFile(elucidationOptions.getPathToFragmentFiles()[0], - detections.getFunctionalGroups())) { + if (Utilities.writeFragmentsFile(elucidationOptions.getPathToFragmentFiles()[0], detections.getFragments())) { fexpMap.put("F" + (fexpMap.size() + 1), true); diff --git a/src/casekit/nmr/elucidation/model/Detections.java b/src/casekit/nmr/elucidation/model/Detections.java index eee968c..cf65104 100644 --- a/src/casekit/nmr/elucidation/model/Detections.java +++ b/src/casekit/nmr/elucidation/model/Detections.java @@ -19,5 +19,5 @@ public class Detections { private Map>>> forbiddenNeighbors; private Map>>> setNeighbors; private Map> fixedNeighbors; - private List functionalGroups; + private List fragments; } From b3d7b23f0bcea2aa52be2959e825f8a200c39484 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 May 2022 00:34:30 +0200 Subject: [PATCH 391/405] fix: use different DEFF files for fragment definition in LSD --- .../lsd/PyLSDInputFileBuilder.java | 31 +++++++++--- .../elucidation/model/ElucidationOptions.java | 4 +- .../nmr/filterandrank/FilterAndRank.java | 9 ---- .../nmr/fragments/FragmentUtilities.java | 9 ++-- .../fragmentation/Fragmentation.java | 1 + .../fragmentation/FragmentationUtilities.java | 47 ++++++++++++++++--- 6 files changed, 71 insertions(+), 30 deletions(-) diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index f251ac6..d3a6447 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -5,6 +5,7 @@ import casekit.nmr.elucidation.model.ElucidationOptions; import casekit.nmr.elucidation.model.Grouping; import casekit.nmr.elucidation.model.MolecularConnectivity; +import casekit.nmr.model.DataSet; import casekit.nmr.model.nmrium.Correlations; import casekit.nmr.utils.Statistics; import casekit.nmr.utils.Utils; @@ -517,17 +518,33 @@ private static String buildDEFFsAndFEXP(final ElucidationOptions elucidationOpti // build and write fragments files final List pathsToFragmentFilesToUse = new ArrayList<>(); - if (Utilities.writeFragmentsFile(elucidationOptions.getPathToFragmentFiles()[0], detections.getFragments())) { - fexpMap.put("F" - + (fexpMap.size() - + 1), true); - pathsToFragmentFilesToUse.add(elucidationOptions.getPathToFragmentFiles()[0]); + String pathToFragmentFile; + DataSet fragmentDataSet; + for (int i = 0; i + < detections.getFragments() + .size(); i++) { + fragmentDataSet = detections.getFragments() + .get(i); + if (fragmentDataSet.getAttachment() + == null + || !((boolean) fragmentDataSet.getAttachment() + .get("include"))) { + continue; + } + pathToFragmentFile = elucidationOptions.getPathToFragmentFiles() + + "_" + + i + + ".deff"; + if (Utilities.writeFragmentFile(pathToFragmentFile, fragmentDataSet)) { + fexpMap.put("F" + + (fexpMap.size() + + 1), true); + pathsToFragmentFilesToUse.add(pathToFragmentFile); + } } // build DEFFs stringBuilder.append( - // buildDEFFs(elucidationOptions.getFilterPaths(), pathsToNeighborsFilesToUse.toArray(String[]::new))) - // buildDEFFs(elucidationOptions.getFilterPaths(), new String[]{}) buildDEFFs(elucidationOptions.getFilterPaths(), pathsToFragmentFilesToUse.toArray(String[]::new))) .append("\n"); // build FEXP diff --git a/src/casekit/nmr/elucidation/model/ElucidationOptions.java b/src/casekit/nmr/elucidation/model/ElucidationOptions.java index 8402b8b..cba1d1f 100644 --- a/src/casekit/nmr/elucidation/model/ElucidationOptions.java +++ b/src/casekit/nmr/elucidation/model/ElucidationOptions.java @@ -13,8 +13,8 @@ public class ElucidationOptions { // PyLSD options private String[] filterPaths; - private String[] pathsToNeighborsFiles; - private String[] pathToFragmentFiles; + private String pathToNeighborsFiles; + private String pathToFragmentFiles; private boolean allowHeteroHeteroBonds; private boolean useElim; private int elimP1; diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index b88af9f..fcf844b 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -105,19 +105,10 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp checkEquivalencesCount, allowLowerEquivalencesCount); dataSet.addAttachment("querySpectrumSignalCount", querySpectrum.getSignalCount()); - dataSet.addAttachment("querySpectrumSignalCountWithEquivalences", - querySpectrum.getSignalCountWithEquivalences()); - dataSet.addAttachment("setAssignmentsCountWithEquivalences", - spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0)); final boolean isCompleteSpectralMatch = spectrum.getSignalCount() == spectralMatchAssignment.getSetAssignmentsCount(0); - final boolean isCompleteSpectralMatchWithEquivalences = spectrum.getSignalCountWithEquivalences() - == spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0); dataSet.addAttachment("setAssignmentsCount", spectralMatchAssignment.getSetAssignmentsCount(0)); - dataSet.addAttachment("setAssignmentsCountWithEquivalences", - spectralMatchAssignment.getSetAssignmentsCountWithEquivalences(0)); dataSet.addAttachment("isCompleteSpectralMatch", isCompleteSpectralMatch); - dataSet.addAttachment("isCompleteSpectralMatchWithEquivalences", isCompleteSpectralMatchWithEquivalences); dataSet.addAttachment("spectralMatchAssignment", spectralMatchAssignment); Double[] deviations = Similarity.getDeviations(querySpectrum, spectrum, 0, 0, spectralMatchAssignment); diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index c3b8693..c818aab 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -131,10 +131,10 @@ public static boolean isMatch(final DataSet dataSet, final Spectrum querySpectru if (!checkHybridizations(dataSet, matchAssignment, queryHybridizationList)) { return false; } - dataSet.addMetaInfo("matchAssignment", gson.toJson(matchAssignment, Assignment.class)); + dataSet.addAttachment("spectralMatchAssignment", gson.toJson(matchAssignment, Assignment.class)); final Double rmsd = Similarity.calculateRMSD(spectrum, querySpectrum, 0, 0, matchAssignment); - dataSet.addMetaInfo("averageDeviation", Double.toString(averageDeviation)); - dataSet.addMetaInfo("rmsd", Double.toString(rmsd)); + dataSet.addAttachment("averageDeviation", averageDeviation); + dataSet.addAttachment("rmsd", rmsd); return true; @@ -165,8 +165,7 @@ private static boolean isStructuralMatch(final DataSet dataSet, final String mf) // check molecular formula with atom types in group // do not allow unsaturated fragments return Utils.compareWithMolecularFormulaLessOrEqual(fragment, mf) - && !Utils.getUnsaturatedAtomIndices(fragment) - .isEmpty(); + && !Utils.isSaturated(fragment); } private static boolean checkHybridizations(final DataSet dataSet, final Assignment matchAssignment, diff --git a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java index 80f87f8..8647a96 100644 --- a/src/casekit/nmr/fragments/fragmentation/Fragmentation.java +++ b/src/casekit/nmr/fragments/fragmentation/Fragmentation.java @@ -23,6 +23,7 @@ public class Fragmentation { * @param dataSet dataset with structure to build the fragments from * @param maxSphere maximum spherical limit for single atom fragment creation * @param maxSphereRing maximum spherical limit for ring atom fragment creation + * @param maxRingSize maximum ring size when detecting rings * @param withPseudoAtoms whether to place pseudo atoms in "outer" sphere * * @return diff --git a/src/casekit/nmr/fragments/fragmentation/FragmentationUtilities.java b/src/casekit/nmr/fragments/fragmentation/FragmentationUtilities.java index c45f6ab..893a5c9 100644 --- a/src/casekit/nmr/fragments/fragmentation/FragmentationUtilities.java +++ b/src/casekit/nmr/fragments/fragmentation/FragmentationUtilities.java @@ -195,20 +195,53 @@ public static void addToAtomContainer(final ConnectionTree connectionTree, final } public static void attachPseudoAtoms(final ConnectionTree connectionTree, final IAtomContainer structure) { - int atomIndexInStructure; + int connectedAtomIndexInStructure; + // first check for atoms which we would add as pseudo atom but are then ring closures actually + // means we check for possible duplicated insertion + final Set connectedAtoms = new HashSet<>(); + final List ringClosureAtomToAdd = new ArrayList<>(); for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { for (final IAtom connectedAtom : structure.getConnectedAtomsList(node.getAtom())) { - atomIndexInStructure = structure.indexOf(connectedAtom); - if (connectionTree.getBond(node.getKey(), atomIndexInStructure) + connectedAtomIndexInStructure = structure.indexOf(connectedAtom); + if (connectionTree.getBond(node.getKey(), connectedAtomIndexInStructure) == null - && connectionTree.getBond(atomIndexInStructure, node.getKey()) + && connectionTree.getBond(connectedAtomIndexInStructure, node.getKey()) == null) { - addPseudoNode(connectionTree, structure.getAtomCount() - + connectionTree.getNodesCount(false), node.getKey(), - structure.getBond(node.getAtom(), connectedAtom)); + if (connectedAtoms.contains(connectedAtom)) { + ringClosureAtomToAdd.add(connectedAtom); + } + connectedAtoms.add(connectedAtom); + } + } + } + ConnectionTreeNode connectedNode; + for (final ConnectionTreeNode node : connectionTree.getNodes(false)) { + for (final IAtom connectedAtom : structure.getConnectedAtomsList(node.getAtom())) { + connectedAtomIndexInStructure = structure.indexOf(connectedAtom); + if (connectionTree.getBond(node.getKey(), connectedAtomIndexInStructure) + == null + && connectionTree.getBond(connectedAtomIndexInStructure, node.getKey()) + == null) { + if (!ringClosureAtomToAdd.contains(connectedAtom)) { + addPseudoNode(connectionTree, structure.getAtomCount() + + connectionTree.getNodesCount(false), node.getKey(), + structure.getBond(node.getAtom(), connectedAtom)); + } else { + // add missing node for ring closure + if (!connectionTree.containsKey(connectedAtom.getIndex())) { + connectionTree.addNode(connectedAtom, connectedAtomIndexInStructure, node.getKey(), + structure.getBond(node.getAtom(), connectedAtom)); + } else { + // set the ring closure + connectedNode = connectionTree.getNode(connectedAtomIndexInStructure); + node.setRingClosureParent(connectedNode); + connectedNode.setRingClosureParent(node); + } + } } } } + closeRings(connectionTree, structure); } private static boolean addPseudoNode(final ConnectionTree connectionTree, final int pseudoNodeKey, From ec06241608dc071ed8b844765888eaf1b2ec87b7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 May 2022 00:35:03 +0200 Subject: [PATCH 392/405] fix: use different DEFF files for fragment definition in LSD (2) --- .../nmr/elucidation/lsd/Utilities.java | 60 ++++++++-------- src/casekit/nmr/utils/Utils.java | 72 +++++++++++-------- 2 files changed, 72 insertions(+), 60 deletions(-) diff --git a/src/casekit/nmr/elucidation/lsd/Utilities.java b/src/casekit/nmr/elucidation/lsd/Utilities.java index 3fd6aaa..5a1b841 100644 --- a/src/casekit/nmr/elucidation/lsd/Utilities.java +++ b/src/casekit/nmr/elucidation/lsd/Utilities.java @@ -151,52 +151,52 @@ public static boolean writeNeighborsFile(final String pathToNeighborsFile, final && FileSystem.writeFile(pathToNeighborsFile, stringBuilder.toString()); } - public static boolean writeFragmentsFile(final String pathToFragmentsFile, final List fragments) { - final StringBuilder stringBuilder = new StringBuilder(); - IAtomContainer fragment; + public static boolean writeFragmentFile(final String pathToFragmentsFile, final DataSet fragmentDataSet) { + final StringBuilder stringBuilder = new StringBuilder(); + final IAtomContainer fragment = fragmentDataSet.getStructure() + .toAtomContainer(); IAtom atom; IBond bond; final Map sstrMap = new HashMap<>(); - for (final DataSet fragmentDataSet : fragments) { - if (fragmentDataSet.getAttachment() - == null - || !((boolean) fragmentDataSet.getAttachment() - .get("include"))) { - continue; - } - fragment = fragmentDataSet.getStructure() - .toAtomContainer(); - for (int i = 0; i - < fragment.getAtomCount(); i++) { - atom = fragment.getAtom(i); - sstrMap.put(atom, sstrMap.size() - + 1); + for (int i = 0; i + < fragment.getAtomCount(); i++) { + atom = fragment.getAtom(i); + sstrMap.put(atom, sstrMap.size() + + 1); + if (atom.getSymbol() + .equals("R")) { + stringBuilder.append("SSTR S") + .append(sstrMap.size()) + .append(" A (1 2 3) (0 1 2 3) ") + .append("\n"); + } else { stringBuilder.append("SSTR S") .append(sstrMap.size()) .append(" ") .append(atom.getSymbol()) .append(" ") - .append(Constants.hybridizationConversionMap.get(atom.getHybridization() - .name())) + .append(atom.getSymbol() + .equals("C") + ? Constants.hybridizationConversionMap.get(atom.getHybridization() + .name()) + : "(1 2 3)") .append(" ") .append(atom.getImplicitHydrogenCount()) .append("\n"); } - for (int i = 0; i - < fragment.getBondCount(); i++) { - bond = fragment.getBond(i); - stringBuilder.append("LINK S") - .append(sstrMap.get(bond.getBegin())) - .append(" S") - .append(sstrMap.get(bond.getEnd())) - .append("\n"); - } - stringBuilder.append("\n\n"); + } + for (int i = 0; i + < fragment.getBondCount(); i++) { + bond = fragment.getBond(i); + stringBuilder.append("LINK S") + .append(sstrMap.get(bond.getBegin())) + .append(" S") + .append(sstrMap.get(bond.getEnd())) + .append("\n"); } System.out.println(stringBuilder); - return !stringBuilder.toString() .isEmpty() && FileSystem.writeFile(pathToFragmentsFile, stringBuilder.toString()); diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index b2a50ef..67ccc76 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -137,38 +137,39 @@ public static int getAtomTypeCount(final String mf, final String atomType) { public static boolean compareWithMolecularFormulaLessOrEqual(final IAtomContainer structure, final String mf) { if (mf - != null - && !mf.trim() - .isEmpty()) { - for (final String atomType : getAtomTypesInAtomContainer(structure)) { - if (getAtomTypeCount(structure, atomType) - > getAtomTypeCount(mf, atomType)) { - return false; - } + == null + || mf.trim() + .isEmpty()) { + return false; + } + for (final String atomType : getAtomTypesInAtomContainer(structure)) { + if (!atomType.equals("R") + && getAtomTypeCount(structure, atomType) + > getAtomTypeCount(mf, atomType)) { + return false; } - return AtomContainerManipulator.getImplicitHydrogenCount(structure) - <= getAtomTypeCount(mf, "H"); } + return AtomContainerManipulator.getImplicitHydrogenCount(structure) + <= getAtomTypeCount(mf, "H"); + - return true; } public static boolean compareWithMolecularFormulaEqual(final IAtomContainer structure, final String mf) { if (mf - != null - && !mf.trim() - .isEmpty()) { - for (final String atomType : getAtomTypesInAtomContainer(structure)) { - if (getAtomTypeCount(structure, atomType) - != getAtomTypeCount(mf, atomType)) { - return false; - } + == null + || mf.trim() + .isEmpty()) { + return false; + } + for (final String atomType : getAtomTypesInAtomContainer(structure)) { + if (getAtomTypeCount(structure, atomType) + != getAtomTypeCount(mf, atomType)) { + return false; } - return AtomContainerManipulator.getImplicitHydrogenCount(structure) - == Utils.getAtomTypeCount(mf, "H"); } - - return true; + return AtomContainerManipulator.getImplicitHydrogenCount(structure) + == Utils.getAtomTypeCount(mf, "H"); } /** @@ -317,16 +318,27 @@ public static boolean isValidBondAddition(final IAtomContainer ac, final int ato <= atom.getValency(); } - public static Boolean isSaturated(final IAtomContainer ac, final int atomIndex) { - if (!checkIndexInAtomContainer(ac, atomIndex)) { - return null; + public static boolean isSaturated(final IAtomContainer ac, final int atomIndex) { + final IAtom atom = ac.getAtom(atomIndex); + if (atom.getSymbol() + .equals("R")) { + return false; } - return ac.getAtom(atomIndex) - .getValency() + return atom.getValency() != null && getBondOrderSum(ac, atomIndex, true).intValue() - >= ac.getAtom(atomIndex) - .getValency(); + >= atom.getValency(); + } + + public static boolean isSaturated(final IAtomContainer ac) { + for (int i = 0; i + < ac.getAtomCount(); i++) { + if (!isSaturated(ac, i)) { + return false; + } + } + + return true; } public static List getUnsaturatedAtomIndices(final IAtomContainer ac) { From 608866a4eb5f7d306421e5bdf720995e0a4ba10d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Sun, 22 May 2022 12:07:17 +0200 Subject: [PATCH 393/405] fix: do not store stringified spectral match assignment --- src/casekit/nmr/fragments/FragmentUtilities.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/casekit/nmr/fragments/FragmentUtilities.java b/src/casekit/nmr/fragments/FragmentUtilities.java index c818aab..440c146 100644 --- a/src/casekit/nmr/fragments/FragmentUtilities.java +++ b/src/casekit/nmr/fragments/FragmentUtilities.java @@ -6,7 +6,6 @@ import casekit.nmr.model.Spectrum; import casekit.nmr.similarity.Similarity; import casekit.nmr.utils.Utils; -import com.google.gson.Gson; import org.openscience.cdk.interfaces.IAtomContainer; import java.util.*; @@ -14,8 +13,6 @@ public class FragmentUtilities { - private final static Gson gson = new Gson(); - public static LinkedHashMap> sortByFrequencies( final Map> functionalGroupDataSetsMap) { final LinkedHashMap> sortedCollection = new LinkedHashMap<>(); @@ -131,7 +128,7 @@ public static boolean isMatch(final DataSet dataSet, final Spectrum querySpectru if (!checkHybridizations(dataSet, matchAssignment, queryHybridizationList)) { return false; } - dataSet.addAttachment("spectralMatchAssignment", gson.toJson(matchAssignment, Assignment.class)); + dataSet.addAttachment("spectralMatchAssignment", matchAssignment); final Double rmsd = Similarity.calculateRMSD(spectrum, querySpectrum, 0, 0, matchAssignment); dataSet.addAttachment("averageDeviation", averageDeviation); dataSet.addAttachment("rmsd", rmsd); From eafefc240d6cccce7a86c29d8fd9ad7441e7f9a5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Mon, 23 May 2022 22:58:56 +0200 Subject: [PATCH 394/405] fix: swapped query spectrum in first place with (sub)spectrum when doing spectral matching --- .../nmr/filterandrank/FilterAndRank.java | 6 +-- src/casekit/nmr/similarity/Utilities.java | 53 ++++++++++++++----- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/casekit/nmr/filterandrank/FilterAndRank.java b/src/casekit/nmr/filterandrank/FilterAndRank.java index fcf844b..c8e5f0c 100644 --- a/src/casekit/nmr/filterandrank/FilterAndRank.java +++ b/src/casekit/nmr/filterandrank/FilterAndRank.java @@ -93,14 +93,14 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp final Assignment spectralMatchAssignment = detections != null - ? Similarity.matchSpectra(querySpectrum, spectrum, 0, 0, + ? Similarity.matchSpectra(spectrum, querySpectrum, 0, 0, shiftTolerance, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount, dataSet.getStructure() .toAtomContainer(), dataSet.getAssignment(), detections) - : Similarity.matchSpectra(querySpectrum, spectrum, 0, 0, + : Similarity.matchSpectra(spectrum, querySpectrum, 0, 0, shiftTolerance, checkMultiplicity, checkEquivalencesCount, allowLowerEquivalencesCount); @@ -111,7 +111,7 @@ public static DataSet checkDataSet(final DataSet dataSet, final Spectrum querySp dataSet.addAttachment("isCompleteSpectralMatch", isCompleteSpectralMatch); dataSet.addAttachment("spectralMatchAssignment", spectralMatchAssignment); - Double[] deviations = Similarity.getDeviations(querySpectrum, spectrum, 0, 0, spectralMatchAssignment); + Double[] deviations = Similarity.getDeviations(spectrum, querySpectrum, 0, 0, spectralMatchAssignment); if (allowIncompleteMatch) { deviations = Arrays.stream(deviations) .filter(Objects::nonNull) diff --git a/src/casekit/nmr/similarity/Utilities.java b/src/casekit/nmr/similarity/Utilities.java index 5a3dc17..23fe1aa 100644 --- a/src/casekit/nmr/similarity/Utilities.java +++ b/src/casekit/nmr/similarity/Utilities.java @@ -13,6 +13,19 @@ public class Utilities { + + /** + * @param spectrum1 first spectrum (possible subspectrum) + * @param spectrum2 second spectrum + * @param dim1 dim in first spectrum + * @param dim2 dim in second spectrum + * @param shiftTolerance shift tolerance + * @param checkMultiplicity whether to check multiplicity + * @param checkEquivalencesCount whether to check equivalences + * @param allowLowerEquivalencesCount whether to allow lower equivalences + * + * @return + */ public static List buildDistanceList(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTolerance, final boolean checkMultiplicity, @@ -74,7 +87,22 @@ public static Double getDistanceValue(final Signal signal1, final Signal signal2 ? null : distanceValue; } - + + /** + * @param spectrum1 first spectrum (possible subspectrum) + * @param spectrum2 second spectrum + * @param dim1 dim in first spectrum + * @param dim2 dim in second spectrum + * @param shiftTolerance shift tolerance + * @param checkMultiplicity whether to check multiplicity + * @param checkEquivalencesCount whether to check equivalences + * @param allowLowerEquivalencesCount whether to allow lower equivalences + * @param structure structure belonging to first spectrum + * @param assignment assignments between structure and first spectrum + * @param detections detections to use as structural filter within given structure + * + * @return + */ public static List buildDistanceList(final Spectrum spectrum1, final Spectrum spectrum2, final int dim1, final int dim2, final double shiftTolerance, final boolean checkMultiplicity, @@ -90,15 +118,6 @@ public static List buildDistanceList(final Spectrum spectrum1, final S boolean skip; for (int i = 0; i < spectrum1.getSignalCount(); i++) { - forbiddenNeighbors = detections.getForbiddenNeighbors() - .get(i) - .keySet(); - setNeighbors = detections.getSetNeighbors() - .get(i) - .keySet(); - hybridizations = detections.getDetectedHybridizations() - .get(i); - for (int j = 0; j < spectrum2.getSignalCount(); j++) { // check spectral constraints @@ -111,9 +130,17 @@ public static List buildDistanceList(final Spectrum spectrum1, final S } skip = false; // check structural constraints - for (int k = 0; k - < assignment.getAssignment(0, j).length; k++) { - atom = structure.getAtom(assignment.getAssignment(0, j, k)); + forbiddenNeighbors = detections.getForbiddenNeighbors() + .get(j) + .keySet(); + setNeighbors = detections.getSetNeighbors() + .get(j) + .keySet(); + hybridizations = detections.getDetectedHybridizations() + .get(j); + for (int equiv = 0; equiv + < assignment.getAssignment(0, i).length; equiv++) { + atom = structure.getAtom(assignment.getAssignment(0, i, equiv)); // if certain hybridizations are given and the atom's hybridization is known if (!hybridizations.isEmpty() && Constants.hybridizationConversionMap.containsKey(atom.getHybridization() From 99e2911ba68cd1c1036ddc6cb34c3eb73fb1f12f Mon Sep 17 00:00:00 2001 From: Michael Wenk Date: Mon, 29 Aug 2022 00:09:40 +0200 Subject: [PATCH 395/405] chore: updated README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5b33ee5..407e7ee 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# - +[![DOI](https://zenodo.org/badge/124278536.svg)](https://zenodo.org/badge/latestdoi/124278536) [![GitHub contributors](https://img.shields.io/github/contributors/michaelwenk/casekit.svg)](https://github.com/michaelwenk/casekit/graphs/contributors/) [![GitHub issues](https://img.shields.io/github/issues/michaelwenk/casekit.svg)](https://github.com/michaelwenk/casekit/issues/) [![GitHub release](https://img.shields.io/github/release/michaelwenk/casekit.svg)](https://github.com/michaelwenk/casekit/releases/) + # The Computer-Assisted-Structure-Elucidation Kit (CASEkit) ## Introduction From f78392ecf9eba7fd16f6861f978c455421f128c6 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 27 Sep 2022 11:43:29 +0200 Subject: [PATCH 396/405] fix: avoid bidirectional BOND commands in PyLSD input file; close #4 --- .../lsd/PyLSDInputFileBuilder.java | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java index d3a6447..8adad69 100644 --- a/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java +++ b/src/casekit/nmr/elucidation/lsd/PyLSDInputFileBuilder.java @@ -91,6 +91,7 @@ private static Map buildStringBuilderMap( int counter, firstOfEquivalenceIndexPyLSD; Set groupMembers; // use as a Set to remove the actual value and not at a list index MolecularConnectivity molecularConnectivityGroupMember, molecularConnectivityHeavyAtom; + final Map> addedBONDPairs = new HashMap<>(); final Set addedKeysSHIH = new HashSet<>(); for (final int correlationIndex : molecularConnectivityMap.keySet()) { firstOfEquivalenceIndexPyLSD = -1; @@ -234,18 +235,29 @@ private static Map buildStringBuilderMap( != null) { stringList = stringListMap.get("BOND"); for (final int bondedIndexInPyLSD : molecularConnectivity.getFixedNeighbors()) { - stringBuilder = new StringBuilder(); - stringBuilder.append("BOND ") - .append(molecularConnectivity.getIndex()) - .append(" ") - .append(bondedIndexInPyLSD) - .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, - casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( - molecularConnectivityMap, "H", true, - bondedIndexInPyLSD))) - .append("\n"); - if (!stringList.contains(stringBuilder.toString())) { - stringList.add(stringBuilder.toString()); + if (!addedBONDPairs.containsKey(molecularConnectivity.getIndex()) + || (addedBONDPairs.containsKey(molecularConnectivity.getIndex()) + && !addedBONDPairs.get(molecularConnectivity.getIndex()) + .contains(bondedIndexInPyLSD))) { + stringBuilder = new StringBuilder(); + stringBuilder.append("BOND ") + .append(molecularConnectivity.getIndex()) + .append(" ") + .append(bondedIndexInPyLSD) + .append(buildShiftsComment(molecularConnectivityMap, molecularConnectivity, + casekit.nmr.elucidation.Utilities.findMolecularConnectivityByIndex( + molecularConnectivityMap, "H", true, + bondedIndexInPyLSD))) + .append("\n"); + if (!stringList.contains(stringBuilder.toString())) { + stringList.add(stringBuilder.toString()); + } + addedBONDPairs.putIfAbsent(molecularConnectivity.getIndex(), new HashSet<>()); + addedBONDPairs.get(molecularConnectivity.getIndex()) + .add(bondedIndexInPyLSD); + addedBONDPairs.putIfAbsent(bondedIndexInPyLSD, new HashSet<>()); + addedBONDPairs.get(bondedIndexInPyLSD) + .add(molecularConnectivity.getIndex()); } } } From b680e235f91baa179f40845a5748b680ddcb255d Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 11 Apr 2023 17:42:14 +0200 Subject: [PATCH 397/405] fix: consider bond stereo configuration in StructureCompact class and during stereo HOSE code prediction --- .../nmr/analysis/HOSECodeShiftStatistics.java | 24 +++++++++++++++++-- src/casekit/nmr/model/StructureCompact.java | 6 +++-- src/casekit/nmr/utils/Utils.java | 2 +- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index a8c6a1a..e1adfe6 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -17,6 +17,7 @@ import org.bson.Document; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.layout.StructureDiagramGenerator; import org.openscience.nmrshiftdb.util.AtomUtils; import org.openscience.nmrshiftdb.util.ExtendedHOSECodeGenerator; @@ -28,6 +29,8 @@ public class HOSECodeShiftStatistics { private final static Gson GSON = new GsonBuilder().setLenient() .create(); + private final static StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); + private final static ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); public static Map>> collectHOSECodeShifts(final List dataSetList, final Integer maxSphere, @@ -60,8 +63,6 @@ public static Map>> collectHOSECodeShifts(final public static boolean insert(final DataSet dataSet, final Integer maxSphere, final boolean use3D, final boolean withExplicitH, final Map>> hoseCodeShifts) { - final StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); - final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); final IAtomContainer structure; Signal signal; String hoseCode; @@ -101,6 +102,14 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin if (use3D) { try { + // store wedge bond information + final int[] ordinals = new int[structure.getBondCount()]; + int i = 0; + for (final IBond bond : structure.bonds()) { + ordinals[i] = bond.getStereo() + .ordinal(); + i++; + } // set 2D coordinates structureDiagramGenerator.setMolecule(structure); structureDiagramGenerator.generateCoordinates(structure); @@ -108,6 +117,17 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin Utils.convertExplicitToImplicitHydrogens(structure); /* add explicit H atoms */ AtomUtils.addAndPlaceHydrogens(structure); + // restore wedge bond information + i = 0; + for (final IBond bond : structure.bonds()) { + bond.setStereo(IBond.Stereo.values()[ordinals[i]]); + + i++; + if (i + >= ordinals.length) { + break; + } + } } catch (final CDKException | IOException | ClassNotFoundException e) { e.printStackTrace(); } diff --git a/src/casekit/nmr/model/StructureCompact.java b/src/casekit/nmr/model/StructureCompact.java index 18421d2..01f6fac 100644 --- a/src/casekit/nmr/model/StructureCompact.java +++ b/src/casekit/nmr/model/StructureCompact.java @@ -39,7 +39,7 @@ @Setter public class StructureCompact { - private int[][][] bondProperties; // connected atom index, bond order, bond is in ring, bond is aromatic + private int[][][] bondProperties; // connected atom index, bond order, bond is in ring, bond is aromatic, bond stereo configuration private Integer[][] atomProperties; // element symbol, hybridization, implicitHydrogenCount, valency, formalCharge, isInRingAtom, isAromaticAtom public StructureCompact(final IAtomContainer ac) { @@ -61,7 +61,8 @@ public StructureCompact(final IAtomContainer ac) { ? 1 : 0, bond.isAromatic() ? 1 - : 0}); + : 0, + bond.getStereo().ordinal()}); } } temp = new int[connectedAtomsList.size()][]; @@ -147,6 +148,7 @@ public IAtomContainer toAtomContainer() { == 1); bond.setIsAromatic(this.bondProperties[i][k][3] == 1); + bond.setStereo(IBond.Stereo.values()[this.bondProperties[i][k][4]]); ac.addBond(bond); } } diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 67ccc76..ee98352 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -579,7 +579,7 @@ public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex * @throws CDKException */ public static DataSet atomContainerToDataSet(final IAtomContainer structure) throws CDKException { - return atomContainerToDataSet(structure, true); + return atomContainerToDataSet(structure, false); } /** From aabccdedf1e94cf30d69db35f062548c665a984b Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 11 Apr 2023 17:44:50 +0200 Subject: [PATCH 398/405] fix: close #6 --- .../nmr/analysis/HOSECodeShiftStatistics.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index e1adfe6..6b7e144 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -104,11 +104,11 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin try { // store wedge bond information final int[] ordinals = new int[structure.getBondCount()]; - int i = 0; + int k = 0; for (final IBond bond : structure.bonds()) { - ordinals[i] = bond.getStereo() + ordinals[k] = bond.getStereo() .ordinal(); - i++; + k++; } // set 2D coordinates structureDiagramGenerator.setMolecule(structure); @@ -118,12 +118,12 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin /* add explicit H atoms */ AtomUtils.addAndPlaceHydrogens(structure); // restore wedge bond information - i = 0; + k = 0; for (final IBond bond : structure.bonds()) { - bond.setStereo(IBond.Stereo.values()[ordinals[i]]); + bond.setStereo(IBond.Stereo.values()[ordinals[k]]); - i++; - if (i + k++; + if (k >= ordinals.length) { break; } From 35721e31b9267fe9ad0239393701045a99972921 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Wed, 12 Apr 2023 11:50:39 +0200 Subject: [PATCH 399/405] chore: use common method placeExplicitHydrogens --- .../nmr/analysis/HOSECodeShiftStatistics.java | 32 ++--------------- src/casekit/nmr/prediction/Prediction.java | 14 ++------ src/casekit/nmr/utils/Utils.java | 35 +++++++++++++++++++ 3 files changed, 40 insertions(+), 41 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index 6b7e144..31c544d 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -17,9 +17,6 @@ import org.bson.Document; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.layout.StructureDiagramGenerator; -import org.openscience.nmrshiftdb.util.AtomUtils; import org.openscience.nmrshiftdb.util.ExtendedHOSECodeGenerator; import java.io.*; @@ -29,7 +26,6 @@ public class HOSECodeShiftStatistics { private final static Gson GSON = new GsonBuilder().setLenient() .create(); - private final static StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); private final static ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); public static Map>> collectHOSECodeShifts(final List dataSetList, @@ -102,32 +98,8 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin if (use3D) { try { - // store wedge bond information - final int[] ordinals = new int[structure.getBondCount()]; - int k = 0; - for (final IBond bond : structure.bonds()) { - ordinals[k] = bond.getStereo() - .ordinal(); - k++; - } - // set 2D coordinates - structureDiagramGenerator.setMolecule(structure); - structureDiagramGenerator.generateCoordinates(structure); - /* !!! No explicit H in mol !!! */ - Utils.convertExplicitToImplicitHydrogens(structure); - /* add explicit H atoms */ - AtomUtils.addAndPlaceHydrogens(structure); - // restore wedge bond information - k = 0; - for (final IBond bond : structure.bonds()) { - bond.setStereo(IBond.Stereo.values()[ordinals[k]]); - - k++; - if (k - >= ordinals.length) { - break; - } - } + Utils.placeExplicitHydrogens(structure); + Utils.setAromaticityAndKekulize(structure); } catch (final CDKException | IOException | ClassNotFoundException e) { e.printStackTrace(); } diff --git a/src/casekit/nmr/prediction/Prediction.java b/src/casekit/nmr/prediction/Prediction.java index 3549e01..63042d5 100644 --- a/src/casekit/nmr/prediction/Prediction.java +++ b/src/casekit/nmr/prediction/Prediction.java @@ -37,7 +37,6 @@ import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.layout.StructureDiagramGenerator; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.SmilesGenerator; import org.openscience.cdk.tools.CDKHydrogenAdder; @@ -58,6 +57,8 @@ */ public class Prediction { + private final static ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); + /** * Diastereotopic distinctions are not provided yet. * @@ -391,8 +392,6 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, final Map> hoseCodeShiftStatistics) { final String atomType = Utils.getAtomTypeFromNucleus(nucleus); - final StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); - final ExtendedHOSECodeGenerator extendedHOSECodeGenerator = new ExtendedHOSECodeGenerator(); final Assignment assignment; Signal signal; @@ -405,14 +404,7 @@ public static DataSet predict1DByStereoHOSECode(final IAtomContainer structure, List medians; try { - // set 2D coordinates - structureDiagramGenerator.setMolecule(structure); - structureDiagramGenerator.generateCoordinates(structure); - /* !!! No explicit H in mol !!! */ - Utils.convertExplicitToImplicitHydrogens(structure); - /* add explicit H atoms */ - AtomUtils.addAndPlaceHydrogens(structure); - /* detect aromaticity */ + Utils.placeExplicitHydrogens(structure); Utils.setAromaticityAndKekulize(structure); final DataSet dataSet = Utils.atomContainerToDataSet(structure, false); diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index ee98352..849b4cd 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -14,6 +14,7 @@ import org.openscience.cdk.graph.CycleFinder; import org.openscience.cdk.graph.Cycles; import org.openscience.cdk.interfaces.*; +import org.openscience.cdk.layout.StructureDiagramGenerator; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.SmiFlavor; import org.openscience.cdk.smiles.SmilesGenerator; @@ -21,7 +22,9 @@ import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; +import org.openscience.nmrshiftdb.util.AtomUtils; +import java.io.IOException; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -29,6 +32,8 @@ public class Utils { + private static final StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); + /** * Specified for carbons only -> not generic!!! * @@ -735,4 +740,34 @@ public static T cloneObject(final T object, final Class clazz) { final String jsonString = gson.toJson(object, clazz); return gson.fromJson(jsonString, clazz); } + + public static void placeExplicitHydrogens( + final IAtomContainer structure) throws CDKException, IOException, ClassNotFoundException { + // store bond stereo information + final int[] ordinals = new int[structure.getBondCount()]; + int k = 0; + for (final IBond bond : structure.bonds()) { + ordinals[k] = bond.getStereo() + .ordinal(); + k++; + } + // set 2D coordinates + structureDiagramGenerator.setMolecule(structure); + structureDiagramGenerator.generateCoordinates(structure); + /* !!! No explicit H in mol !!! */ + Utils.convertExplicitToImplicitHydrogens(structure); + /* add explicit H atoms */ + AtomUtils.addAndPlaceHydrogens(structure); + // restore bond stereo information + k = 0; + for (final IBond bond : structure.bonds()) { + bond.setStereo(IBond.Stereo.values()[ordinals[k]]); + + k++; + if (k + >= ordinals.length) { + break; + } + } + } } From 6da415bbf19a120b8776fdd57fdbc53df0ac2d74 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 13 Apr 2023 11:10:47 +0200 Subject: [PATCH 400/405] fix: configure structure atomContainerToDataSet method --- src/casekit/nmr/utils/Utils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index 849b4cd..f0516cf 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -584,7 +584,7 @@ public static Float getBondOrderSum(final IAtomContainer ac, final int atomIndex * @throws CDKException */ public static DataSet atomContainerToDataSet(final IAtomContainer structure) throws CDKException { - return atomContainerToDataSet(structure, false); + return atomContainerToDataSet(structure, true); } /** From c81b2229951fd9677ae08e5b920999f5fcdd942f Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 13 Apr 2023 15:19:49 +0200 Subject: [PATCH 401/405] fix: remove unnecessary command --- src/casekit/nmr/analysis/HOSECodeShiftStatistics.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index 31c544d..274345b 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -99,7 +99,6 @@ public static boolean insert(final DataSet dataSet, final Integer maxSphere, fin if (use3D) { try { Utils.placeExplicitHydrogens(structure); - Utils.setAromaticityAndKekulize(structure); } catch (final CDKException | IOException | ClassNotFoundException e) { e.printStackTrace(); } From 842557397ffe90969c3bc1fa9f5fb37013b552e5 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Fri, 14 Apr 2023 11:31:17 +0200 Subject: [PATCH 402/405] fix: do not place explicit hydrogens when no bonds exist --- src/casekit/nmr/utils/Utils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index f0516cf..d7529ba 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -743,6 +743,10 @@ public static T cloneObject(final T object, final Class clazz) { public static void placeExplicitHydrogens( final IAtomContainer structure) throws CDKException, IOException, ClassNotFoundException { + if (structure.getBondCount() + == 0) { + return; + } // store bond stereo information final int[] ordinals = new int[structure.getBondCount()]; int k = 0; From 83c4fce311670bf00e3f59896814df8aebccef92 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 18 Apr 2023 13:38:09 +0200 Subject: [PATCH 403/405] chore: inserted separate buildAlphabeticMF method --- src/casekit/nmr/utils/Utils.java | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/casekit/nmr/utils/Utils.java b/src/casekit/nmr/utils/Utils.java index d7529ba..519b156 100644 --- a/src/casekit/nmr/utils/Utils.java +++ b/src/casekit/nmr/utils/Utils.java @@ -627,19 +627,10 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure, : structure.getProperty("SMILES_ID", String.class) .split("\\.")[0]); } - final IMolecularFormula mf = casekit.nmr.utils.Utils.getMolecularFormulaFromAtomContainer(structure); - meta.put("mfOriginal", casekit.nmr.utils.Utils.molecularFormularToString(mf)); - final StringBuilder mfAlphabetic = new StringBuilder(); - final Map mfAlphabeticMap = new TreeMap<>( - Utils.getMolecularFormulaElementCounts(Utils.molecularFormularToString(mf))); - for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { - mfAlphabetic.append(entry.getKey()); - if (entry.getValue() - > 1) { - mfAlphabetic.append(entry.getValue()); - } - } - meta.put("mf", mfAlphabetic.toString()); + final String mf = molecularFormularToString( + casekit.nmr.utils.Utils.getMolecularFormulaFromAtomContainer(structure)); + meta.put("mfOriginal", mf); + meta.put("mf", buildAlphabeticMF(mf)); try { final String smiles = getSmilesFromAtomContainer(structure); meta.put("smiles", smiles); @@ -654,6 +645,20 @@ public static DataSet atomContainerToDataSet(final IAtomContainer structure, return dataSet; } + public static String buildAlphabeticMF(final String mf) { + final StringBuilder mfAlphabetic = new StringBuilder(); + final Map mfAlphabeticMap = new TreeMap<>(Utils.getMolecularFormulaElementCounts(mf)); + for (final Map.Entry entry : mfAlphabeticMap.entrySet()) { + mfAlphabetic.append(entry.getKey()); + if (entry.getValue() + > 1) { + mfAlphabetic.append(entry.getValue()); + } + } + + return mfAlphabetic.toString(); + } + public static Signal extractFirstSignalFromCorrelation(final Correlation correlation) { if (correlation.isPseudo()) { return null; From 7f44e3db16aa3d1643801affa64b3a7def1ea7b7 Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Thu, 15 Jun 2023 12:13:15 +0200 Subject: [PATCH 404/405] fix: enable no stereo bond information in StructureCompact class --- src/casekit/nmr/model/StructureCompact.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/casekit/nmr/model/StructureCompact.java b/src/casekit/nmr/model/StructureCompact.java index 01f6fac..74c36f7 100644 --- a/src/casekit/nmr/model/StructureCompact.java +++ b/src/casekit/nmr/model/StructureCompact.java @@ -148,7 +148,14 @@ public IAtomContainer toAtomContainer() { == 1); bond.setIsAromatic(this.bondProperties[i][k][3] == 1); - bond.setStereo(IBond.Stereo.values()[this.bondProperties[i][k][4]]); + if (this.bondProperties[i][k].length + == 5) { + // with stereo information + bond.setStereo(IBond.Stereo.values()[this.bondProperties[i][k][4]]); + } else { + // without stereo information + bond.setStereo(IBond.Stereo.NONE); + } ac.addBond(bond); } } From b2a5de56355dcdef5fb6f234125bcc07e65901be Mon Sep 17 00:00:00 2001 From: michaelwenk Date: Tue, 20 Jun 2023 09:14:31 +0200 Subject: [PATCH 405/405] chore: optimised getOutliers method --- .../nmr/analysis/HOSECodeShiftStatistics.java | 2 +- src/casekit/nmr/utils/Statistics.java | 63 ++++++++++++++----- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java index 274345b..e0fecad 100644 --- a/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java +++ b/src/casekit/nmr/analysis/HOSECodeShiftStatistics.java @@ -193,7 +193,7 @@ public static Map> buildHOSECodeShiftStatistics( for (final Map.Entry> solvents : hoseCodes.getValue() .entrySet()) { values = new ArrayList<>(solvents.getValue()); - Statistics.removeOutliers(values, 1.5); + values = Statistics.removeOutliers(values, 1.5); hoseCodeShiftStatistics.get(hoseCodes.getKey()) .put(solvents.getKey(), new Double[]{(double) values.size(), Collections.min(values), diff --git a/src/casekit/nmr/utils/Statistics.java b/src/casekit/nmr/utils/Statistics.java index 75c1227..94cab89 100644 --- a/src/casekit/nmr/utils/Statistics.java +++ b/src/casekit/nmr/utils/Statistics.java @@ -17,15 +17,38 @@ public class Statistics { * * @return new array list without values outside the generated boundaries */ - public static void removeOutliers(final List input, final double multiplierIQR) { - input.removeAll(getOutliers(input, multiplierIQR)); + public static List removeOutliers(final List input, final double multiplierIQR) { + final List values = new ArrayList<>(); + if (input.size() + <= 1) { + return values; + } + final double[] boundaries = getLowerAndUpperBoundaries(input, multiplierIQR); + final double lowerBound = boundaries[0]; + final double upperBound = boundaries[1]; + + for (final Double value : input) { + if (value + >= lowerBound + && value + <= upperBound) { + values.add(value); + } + } + + return values; } /** - * @param input - * @param multiplierIQR + * Detects outliers in given array list of input values and returns them.
+ * Here, outliers are those which are outside of a calculated lower and upper bound (whisker). + * The interquartile range (IQR) of the input values is therefore multiplied with a given value + * for whisker creation. * - * @return + * @param input list of values to process + * @param multiplierIQR multiplier for IQR to use for lower and upper bound creation + * + * @return new array list with values outside the generated boundaries */ public static List getOutliers(final List input, final double multiplierIQR) { final List outliers = new ArrayList<>(); @@ -33,6 +56,22 @@ public static List getOutliers(final List input, final double mu <= 1) { return outliers; } + final double[] boundaries = getLowerAndUpperBoundaries(input, multiplierIQR); + final double lowerBound = boundaries[0]; + final double upperBound = boundaries[1]; + for (final Double value : input) { + if (value + < lowerBound + || value + > upperBound) { + outliers.add(value); + } + } + + return outliers; + } + + public static double[] getLowerAndUpperBoundaries(final List input, final double multiplierIQR) { Collections.sort(input); final List data1 = input.subList(0, input.size() / 2); @@ -57,18 +96,8 @@ public static List getOutliers(final List input, final double mu final double upperBound = q3 + multiplierIQR * iqr; - for (int i = 0; i - < input.size(); i++) { - if ((input.get(i) - < lowerBound) - || (input.get(i) - > upperBound)) { - outliers.add(input.get(i)); - } - } - // System.out.println("input size: " + input.size()); - // System.out.println("output size: " + outliers.size()); - return outliers; + + return new double[]{lowerBound, upperBound}; } /**