/** * For proteins and peptides modifications SHOULD be reported using either UNIMOD or PSI-MOD accessions. * As these two ontologies are not applicable to small molecules, so-called CHEMMODs can also be defined. * * Ambiguity of modification position MUST NOT be reported at the Protein level. */ protected SplitList<Modification> checkModifications(MZTabColumn column, string target) { SplitList<Modification> modificationList = checkModifications(_section, column, target); foreach (Modification mod in modificationList){ if (mod.PositionMap.Count > 1){ // this is warn _errorList.Add(new MZTabError(LogicalErrorType.AmbiguityMod, _lineNumber, column.Header, mod.ToString())); } if (mod.Type == Modification.ModificationType.CHEMMOD){ // this is warn _errorList.Add(new MZTabError(LogicalErrorType.CHEMMODS, _lineNumber, column.Header, mod.ToString())); } if (mod.Type == Modification.ModificationType.SUBST && parseSubstitutionIdentifier(mod.Accession) != null){ _errorList.Add(new MZTabError(LogicalErrorType.SubstituteIdentifier, _lineNumber, column.Header, mod.ToString())); return null; } } return modificationList; }
/** * accession should not null. * accession MUST be unique, otherwise raise {@link LogicalErrorType#DuplicationAccession} error. * * If check error return null, else return accession string. */ protected string checkAccession(MZTabColumn column, string accession) { string result_accession = checkData(column, accession, false); if (result_accession == null){ return result_accession; } if (! accessionSet.Add(result_accession)){ _errorList.Add(new MZTabError(LogicalErrorType.DuplicationAccession, _lineNumber, column.Header, result_accession)); return null; } return result_accession; }
/** * Defines the used unit for a column in the peptide/protein/small_molecule section. * The format of the value has to be {column name}={Parameter defining the unit} */ internal ColUnit(MZTabColumn column, Param value) { if (column == null){ throw new NullReferenceException("MZTabColumn can not set null"); } if (value == null){ throw new NullReferenceException("Param can not set null"); } if (column is PluginMzTab.Lib.Model.AbundanceColumn){ throw new Exception("Colunit MUST NOT be used to define a unit for quantification columns."); } this.column = column; this.value = value; }
/** * For proteins and peptides modifications SHOULD be reported using either UNIMOD or PSI-MOD accessions. * As these two ontologies are not applicable to small molecules, so-called CHEMMODs can also be defined. */ protected SplitList<Modification> checkModifications(MZTabColumn column, string sequence, string target) { SplitList<Modification> modificationList = checkModifications(_section, column, target); int terminal_position = sequence.Length + 1; foreach (Modification mod in modificationList){ foreach (int position in mod.PositionMap.Keys){ if (position > terminal_position || position < 0){ _errorList.Add(new MZTabError(LogicalErrorType.ModificationPosition, _lineNumber, column.Header, mod.ToString(), sequence)); return null; } } if (mod.Type == Modification.ModificationType.CHEMMOD){ _errorList.Add(new MZTabError(LogicalErrorType.CHEMMODS, _lineNumber, column.Header, mod.ToString())); } } return modificationList; }
/** * As these two ontologies are not applicable to small molecules, so-called CHEMMODs can also be defined. * CHEMMODs MUST NOT be used if the modification can be reported using a PSI-MOD or UNIMOD accession. * Mass deltas MUST NOT be used for CHEMMODs if the delta can be expressed through a known chemical formula . */ protected SplitList<Modification> checkModifications(MZTabColumn column, string target) { SplitList<Modification> modificationList = base.checkModifications(_section, column, target); foreach (Modification mod in modificationList){ if (mod.Type == Modification.ModificationType.CHEMMOD){ if (target.Contains("-MOD:") || target.Contains("-UNIMOD:")){ _errorList.Add(new MZTabError(LogicalErrorType.CHEMMODS, _lineNumber, column.Header, mod.ToString())); } if (parseChemmodAccession(mod.Accession) == null){ _errorList.Add(new MZTabError(FormatErrorType.CHEMMODSAccession, _lineNumber, column.Header, mod.ToString())); return null; } } } return modificationList; }
/** * Check and translate smiles string into parameter list which split by '|' character.. * If parse incorrect, raise {@link FormatErrorType#StringList} error. * Normally, smiles can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param smiles SHOULD NOT be empty. */ protected SplitList<string> checkSmiles(MZTabColumn column, string smiles) { return checkStringList(column, smiles, MZTabConstants.BAR); }
/** * Check and translate peptide sequence. 'O' and 'U' are encoded by codons that are usually interpreted as stop codons, * which can not displayed in the sequence. So, if find it, system raise {@link FormatErrorType#Sequence} error. * * @param column SHOULD NOT set null * @param sequence SHOULD NOT be empty. */ protected string checkSequence(MZTabColumn column, string sequence) { string result = checkData(column, sequence, true); if (result == null){ return null; } result = result.ToUpper(); Regex regex = new Regex("[OU]"); Match match = regex.Match(result); if (match.Success){ _errorList.Add(new MZTabError(FormatErrorType.Sequence, _lineNumber, column.Header, sequence)); } return result; }
/** * Check and translate target to {@link MZBoolean}. Only "0" and "1" allow used in express Boolean (0/1). * If parse incorrect, raise {@link FormatErrorType#MZBoolean} error. * * @param column SHOULD NOT set null * @param target SHOULD NOT be empty. */ protected MZBoolean checkMZBoolean(MZTabColumn column, string target) { string result = checkData(column, target, true); if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){ return null; } MZBoolean value = MZBoolean.FindBoolean(result); if (value == null){ _errorList.Add(new MZTabError(FormatErrorType.MZBoolean, _lineNumber, column.Header, result)); } return value; }
private string checkAccession(MZTabColumn column, string target) { return checkData(column, target, true); }
/** * Check and translate unique to {@link MZBoolean}. Only "0" and "1" allow used in express Boolean (0/1). * If parse incorrect, raise {@link FormatErrorType#MZBoolean} error. * * @param column SHOULD NOT set null * @param unique SHOULD NOT be empty. */ protected MZBoolean checkUnique(MZTabColumn column, string unique) { return checkMZBoolean(column, unique); }
/** * Check and translate target string into parameter list which split by splitChar character.. * If parse incorrect, raise {@link FormatErrorType#StringList} error. * * @param column SHOULD NOT set null * @param target SHOULD NOT be empty. */ protected SplitList<string> checkStringList(MZTabColumn column, string target, char splitChar) { string result = checkData(column, target, true); if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){ return new SplitList<string>(splitChar); } SplitList<string> stringList = MZTabUtils.ParseStringList(splitChar, result); if (stringList.Count == 0){ _errorList.Add(new MZTabError(FormatErrorType.StringList, _lineNumber, column.Header, result, "" + splitChar)); } return stringList; }
/** * Check target string. Normally, start can set "null". But * in "Complete" file, in general "null" values SHOULD not be given. * * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean) * * @param column SHOULD NOT set null * @param start SHOULD NOT be empty. */ protected string checkStart(MZTabColumn column, string start) { return checkData(column, start, true); }
/** * Check and translate protein_coverage string into double. If parse incorrect, raise {@link FormatErrorType#double} error. * protein_coverage range should be in the [0, 1), otherwise raise {@link LogicalErrorType#ProteinCoverage} error. * * NOTICE: If ratios are included and the denominator is zero, the "INF" value MUST be used. If the result leads * to calculation errors (for example 0/0), this MUST be reported as "not a number" ("NaN"). * * @param column SHOULD NOT set null * @param protein_coverage SHOULD NOT be empty. */ protected double checkProteinCoverage(MZTabColumn column, string protein_coverage) { double result = checkDouble(column, protein_coverage); if (result.Equals(double.MinValue)){ return double.MinValue; } if (result < 0 || result > 1){ _errorList.Add(new MZTabError(LogicalErrorType.ProteinCoverage, _lineNumber, column.Header, MZTabUtils.PrintDouble(result))); return double.MinValue; } return result; }
/** * Check target string. Normally, pre can set "null". But * in "Complete" file, in general "null" values SHOULD not be given. * * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean) * * @param column SHOULD NOT set null * @param pre SHOULD NOT be empty. */ protected string checkPre(MZTabColumn column, string pre) { return checkData(column, pre, true); }
/** * Check target string. Normally, post can set "null". But * in "Complete" file, in general "null" values SHOULD not be given. * * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean) * * @param column SHOULD NOT set null * @param post SHOULD NOT be empty. */ protected string checkPost(MZTabColumn column, string post) { return checkData(column, post, true); }
/** * Check and translate target string into parameter list which split by '|' character.. * If parse incorrect, raise {@link FormatErrorType#ParamList} error. * * @param column SHOULD NOT set null * @param target SHOULD NOT be empty. */ protected SplitList<Param> checkParamList(MZTabColumn column, string target) { string result = checkData(column, target, true); if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){ return new SplitList<Param>(MZTabConstants.BAR); } SplitList<Param> paramList = MZTabUtils.ParseParamList(result); if (paramList.Count == 0){ _errorList.Add(new MZTabError(FormatErrorType.ParamList, _lineNumber, "Column " + column.Header, target)); } return paramList; }
/** * Check and translate numPSMs string into int. If exists error during parse, raise {@link FormatErrorType#int} error. * Normally, numPSMs can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param numPSMs SHOULD NOT be empty. */ protected Integer checkNumPSMs(MZTabColumn column, string numPSMs) { return checkInteger(column, numPSMs); }
/** * Check and translate numPeptidesUnique string into int. If exists error during parse, raise {@link FormatErrorType#int} error. * Normally, numPeptidesUnique can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param numPeptidesUnique SHOULD NOT be empty. */ protected Integer checkNumPeptidesUnique(MZTabColumn column, string numPeptidesUnique) { return checkInteger(column, numPeptidesUnique); }
/** * Check and translate numPeptidesDistinct string into int. If exists error during parse, raise {@link FormatErrorType#int} error. * Normally, numPeptidesDistinct can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param numPeptidesDistinct SHOULD NOT be empty. */ protected Integer checkNumPeptidesDistinct(MZTabColumn column, string numPeptidesDistinct) { return checkInteger(column, numPeptidesDistinct); }
/** * Check target string. Normally, species can set "null". But * in "Complete" file, in general "null" values SHOULD not be given. * * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean) * * @param column SHOULD NOT set null * @param species SHOULD NOT be empty. */ protected string checkSpecies(MZTabColumn column, string species) { return checkData(column, species, true); }
/** * Check and translate spectraRef string into {@link SpectraRef} list. * If parse incorrect, or ms_run not defined in metadata raise {@link FormatErrorType#SpectraRef} error. * Normally, spectraRef can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param spectraRef SHOULD NOT be empty. */ protected List<SpectraRef> checkSpectraRef(MZTabColumn column, string spectraRef) { string result_spectraRef = checkData(column, spectraRef, true); if (result_spectraRef == null || result_spectraRef.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){ return new List<SpectraRef>(); } List<SpectraRef> refList = MZTabUtils.ParseSpectraRefList(metadata, result_spectraRef); if (refList.Count == 0){ _errorList.Add(new MZTabError(FormatErrorType.SpectraRef, _lineNumber, column.Header, result_spectraRef)); } else{ foreach (SpectraRef reference in refList){ MsRun run = reference.MsRun; if (run.Location == null){ _errorList.Add(new MZTabError(LogicalErrorType.SpectraRef, _lineNumber, column.Header, result_spectraRef, "ms_run[" + run.Id + "]-location")); refList.Clear(); break; } } } return refList; }
/** * Check and translate psm_id string into int. If exists error during parse, raise {@link FormatErrorType#int} error. * Normally, psm_id can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param psm_id SHOULD NOT be empty. */ protected Integer checkPSMID(MZTabColumn column, string psm_id) { return checkInteger(column, psm_id); }
/** * In the table-based sections (protein, peptide, and small molecule) there MUST NOT be any empty cells. * Some field not allow "null" value, for example unit_id, accession and so on. In "Complete" file, in * general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param target SHOULD NOT be empty. */ protected string checkString(MZTabColumn column, string target) { return checkData(column, target, true); }
/** * Check and translate reliability string into {@link Reliability}. Currently, only "1", "2", "3" and "null" are * correct value, and others will raise {@link FormatErrorType#Reliability} error. * But in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param reliability SHOULD NOT be empty. */ protected Reliability checkReliability(MZTabColumn column, string reliability) { string result_reliaility = checkData(column, reliability, true); if (result_reliaility == null || result_reliaility.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){ return null; } Reliability result = Reliability.findReliability(result_reliaility); if (result == null){ _errorList.Add(new MZTabError(FormatErrorType.Reliability, _lineNumber, column.Header, result_reliaility)); } return result; }
/** * Check and translate taxid string into int. If exists error during parse, raise {@link FormatErrorType#int} error. * Normally, taxid can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param taxid SHOULD NOT be empty. */ protected Integer checkTaxid(MZTabColumn column, string taxid) { return checkInteger(column, taxid); }
/** * Check and translate searchEngine string into parameter list which split by '|' character.. * If parse incorrect, raise {@link FormatErrorType#ParamList} error. * Normally, searchEngine can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param searchEngine SHOULD NOT be empty. */ protected SplitList<Param> checkSearchEngine(MZTabColumn column, string searchEngine) { return checkParamList(column, searchEngine); }
protected Uri checkURI(MZTabColumn column, string uri) { string result_uri = checkData(column, uri, true); if (result_uri == null || result_uri.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){ return null; } Uri result = MZTabUtils.ParseURI(result_uri); if (result == null){ _errorList.Add(new MZTabError(FormatErrorType.URI, _lineNumber, "Column " + column.Header, result_uri)); } return result; }
/** * Check and translate searchEngineScore string into parameter list which split by '|' character.. * If parse incorrect, raise {@link FormatErrorType#ParamList} error. If parameter is not {@link CVParam}, * or parameter value is empty (score should be provide), system raise {@link FormatErrorType#SearchEngineScore}. * Normally, searchEngineScore can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param searchEngineScore SHOULD NOT be empty. */ protected SplitList<Param> checkSearchEngineScore(MZTabColumn column, string searchEngineScore) { SplitList<Param> paramList = checkParamList(column, searchEngineScore); foreach (Param param in paramList){ if (!(param is CVParam) || string.IsNullOrEmpty(param.Value)){ _errorList.Add(new MZTabError(FormatErrorType.SearchEngineScore, _lineNumber, column.Header, searchEngineScore)); } } return paramList; }
/** * Check and translate retention_time_window string into double list which split by '|' character.. * If parse incorrect, raise {@link FormatErrorType#DoubleList} error. * Normally, retention_time_window can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * @param column SHOULD NOT set null * @param retention_time_window SHOULD NOT be empty. */ protected SplitList<double> checkRetentionTimeWindow(MZTabColumn column, string retention_time_window) { string result = checkData(column, retention_time_window, true); if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){ return new SplitList<double>(MZTabConstants.BAR); } SplitList<double> valueList = MZTabUtils.ParseDoubleList(result); if (valueList.Count == 0){ _errorList.Add(new MZTabError(FormatErrorType.DoubleList, _lineNumber, column.Header, result, "" + MZTabConstants.BAR)); } return valueList; }
/** * Check and translate target string into {@link Modification} list which split by ',' character.. * If parse incorrect, raise {@link FormatErrorType#ModificationList} error. * Normally, ambiguityMembers can set "null", but in "Complete" file, in general "null" values SHOULD not be given. * * If software cannot determine protein-level modifications, "null" MUST be used. * If the software has determined that there are no modifications to a given protein "0" MUST be used. * * @param section SHOULD NOT set null * @param column SHOULD NOT set null * @param modificationsLabel SHOULD NOT be empty. */ protected SplitList<Modification> checkModifications(Section section, MZTabColumn column, string modificationsLabel) { string result_modifications = checkData(column, modificationsLabel, true); if (result_modifications == null || result_modifications.Equals("NULL", StringComparison.CurrentCultureIgnoreCase) || result_modifications.Equals("0")){ return new SplitList<Modification>(MZTabConstants.COMMA); } SplitList<Modification> modificationList = MZTabUtils.ParseModificationList(section, modificationsLabel); if (modificationList.Count == 0){ _errorList.Add(new MZTabError(FormatErrorType.ModificationList, _lineNumber, column.Header, result_modifications)); } return modificationList; }