/**
         * For proteins and peptides modifications SHOULD be reported using either UNIMOD or PSI-MOD accessions.
         * As these two ontologies are not applicable to small molecules, so-called CHEMMODs can also be defined.
         *
         * Ambiguity of modification position MUST NOT be reported at the Protein level.
         */
        protected SplitList<Modification> checkModifications(MZTabColumn column, string target)
        {
            SplitList<Modification> modificationList = checkModifications(_section, column, target);

            foreach (Modification mod in modificationList){
                if (mod.PositionMap.Count > 1){
                    // this is warn
                    _errorList.Add(new MZTabError(LogicalErrorType.AmbiguityMod, _lineNumber, column.Header,
                                                  mod.ToString()));
                }

                if (mod.Type == Modification.ModificationType.CHEMMOD){
                    // this is warn
                    _errorList.Add(new MZTabError(LogicalErrorType.CHEMMODS, _lineNumber, column.Header, mod.ToString()));
                }

                if (mod.Type == Modification.ModificationType.SUBST &&
                    parseSubstitutionIdentifier(mod.Accession) != null){
                    _errorList.Add(new MZTabError(LogicalErrorType.SubstituteIdentifier, _lineNumber, column.Header,
                                                  mod.ToString()));
                    return null;
                }
            }

            return modificationList;
        }
        /**
         * accession should not null.
         * accession MUST be unique, otherwise raise {@link LogicalErrorType#DuplicationAccession} error.
         *
         * If check error return null, else return accession string.
         */
        protected string checkAccession(MZTabColumn column, string accession)
        {
            string result_accession = checkData(column, accession, false);

            if (result_accession == null){
                return result_accession;
            }

            if (! accessionSet.Add(result_accession)){
                _errorList.Add(new MZTabError(LogicalErrorType.DuplicationAccession, _lineNumber, column.Header,
                                              result_accession));
                return null;
            }

            return result_accession;
        }
Beispiel #3
0
        /**
         * Defines the used unit for a column in the peptide/protein/small_molecule section.
         * The format of the value has to be {column name}={Parameter defining the unit}
         */
        internal ColUnit(MZTabColumn column, Param value)
        {
            if (column == null){
                throw new NullReferenceException("MZTabColumn can not set null");
            }

            if (value == null){
                throw new NullReferenceException("Param can not set null");
            }

            if (column is PluginMzTab.Lib.Model.AbundanceColumn){
                throw new Exception("Colunit MUST NOT be used to define a unit for quantification columns.");
            }

            this.column = column;
            this.value = value;
        }
        /**
         * For proteins and peptides modifications SHOULD be reported using either UNIMOD or PSI-MOD accessions.
         * As these two ontologies are not applicable to small molecules, so-called CHEMMODs can also be defined.
         */
        protected SplitList<Modification> checkModifications(MZTabColumn column, string sequence, string target)
        {
            SplitList<Modification> modificationList = checkModifications(_section, column, target);

            int terminal_position = sequence.Length + 1;
            foreach (Modification mod in modificationList){
                foreach (int position in mod.PositionMap.Keys){
                    if (position > terminal_position || position < 0){
                        _errorList.Add(new MZTabError(LogicalErrorType.ModificationPosition, _lineNumber, column.Header,
                                                      mod.ToString(), sequence));
                        return null;
                    }
                }

                if (mod.Type == Modification.ModificationType.CHEMMOD){
                    _errorList.Add(new MZTabError(LogicalErrorType.CHEMMODS, _lineNumber, column.Header, mod.ToString()));
                }
            }

            return modificationList;
        }
        /**
         * As these two ontologies are not applicable to small molecules, so-called CHEMMODs can also be defined.
         * CHEMMODs MUST NOT be used if the modification can be reported using a PSI-MOD or UNIMOD accession.
         * Mass deltas MUST NOT be used for CHEMMODs if the delta can be expressed through a known chemical formula .
         */
        protected SplitList<Modification> checkModifications(MZTabColumn column, string target)
        {
            SplitList<Modification> modificationList = base.checkModifications(_section, column, target);

            foreach (Modification mod in modificationList){
                if (mod.Type == Modification.ModificationType.CHEMMOD){
                    if (target.Contains("-MOD:") || target.Contains("-UNIMOD:")){
                        _errorList.Add(new MZTabError(LogicalErrorType.CHEMMODS, _lineNumber, column.Header,
                                                      mod.ToString()));
                    }

                    if (parseChemmodAccession(mod.Accession) == null){
                        _errorList.Add(new MZTabError(FormatErrorType.CHEMMODSAccession, _lineNumber, column.Header,
                                                      mod.ToString()));
                        return null;
                    }
                }
            }

            return modificationList;
        }
 /**
  * Check and translate smiles string into parameter list which split by '|' character..
  * If parse incorrect, raise {@link FormatErrorType#StringList} error.
  * Normally, smiles can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param smiles SHOULD NOT be empty.
  */
 protected SplitList<string> checkSmiles(MZTabColumn column, string smiles)
 {
     return checkStringList(column, smiles, MZTabConstants.BAR);
 }
        /**
         * Check and translate peptide sequence. 'O' and 'U' are encoded by codons that are usually interpreted as stop codons,
         * which can not displayed in the sequence. So, if find it, system raise {@link FormatErrorType#Sequence} error.
         *
         * @param column SHOULD NOT set null
         * @param sequence SHOULD NOT be empty.
         */
        protected string checkSequence(MZTabColumn column, string sequence)
        {
            string result = checkData(column, sequence, true);

            if (result == null){
                return null;
            }

            result = result.ToUpper();

            Regex regex = new Regex("[OU]");
            Match match = regex.Match(result);
            if (match.Success){
                _errorList.Add(new MZTabError(FormatErrorType.Sequence, _lineNumber, column.Header, sequence));
            }

            return result;
        }
        /**
         * Check and translate target to {@link MZBoolean}. Only "0" and "1" allow used in express Boolean (0/1).
         * If parse incorrect, raise {@link FormatErrorType#MZBoolean} error.
         *
         * @param column SHOULD NOT set null
         * @param target SHOULD NOT be empty.
         */
        protected MZBoolean checkMZBoolean(MZTabColumn column, string target)
        {
            string result = checkData(column, target, true);

            if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){
                return null;
            }

            MZBoolean value = MZBoolean.FindBoolean(result);
            if (value == null){
                _errorList.Add(new MZTabError(FormatErrorType.MZBoolean, _lineNumber, column.Header, result));
            }

            return value;
        }
 private string checkAccession(MZTabColumn column, string target)
 {
     return checkData(column, target, true);
 }
 /**
  * Check and translate unique to {@link MZBoolean}. Only "0" and "1" allow used in express Boolean (0/1).
  * If parse incorrect, raise {@link FormatErrorType#MZBoolean} error.
  *
  * @param column SHOULD NOT set null
  * @param unique SHOULD NOT be empty.
  */
 protected MZBoolean checkUnique(MZTabColumn column, string unique)
 {
     return checkMZBoolean(column, unique);
 }
        /**
         * Check and translate target string into parameter list which split by splitChar character..
         * If parse incorrect, raise {@link FormatErrorType#StringList} error.
         *
         * @param column SHOULD NOT set null
         * @param target SHOULD NOT be empty.
         */
        protected SplitList<string> checkStringList(MZTabColumn column, string target, char splitChar)
        {
            string result = checkData(column, target, true);

            if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){
                return new SplitList<string>(splitChar);
            }

            SplitList<string> stringList = MZTabUtils.ParseStringList(splitChar, result);
            if (stringList.Count == 0){
                _errorList.Add(new MZTabError(FormatErrorType.StringList, _lineNumber, column.Header, result,
                                              "" + splitChar));
            }

            return stringList;
        }
 /**
  * Check target string. Normally, start can set "null". But
  * in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean)
  *
  * @param column SHOULD NOT set null
  * @param start SHOULD NOT be empty.
  */
 protected string checkStart(MZTabColumn column, string start)
 {
     return checkData(column, start, true);
 }
        /**
         * Check and translate protein_coverage string into double. If parse incorrect, raise {@link FormatErrorType#double} error.
         * protein_coverage range should be in the [0, 1), otherwise raise {@link LogicalErrorType#ProteinCoverage} error.
         *
         * NOTICE: If ratios are included and the denominator is zero, the "INF" value MUST be used. If the result leads
         * to calculation errors (for example 0/0), this MUST be reported as "not a number" ("NaN").
         *
         * @param column SHOULD NOT set null
         * @param protein_coverage SHOULD NOT be empty.
         */
        protected double checkProteinCoverage(MZTabColumn column, string protein_coverage)
        {
            double result = checkDouble(column, protein_coverage);

            if (result.Equals(double.MinValue)){
                return double.MinValue;
            }

            if (result < 0 || result > 1){
                _errorList.Add(new MZTabError(LogicalErrorType.ProteinCoverage, _lineNumber, column.Header,
                                              MZTabUtils.PrintDouble(result)));
                return double.MinValue;
            }

            return result;
        }
 /**
  * Check target string. Normally, pre can set "null". But
  * in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean)
  *
  * @param column SHOULD NOT set null
  * @param pre SHOULD NOT be empty.
  */
 protected string checkPre(MZTabColumn column, string pre)
 {
     return checkData(column, pre, true);
 }
 /**
  * Check target string. Normally, post can set "null". But
  * in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean)
  *
  * @param column SHOULD NOT set null
  * @param post SHOULD NOT be empty.
  */
 protected string checkPost(MZTabColumn column, string post)
 {
     return checkData(column, post, true);
 }
        /**
         * Check and translate target string into parameter list which split by '|' character..
         * If parse incorrect, raise {@link FormatErrorType#ParamList} error.
         *
         * @param column SHOULD NOT set null
         * @param target SHOULD NOT be empty.
         */
        protected SplitList<Param> checkParamList(MZTabColumn column, string target)
        {
            string result = checkData(column, target, true);

            if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){
                return new SplitList<Param>(MZTabConstants.BAR);
            }

            SplitList<Param> paramList = MZTabUtils.ParseParamList(result);
            if (paramList.Count == 0){
                _errorList.Add(new MZTabError(FormatErrorType.ParamList, _lineNumber, "Column " + column.Header, target));
            }

            return paramList;
        }
 /**
  * Check and translate numPSMs string into int. If exists error during parse, raise {@link FormatErrorType#int} error.
  * Normally, numPSMs can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param numPSMs SHOULD NOT be empty.
  */
 protected Integer checkNumPSMs(MZTabColumn column, string numPSMs)
 {
     return checkInteger(column, numPSMs);
 }
 /**
  * Check and translate numPeptidesUnique string into int. If exists error during parse, raise {@link FormatErrorType#int} error.
  * Normally, numPeptidesUnique can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param numPeptidesUnique SHOULD NOT be empty.
  */
 protected Integer checkNumPeptidesUnique(MZTabColumn column, string numPeptidesUnique)
 {
     return checkInteger(column, numPeptidesUnique);
 }
 /**
  * Check and translate numPeptidesDistinct string into int. If exists error during parse, raise {@link FormatErrorType#int} error.
  * Normally, numPeptidesDistinct can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param numPeptidesDistinct SHOULD NOT be empty.
  */
 protected Integer checkNumPeptidesDistinct(MZTabColumn column, string numPeptidesDistinct)
 {
     return checkInteger(column, numPeptidesDistinct);
 }
 /**
  * Check target string. Normally, species can set "null". But
  * in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @see #checkData(uk.ac.ebi.pride.jmztab.model.MZTabColumn, string, boolean)
  *
  * @param column SHOULD NOT set null
  * @param species SHOULD NOT be empty.
  */
 protected string checkSpecies(MZTabColumn column, string species)
 {
     return checkData(column, species, true);
 }
        /**
         * Check and translate spectraRef string into {@link SpectraRef} list.
         * If parse incorrect, or ms_run not defined in metadata raise {@link FormatErrorType#SpectraRef} error.
         * Normally, spectraRef can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
         *
         * @param column SHOULD NOT set null
         * @param spectraRef SHOULD NOT be empty.
         */
        protected List<SpectraRef> checkSpectraRef(MZTabColumn column, string spectraRef)
        {
            string result_spectraRef = checkData(column, spectraRef, true);

            if (result_spectraRef == null || result_spectraRef.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){
                return new List<SpectraRef>();
            }

            List<SpectraRef> refList = MZTabUtils.ParseSpectraRefList(metadata, result_spectraRef);
            if (refList.Count == 0){
                _errorList.Add(new MZTabError(FormatErrorType.SpectraRef, _lineNumber, column.Header, result_spectraRef));
            }
            else{
                foreach (SpectraRef reference in refList){
                    MsRun run = reference.MsRun;
                    if (run.Location == null){
                        _errorList.Add(new MZTabError(LogicalErrorType.SpectraRef, _lineNumber, column.Header,
                                                      result_spectraRef, "ms_run[" + run.Id + "]-location"));
                        refList.Clear();
                        break;
                    }
                }
            }

            return refList;
        }
 /**
  * Check and translate psm_id string into int. If exists error during parse, raise {@link FormatErrorType#int} error.
  * Normally, psm_id can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param psm_id SHOULD NOT be empty.
  */
 protected Integer checkPSMID(MZTabColumn column, string psm_id)
 {
     return checkInteger(column, psm_id);
 }
 /**
  * In the table-based sections (protein, peptide, and small molecule) there MUST NOT be any empty cells.
  * Some field not allow "null" value, for example unit_id, accession and so on. In "Complete" file, in
  * general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param target SHOULD NOT be empty.
  */
 protected string checkString(MZTabColumn column, string target)
 {
     return checkData(column, target, true);
 }
        /**
         * Check and translate reliability string into {@link Reliability}. Currently, only "1", "2", "3" and "null" are
         * correct value, and others will raise {@link FormatErrorType#Reliability} error.
         * But in "Complete" file, in general "null" values SHOULD not be given.
         *
         * @param column SHOULD NOT set null
         * @param reliability SHOULD NOT be empty.
         */
        protected Reliability checkReliability(MZTabColumn column, string reliability)
        {
            string result_reliaility = checkData(column, reliability, true);

            if (result_reliaility == null || result_reliaility.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){
                return null;
            }

            Reliability result = Reliability.findReliability(result_reliaility);
            if (result == null){
                _errorList.Add(new MZTabError(FormatErrorType.Reliability, _lineNumber, column.Header, result_reliaility));
            }

            return result;
        }
 /**
  * Check and translate taxid string into int. If exists error during parse, raise {@link FormatErrorType#int} error.
  * Normally, taxid can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param taxid SHOULD NOT be empty.
  */
 protected Integer checkTaxid(MZTabColumn column, string taxid)
 {
     return checkInteger(column, taxid);
 }
 /**
  * Check and translate searchEngine string into parameter list which split by '|' character..
  * If parse incorrect, raise {@link FormatErrorType#ParamList} error.
  * Normally, searchEngine can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
  *
  * @param column SHOULD NOT set null
  * @param searchEngine SHOULD NOT be empty.
  */
 protected SplitList<Param> checkSearchEngine(MZTabColumn column, string searchEngine)
 {
     return checkParamList(column, searchEngine);
 }
        protected Uri checkURI(MZTabColumn column, string uri)
        {
            string result_uri = checkData(column, uri, true);

            if (result_uri == null || result_uri.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){
                return null;
            }

            Uri result = MZTabUtils.ParseURI(result_uri);
            if (result == null){
                _errorList.Add(new MZTabError(FormatErrorType.URI, _lineNumber, "Column " + column.Header, result_uri));
            }

            return result;
        }
        /**
         * Check and translate searchEngineScore string into parameter list which split by '|' character..
         * If parse incorrect, raise {@link FormatErrorType#ParamList} error. If parameter is not {@link CVParam},
         * or parameter value is empty (score should be provide), system raise {@link FormatErrorType#SearchEngineScore}.
         * Normally, searchEngineScore can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
         *
         * @param column SHOULD NOT set null
         * @param searchEngineScore SHOULD NOT be empty.
         */
        protected SplitList<Param> checkSearchEngineScore(MZTabColumn column, string searchEngineScore)
        {
            SplitList<Param> paramList = checkParamList(column, searchEngineScore);

            foreach (Param param in paramList){
                if (!(param is CVParam) || string.IsNullOrEmpty(param.Value)){
                    _errorList.Add(new MZTabError(FormatErrorType.SearchEngineScore, _lineNumber, column.Header,
                                                  searchEngineScore));
                }
            }

            return paramList;
        }
        /**
         * Check and translate retention_time_window string into double list which split by '|' character..
         * If parse incorrect, raise {@link FormatErrorType#DoubleList} error.
         * Normally, retention_time_window can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
         *
         * @param column SHOULD NOT set null
         * @param retention_time_window SHOULD NOT be empty.
         */
        protected SplitList<double> checkRetentionTimeWindow(MZTabColumn column, string retention_time_window)
        {
            string result = checkData(column, retention_time_window, true);

            if (result == null || result.Equals("NULL", StringComparison.CurrentCultureIgnoreCase)){
                return new SplitList<double>(MZTabConstants.BAR);
            }

            SplitList<double> valueList = MZTabUtils.ParseDoubleList(result);
            if (valueList.Count == 0){
                _errorList.Add(new MZTabError(FormatErrorType.DoubleList, _lineNumber, column.Header, result,
                                              "" + MZTabConstants.BAR));
            }

            return valueList;
        }
        /**
         * Check and translate target string into {@link Modification} list which split by ',' character..
         * If parse incorrect, raise {@link FormatErrorType#ModificationList} error.
         * Normally, ambiguityMembers can set "null", but in "Complete" file, in general "null" values SHOULD not be given.
         *
         * If software cannot determine protein-level modifications, "null" MUST be used.
         * If the software has determined that there are no modifications to a given protein "0" MUST be used.
         *
         * @param section SHOULD NOT set null
         * @param column SHOULD NOT set null
         * @param modificationsLabel SHOULD NOT be empty.
         */
        protected SplitList<Modification> checkModifications(Section section, MZTabColumn column,
                                                             string modificationsLabel)
        {
            string result_modifications = checkData(column, modificationsLabel, true);

            if (result_modifications == null ||
                result_modifications.Equals("NULL", StringComparison.CurrentCultureIgnoreCase) ||
                result_modifications.Equals("0")){
                return new SplitList<Modification>(MZTabConstants.COMMA);
            }

            SplitList<Modification> modificationList = MZTabUtils.ParseModificationList(section, modificationsLabel);
            if (modificationList.Count == 0){
                _errorList.Add(new MZTabError(FormatErrorType.ModificationList, _lineNumber, column.Header,
                                              result_modifications));
            }

            return modificationList;
        }