/** * Generate a mzTab data line parser. A couple of common method used to parse a data line into * {@link MZTabRecord} structure. * * NOTICE: {@link MZTabColumnFactory} maintain a couple of {@link MZTabColumn} which have internal logical * position and order. In physical mzTab file, we allow user not obey this logical position organized way, * and provide their date with own order. In order to distinguish them, we use physical position (a positive * integer) to record the column location in mzTab file. And use {@link PositionMapping} structure the maintain * the mapping between them. * * @param factory SHOULD NOT set null * @param positionMapping SHOULD NOT set null * @param metadata SHOULD NOT set null */ protected MZTabDataLineParser(MZTabColumnFactory factory, PositionMapping positionMapping, Metadata metadata, MZTabErrorList errorList) { if (factory == null){ throw new NullReferenceException("Column header factory should be create first."); } this.factory = factory; this.positionMapping = positionMapping; exchangeMapping = positionMapping.exchange(); mapping = factory.GetOffsetColumnsMap(); if (metadata == null){ throw new NullReferenceException("Metadata should be parser first."); } this.metadata = metadata; _errorList = errorList ?? new MZTabErrorList(); }
public PSMLineParser(MZTabColumnFactory factory, PositionMapping positionMapping, Metadata metadata, MZTabErrorList errorList) : base(factory, positionMapping, metadata, errorList) { }
/** * Query {@link uk.ac.ebi.pride.jmztab.utils.errors.MZTabErrorList} to check exist errors or not. * @throws java.io.IOException * @throws uk.ac.ebi.pride.jmztab.utils.errors.MZTabException during parse metadata, protein/peptide/small_molecule header line, exists error. * @throws uk.ac.ebi.pride.jmztab.utils.errors.MZTabErrorOverflowException reference mztab.properties file mztab.max_error_count parameter. */ public void check(StreamReader reader) { COMLineParser comParser = new COMLineParser(); MTDLineParser mtdParser = new MTDLineParser(); PRHLineParser prhParser = null; PRTLineParser prtParser = null; PEHLineParser pehParser = null; PEPLineParser pepParser = null; PSHLineParser pshParser = null; PSMLineParser psmParser = null; SMHLineParser smhParser = null; SMLLineParser smlParser = null; SortedDictionary<int, Comment> commentMap = new SortedDictionary<int, Comment>(); SortedDictionary<int, Protein> proteinMap = new SortedDictionary<int, Protein>(); SortedDictionary<int, Peptide> peptideMap = new SortedDictionary<int, Peptide>(); SortedDictionary<int, PSM> psmMap = new SortedDictionary<int, PSM>(); SortedDictionary<int, SmallMolecule> smallMoleculeMap = new SortedDictionary<int, SmallMolecule>(); PositionMapping prtPositionMapping = null; PositionMapping pepPositionMapping = null; PositionMapping psmPositionMapping = null; PositionMapping smlPositionMapping = null; string line; int highWaterMark = 1; int lineNumber = 0; while ((line = reader.ReadLine()) != null){ lineNumber++; if (string.IsNullOrEmpty(line) || line.StartsWith("MTH") || line.StartsWith("#")){ continue; } if (line.StartsWith(Section.Comment.Prefix)){ comParser.Parse(lineNumber, line, errorList); commentMap.Add(lineNumber, comParser.getComment()); continue; } Section section = getSection(line); MZTabError error; if (section == null){ error = new MZTabError(FormatErrorType.LinePrefix, lineNumber, subString(line)); throw new MZTabException(error); } if (section.Level < highWaterMark){ Section currentSection = Section.FindSection(highWaterMark); error = new MZTabError(LogicalErrorType.LineOrder, lineNumber, currentSection.Name, section.Name); throw new MZTabException(error); } highWaterMark = section.Level; // There exists errors during checking metadata section. if (highWaterMark == 1 && ! errorList.IsNullOrEmpty()){ break; } switch (highWaterMark){ case 1: // metadata section. mtdParser.Parse(lineNumber, line, errorList); break; case 2: if (prhParser != null){ // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } // protein header section prhParser = new PRHLineParser(mtdParser.Metadata); prhParser.Parse(lineNumber, line, errorList); prtPositionMapping = new PositionMapping(prhParser.getFactory(), line); // tell system to continue check protein data line. highWaterMark = 3; break; case 3: if (prhParser == null){ // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } if (prtParser == null){ prtParser = new PRTLineParser(prhParser.getFactory(), prtPositionMapping, mtdParser.Metadata, errorList); } prtParser.Parse(lineNumber, line, errorList); proteinMap.Add(lineNumber, prtParser.getRecord(line)); break; case 4: if (pehParser != null){ // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } if (mtdParser.Metadata.MzTabType == MzTabType.Identification){ errorList.Add(new MZTabError(LogicalErrorType.PeptideSection, lineNumber, subString(line))); } // peptide header section pehParser = new PEHLineParser(mtdParser.Metadata); pehParser.Parse(lineNumber, line, errorList); pepPositionMapping = new PositionMapping(pehParser.getFactory(), line); // tell system to continue check peptide data line. highWaterMark = 5; break; case 5: if (pehParser == null){ // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } if (pepParser == null){ pepParser = new PEPLineParser(pehParser.getFactory(), pepPositionMapping, mtdParser.Metadata, errorList); } pepParser.Parse(lineNumber, line, errorList); peptideMap.Add(lineNumber, pepParser.getRecord(line)); break; case 6: if (pshParser != null){ // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } // psm header section pshParser = new PSHLineParser(mtdParser.Metadata); pshParser.Parse(lineNumber, line, errorList); psmPositionMapping = new PositionMapping(pshParser.getFactory(), line); // tell system to continue check peptide data line. highWaterMark = 7; break; case 7: if (pshParser == null){ // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } if (psmParser == null){ psmParser = new PSMLineParser(pshParser.getFactory(), psmPositionMapping, mtdParser.Metadata, errorList); } psmParser.Parse(lineNumber, line, errorList); psmMap.Add(lineNumber, psmParser.getRecord(line)); break; case 8: if (smhParser != null){ // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } // small molecule header section smhParser = new SMHLineParser(mtdParser.Metadata); smhParser.Parse(lineNumber, line, errorList); smlPositionMapping = new PositionMapping(smhParser.getFactory(), line); // tell system to continue check small molecule data line. highWaterMark = 9; break; case 9: if (smhParser == null){ // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line)); throw new MZTabException(error); } if (smlParser == null){ smlParser = new SMLLineParser(smhParser.getFactory(), smlPositionMapping, mtdParser.Metadata, errorList); } smlParser.Parse(lineNumber, line, errorList); smallMoleculeMap.Add(lineNumber, smlParser.getRecord(line)); break; } } if (reader != null){ reader.Close(); } if (errorList.IsNullOrEmpty()){ _mzTabFile = new MZTabFile(mtdParser.Metadata); foreach (int id in commentMap.Keys){ _mzTabFile.addComment(id, commentMap[id]); } if (prhParser != null){ MZTabColumnFactory proteinColumnFactory = prhParser.getFactory(); _mzTabFile.setProteinColumnFactory(proteinColumnFactory); foreach (int id in proteinMap.Keys){ _mzTabFile.addProtein(id, proteinMap[id]); } } if (pehParser != null){ MZTabColumnFactory peptideColumnFactory = pehParser.getFactory(); _mzTabFile.setPeptideColumnFactory(peptideColumnFactory); foreach (int id in peptideMap.Keys){ _mzTabFile.addPeptide(id, peptideMap[id]); } } if (pshParser != null){ MZTabColumnFactory psmColumnFactory = pshParser.getFactory(); _mzTabFile.setPSMColumnFactory(psmColumnFactory); foreach (int id in psmMap.Keys){ _mzTabFile.addPSM(id, psmMap[id]); } } if (smhParser != null){ MZTabColumnFactory smallMoleculeColumnFactory = smhParser.getFactory(); _mzTabFile.setSmallMoleculeColumnFactory(smallMoleculeColumnFactory); foreach (int id in smallMoleculeMap.Keys){ _mzTabFile.addSmallMolecule(id, smallMoleculeMap[id]); } } } }