public void ValidateFile(Stream stream, TextWriter outstream, Action<int> progress, Action<string> status) { try { MZTabErrorList errorList = new MZTabErrorList(Level.Info); try { validate(stream, outstream, errorList, progress, status); //refine(); } catch (MZTabException e) { outstream.Write(MZTabProperties.MZTabExceptionMessage); errorList.Add(e.Error); } catch (MZTabErrorOverflowException) { outstream.Write(MZTabProperties.MZTabErrorOverflowExceptionMessage); } errorList.print(outstream); if (errorList.IsNullOrEmpty()) { outstream.Write("No errors in this section!" + MZTabConstants.NEW_LINE); } outstream.Close(); //stream.Close(); } catch (Exception e) { MessageBox.Show(e.Message, e.StackTrace); } }
/** * For facing colunit definition line, for example: * MTD colunit-protein retention_time=[UO, UO:000031, minute, ] * after parse metadata and header lines, need calling * {@link #refineColUnit(uk.ac.ebi.pride.jmztab.model.MZTabColumnFactory)} manually. */ public new void Parse(int lineNumber, string mtdLine, MZTabErrorList errorList) { base.Parse(lineNumber, mtdLine, errorList); if (_items.Length != 3){ MZTabError error = new MZTabError(FormatErrorType.MTDLine, lineNumber, mtdLine); throw new MZTabException(error); } string defineLabel = _items[1].Trim().ToLower(); string valueLabel = _items[2].Trim(); if (defineLabel.Contains("colunit")){ // ignore colunit parse. In the stage, just store them into colUnitMap<defineLabel, valueLabel>. // after table section columns created, call checkColUnit manually. _colUnitMap.Add(defineLabel, valueLabel); if (! defineLabel.Equals("colunit-protein") && ! defineLabel.Equals("colunit-peptide") && ! defineLabel.Equals("colunit-psm") && ! defineLabel.Equals("colunit-small_molecule")){ MZTabError error = new MZTabError(FormatErrorType.MTDDefineLabel, lineNumber, defineLabel); throw new MZTabException(error); } } else{ parseNormalMetadata(defineLabel, valueLabel); } }
/** * Parse a header line into {@link MZTabColumnFactory} structure. There are two steps in this method: * Step 1: {@link #parseStableOrderColumns()} focus on validate and parse for stable columns and optional * columns which have stable order; and Step 2: {@link #parseOptionalColumns(int)} focus on {@link AbundanceColumn}, * {@link OptionColumn} and {@link CVParamOptionColumn} parse and validation. */ protected new void Parse(int lineNumber, string line, MZTabErrorList errorList) { base.Parse(lineNumber, line, errorList); int offset = parseStableOrderColumns(); if (offset < _items.Length){ parseOptionalColumns(offset); } refine(); }
/** * We assume that user before call this method, have parse the raw line * is not empty line and start with section prefix. */ protected void Parse(int lineNumber, string line, MZTabErrorList errorList) { _lineNumber = lineNumber; _line = line; _errorList = errorList ?? new MZTabErrorList(); _items = line.Split(MZTabConstants.TAB); _items[0] = _items[0].Trim(); _items[_items.Length - 1] = _items[_items.Length - 1].Trim(); _section = Section.findSection(_items[0]); if (_section == null){ MZTabError error = new MZTabError(FormatErrorType.LinePrefix, lineNumber, _items[0]); _errorList.Add(error); } }
/** * Generate a mzTab data line parser. A couple of common method used to parse a data line into * {@link MZTabRecord} structure. * * NOTICE: {@link MZTabColumnFactory} maintain a couple of {@link MZTabColumn} which have internal logical * position and order. In physical mzTab file, we allow user not obey this logical position organized way, * and provide their date with own order. In order to distinguish them, we use physical position (a positive * integer) to record the column location in mzTab file. And use {@link PositionMapping} structure the maintain * the mapping between them. * * @param factory SHOULD NOT set null * @param positionMapping SHOULD NOT set null * @param metadata SHOULD NOT set null */ protected MZTabDataLineParser(MZTabColumnFactory factory, PositionMapping positionMapping, Metadata metadata, MZTabErrorList errorList) { if (factory == null){ throw new NullReferenceException("Column header factory should be create first."); } this.factory = factory; this.positionMapping = positionMapping; exchangeMapping = positionMapping.exchange(); mapping = factory.GetOffsetColumnsMap(); if (metadata == null){ throw new NullReferenceException("Metadata should be parser first."); } this.metadata = metadata; _errorList = errorList ?? new MZTabErrorList(); }
public MZTabFileParser(string tabFile, TextWriter outstream, Level level) { init(tabFile); try{ errorList = new MZTabErrorList(level); check(); refine(); } catch (MZTabException e){ outstream.Write(MZTabProperties.MZTabExceptionMessage); errorList.Add(e.Error); } catch (MZTabErrorOverflowException){ outstream.Write(MZTabProperties.MZTabErrorOverflowExceptionMessage); } errorList.print(outstream); if (errorList.IsNullOrEmpty()){ outstream.Write("not errors in " + tabFile + " file!" + MZTabConstants.NEW_LINE); } }
public PSMLineParser(MZTabColumnFactory factory, PositionMapping positionMapping, Metadata metadata, MZTabErrorList errorList) : base(factory, positionMapping, metadata, errorList) { }
/** * Validate the data line, if there exist errors, add them into {@link MZTabErrorList}. * * NOTICE: this step just do validate, not do convert operation. Convert the data line into * {@link MZTabRecord} implemented by {@link #getRecord(uk.ac.ebi.pride.jmztab.model.Section, string)} * method. */ public new void Parse(int lineNumber, string line, MZTabErrorList errorList) { base.Parse(lineNumber, line, errorList); checkCount(); checkStableData(); checkOptionalData(); }
private void validate(Stream stream, TextWriter outstream, MZTabErrorList errorList, Action<int> progress, Action<string> status) { COMLineParser comParser = new COMLineParser(); MTDLineParser mtdParser = new MTDLineParser(); PRHLineParser prhParser = null; PRTLineParser prtParser = null; PEHLineParser pehParser = null; PEPLineParser pepParser = null; PSHLineParser pshParser = null; PSMLineParser psmParser = null; SMHLineParser smhParser = null; SMLLineParser smlParser = null; SortedDictionary<int, Comment> commentMap = new SortedDictionary<int, Comment>(); SortedDictionary<int, Protein> proteinMap = new SortedDictionary<int, Protein>(); SortedDictionary<int, Peptide> peptideMap = new SortedDictionary<int, Peptide>(); SortedDictionary<int, PSM> psmMap = new SortedDictionary<int, PSM>(); SortedDictionary<int, SmallMolecule> smallMoleculeMap = new SortedDictionary<int, SmallMolecule>(); PositionMapping prtPositionMapping = null; PositionMapping pepPositionMapping = null; PositionMapping psmPositionMapping = null; PositionMapping smlPositionMapping = null; StreamReader reader = new StreamReader(stream); int highWaterMark = 1; int lineNumber = 0; try { string line; while ((line = reader.ReadLine()) != null) { progress((int)(stream.Position * 100 / stream.Length)); status("Validate line " + lineNumber); lineNumber++; if (String.IsNullOrEmpty(line) || line.StartsWith("MTH") || line.StartsWith("#")) { continue; } if (line.StartsWith(Section.Comment.Prefix)) { comParser.Parse(lineNumber, line, errorList); commentMap.Add(lineNumber, comParser.getComment()); continue; } Section section = MZTabFileParser.getSection(line); MZTabError error; if (section == null) { error = new MZTabError(FormatErrorType.LinePrefix, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } if (section.Level < highWaterMark) { Section currentSection = Section.FindSection(highWaterMark); error = new MZTabError(LogicalErrorType.LineOrder, lineNumber, currentSection.Name, section.Name); throw new MZTabException(error); } highWaterMark = section.Level; // There exists errors during checking metadata section. if (highWaterMark == 1 && !errorList.IsNullOrEmpty()) { break; } switch (highWaterMark) { case 1: // metadata section. mtdParser.Parse(lineNumber, line, errorList); break; case 2: if (prhParser != null) { // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } // protein header section prhParser = new PRHLineParser(mtdParser.Metadata); prhParser.Parse(lineNumber, line, errorList); prtPositionMapping = new PositionMapping(prhParser.getFactory(), line); // tell system to continue check protein data line. highWaterMark = 3; break; case 3: if (prhParser == null) { // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } if (prtParser == null) { prtParser = new PRTLineParser(prhParser.getFactory(), prtPositionMapping, mtdParser.Metadata, errorList); } prtParser.Parse(lineNumber, line, errorList); proteinMap.Add(lineNumber, prtParser.getRecord(line)); break; case 4: if (pehParser != null) { // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } if (mtdParser.Metadata.MzTabType == MzTabType.Identification) { errorList.Add(new MZTabError(LogicalErrorType.PeptideSection, lineNumber, MZTabFileParser.subString(line))); } // peptide header section pehParser = new PEHLineParser(mtdParser.Metadata); pehParser.Parse(lineNumber, line, errorList); pepPositionMapping = new PositionMapping(pehParser.getFactory(), line); // tell system to continue check peptide data line. highWaterMark = 5; break; case 5: if (pehParser == null) { // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } if (pepParser == null) { pepParser = new PEPLineParser(pehParser.getFactory(), pepPositionMapping, mtdParser.Metadata, errorList); } pepParser.Parse(lineNumber, line, errorList); peptideMap.Add(lineNumber, pepParser.getRecord(line)); break; case 6: if (pshParser != null) { // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } // psm header section pshParser = new PSHLineParser(mtdParser.Metadata); pshParser.Parse(lineNumber, line, errorList); psmPositionMapping = new PositionMapping(pshParser.getFactory(), line); // tell system to continue check peptide data line. highWaterMark = 7; break; case 7: if (pshParser == null) { // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } if (psmParser == null) { psmParser = new PSMLineParser(pshParser.getFactory(), psmPositionMapping, mtdParser.Metadata, errorList); } psmParser.Parse(lineNumber, line, errorList); psmMap.Add(lineNumber, psmParser.getRecord(line)); break; case 8: if (smhParser != null) { // header line only display once! error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } // small molecule header section smhParser = new SMHLineParser(mtdParser.Metadata); smhParser.Parse(lineNumber, line, errorList); smlPositionMapping = new PositionMapping(smhParser.getFactory(), line); // tell system to continue check small molecule data line. highWaterMark = 9; break; case 9: if (smhParser == null) { // header line should be check first. error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, MZTabFileParser.subString(line)); throw new MZTabException(error); } if (smlParser == null) { smlParser = new SMLLineParser(smhParser.getFactory(), smlPositionMapping, mtdParser.Metadata, errorList); } smlParser.Parse(lineNumber, line, errorList); smallMoleculeMap.Add(lineNumber, smlParser.getRecord(line)); break; } } } catch (Exception e) { outstream.WriteLine("Line {0}: {1}", lineNumber, e.Message); errorList.Add(new ParserError(lineNumber, e.Message)); } if (reader != null) { reader.Close(); } if (errorList.IsNullOrEmpty()) { MZTabFile mzTabFile = new MZTabFile(mtdParser.Metadata); foreach (int id in commentMap.Keys) { mzTabFile.addComment(id, commentMap[id]); } if (prhParser != null) { MZTabColumnFactory proteinColumnFactory = prhParser.getFactory(); mzTabFile.setProteinColumnFactory(proteinColumnFactory); foreach (int id in proteinMap.Keys) { mzTabFile.addProtein(id, proteinMap[id]); } } if (pehParser != null) { MZTabColumnFactory peptideColumnFactory = pehParser.getFactory(); mzTabFile.setPeptideColumnFactory(peptideColumnFactory); foreach (int id in peptideMap.Keys) { mzTabFile.addPeptide(id, peptideMap[id]); } } if (pshParser != null) { MZTabColumnFactory psmColumnFactory = pshParser.getFactory(); mzTabFile.setPSMColumnFactory(psmColumnFactory); foreach (int id in psmMap.Keys) { mzTabFile.addPSM(id, psmMap[id]); } } if (smhParser != null) { MZTabColumnFactory smallMoleculeColumnFactory = smhParser.getFactory(); mzTabFile.setSmallMoleculeColumnFactory(smallMoleculeColumnFactory); foreach (int id in smallMoleculeMap.Keys) { mzTabFile.addSmallMolecule(id, smallMoleculeMap[id]); } } } }
public new void Parse(int lineNumber, String line, MZTabErrorList errorList) { base.Parse(lineNumber, line, errorList); }