Ejemplo n.º 1
0
        public void ValidateFile(Stream stream, TextWriter outstream, Action<int> progress, Action<string> status)
        {
            try {
                MZTabErrorList errorList = new MZTabErrorList(Level.Info);

                try {
                    validate(stream, outstream, errorList, progress, status);
                    //refine();
                } catch (MZTabException e) {
                    outstream.Write(MZTabProperties.MZTabExceptionMessage);
                    errorList.Add(e.Error);
                } catch (MZTabErrorOverflowException) {
                    outstream.Write(MZTabProperties.MZTabErrorOverflowExceptionMessage);
                }

                errorList.print(outstream);
                if (errorList.IsNullOrEmpty()) {
                    outstream.Write("No errors in this section!" + MZTabConstants.NEW_LINE);
                }

                outstream.Close();
                //stream.Close();
            } catch (Exception e) {
                MessageBox.Show(e.Message, e.StackTrace);
            }
        }
Ejemplo n.º 2
0
        /**
         * For facing colunit definition line, for example:
         * MTD  colunit-protein retention_time=[UO, UO:000031, minute, ]
         * after parse metadata and header lines, need calling
         * {@link #refineColUnit(uk.ac.ebi.pride.jmztab.model.MZTabColumnFactory)} manually.
         */
        public new void Parse(int lineNumber, string mtdLine, MZTabErrorList errorList)
        {
            base.Parse(lineNumber, mtdLine, errorList);

            if (_items.Length != 3){
                MZTabError error = new MZTabError(FormatErrorType.MTDLine, lineNumber, mtdLine);
                throw new MZTabException(error);
            }

            string defineLabel = _items[1].Trim().ToLower();
            string valueLabel = _items[2].Trim();

            if (defineLabel.Contains("colunit")){
                // ignore colunit parse. In the stage, just store them into colUnitMap<defineLabel, valueLabel>.
                // after table section columns created, call checkColUnit manually.
                _colUnitMap.Add(defineLabel, valueLabel);

                if (! defineLabel.Equals("colunit-protein") &&
                    ! defineLabel.Equals("colunit-peptide") &&
                    ! defineLabel.Equals("colunit-psm") &&
                    ! defineLabel.Equals("colunit-small_molecule")){
                    MZTabError error = new MZTabError(FormatErrorType.MTDDefineLabel, lineNumber, defineLabel);
                    throw new MZTabException(error);
                }
            }
            else{
                parseNormalMetadata(defineLabel, valueLabel);
            }
        }
        /**
         * Parse a header line into {@link MZTabColumnFactory} structure. There are two steps in this method:
         * Step 1: {@link #parseStableOrderColumns()} focus on validate and parse for stable columns and optional
         * columns which have stable order; and Step 2: {@link #parseOptionalColumns(int)} focus on {@link AbundanceColumn},
         * {@link OptionColumn} and {@link CVParamOptionColumn} parse and validation.
         */
        protected new void Parse(int lineNumber, string line, MZTabErrorList errorList)
        {
            base.Parse(lineNumber, line, errorList);

            int offset = parseStableOrderColumns();
            if (offset < _items.Length){
                parseOptionalColumns(offset);
            }

            refine();
        }
Ejemplo n.º 4
0
        /**
        * We assume that user before call this method, have parse the raw line
        * is not empty line and start with section prefix.
        */
        protected void Parse(int lineNumber, string line, MZTabErrorList errorList)
        {
            _lineNumber = lineNumber;
            _line = line;
            _errorList = errorList ?? new MZTabErrorList();

            _items = line.Split(MZTabConstants.TAB);
            _items[0] = _items[0].Trim();
            _items[_items.Length - 1] = _items[_items.Length - 1].Trim();

            _section = Section.findSection(_items[0]);

            if (_section == null){
                MZTabError error = new MZTabError(FormatErrorType.LinePrefix, lineNumber, _items[0]);
                _errorList.Add(error);
            }
        }
        /**
         * Generate a mzTab data line parser. A couple of common method used to parse a data line into
         * {@link MZTabRecord} structure.
         *
         * NOTICE: {@link MZTabColumnFactory} maintain a couple of {@link MZTabColumn} which have internal logical
         * position and order. In physical mzTab file, we allow user not obey this logical position organized way,
         * and provide their date with own order. In order to distinguish them, we use physical position (a positive
         * integer) to record the column location in mzTab file. And use {@link PositionMapping} structure the maintain
         * the mapping between them.
         *
         * @param factory SHOULD NOT set null
         * @param positionMapping SHOULD NOT set null
         * @param metadata SHOULD NOT set null
         */
        protected MZTabDataLineParser(MZTabColumnFactory factory, PositionMapping positionMapping,
                                      Metadata metadata, MZTabErrorList errorList)
        {
            if (factory == null){
                throw new NullReferenceException("Column header factory should be create first.");
            }
            this.factory = factory;

            this.positionMapping = positionMapping;
            exchangeMapping = positionMapping.exchange();

            mapping = factory.GetOffsetColumnsMap();

            if (metadata == null){
                throw new NullReferenceException("Metadata should be parser first.");
            }
            this.metadata = metadata;
            _errorList = errorList ?? new MZTabErrorList();
        }
Ejemplo n.º 6
0
        public MZTabFileParser(string tabFile, TextWriter outstream, Level level)
        {
            init(tabFile);

            try{
                errorList = new MZTabErrorList(level);
                check();
                refine();
            }
            catch (MZTabException e){
                outstream.Write(MZTabProperties.MZTabExceptionMessage);
                errorList.Add(e.Error);
            }
            catch (MZTabErrorOverflowException){
                outstream.Write(MZTabProperties.MZTabErrorOverflowExceptionMessage);
            }

            errorList.print(outstream);
            if (errorList.IsNullOrEmpty()){
                outstream.Write("not errors in " + tabFile + " file!" + MZTabConstants.NEW_LINE);
            }
        }
Ejemplo n.º 7
0
 public PSMLineParser(MZTabColumnFactory factory, PositionMapping positionMapping, Metadata metadata,
                      MZTabErrorList errorList)
     : base(factory, positionMapping, metadata, errorList)
 {
 }
 /**
  * Validate the data line, if there exist errors, add them into {@link MZTabErrorList}.
  *
  * NOTICE: this step just do validate, not do convert operation. Convert the data line into
  * {@link MZTabRecord} implemented by {@link #getRecord(uk.ac.ebi.pride.jmztab.model.Section, string)}
  * method.
  */
 public new void Parse(int lineNumber, string line, MZTabErrorList errorList)
 {
     base.Parse(lineNumber, line, errorList);
     checkCount();
     checkStableData();
     checkOptionalData();
 }
Ejemplo n.º 9
0
        private void validate(Stream stream, TextWriter outstream, MZTabErrorList errorList,
                          Action<int> progress, Action<string> status)
        {
            COMLineParser comParser = new COMLineParser();
            MTDLineParser mtdParser = new MTDLineParser();
            PRHLineParser prhParser = null;
            PRTLineParser prtParser = null;
            PEHLineParser pehParser = null;
            PEPLineParser pepParser = null;
            PSHLineParser pshParser = null;
            PSMLineParser psmParser = null;
            SMHLineParser smhParser = null;
            SMLLineParser smlParser = null;

            SortedDictionary<int, Comment> commentMap = new SortedDictionary<int, Comment>();
            SortedDictionary<int, Protein> proteinMap = new SortedDictionary<int, Protein>();
            SortedDictionary<int, Peptide> peptideMap = new SortedDictionary<int, Peptide>();
            SortedDictionary<int, PSM> psmMap = new SortedDictionary<int, PSM>();
            SortedDictionary<int, SmallMolecule> smallMoleculeMap = new SortedDictionary<int, SmallMolecule>();

            PositionMapping prtPositionMapping = null;
            PositionMapping pepPositionMapping = null;
            PositionMapping psmPositionMapping = null;
            PositionMapping smlPositionMapping = null;

            StreamReader reader = new StreamReader(stream);
            int highWaterMark = 1;
            int lineNumber = 0;
            try {
                string line;
                while ((line = reader.ReadLine()) != null) {
                    progress((int)(stream.Position * 100 / stream.Length));
                    status("Validate line " + lineNumber);
                    lineNumber++;

                    if (String.IsNullOrEmpty(line) || line.StartsWith("MTH") || line.StartsWith("#")) {
                        continue;
                    }

                    if (line.StartsWith(Section.Comment.Prefix)) {
                        comParser.Parse(lineNumber, line, errorList);
                        commentMap.Add(lineNumber, comParser.getComment());
                        continue;
                    }

                    Section section = MZTabFileParser.getSection(line);
                    MZTabError error;
                    if (section == null) {
                        error = new MZTabError(FormatErrorType.LinePrefix, lineNumber, MZTabFileParser.subString(line));
                        throw new MZTabException(error);
                    }
                    if (section.Level < highWaterMark) {
                        Section currentSection = Section.FindSection(highWaterMark);
                        error = new MZTabError(LogicalErrorType.LineOrder, lineNumber, currentSection.Name, section.Name);
                        throw new MZTabException(error);
                    }

                    highWaterMark = section.Level;
                    // There exists errors during checking metadata section.
                    if (highWaterMark == 1 && !errorList.IsNullOrEmpty()) {
                        break;
                    }

                    switch (highWaterMark) {
                        case 1:
                            // metadata section.
                            mtdParser.Parse(lineNumber, line, errorList);
                            break;
                        case 2:
                            if (prhParser != null) {
                                // header line only display once!
                                error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            // protein header section
                            prhParser = new PRHLineParser(mtdParser.Metadata);
                            prhParser.Parse(lineNumber, line, errorList);
                            prtPositionMapping = new PositionMapping(prhParser.getFactory(), line);

                            // tell system to continue check protein data line.
                            highWaterMark = 3;
                            break;
                        case 3:
                            if (prhParser == null) {
                                // header line should be check first.
                                error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            if (prtParser == null) {
                                prtParser = new PRTLineParser(prhParser.getFactory(), prtPositionMapping,
                                                              mtdParser.Metadata,
                                                              errorList);
                            }
                            prtParser.Parse(lineNumber, line, errorList);
                            proteinMap.Add(lineNumber, prtParser.getRecord(line));

                            break;
                        case 4:
                            if (pehParser != null) {
                                // header line only display once!
                                error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            if (mtdParser.Metadata.MzTabType == MzTabType.Identification) {
                                errorList.Add(new MZTabError(LogicalErrorType.PeptideSection, lineNumber,
                                                             MZTabFileParser.subString(line)));
                            }

                            // peptide header section
                            pehParser = new PEHLineParser(mtdParser.Metadata);
                            pehParser.Parse(lineNumber, line, errorList);
                            pepPositionMapping = new PositionMapping(pehParser.getFactory(), line);

                            // tell system to continue check peptide data line.
                            highWaterMark = 5;
                            break;
                        case 5:
                            if (pehParser == null) {
                                // header line should be check first.
                                error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            if (pepParser == null) {
                                pepParser = new PEPLineParser(pehParser.getFactory(), pepPositionMapping,
                                                              mtdParser.Metadata,
                                                              errorList);
                            }
                            pepParser.Parse(lineNumber, line, errorList);
                            peptideMap.Add(lineNumber, pepParser.getRecord(line));

                            break;
                        case 6:
                            if (pshParser != null) {
                                // header line only display once!
                                error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            // psm header section
                            pshParser = new PSHLineParser(mtdParser.Metadata);
                            pshParser.Parse(lineNumber, line, errorList);
                            psmPositionMapping = new PositionMapping(pshParser.getFactory(), line);

                            // tell system to continue check peptide data line.
                            highWaterMark = 7;
                            break;
                        case 7:
                            if (pshParser == null) {
                                // header line should be check first.
                                error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            if (psmParser == null) {
                                psmParser = new PSMLineParser(pshParser.getFactory(), psmPositionMapping,
                                                              mtdParser.Metadata,
                                                              errorList);
                            }
                            psmParser.Parse(lineNumber, line, errorList);
                            psmMap.Add(lineNumber, psmParser.getRecord(line));

                            break;
                        case 8:
                            if (smhParser != null) {
                                // header line only display once!
                                error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            // small molecule header section
                            smhParser = new SMHLineParser(mtdParser.Metadata);
                            smhParser.Parse(lineNumber, line, errorList);
                            smlPositionMapping = new PositionMapping(smhParser.getFactory(), line);

                            // tell system to continue check small molecule data line.
                            highWaterMark = 9;
                            break;
                        case 9:
                            if (smhParser == null) {
                                // header line should be check first.
                                error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber,
                                                       MZTabFileParser.subString(line));
                                throw new MZTabException(error);
                            }

                            if (smlParser == null) {
                                smlParser = new SMLLineParser(smhParser.getFactory(), smlPositionMapping,
                                                              mtdParser.Metadata,
                                                              errorList);
                            }
                            smlParser.Parse(lineNumber, line, errorList);
                            smallMoleculeMap.Add(lineNumber, smlParser.getRecord(line));

                            break;
                    }
                }
            } catch (Exception e) {
                outstream.WriteLine("Line {0}: {1}", lineNumber, e.Message);
                errorList.Add(new ParserError(lineNumber, e.Message));
            }

            if (reader != null) {
                reader.Close();
            }

            if (errorList.IsNullOrEmpty()) {
                MZTabFile mzTabFile = new MZTabFile(mtdParser.Metadata);
                foreach (int id in commentMap.Keys) {
                    mzTabFile.addComment(id, commentMap[id]);
                }

                if (prhParser != null) {
                    MZTabColumnFactory proteinColumnFactory = prhParser.getFactory();
                    mzTabFile.setProteinColumnFactory(proteinColumnFactory);
                    foreach (int id in proteinMap.Keys) {
                        mzTabFile.addProtein(id, proteinMap[id]);
                    }
                }

                if (pehParser != null) {
                    MZTabColumnFactory peptideColumnFactory = pehParser.getFactory();
                    mzTabFile.setPeptideColumnFactory(peptideColumnFactory);
                    foreach (int id in peptideMap.Keys) {
                        mzTabFile.addPeptide(id, peptideMap[id]);
                    }
                }

                if (pshParser != null) {
                    MZTabColumnFactory psmColumnFactory = pshParser.getFactory();
                    mzTabFile.setPSMColumnFactory(psmColumnFactory);
                    foreach (int id in psmMap.Keys) {
                        mzTabFile.addPSM(id, psmMap[id]);
                    }
                }

                if (smhParser != null) {
                    MZTabColumnFactory smallMoleculeColumnFactory = smhParser.getFactory();
                    mzTabFile.setSmallMoleculeColumnFactory(smallMoleculeColumnFactory);
                    foreach (int id in smallMoleculeMap.Keys) {
                        mzTabFile.addSmallMolecule(id, smallMoleculeMap[id]);
                    }
                }
            }
        }
Ejemplo n.º 10
0
 public new void Parse(int lineNumber, String line, MZTabErrorList errorList)
 {
     base.Parse(lineNumber, line, errorList);
 }