/**
         * Generate a mzTab data line parser. A couple of common method used to parse a data line into
         * {@link MZTabRecord} structure.
         *
         * NOTICE: {@link MZTabColumnFactory} maintain a couple of {@link MZTabColumn} which have internal logical
         * position and order. In physical mzTab file, we allow user not obey this logical position organized way,
         * and provide their date with own order. In order to distinguish them, we use physical position (a positive
         * integer) to record the column location in mzTab file. And use {@link PositionMapping} structure the maintain
         * the mapping between them.
         *
         * @param factory SHOULD NOT set null
         * @param positionMapping SHOULD NOT set null
         * @param metadata SHOULD NOT set null
         */
        protected MZTabDataLineParser(MZTabColumnFactory factory, PositionMapping positionMapping,
                                      Metadata metadata, MZTabErrorList errorList)
        {
            if (factory == null){
                throw new NullReferenceException("Column header factory should be create first.");
            }
            this.factory = factory;

            this.positionMapping = positionMapping;
            exchangeMapping = positionMapping.exchange();

            mapping = factory.GetOffsetColumnsMap();

            if (metadata == null){
                throw new NullReferenceException("Metadata should be parser first.");
            }
            this.metadata = metadata;
            _errorList = errorList ?? new MZTabErrorList();
        }
Ejemplo n.º 2
0
 public PSMLineParser(MZTabColumnFactory factory, PositionMapping positionMapping, Metadata metadata,
                      MZTabErrorList errorList)
     : base(factory, positionMapping, metadata, errorList)
 {
 }
Ejemplo n.º 3
0
        /**
         * Query {@link uk.ac.ebi.pride.jmztab.utils.errors.MZTabErrorList} to check exist errors or not.
         * @throws java.io.IOException
         * @throws uk.ac.ebi.pride.jmztab.utils.errors.MZTabException during parse metadata, protein/peptide/small_molecule header line, exists error.
         * @throws uk.ac.ebi.pride.jmztab.utils.errors.MZTabErrorOverflowException reference mztab.properties file mztab.max_error_count parameter.
         */
        public void check(StreamReader reader)
        {
            COMLineParser comParser = new COMLineParser();
            MTDLineParser mtdParser = new MTDLineParser();
            PRHLineParser prhParser = null;
            PRTLineParser prtParser = null;
            PEHLineParser pehParser = null;
            PEPLineParser pepParser = null;
            PSHLineParser pshParser = null;
            PSMLineParser psmParser = null;
            SMHLineParser smhParser = null;
            SMLLineParser smlParser = null;

            SortedDictionary<int, Comment> commentMap = new SortedDictionary<int, Comment>();
            SortedDictionary<int, Protein> proteinMap = new SortedDictionary<int, Protein>();
            SortedDictionary<int, Peptide> peptideMap = new SortedDictionary<int, Peptide>();
            SortedDictionary<int, PSM> psmMap = new SortedDictionary<int, PSM>();
            SortedDictionary<int, SmallMolecule> smallMoleculeMap = new SortedDictionary<int, SmallMolecule>();

            PositionMapping prtPositionMapping = null;
            PositionMapping pepPositionMapping = null;
            PositionMapping psmPositionMapping = null;
            PositionMapping smlPositionMapping = null;

            string line;
            int highWaterMark = 1;
            int lineNumber = 0;
            while ((line = reader.ReadLine()) != null){
                lineNumber++;

                if (string.IsNullOrEmpty(line) || line.StartsWith("MTH") || line.StartsWith("#")){
                    continue;
                }

                if (line.StartsWith(Section.Comment.Prefix)){
                    comParser.Parse(lineNumber, line, errorList);
                    commentMap.Add(lineNumber, comParser.getComment());
                    continue;
                }

                Section section = getSection(line);
                MZTabError error;
                if (section == null){
                    error = new MZTabError(FormatErrorType.LinePrefix, lineNumber, subString(line));
                    throw new MZTabException(error);
                }
                if (section.Level < highWaterMark){
                    Section currentSection = Section.FindSection(highWaterMark);
                    error = new MZTabError(LogicalErrorType.LineOrder, lineNumber, currentSection.Name, section.Name);
                    throw new MZTabException(error);
                }

                highWaterMark = section.Level;
                // There exists errors during checking metadata section.
                if (highWaterMark == 1 && ! errorList.IsNullOrEmpty()){
                    break;
                }

                switch (highWaterMark){
                    case 1:
                        // metadata section.
                        mtdParser.Parse(lineNumber, line, errorList);
                        break;
                    case 2:
                        if (prhParser != null){
                            // header line only display once!
                            error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        // protein header section
                        prhParser = new PRHLineParser(mtdParser.Metadata);
                        prhParser.Parse(lineNumber, line, errorList);
                        prtPositionMapping = new PositionMapping(prhParser.getFactory(), line);

                        // tell system to continue check protein data line.
                        highWaterMark = 3;
                        break;
                    case 3:
                        if (prhParser == null){
                            // header line should be check first.
                            error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        if (prtParser == null){
                            prtParser = new PRTLineParser(prhParser.getFactory(), prtPositionMapping, mtdParser.Metadata,
                                                          errorList);
                        }
                        prtParser.Parse(lineNumber, line, errorList);
                        proteinMap.Add(lineNumber, prtParser.getRecord(line));

                        break;
                    case 4:
                        if (pehParser != null){
                            // header line only display once!
                            error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        if (mtdParser.Metadata.MzTabType == MzTabType.Identification){
                            errorList.Add(new MZTabError(LogicalErrorType.PeptideSection, lineNumber, subString(line)));
                        }

                        // peptide header section
                        pehParser = new PEHLineParser(mtdParser.Metadata);
                        pehParser.Parse(lineNumber, line, errorList);
                        pepPositionMapping = new PositionMapping(pehParser.getFactory(), line);

                        // tell system to continue check peptide data line.
                        highWaterMark = 5;
                        break;
                    case 5:
                        if (pehParser == null){
                            // header line should be check first.
                            error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        if (pepParser == null){
                            pepParser = new PEPLineParser(pehParser.getFactory(), pepPositionMapping, mtdParser.Metadata,
                                                          errorList);
                        }
                        pepParser.Parse(lineNumber, line, errorList);
                        peptideMap.Add(lineNumber, pepParser.getRecord(line));

                        break;
                    case 6:
                        if (pshParser != null){
                            // header line only display once!
                            error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        // psm header section
                        pshParser = new PSHLineParser(mtdParser.Metadata);
                        pshParser.Parse(lineNumber, line, errorList);
                        psmPositionMapping = new PositionMapping(pshParser.getFactory(), line);

                        // tell system to continue check peptide data line.
                        highWaterMark = 7;
                        break;
                    case 7:
                        if (pshParser == null){
                            // header line should be check first.
                            error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        if (psmParser == null){
                            psmParser = new PSMLineParser(pshParser.getFactory(), psmPositionMapping, mtdParser.Metadata,
                                                          errorList);
                        }
                        psmParser.Parse(lineNumber, line, errorList);
                        psmMap.Add(lineNumber, psmParser.getRecord(line));

                        break;
                    case 8:
                        if (smhParser != null){
                            // header line only display once!
                            error = new MZTabError(LogicalErrorType.HeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        // small molecule header section
                        smhParser = new SMHLineParser(mtdParser.Metadata);
                        smhParser.Parse(lineNumber, line, errorList);
                        smlPositionMapping = new PositionMapping(smhParser.getFactory(), line);

                        // tell system to continue check small molecule data line.
                        highWaterMark = 9;
                        break;
                    case 9:
                        if (smhParser == null){
                            // header line should be check first.
                            error = new MZTabError(LogicalErrorType.NoHeaderLine, lineNumber, subString(line));
                            throw new MZTabException(error);
                        }

                        if (smlParser == null){
                            smlParser = new SMLLineParser(smhParser.getFactory(), smlPositionMapping, mtdParser.Metadata,
                                                          errorList);
                        }
                        smlParser.Parse(lineNumber, line, errorList);
                        smallMoleculeMap.Add(lineNumber, smlParser.getRecord(line));

                        break;
                }
            }

            if (reader != null){
                reader.Close();
            }

            if (errorList.IsNullOrEmpty()){
                _mzTabFile = new MZTabFile(mtdParser.Metadata);
                foreach (int id in commentMap.Keys){
                    _mzTabFile.addComment(id, commentMap[id]);
                }

                if (prhParser != null){
                    MZTabColumnFactory proteinColumnFactory = prhParser.getFactory();
                    _mzTabFile.setProteinColumnFactory(proteinColumnFactory);
                    foreach (int id in proteinMap.Keys){
                        _mzTabFile.addProtein(id, proteinMap[id]);
                    }
                }

                if (pehParser != null){
                    MZTabColumnFactory peptideColumnFactory = pehParser.getFactory();
                    _mzTabFile.setPeptideColumnFactory(peptideColumnFactory);
                    foreach (int id in peptideMap.Keys){
                        _mzTabFile.addPeptide(id, peptideMap[id]);
                    }
                }

                if (pshParser != null){
                    MZTabColumnFactory psmColumnFactory = pshParser.getFactory();
                    _mzTabFile.setPSMColumnFactory(psmColumnFactory);
                    foreach (int id in psmMap.Keys){
                        _mzTabFile.addPSM(id, psmMap[id]);
                    }
                }

                if (smhParser != null){
                    MZTabColumnFactory smallMoleculeColumnFactory = smhParser.getFactory();
                    _mzTabFile.setSmallMoleculeColumnFactory(smallMoleculeColumnFactory);
                    foreach (int id in smallMoleculeMap.Keys){
                        _mzTabFile.addSmallMolecule(id, smallMoleculeMap[id]);
                    }
                }
            }
        }