Beispiel #1
0
        public IEnumerable<PeptideGroupDocNode> Import(IProgressMonitor progressMonitor,
            ColumnIndices indices,
            IDictionary<string, FastaSequence> dictNameSeq,
            out List<MeasuredRetentionTime> irtPeptides,
            out List<SpectrumMzInfo> librarySpectra,
            out List<TransitionImportErrorInfo> errorList)
        {
            irtPeptides = new List<MeasuredRetentionTime>();
            librarySpectra = new List<SpectrumMzInfo>();
            errorList = new List<TransitionImportErrorInfo>();
            var status = new ProgressStatus(string.Empty);
            // Get the lines used to guess the necessary columns and create the row reader
            if (progressMonitor != null)
            {
                if (progressMonitor.IsCanceled)
                    return new PeptideGroupDocNode[0];
                status = status.ChangeMessage(Resources.MassListImporter_Import_Reading_transition_list);
            }

            var lines = new List<string>(Inputs.ReadLines());

            if (progressMonitor != null)
            {
                if (progressMonitor.IsCanceled)
                    return new PeptideGroupDocNode[0];
                status = status.ChangeMessage(Resources.MassListImporter_Import_Inspecting_peptide_sequence_information);
            }
            if (indices != null)
            {
                _rowReader = new GeneralRowReader(FormatProvider, Separator, indices, Settings, lines);
            }
            else
            {
                // Check first line for validity
                var line = lines.FirstOrDefault();
                string[] fields = line.ParseDsvFields(Separator);
                string[] headers = fields.All(field => GetColumnType(field.Trim(), FormatProvider) != typeof (double))
                                       ? fields
                                       : null;
                int decoyColumn = -1;
                int irtColumn = -1;
                int libraryColumn = -1;
                var irtNames = new[] { "tr_recalibrated", "irt" }; // Not L10N
                var libraryNames = new[] { "libraryintensity", "relativeintensity", "relative_intensity", "relativefragmentintensity", "library_intensity" }; // Not L10N
                var decoyNames = new[] { "decoy" }; // Not L10N
                if (headers != null)
                {
                    lines.RemoveAt(0);
                    decoyColumn = headers.IndexOf(col => decoyNames.Contains(col.ToLowerInvariant()));
                    irtColumn = headers.IndexOf(col => irtNames.Contains(col.ToLowerInvariant()));
                    libraryColumn = headers.IndexOf(col => libraryNames.Contains(col.ToLowerInvariant()));
                    line = lines.FirstOrDefault();
                    fields = line != null ? line.ParseDsvFields(Separator) : new string[0];
                }
                if (fields.Length < 3)
                    throw new InvalidDataException(Resources.MassListImporter_Import_Invalid_transition_list_Transition_lists_must_contain_at_least_precursor_m_z_product_m_z_and_peptide_sequence);

                // If no numeric columns in the first row
                _rowReader = ExPeptideRowReader.Create(lines, decoyColumn, FormatProvider, Separator, Settings, irtColumn, libraryColumn);
                if (_rowReader == null)
                {
                    _rowReader = GeneralRowReader.Create(lines, headers, decoyColumn, FormatProvider, Separator, Settings, irtColumn, libraryColumn);
                    if (_rowReader == null && headers == null)
                    {
                        // Check for a possible header row
                        headers = lines[0].Split(Separator);
                        lines.RemoveAt(0);
                        _rowReader = GeneralRowReader.Create(lines, headers, decoyColumn, FormatProvider, Separator, Settings, irtColumn, libraryColumn);
                    }
                    if (_rowReader == null)
                        throw new LineColNumberedIoException(Resources.MassListImporter_Import_Failed_to_find_peptide_column, 1, -1);
                }
            }

            // Set starting values for limit counters
            _countPeptides = Document.PeptideCount;
            _countIons = Document.PeptideTransitionCount;

            List<PeptideGroupDocNode> peptideGroupsNew = new List<PeptideGroupDocNode>();
            PeptideGroupBuilder seqBuilder = null;

            // Process lines
            long lineIndex = 0;
            foreach (string row in lines)
            {
                lineIndex++;
                var errorInfo = _rowReader.NextRow(row, lineIndex);
                if (errorInfo != null)
                {
                    errorList.Add(errorInfo);
                    continue;
                }

                if (progressMonitor != null)
                {
                    if (progressMonitor.IsCanceled)
                    {
                        irtPeptides.Clear();
                        librarySpectra.Clear();
                        errorList.Clear();
                        return new PeptideGroupDocNode[0];
                    }

                    int percentComplete = (int)(lineIndex * 100 / lines.Count);

                    if (status.PercentComplete != percentComplete)
                    {
                        string message = string.Format(Resources.MassListImporter_Import_Importing__0__,
                            _rowReader.TransitionInfo.ProteinName ?? _rowReader.TransitionInfo.PeptideSequence);
                        status = status.ChangePercentComplete(percentComplete).ChangeMessage(message);
                    }
                }

                seqBuilder = AddRow(seqBuilder, _rowReader, dictNameSeq, peptideGroupsNew, lineIndex, irtPeptides, librarySpectra, errorList);
            }

            // Add last sequence.
            if (seqBuilder != null)
                AddPeptideGroup(peptideGroupsNew, seqBuilder, irtPeptides, librarySpectra, errorList);

            return MergeEqualGroups(peptideGroupsNew);
        }
Beispiel #2
0
            private const string REGEX_PEPTIDE_FORMAT = @"^([^. ]+)\.([A-Z0-9_+\-\[\]]+)\..+\.(light|{0})$"; // Not L10N

            #endregion Fields

            #region Constructors

            private ExPeptideRowReader(IFormatProvider provider,
                char separator,
                ColumnIndices indices,
                Regex exPeptideRegex,
                SrmSettings settings,
                IEnumerable<string> lines)
                : base(provider, separator, indices, settings, GetSequencesFromLines(lines, separator, indices, exPeptideRegex))
            {
                ExPeptideRegex = exPeptideRegex;
            }
Beispiel #3
0
 protected MassListRowReader(IFormatProvider provider,
     char separator,
     ColumnIndices indices,
     SrmSettings settings,
     IEnumerable<string> sequences)
 {
     FormatProvider = provider;
     Separator = separator;
     Indices = indices;
     Settings = settings;
     ModMatcher = CreateModificationMatcher(settings, sequences);
     NodeDictionary = new Dictionary<string, PeptideDocNode>();
 }
Beispiel #4
0
 public IEnumerable<PeptideGroupDocNode> Import(IProgressMonitor progressMonitor,
     ColumnIndices indices,
     IDictionary<string, FastaSequence> dictNameSeq)
 {
     List<MeasuredRetentionTime> irtPeptides;
     List<SpectrumMzInfo> librarySpectra;
     List<TransitionImportErrorInfo> errorList;
     return Import(progressMonitor, indices, dictNameSeq, out irtPeptides, out librarySpectra, out errorList);
 }
Beispiel #5
0
 private static IEnumerable<string> GetSequencesFromLines(IEnumerable<string> lines, char separator, ColumnIndices indices)
 {
     return lines.Select(line => RemoveModifiedSequenceNotes(line.ParseDsvFields(separator)[indices.PeptideColumn]));
 }
Beispiel #6
0
            public static GeneralRowReader Create(IList<string> lines, IList<string> headers, int iDecoy,
                IFormatProvider provider, char separator, SrmSettings settings, int iirt, int iLibrary)
            {
                // Split the first line into fields.
                Assume.IsTrue(lines.Count > 0);
                // Look for sequence column
                string[] fieldsFirstRow = null;
                PrecursorCandidate[] sequenceCandidates = null;
                int bestCandidateIndex = -1;
                int iLabelType = -1;

                double tolerance = settings.TransitionSettings.Instrument.MzMatchTolerance;

                foreach (var line in lines)
                {
                    string[] fields = line.ParseDsvFields(separator);
                    if (fieldsFirstRow == null)
                        fieldsFirstRow = fields;

                    // Choose precursor field candidates from the first row
                    if (sequenceCandidates == null)
                    {
                        iLabelType = FindLabelType(fields, lines, separator);

                        // If no sequence column found, return null.  After this, all errors throw.
                        var newSeqCandidates = FindSequenceCandidates(fields);
                        if (newSeqCandidates.Length == 0)
                            return null;

                        var listNewCandidates = new List<PrecursorCandidate>();
                        foreach (var candidateIndex in newSeqCandidates)
                        {
                            string sequence = RemoveSequenceNotes(fields[candidateIndex]);
                            string modifiedSequence = RemoveModifiedSequenceNotes(fields[candidateIndex]);
                            IsotopeLabelType labelType = IsotopeLabelType.light;
                            if (iLabelType != -1)
                                labelType = GetLabelType(fields[iLabelType]);
                            IList<TransitionExp> transitionExps;
                            int candidateMzIndex = FindPrecursor(fields, sequence, modifiedSequence, labelType, candidateIndex, iDecoy,
                                                       tolerance, provider, settings, out transitionExps);
                            // If no match, and no specific label type, then try heavy.
                            if (settings.PeptideSettings.Modifications.HasHeavyModifications &&
                                    candidateMzIndex == -1 && iLabelType == -1)
                            {
                                var peptideMods = settings.PeptideSettings.Modifications;
                                foreach (var typeMods in peptideMods.GetHeavyModifications())
                                {
                                    if (settings.TryGetPrecursorCalc(typeMods.LabelType, null) != null)
                                    {
                                        candidateMzIndex = FindPrecursor(fields, sequence, modifiedSequence, typeMods.LabelType, candidateIndex, iDecoy,
                                                                   tolerance, provider, settings, out transitionExps);
                                        if (candidateMzIndex != -1)
                                            break;
                                    }
                                }
                            }

                            if (candidateMzIndex != -1)
                                listNewCandidates.Add(new PrecursorCandidate(candidateIndex, candidateMzIndex, sequence, transitionExps));
                        }

                        if (listNewCandidates.Count == 0)
                            throw new MzMatchException(Resources.GeneralRowReader_Create_No_valid_precursor_m_z_column_found, 1, -1);
                        sequenceCandidates = listNewCandidates.ToArray();
                    }

                    bestCandidateIndex = FindBestCandidate(sequenceCandidates, fields);
                    // Break if a best candidate was found
                    if (bestCandidateIndex != -1)
                        break;
                }
                if (sequenceCandidates == null)
                    return null;

                if (bestCandidateIndex == -1)
                    bestCandidateIndex = 0;

                var prec = sequenceCandidates[bestCandidateIndex];
                int iSequence = prec.SequenceIndex;
                int iPrecursor = prec.PrecursorMzIdex;
                int iProduct = FindProduct(fieldsFirstRow, prec.Sequence, prec.TransitionExps, prec.SequenceIndex, prec.PrecursorMzIdex,
                    tolerance, provider, settings);
                if (iProduct == -1)
                    throw new MzMatchException(Resources.GeneralRowReader_Create_No_valid_product_m_z_column_found, 1, -1);

                int iProtein = FindProtein(fieldsFirstRow, iSequence, lines, headers, provider, separator);

                var indices = new ColumnIndices(iProtein, iSequence, iPrecursor, iProduct, iLabelType, iDecoy, iirt, iLibrary);

                return new GeneralRowReader(provider, separator, indices, settings, lines);
            }
Beispiel #7
0
            private static readonly string[] EXCLUDE_PROTEIN_VALUES = { "true", "false", "heavy", "light", "unit" }; // Not L10N

            #endregion Fields

            #region Constructors

            public GeneralRowReader(IFormatProvider provider,
                char separator,
                ColumnIndices indices,
                SrmSettings settings,
                IEnumerable<string> lines)
                : base(provider, separator, indices, settings, GetSequencesFromLines(lines, separator, indices))
            {
            }
Beispiel #8
0
 private static IEnumerable<string> GetSequencesFromLines(IEnumerable<string> lines, char separator, ColumnIndices indices, Regex exPeptideRegex)
 {
     return lines.Select(line => GetModifiedSequence(exPeptideRegex.Match(line.ParseDsvFields(separator)[indices.PeptideColumn])));
 }
Beispiel #9
0
            public static ExPeptideRowReader Create(IList<string> lines, int iDecoy,
                IFormatProvider provider, char separator, SrmSettings settings, int iirt, int iLibrary)
            {
                // Split the first line into fields.
                Debug.Assert(lines.Count > 0);
                string[] fields = lines[0].ParseDsvFields(separator);

                // Create the ExPeptide regular expression
                var modSettings = settings.PeptideSettings.Modifications;
                var heavyTypeNames = from typedMods in modSettings.GetHeavyModifications()
                                     select typedMods.LabelType.Name;
                string exPeptideFormat = string.Format(REGEX_PEPTIDE_FORMAT, string.Join("|", heavyTypeNames.ToArray())); // Not L10N
                var exPeptideRegex = new Regex(exPeptideFormat);

                // Look for sequence column
                string sequence;
                string modifiedSequence;
                IsotopeLabelType labelType;
                int iExPeptide = FindExPeptide(fields, exPeptideRegex, settings,
                    out sequence, out modifiedSequence, out labelType);
                // If no sequence column found, return null.  After this,
                // all errors throw.
                if (iExPeptide == -1)
                    return null;

                if (!labelType.IsLight && !modSettings.HasHeavyImplicitModifications)
                {
                    var message = TextUtil.LineSeparate(Resources.ExPeptideRowReader_Create_Isotope_labeled_entry_found_without_matching_settings,
                                                        Resources.ExPeptideRowReaderCreateCheck_the_Modifications_tab_in_Transition_Settings);
                    throw new LineColNumberedIoException(message, 1, iExPeptide);
                }

                double tolerance = settings.TransitionSettings.Instrument.MzMatchTolerance;
                IList<TransitionExp> transitionExps;
                int iPrecursor = FindPrecursor(fields, sequence, modifiedSequence, labelType, iExPeptide, iDecoy,
                                               tolerance, provider, settings, out transitionExps);
                if (iPrecursor == -1)
                    throw new MzMatchException(Resources.GeneralRowReader_Create_No_valid_precursor_m_z_column_found, 1, -1);

                int iProduct = FindProduct(fields, sequence, transitionExps, iExPeptide, iPrecursor,
                    tolerance, provider, settings);
                if (iProduct == -1)
                    throw new MzMatchException(Resources.GeneralRowReader_Create_No_valid_product_m_z_column_found, 1, -1);

                var indices = new ColumnIndices(iExPeptide, iExPeptide, iPrecursor, iProduct, iExPeptide, iDecoy, iirt, iLibrary);
                return new ExPeptideRowReader(provider, separator, indices, exPeptideRegex, settings, lines);
            }