示例#1
0
        public SrmDocument Import(TextReader reader, IProgressMonitor progressMonitor, long lineCount, bool isMinutes, bool removeMissing = false, bool changePeaks = true)
        {
            IProgressStatus status = new ProgressStatus(Resources.PeakBoundaryImporter_Import_Importing_Peak_Boundaries);
            double          timeConversionFactor = isMinutes ? 1.0 : 60.0;
            int             linesRead            = 0;
            int             progressPercent      = 0;
            var             docNew                = (SrmDocument)Document.ChangeIgnoreChangingChildren(true);
            var             docReference          = docNew;
            var             sequenceToNode        = MakeSequenceDictionary(Document);
            var             fileNameToFileMatch   = new Dictionary <string, ChromSetFileMatch>();
            var             trackAdjustedResults  = new HashSet <ResultsKey>();
            var             modMatcher            = new ModificationMatcher();
            var             canonicalSequenceDict = new Dictionary <string, string>();

            // Add annotations as possible columns
            var allFieldNames = new List <string[]>(FIELD_NAMES);

            allFieldNames.AddRange(from def in Document.Settings.DataSettings.AnnotationDefs
                                   where def.AnnotationTargets.Contains(AnnotationDef.AnnotationTarget.precursor_result)
                                   select new[] { def.Name });

            string line = reader.ReadLine();

            linesRead++;
            int[] fieldIndices;
            int   fieldsTotal;
            // If we aren't changing peaks, allow start and end time to be missing
            var  requiredFields   = changePeaks ? REQUIRED_FIELDS : REQUIRED_NO_CHROM;
            char correctSeparator = ReadFirstLine(line, allFieldNames, requiredFields, out fieldIndices, out fieldsTotal);

            while ((line = reader.ReadLine()) != null)
            {
                linesRead++;
                if (progressMonitor != null)
                {
                    if (progressMonitor.IsCanceled)
                    {
                        return(Document);
                    }
                    int progressNew = (int)(linesRead * 100 / lineCount);
                    if (progressPercent != progressNew)
                    {
                        progressMonitor.UpdateProgress(status = status.ChangePercentComplete(progressNew));
                        progressPercent = progressNew;
                    }
                }
                var dataFields = new DataFields(fieldIndices, line.ParseDsvFields(correctSeparator), allFieldNames);
                if (dataFields.Length != fieldsTotal)
                {
                    throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Line__0__field_count__1__differs_from_the_first_line__which_has__2_,
                                                        linesRead, dataFields.Length, fieldsTotal));
                }
                string modifiedPeptideString = dataFields.GetField(Field.modified_peptide);
                string fileName = dataFields.GetField(Field.filename);
                bool   isDecoy  = dataFields.IsDecoy(linesRead);
                IList <IdentityPath> pepPaths;

                if (!sequenceToNode.TryGetValue(Tuple.Create(modifiedPeptideString, isDecoy), out pepPaths))
                {
                    string canonicalSequence;
                    if (!canonicalSequenceDict.TryGetValue(modifiedPeptideString, out canonicalSequence))
                    {
                        if (modifiedPeptideString.Any(c => c < 'A' || c > 'Z'))
                        {
                            modMatcher.CreateMatches(Document.Settings,
                                                     new List <string> {
                                modifiedPeptideString
                            },
                                                     Settings.Default.StaticModList,
                                                     Settings.Default.HeavyModList);
                            var nodeForModPep = modMatcher.GetModifiedNode(modifiedPeptideString);
                            if (nodeForModPep == null)
                            {
                                throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Peptide_has_unrecognized_modifications__0__at_line__1_, modifiedPeptideString, linesRead));
                            }
                            nodeForModPep = nodeForModPep.ChangeSettings(Document.Settings, SrmSettingsDiff.ALL);
                            // Convert the modified peptide string into a standardized form that
                            // converts unimod, names, etc, into masses, eg [+57.0]
                            canonicalSequence = nodeForModPep.ModifiedTarget.Sequence;
                            canonicalSequenceDict.Add(modifiedPeptideString, canonicalSequence);
                        }
                    }
                    if (null != canonicalSequence)
                    {
                        sequenceToNode.TryGetValue(Tuple.Create(canonicalSequence, isDecoy), out pepPaths);
                    }
                }
                if (null == pepPaths)
                {
                    UnrecognizedPeptides.Add(modifiedPeptideString);
                    continue;
                }
                Adduct charge;
                bool   chargeSpecified = dataFields.TryGetCharge(linesRead, out charge);
                string sampleName      = dataFields.GetField(Field.sample_name);

                double?apexTime = dataFields.GetTime(Field.apex_time, timeConversionFactor,
                                                     Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_time_, linesRead);
                double?startTime = dataFields.GetTime(Field.start_time, timeConversionFactor,
                                                      Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_start_time_, linesRead);
                double?endTime = dataFields.GetTime(Field.end_time, timeConversionFactor,
                                                    Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_end_time_, linesRead);

                // Error if only one of startTime and endTime is null
                if (startTime == null && endTime != null)
                {
                    if (changePeaks)
                    {
                        throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_start_time_on_line__0_, linesRead));
                    }
                    endTime = null;
                }
                if (startTime != null && endTime == null)
                {
                    if (changePeaks)
                    {
                        throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_end_time_on_line__0_, linesRead));
                    }
                    startTime = null;
                }
                // Add filename to second dictionary if not yet encountered
                ChromSetFileMatch fileMatch;
                if (!fileNameToFileMatch.TryGetValue(fileName, out fileMatch))
                {
                    fileMatch = Document.Settings.MeasuredResults.FindMatchingMSDataFile(MsDataFileUri.Parse(fileName));
                    fileNameToFileMatch.Add(fileName, fileMatch);
                }
                if (fileMatch == null)
                {
                    UnrecognizedFiles.Add(fileName);
                    continue;
                }
                var               chromSet = fileMatch.Chromatograms;
                string            nameSet  = chromSet.Name;
                ChromFileInfoId[] fileIds;
                if (sampleName == null)
                {
                    fileIds = chromSet.MSDataFileInfos.Select(x => x.FileId).ToArray();
                }
                else
                {
                    var sampleFile = chromSet.MSDataFileInfos.FirstOrDefault(info => Equals(sampleName, info.FilePath.GetSampleName()));
                    if (sampleFile == null)
                    {
                        throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Sample__0__on_line__1__does_not_match_the_file__2__, sampleName, linesRead, fileName));
                    }
                    fileIds = new[] { sampleFile.FileId };
                }
                // Define the annotations to be added
                var annotations = dataFields.GetAnnotations();
                if (!changePeaks)
                {
                    if (apexTime.HasValue)
                    {
                        annotations.Add(ComparePeakBoundaries.APEX_ANNOTATION, dataFields.GetField(Field.apex_time));
                    }
                    if (startTime.HasValue && endTime.HasValue)
                    {
                        annotations.Add(ComparePeakBoundaries.START_TIME_ANNOTATION, dataFields.GetField(Field.start_time));
                        annotations.Add(ComparePeakBoundaries.END_TIME_ANNOTATION, dataFields.GetField(Field.end_time));
                    }
                }
                AnnotationsAdded = annotations.Keys.ToList();

                // Loop over all the transition groups in that peptide to find matching charge,
                // or use all transition groups if charge not specified
                bool foundSample = false;
                foreach (var pepPath in pepPaths)
                {
                    var nodePep = (PeptideDocNode)docNew.FindNode(pepPath);

                    foreach (TransitionGroupDocNode groupNode in nodePep.Children)
                    {
                        if (chargeSpecified && charge != groupNode.TransitionGroup.PrecursorAdduct)
                        {
                            continue;
                        }

                        // Loop over the files in this groupNode to find the correct sample
                        // Change peak boundaries for the transition group
                        foreach (var fileId in GetApplicableFiles(fileIds, groupNode))
                        {
                            var groupPath = new IdentityPath(pepPath, groupNode.Id);
                            // Attach annotations
                            if (annotations.Any())
                            {
                                docNew = docNew.AddPrecursorResultsAnnotations(groupPath, fileId, annotations);
                            }
                            // Change peak
                            var filePath = chromSet.GetFileInfo(fileId).FilePath;
                            if (changePeaks)
                            {
                                docNew = docNew.ChangePeak(groupPath, nameSet, filePath,
                                                           null, startTime, endTime, UserSet.IMPORTED, null, false);
                            }
                            // For removing peaks that are not in the file, if removeMissing = true
                            trackAdjustedResults.Add(new ResultsKey(fileId.GlobalIndex, groupNode.Id));
                            foundSample = true;
                        }
                    }
                }
                if (!foundSample)
                {
                    UnrecognizedChargeStates.Add(new UnrecognizedChargeState(charge, fileName, modifiedPeptideString));
                }
            }
            // Remove peaks from the document that weren't in the file.
            if (removeMissing)
            {
                docNew = RemoveMissing(docNew, trackAdjustedResults, changePeaks);
            }
            // If nothing has changed, return the old Document before ChangeIgnoreChangingChildren was turned off
            if (!ReferenceEquals(docNew, docReference))
            {
                Document = (SrmDocument)Document.ChangeIgnoreChangingChildren(false).ChangeChildrenChecked(docNew.Children);
            }
            return(Document);
        }
        public SrmDocument Import(TextReader reader, IProgressMonitor progressMonitor, long lineCount, bool isMinutes, bool removeMissing = false, bool changePeaks = true)
        {
            var status = new ProgressStatus(Resources.PeakBoundaryImporter_Import_Importing_Peak_Boundaries);
            double timeConversionFactor = isMinutes ? 1.0 : 60.0;
            int linesRead = 0;
            int progressPercent = 0;
            var docNew = (SrmDocument) Document.ChangeIgnoreChangingChildren(true);
            var docReference = docNew;
            var sequenceToNode = new Dictionary<Tuple<string, bool>, IList<IdentityPath>>();
            var fileNameToFileMatch = new Dictionary<string, ChromSetFileMatch>();
            var trackAdjustedResults = new HashSet<ResultsKey>();
            var modMatcher = new ModificationMatcher();
            // Make the dictionary of modified peptide strings to doc nodes and paths
            for (int i = 0; i < Document.MoleculeCount; ++i)
            {
                IdentityPath peptidePath = Document.GetPathTo((int) SrmDocument.Level.Molecules, i);
                PeptideDocNode peptideNode = (PeptideDocNode) Document.FindNode(peptidePath);
                var peptidePair = new Tuple<string, bool>(peptideNode.RawTextId, peptideNode.IsDecoy);
                IList<IdentityPath> idPathList;
                // Each (sequence, isDecoy) pair can be associated with more than one peptide,
                // to handle the case of duplicate peptides in the doucment.
                if (sequenceToNode.TryGetValue(peptidePair, out idPathList))
                {
                    idPathList.Add(peptidePath);
                    sequenceToNode[peptidePair] = idPathList;
                }
                else
                {
                    idPathList = new List<IdentityPath> { peptidePath };
                    sequenceToNode.Add(peptidePair, idPathList);
                }
            }

            // Add annotations as possible columns
            var allFieldNames = new List<string[]>(FIELD_NAMES);
            allFieldNames.AddRange(from def in Document.Settings.DataSettings.AnnotationDefs
                                   where def.AnnotationTargets.Contains(AnnotationDef.AnnotationTarget.precursor_result)
                                   select new[] { def.Name });

            string line = reader.ReadLine();
            linesRead++;
            int[] fieldIndices;
            int fieldsTotal;
            // If we aren't changing peaks, allow start and end time to be missing
            var requiredFields = changePeaks ? REQUIRED_FIELDS : REQUIRED_NO_CHROM;
            char correctSeparator = ReadFirstLine(line, allFieldNames, requiredFields, out fieldIndices, out fieldsTotal);

            while ((line = reader.ReadLine()) != null)
            {
                linesRead++;
                if (progressMonitor != null)
                {
                    if (progressMonitor.IsCanceled)
                        return Document;
                    int progressNew = (int) (linesRead*100/lineCount);
                    if (progressPercent != progressNew)
                    {
                        progressMonitor.UpdateProgress(status = status.ChangePercentComplete(progressNew));
                        progressPercent = progressNew;
                    }
                }
                var dataFields = new DataFields(fieldIndices, line.ParseDsvFields(correctSeparator), allFieldNames);
                if (dataFields.Length != fieldsTotal)
                {
                    throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Line__0__field_count__1__differs_from_the_first_line__which_has__2_,
                        linesRead, dataFields.Length, fieldsTotal));
                }
                string modifiedPeptideString = dataFields.GetField(Field.modified_peptide);
                modMatcher.CreateMatches(Document.Settings,
                                        new List<string> {modifiedPeptideString},
                                        Settings.Default.StaticModList,
                                        Settings.Default.HeavyModList);
                // Convert the modified peptide string into a standardized form that
                // converts unimod, names, etc, into masses, eg [+57.0]
                var nodeForModPep = modMatcher.GetModifiedNode(modifiedPeptideString);
                if (nodeForModPep == null)
                {
                    throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Peptide_has_unrecognized_modifications__0__at_line__1_, modifiedPeptideString, linesRead));
                }
                nodeForModPep = nodeForModPep.ChangeSettings(Document.Settings, SrmSettingsDiff.ALL);
                modifiedPeptideString = nodeForModPep.RawTextId; // Modified sequence, or custom ion name
                string fileName = dataFields.GetField(Field.filename);
                bool isDecoy = dataFields.IsDecoy(linesRead);
                var peptideIdentifier = new Tuple<string, bool>(modifiedPeptideString, isDecoy);
                int charge;
                bool chargeSpecified = dataFields.TryGetCharge(linesRead, out charge);
                string sampleName = dataFields.GetField(Field.sample_name);

                double? startTime = null;
                double? endTime = null;
                if (changePeaks)
                {
                    startTime = dataFields.GetTime(Field.start_time, Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_start_time_, linesRead);
                    if (startTime.HasValue)
                        startTime = startTime / timeConversionFactor;
                    endTime = dataFields.GetTime(Field.end_time, Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_end_time_, linesRead);
                    if (endTime.HasValue)
                        endTime = endTime / timeConversionFactor;
                }

                // Error if only one of startTime and endTime is null
                if (startTime == null && endTime != null)
                    throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_start_time_on_line__0_, linesRead));
                if (startTime != null && endTime == null)
                    throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_end_time_on_line__0_, linesRead));
                // Add filename to second dictionary if not yet encountered
                ChromSetFileMatch fileMatch;
                if (!fileNameToFileMatch.TryGetValue(fileName, out fileMatch))
                {
                    fileMatch = Document.Settings.MeasuredResults.FindMatchingMSDataFile(MsDataFileUri.Parse(fileName));
                    fileNameToFileMatch.Add(fileName, fileMatch);
                }
                if (fileMatch == null)
                {
                    UnrecognizedFiles.Add(fileName);
                    continue;
                }
                var chromSet = fileMatch.Chromatograms;
                string nameSet = chromSet.Name;
                ChromFileInfoId[] fileIds;
                if (sampleName == null)
                {
                    fileIds = chromSet.MSDataFileInfos.Select(x => x.FileId).ToArray();
                }
                else
                {
                    var sampleFile = chromSet.MSDataFileInfos.FirstOrDefault(info => Equals(sampleName, info.FilePath.GetSampleName()));
                    if (sampleFile == null)
                    {
                        throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Sample__0__on_line__1__does_not_match_the_file__2__, sampleName, linesRead, fileName));
                    }
                    fileIds = new[] {sampleFile.FileId};
                }
                // Look up the IdentityPath of peptide in first dictionary
                IList<IdentityPath> pepPaths;
                if (!sequenceToNode.TryGetValue(peptideIdentifier, out pepPaths))
                {
                    UnrecognizedPeptides.Add(modifiedPeptideString);
                    continue;
                }

                // Define the annotations to be added
                var annotations = dataFields.GetAnnotations();
                AnnotationsAdded = annotations.Keys.ToList();

                // Loop over all the transition groups in that peptide to find matching charge,
                // or use all transition groups if charge not specified
                bool foundSample = false;
                foreach (var pepPath in pepPaths)
                {
                    var nodePep = (PeptideDocNode)docNew.FindNode(pepPath);
                    for(int i = 0; i < nodePep.Children.Count; ++i)
                    {
                        var groupRelPath = nodePep.GetPathTo(i);
                        var groupNode = (TransitionGroupDocNode) nodePep.FindNode(groupRelPath);
                        if (!chargeSpecified || charge == groupNode.TransitionGroup.PrecursorCharge)
                        {
                            var groupFileIndices =
                                new HashSet<int>(groupNode.ChromInfos.Select(x => x.FileId.GlobalIndex));
                            // Loop over the files in this groupNode to find the correct sample
                            // Change peak boundaries for the transition group
                            foreach (var fileId in fileIds)
                            {
                                if (groupFileIndices.Contains(fileId.GlobalIndex))
                                {
                                    var groupPath = new IdentityPath(pepPath, groupNode.Id);
                                    // Attach annotations
                                    docNew = docNew.AddPrecursorResultsAnnotations(groupPath, fileId, annotations);
                                    // Change peak
                                    var filePath = chromSet.GetFileInfo(fileId).FilePath;
                                    if (changePeaks)
                                    {
                                        docNew = docNew.ChangePeak(groupPath, nameSet, filePath,
                                            null, startTime, endTime, UserSet.IMPORTED, null, false);
                                    }
                                    // For removing peaks that are not in the file, if removeMissing = true
                                    trackAdjustedResults.Add(new ResultsKey(fileId.GlobalIndex, groupNode.Id));
                                    foundSample = true;
                                }
                            }
                        }
                    }
                }
                if (!foundSample)
                {
                    UnrecognizedChargeStates.Add(new UnrecognizedChargeState(charge, fileName, modifiedPeptideString));
                }
            }
            // Remove peaks from the document that weren't in the file.
            if (removeMissing)
                docNew = RemoveMissing(docNew, trackAdjustedResults, changePeaks);
            // If nothing has changed, return the old Document before ChangeIgnoreChangingChildren was turned off
            if (!ReferenceEquals(docNew, docReference))
                Document = (SrmDocument) Document.ChangeIgnoreChangingChildren(false).ChangeChildrenChecked(docNew.Children);
            return Document;
        }