public SrmDocument Import(TextReader reader, IProgressMonitor progressMonitor, long lineCount, bool isMinutes, bool removeMissing = false, bool changePeaks = true) { IProgressStatus status = new ProgressStatus(Resources.PeakBoundaryImporter_Import_Importing_Peak_Boundaries); double timeConversionFactor = isMinutes ? 1.0 : 60.0; int linesRead = 0; int progressPercent = 0; var docNew = (SrmDocument)Document.ChangeIgnoreChangingChildren(true); var docReference = docNew; var sequenceToNode = MakeSequenceDictionary(Document); var fileNameToFileMatch = new Dictionary <string, ChromSetFileMatch>(); var trackAdjustedResults = new HashSet <ResultsKey>(); var modMatcher = new ModificationMatcher(); var canonicalSequenceDict = new Dictionary <string, string>(); // Add annotations as possible columns var allFieldNames = new List <string[]>(FIELD_NAMES); allFieldNames.AddRange(from def in Document.Settings.DataSettings.AnnotationDefs where def.AnnotationTargets.Contains(AnnotationDef.AnnotationTarget.precursor_result) select new[] { def.Name }); string line = reader.ReadLine(); linesRead++; int[] fieldIndices; int fieldsTotal; // If we aren't changing peaks, allow start and end time to be missing var requiredFields = changePeaks ? REQUIRED_FIELDS : REQUIRED_NO_CHROM; char correctSeparator = ReadFirstLine(line, allFieldNames, requiredFields, out fieldIndices, out fieldsTotal); while ((line = reader.ReadLine()) != null) { linesRead++; if (progressMonitor != null) { if (progressMonitor.IsCanceled) { return(Document); } int progressNew = (int)(linesRead * 100 / lineCount); if (progressPercent != progressNew) { progressMonitor.UpdateProgress(status = status.ChangePercentComplete(progressNew)); progressPercent = progressNew; } } var dataFields = new DataFields(fieldIndices, line.ParseDsvFields(correctSeparator), allFieldNames); if (dataFields.Length != fieldsTotal) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Line__0__field_count__1__differs_from_the_first_line__which_has__2_, linesRead, dataFields.Length, fieldsTotal)); } string modifiedPeptideString = dataFields.GetField(Field.modified_peptide); string fileName = dataFields.GetField(Field.filename); bool isDecoy = dataFields.IsDecoy(linesRead); IList <IdentityPath> pepPaths; if (!sequenceToNode.TryGetValue(Tuple.Create(modifiedPeptideString, isDecoy), out pepPaths)) { string canonicalSequence; if (!canonicalSequenceDict.TryGetValue(modifiedPeptideString, out canonicalSequence)) { if (modifiedPeptideString.Any(c => c < 'A' || c > 'Z')) { modMatcher.CreateMatches(Document.Settings, new List <string> { modifiedPeptideString }, Settings.Default.StaticModList, Settings.Default.HeavyModList); var nodeForModPep = modMatcher.GetModifiedNode(modifiedPeptideString); if (nodeForModPep == null) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Peptide_has_unrecognized_modifications__0__at_line__1_, modifiedPeptideString, linesRead)); } nodeForModPep = nodeForModPep.ChangeSettings(Document.Settings, SrmSettingsDiff.ALL); // Convert the modified peptide string into a standardized form that // converts unimod, names, etc, into masses, eg [+57.0] canonicalSequence = nodeForModPep.ModifiedTarget.Sequence; canonicalSequenceDict.Add(modifiedPeptideString, canonicalSequence); } } if (null != canonicalSequence) { sequenceToNode.TryGetValue(Tuple.Create(canonicalSequence, isDecoy), out pepPaths); } } if (null == pepPaths) { UnrecognizedPeptides.Add(modifiedPeptideString); continue; } Adduct charge; bool chargeSpecified = dataFields.TryGetCharge(linesRead, out charge); string sampleName = dataFields.GetField(Field.sample_name); double?apexTime = dataFields.GetTime(Field.apex_time, timeConversionFactor, Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_time_, linesRead); double?startTime = dataFields.GetTime(Field.start_time, timeConversionFactor, Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_start_time_, linesRead); double?endTime = dataFields.GetTime(Field.end_time, timeConversionFactor, Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_end_time_, linesRead); // Error if only one of startTime and endTime is null if (startTime == null && endTime != null) { if (changePeaks) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_start_time_on_line__0_, linesRead)); } endTime = null; } if (startTime != null && endTime == null) { if (changePeaks) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_end_time_on_line__0_, linesRead)); } startTime = null; } // Add filename to second dictionary if not yet encountered ChromSetFileMatch fileMatch; if (!fileNameToFileMatch.TryGetValue(fileName, out fileMatch)) { fileMatch = Document.Settings.MeasuredResults.FindMatchingMSDataFile(MsDataFileUri.Parse(fileName)); fileNameToFileMatch.Add(fileName, fileMatch); } if (fileMatch == null) { UnrecognizedFiles.Add(fileName); continue; } var chromSet = fileMatch.Chromatograms; string nameSet = chromSet.Name; ChromFileInfoId[] fileIds; if (sampleName == null) { fileIds = chromSet.MSDataFileInfos.Select(x => x.FileId).ToArray(); } else { var sampleFile = chromSet.MSDataFileInfos.FirstOrDefault(info => Equals(sampleName, info.FilePath.GetSampleName())); if (sampleFile == null) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Sample__0__on_line__1__does_not_match_the_file__2__, sampleName, linesRead, fileName)); } fileIds = new[] { sampleFile.FileId }; } // Define the annotations to be added var annotations = dataFields.GetAnnotations(); if (!changePeaks) { if (apexTime.HasValue) { annotations.Add(ComparePeakBoundaries.APEX_ANNOTATION, dataFields.GetField(Field.apex_time)); } if (startTime.HasValue && endTime.HasValue) { annotations.Add(ComparePeakBoundaries.START_TIME_ANNOTATION, dataFields.GetField(Field.start_time)); annotations.Add(ComparePeakBoundaries.END_TIME_ANNOTATION, dataFields.GetField(Field.end_time)); } } AnnotationsAdded = annotations.Keys.ToList(); // Loop over all the transition groups in that peptide to find matching charge, // or use all transition groups if charge not specified bool foundSample = false; foreach (var pepPath in pepPaths) { var nodePep = (PeptideDocNode)docNew.FindNode(pepPath); foreach (TransitionGroupDocNode groupNode in nodePep.Children) { if (chargeSpecified && charge != groupNode.TransitionGroup.PrecursorAdduct) { continue; } // Loop over the files in this groupNode to find the correct sample // Change peak boundaries for the transition group foreach (var fileId in GetApplicableFiles(fileIds, groupNode)) { var groupPath = new IdentityPath(pepPath, groupNode.Id); // Attach annotations if (annotations.Any()) { docNew = docNew.AddPrecursorResultsAnnotations(groupPath, fileId, annotations); } // Change peak var filePath = chromSet.GetFileInfo(fileId).FilePath; if (changePeaks) { docNew = docNew.ChangePeak(groupPath, nameSet, filePath, null, startTime, endTime, UserSet.IMPORTED, null, false); } // For removing peaks that are not in the file, if removeMissing = true trackAdjustedResults.Add(new ResultsKey(fileId.GlobalIndex, groupNode.Id)); foundSample = true; } } } if (!foundSample) { UnrecognizedChargeStates.Add(new UnrecognizedChargeState(charge, fileName, modifiedPeptideString)); } } // Remove peaks from the document that weren't in the file. if (removeMissing) { docNew = RemoveMissing(docNew, trackAdjustedResults, changePeaks); } // If nothing has changed, return the old Document before ChangeIgnoreChangingChildren was turned off if (!ReferenceEquals(docNew, docReference)) { Document = (SrmDocument)Document.ChangeIgnoreChangingChildren(false).ChangeChildrenChecked(docNew.Children); } return(Document); }
public SrmDocument Import(TextReader reader, IProgressMonitor progressMonitor, long lineCount, bool isMinutes, bool removeMissing = false, bool changePeaks = true) { var status = new ProgressStatus(Resources.PeakBoundaryImporter_Import_Importing_Peak_Boundaries); double timeConversionFactor = isMinutes ? 1.0 : 60.0; int linesRead = 0; int progressPercent = 0; var docNew = (SrmDocument) Document.ChangeIgnoreChangingChildren(true); var docReference = docNew; var sequenceToNode = new Dictionary<Tuple<string, bool>, IList<IdentityPath>>(); var fileNameToFileMatch = new Dictionary<string, ChromSetFileMatch>(); var trackAdjustedResults = new HashSet<ResultsKey>(); var modMatcher = new ModificationMatcher(); // Make the dictionary of modified peptide strings to doc nodes and paths for (int i = 0; i < Document.MoleculeCount; ++i) { IdentityPath peptidePath = Document.GetPathTo((int) SrmDocument.Level.Molecules, i); PeptideDocNode peptideNode = (PeptideDocNode) Document.FindNode(peptidePath); var peptidePair = new Tuple<string, bool>(peptideNode.RawTextId, peptideNode.IsDecoy); IList<IdentityPath> idPathList; // Each (sequence, isDecoy) pair can be associated with more than one peptide, // to handle the case of duplicate peptides in the doucment. if (sequenceToNode.TryGetValue(peptidePair, out idPathList)) { idPathList.Add(peptidePath); sequenceToNode[peptidePair] = idPathList; } else { idPathList = new List<IdentityPath> { peptidePath }; sequenceToNode.Add(peptidePair, idPathList); } } // Add annotations as possible columns var allFieldNames = new List<string[]>(FIELD_NAMES); allFieldNames.AddRange(from def in Document.Settings.DataSettings.AnnotationDefs where def.AnnotationTargets.Contains(AnnotationDef.AnnotationTarget.precursor_result) select new[] { def.Name }); string line = reader.ReadLine(); linesRead++; int[] fieldIndices; int fieldsTotal; // If we aren't changing peaks, allow start and end time to be missing var requiredFields = changePeaks ? REQUIRED_FIELDS : REQUIRED_NO_CHROM; char correctSeparator = ReadFirstLine(line, allFieldNames, requiredFields, out fieldIndices, out fieldsTotal); while ((line = reader.ReadLine()) != null) { linesRead++; if (progressMonitor != null) { if (progressMonitor.IsCanceled) return Document; int progressNew = (int) (linesRead*100/lineCount); if (progressPercent != progressNew) { progressMonitor.UpdateProgress(status = status.ChangePercentComplete(progressNew)); progressPercent = progressNew; } } var dataFields = new DataFields(fieldIndices, line.ParseDsvFields(correctSeparator), allFieldNames); if (dataFields.Length != fieldsTotal) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Line__0__field_count__1__differs_from_the_first_line__which_has__2_, linesRead, dataFields.Length, fieldsTotal)); } string modifiedPeptideString = dataFields.GetField(Field.modified_peptide); modMatcher.CreateMatches(Document.Settings, new List<string> {modifiedPeptideString}, Settings.Default.StaticModList, Settings.Default.HeavyModList); // Convert the modified peptide string into a standardized form that // converts unimod, names, etc, into masses, eg [+57.0] var nodeForModPep = modMatcher.GetModifiedNode(modifiedPeptideString); if (nodeForModPep == null) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Peptide_has_unrecognized_modifications__0__at_line__1_, modifiedPeptideString, linesRead)); } nodeForModPep = nodeForModPep.ChangeSettings(Document.Settings, SrmSettingsDiff.ALL); modifiedPeptideString = nodeForModPep.RawTextId; // Modified sequence, or custom ion name string fileName = dataFields.GetField(Field.filename); bool isDecoy = dataFields.IsDecoy(linesRead); var peptideIdentifier = new Tuple<string, bool>(modifiedPeptideString, isDecoy); int charge; bool chargeSpecified = dataFields.TryGetCharge(linesRead, out charge); string sampleName = dataFields.GetField(Field.sample_name); double? startTime = null; double? endTime = null; if (changePeaks) { startTime = dataFields.GetTime(Field.start_time, Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_start_time_, linesRead); if (startTime.HasValue) startTime = startTime / timeConversionFactor; endTime = dataFields.GetTime(Field.end_time, Resources.PeakBoundaryImporter_Import_The_value___0___on_line__1__is_not_a_valid_end_time_, linesRead); if (endTime.HasValue) endTime = endTime / timeConversionFactor; } // Error if only one of startTime and endTime is null if (startTime == null && endTime != null) throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_start_time_on_line__0_, linesRead)); if (startTime != null && endTime == null) throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Missing_end_time_on_line__0_, linesRead)); // Add filename to second dictionary if not yet encountered ChromSetFileMatch fileMatch; if (!fileNameToFileMatch.TryGetValue(fileName, out fileMatch)) { fileMatch = Document.Settings.MeasuredResults.FindMatchingMSDataFile(MsDataFileUri.Parse(fileName)); fileNameToFileMatch.Add(fileName, fileMatch); } if (fileMatch == null) { UnrecognizedFiles.Add(fileName); continue; } var chromSet = fileMatch.Chromatograms; string nameSet = chromSet.Name; ChromFileInfoId[] fileIds; if (sampleName == null) { fileIds = chromSet.MSDataFileInfos.Select(x => x.FileId).ToArray(); } else { var sampleFile = chromSet.MSDataFileInfos.FirstOrDefault(info => Equals(sampleName, info.FilePath.GetSampleName())); if (sampleFile == null) { throw new IOException(string.Format(Resources.PeakBoundaryImporter_Import_Sample__0__on_line__1__does_not_match_the_file__2__, sampleName, linesRead, fileName)); } fileIds = new[] {sampleFile.FileId}; } // Look up the IdentityPath of peptide in first dictionary IList<IdentityPath> pepPaths; if (!sequenceToNode.TryGetValue(peptideIdentifier, out pepPaths)) { UnrecognizedPeptides.Add(modifiedPeptideString); continue; } // Define the annotations to be added var annotations = dataFields.GetAnnotations(); AnnotationsAdded = annotations.Keys.ToList(); // Loop over all the transition groups in that peptide to find matching charge, // or use all transition groups if charge not specified bool foundSample = false; foreach (var pepPath in pepPaths) { var nodePep = (PeptideDocNode)docNew.FindNode(pepPath); for(int i = 0; i < nodePep.Children.Count; ++i) { var groupRelPath = nodePep.GetPathTo(i); var groupNode = (TransitionGroupDocNode) nodePep.FindNode(groupRelPath); if (!chargeSpecified || charge == groupNode.TransitionGroup.PrecursorCharge) { var groupFileIndices = new HashSet<int>(groupNode.ChromInfos.Select(x => x.FileId.GlobalIndex)); // Loop over the files in this groupNode to find the correct sample // Change peak boundaries for the transition group foreach (var fileId in fileIds) { if (groupFileIndices.Contains(fileId.GlobalIndex)) { var groupPath = new IdentityPath(pepPath, groupNode.Id); // Attach annotations docNew = docNew.AddPrecursorResultsAnnotations(groupPath, fileId, annotations); // Change peak var filePath = chromSet.GetFileInfo(fileId).FilePath; if (changePeaks) { docNew = docNew.ChangePeak(groupPath, nameSet, filePath, null, startTime, endTime, UserSet.IMPORTED, null, false); } // For removing peaks that are not in the file, if removeMissing = true trackAdjustedResults.Add(new ResultsKey(fileId.GlobalIndex, groupNode.Id)); foundSample = true; } } } } } if (!foundSample) { UnrecognizedChargeStates.Add(new UnrecognizedChargeState(charge, fileName, modifiedPeptideString)); } } // Remove peaks from the document that weren't in the file. if (removeMissing) docNew = RemoveMissing(docNew, trackAdjustedResults, changePeaks); // If nothing has changed, return the old Document before ChangeIgnoreChangingChildren was turned off if (!ReferenceEquals(docNew, docReference)) Document = (SrmDocument) Document.ChangeIgnoreChangingChildren(false).ChangeChildrenChecked(docNew.Children); return Document; }