public static List <T> FilterFeatures <T>(List <T> features, LcmsFeatureFilteringOptions options, IScanSummaryProvider scanSummaryProvider = null) where T : UMCLight { IEnumerable <T> newFeatures; if (scanSummaryProvider == null || !options.FilterOnMinutes) { var minimumSize = options.FeatureLengthRangeScans.Minimum; var maximumSize = options.FeatureLengthRangeScans.Maximum; // Scan Length newFeatures = features.Where(x => { var size = Math.Abs(x.ScanStart - x.ScanEnd); return(size >= minimumSize && size <= maximumSize); }); } else { var minimumSize = options.FeatureLengthRangeMinutes.Minimum; var maximumSize = options.FeatureLengthRangeMinutes.Maximum; var minimumPoints = options.MinimumDataPoints; //var knownScanNumbers = scanTimes.Keys.ToList(); //knownScanNumbers.Sort(); // Scan Length newFeatures = features.Where(x => { try { double size = 0; if (x.ScanStart == 0) { //Scan 0 won't show up in scanTimes dictionary, so the feature length is just the time of the last feature scan. size = scanSummaryProvider.GetScanSummary(x.ScanEnd).Time; } else { size = Math.Abs(scanSummaryProvider.GetScanSummary(x.ScanEnd).Time - scanSummaryProvider.GetScanSummary(x.ScanStart).Time); } return(size >= minimumSize && size <= maximumSize && x.Features.Count >= minimumPoints); } catch (Exception ex) { throw (new IndexOutOfRangeException(String.Format("Exception determining the elution time for scans {0} and {1}: {2}", x.ScanStart, x.ScanEnd, ex.Message))); } }); } return(newFeatures.Where(x => x.Abundance > 0).ToList()); }
/// <summary> /// Filters the list of MS Features that may be from MS/MS deisotoped data. /// </summary> public List <MSFeatureLight> Filter(List <MSFeatureLight> msFeatures, IScanSummaryProvider provider, ref DatasetInformation dataset) { string rawPath = dataset.RawFile.Path; if (rawPath == null || string.IsNullOrWhiteSpace(rawPath)) { return(msFeatures); } // First find all unique scans var scanMap = new Dictionary <int, bool>(); foreach (var feature in msFeatures) { if (!scanMap.ContainsKey(feature.Scan)) { // Assume all scans are parents scanMap.Add(feature.Scan, true); } } // Then parse each to figure out if this is true. var fullScans = new Dictionary <int, bool>(); var scanTimes = dataset.ScanTimes; if (provider == null) { UpdateStatus(string.Format("Warning: Raw file not found ({0}); scan times are not available!", System.IO.Path.GetFileName(rawPath))); } else { UpdateStatus(string.Format("Reading scan info from {0}", System.IO.Path.GetFileName(rawPath))); foreach (var scan in scanMap.Keys) { ScanSummary summary = provider.GetScanSummary(scan); if (summary == null) { continue; } if (summary.MsLevel == 1) { fullScans.Add(scan, true); } if (scanTimes.ContainsKey(scan)) { scanTimes[scan] = summary.Time; } else { scanTimes.Add(scan, summary.Time); } } dataset.ScanTimes = scanTimes; } return(msFeatures.Where(x => fullScans.ContainsKey(x.Scan)).ToList()); }
/// <summary> /// Finds features /// </summary> /// <returns></returns> public List <UMCLight> FindFeatures(List <MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, IScanSummaryProvider provider, IProgress <ProgressData> progress = null) { if (provider == null) { throw new ArgumentNullException(nameof(provider)); } var tolerances = new FeatureTolerances { Mass = options.InstrumentTolerances.Mass, Net = options.MaximumNetRange }; var clusterer = new MsToLcmsFeatures(provider, options); // MultiAlignCore.Algorithms.FeatureClustering.MsFeatureTreeClusterer //var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> //{ // Tolerances = // new FeatureTolerances // { // Mass = options.InstrumentTolerances.Mass, // Net = options.MaximumNetRange // }, // ScanTolerance = options.MaximumScanRange, // SpectraProvider = (InformedProteomicsReader) provider // //TODO: Make sure we have a mass range for XIC's too.... //}; //clusterer.SpectraProvider = (InformedProteomicsReader) provider; //OnStatus("Starting cluster definition"); //clusterer.Progress += (sender, args) => OnStatus(args.Message); var features = clusterer.Convert(msFeatures, progress); var minScan = int.MaxValue; var maxScan = int.MinValue; foreach (var feature in msFeatures) { minScan = Math.Min(feature.Scan, minScan); maxScan = Math.Max(feature.Scan, maxScan); } var minScanTime = provider.GetScanSummary(minScan).Time; var maxScanTime = provider.GetScanSummary(maxScan).Time; var id = 0; var newFeatures = new List <UMCLight>(); foreach (var feature in features) { if (feature.MsFeatures.Count < 1) { continue; } feature.Net = (provider.GetScanSummary(feature.Scan).Time - minScanTime) / (maxScanTime - minScanTime); feature.CalculateStatistics(); feature.Id = id++; newFeatures.Add(feature); //Sets the width of the feature to be the width of the peak, not the width of the tails var maxAbundance = double.MinValue; var maxAbundanceIndex = 0; for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { var msFeature = feature.MsFeatures[msFeatureIndex]; if (msFeature.Abundance > maxAbundance) { maxAbundance = msFeature.Abundance; maxAbundanceIndex = msFeatureIndex; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan; break; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan; break; } } } return(features); }
public IEnumerable <UMCLight> ReadFile(string fileLocation) { var headers = new Dictionary <string, int>(); var umcs = new List <UMCLight>(); int msFeatureId = 0; int lineCount = 0; foreach (var line in File.ReadLines(fileLocation)) { var parts = line.Split('\t'); if (lineCount++ == 0) { // Get the headers if this if the first line for (int i = 0; i < parts.Length; i++) { headers.Add(parts[i], i); } continue; } var minLcScan = Convert.ToInt32(parts[headers["Scan_Start"]]); var maxLcScan = Convert.ToInt32(parts[headers["Scan_End"]]); var minNet = 0.0; var maxNet = 0.0; if (this.provider != null) { var minScanSum = provider.GetScanSummary(minLcScan); minNet = minScanSum.Net; var maxScanSum = provider.GetScanSummary(maxLcScan); maxNet = maxScanSum.Net; } var umc = new UMCLight { GroupId = this.datasetId, Id = Convert.ToInt32(parts[headers["Feature_Index"]]), ChargeState = Convert.ToInt32(parts[headers["Class_Rep_Charge"]]), MassMonoisotopic = Convert.ToDouble(parts[headers["Monoisotopic_Mass"]]), MassMonoisotopicAligned = Convert.ToDouble(parts[headers["Monoisotopic_Mass"]]), Mz = Convert.ToDouble(parts[headers["Class_Rep_MZ"]]), Abundance = Convert.ToDouble(parts[headers["Abundance"]]), ScanStart = minLcScan, NetStart = minNet, ScanEnd = maxLcScan, NetEnd = maxNet, Net = (minNet + maxNet) / 2, NetAligned = (minNet + maxNet) / 2, ScanAligned = (minLcScan + maxLcScan) / 2, ImsScanStart = Convert.ToInt32(parts[headers["IMS_Scan_Start"]]), ImsScanEnd = Convert.ToInt32(parts[headers["IMS_Scan_End"]]), DriftTime = Convert.ToDouble(parts[headers["Drift_Time"]]), ConformationFitScore = Convert.ToDouble(parts[headers["Conformation_Fit_Score"]]), AverageDeconFitScore = Convert.ToDouble(parts[headers["Average_Isotopic_Fit"]]), SaturatedMemberCount = Convert.ToInt32(parts[headers["Saturated_Member_Count"]]), }; // min feature umc.AddChildFeature(new MSFeatureLight { GroupId = datasetId, Id = msFeatureId++, ChargeState = umc.ChargeState, MassMonoisotopic = umc.MassMonoisotopic, MassMonoisotopicAligned = umc.MassMonoisotopicAligned, Scan = umc.ScanStart, Net = umc.NetStart, NetAligned = umc.NetStart, ImsScanStart = umc.ImsScanStart, ImsScanEnd = umc.ImsScanEnd, DriftTime = umc.DriftTime, }); // max feature umc.AddChildFeature(new MSFeatureLight { GroupId = datasetId, Id = msFeatureId++, ChargeState = umc.ChargeState, MassMonoisotopic = umc.MassMonoisotopic, MassMonoisotopicAligned = umc.MassMonoisotopicAligned, Scan = umc.ScanEnd, Net = umc.NetEnd, NetAligned = umc.NetEnd, ImsScanStart = umc.ImsScanStart, ImsScanEnd = umc.ImsScanEnd, DriftTime = umc.DriftTime, }); umcs.Add(umc); } return(umcs); }