Ejemplo n.º 1
0
        public static List <T> FilterFeatures <T>(List <T> features, LcmsFeatureFilteringOptions options, IScanSummaryProvider scanSummaryProvider = null)
            where T : UMCLight
        {
            IEnumerable <T> newFeatures;

            if (scanSummaryProvider == null || !options.FilterOnMinutes)
            {
                var minimumSize = options.FeatureLengthRangeScans.Minimum;
                var maximumSize = options.FeatureLengthRangeScans.Maximum;


                // Scan Length
                newFeatures = features.Where(x =>
                {
                    var size = Math.Abs(x.ScanStart - x.ScanEnd);
                    return(size >= minimumSize && size <= maximumSize);
                });
            }
            else
            {
                var minimumSize   = options.FeatureLengthRangeMinutes.Minimum;
                var maximumSize   = options.FeatureLengthRangeMinutes.Maximum;
                var minimumPoints = options.MinimumDataPoints;

                //var knownScanNumbers = scanTimes.Keys.ToList();
                //knownScanNumbers.Sort();

                // Scan Length
                newFeatures = features.Where(x =>
                {
                    try
                    {
                        double size = 0;
                        if (x.ScanStart == 0)
                        {
                            //Scan 0 won't show up in scanTimes dictionary, so the feature length is just the time of the last feature scan.
                            size = scanSummaryProvider.GetScanSummary(x.ScanEnd).Time;
                        }
                        else
                        {
                            size = Math.Abs(scanSummaryProvider.GetScanSummary(x.ScanEnd).Time - scanSummaryProvider.GetScanSummary(x.ScanStart).Time);
                        }
                        return(size >= minimumSize && size <= maximumSize && x.Features.Count >= minimumPoints);
                    }
                    catch (Exception ex)
                    {
                        throw (new IndexOutOfRangeException(String.Format("Exception determining the elution time for scans {0} and {1}: {2}", x.ScanStart, x.ScanEnd, ex.Message)));
                    }
                });
            }

            return(newFeatures.Where(x => x.Abundance > 0).ToList());
        }
Ejemplo n.º 2
0
        /// <summary>
        ///     Filters the list of MS Features that may be from MS/MS deisotoped data.
        /// </summary>
        public List <MSFeatureLight> Filter(List <MSFeatureLight> msFeatures, IScanSummaryProvider provider, ref DatasetInformation dataset)
        {
            string rawPath = dataset.RawFile.Path;

            if (rawPath == null || string.IsNullOrWhiteSpace(rawPath))
            {
                return(msFeatures);
            }

            // First find all unique scans
            var scanMap = new Dictionary <int, bool>();

            foreach (var feature in msFeatures)
            {
                if (!scanMap.ContainsKey(feature.Scan))
                {
                    // Assume all scans are parents
                    scanMap.Add(feature.Scan, true);
                }
            }
            // Then parse each to figure out if this is true.
            var fullScans = new Dictionary <int, bool>();
            var scanTimes = dataset.ScanTimes;

            if (provider == null)
            {
                UpdateStatus(string.Format("Warning: Raw file not found ({0}); scan times are not available!", System.IO.Path.GetFileName(rawPath)));
            }
            else
            {
                UpdateStatus(string.Format("Reading scan info from {0}", System.IO.Path.GetFileName(rawPath)));

                foreach (var scan in scanMap.Keys)
                {
                    ScanSummary summary = provider.GetScanSummary(scan);

                    if (summary == null)
                    {
                        continue;
                    }
                    if (summary.MsLevel == 1)
                    {
                        fullScans.Add(scan, true);
                    }
                    if (scanTimes.ContainsKey(scan))
                    {
                        scanTimes[scan] = summary.Time;
                    }
                    else
                    {
                        scanTimes.Add(scan, summary.Time);
                    }
                }
                dataset.ScanTimes = scanTimes;
            }
            return(msFeatures.Where(x => fullScans.ContainsKey(x.Scan)).ToList());
        }
Ejemplo n.º 3
0
        /// <summary>
        ///     Finds features
        /// </summary>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(List <MSFeatureLight> msFeatures,
                                            LcmsFeatureFindingOptions options, IScanSummaryProvider provider,
                                            IProgress <ProgressData> progress = null)
        {
            if (provider == null)
            {
                throw new ArgumentNullException(nameof(provider));
            }

            var tolerances = new FeatureTolerances
            {
                Mass = options.InstrumentTolerances.Mass,
                Net  = options.MaximumNetRange
            };

            var clusterer = new MsToLcmsFeatures(provider, options);

            // MultiAlignCore.Algorithms.FeatureClustering.MsFeatureTreeClusterer
            //var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight>
            //{
            //    Tolerances =
            //        new FeatureTolerances
            //        {
            //            Mass = options.InstrumentTolerances.Mass,
            //            Net = options.MaximumNetRange
            //        },
            //    ScanTolerance = options.MaximumScanRange,
            //    SpectraProvider = (InformedProteomicsReader) provider
            //    //TODO: Make sure we have a mass range for XIC's too....
            //};

            //clusterer.SpectraProvider = (InformedProteomicsReader) provider;

            //OnStatus("Starting cluster definition");
            //clusterer.Progress += (sender, args) => OnStatus(args.Message);

            var features = clusterer.Convert(msFeatures, progress);

            var minScan = int.MaxValue;
            var maxScan = int.MinValue;

            foreach (var feature in msFeatures)
            {
                minScan = Math.Min(feature.Scan, minScan);
                maxScan = Math.Max(feature.Scan, maxScan);
            }



            var minScanTime = provider.GetScanSummary(minScan).Time;
            var maxScanTime = provider.GetScanSummary(maxScan).Time;
            var id          = 0;
            var newFeatures = new List <UMCLight>();

            foreach (var feature in features)
            {
                if (feature.MsFeatures.Count < 1)
                {
                    continue;
                }
                feature.Net = (provider.GetScanSummary(feature.Scan).Time - minScanTime) /
                              (maxScanTime - minScanTime);
                feature.CalculateStatistics();
                feature.Id = id++;
                newFeatures.Add(feature);
                //Sets the width of the feature to be the width of the peak, not the width of the tails
                var maxAbundance      = double.MinValue;
                var maxAbundanceIndex = 0;
                for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    var msFeature = feature.MsFeatures[msFeatureIndex];
                    if (msFeature.Abundance > maxAbundance)
                    {
                        maxAbundance      = msFeature.Abundance;
                        maxAbundanceIndex = msFeatureIndex;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
            }
            return(features);
        }
Ejemplo n.º 4
0
        public IEnumerable <UMCLight> ReadFile(string fileLocation)
        {
            var headers = new Dictionary <string, int>();
            var umcs    = new List <UMCLight>();

            int msFeatureId = 0;

            int lineCount = 0;

            foreach (var line in File.ReadLines(fileLocation))
            {
                var parts = line.Split('\t');
                if (lineCount++ == 0)
                {   // Get the headers if this if the first line
                    for (int i = 0; i < parts.Length; i++)
                    {
                        headers.Add(parts[i], i);
                    }

                    continue;
                }

                var minLcScan = Convert.ToInt32(parts[headers["Scan_Start"]]);
                var maxLcScan = Convert.ToInt32(parts[headers["Scan_End"]]);
                var minNet    = 0.0;
                var maxNet    = 0.0;
                if (this.provider != null)
                {
                    var minScanSum = provider.GetScanSummary(minLcScan);
                    minNet = minScanSum.Net;

                    var maxScanSum = provider.GetScanSummary(maxLcScan);
                    maxNet = maxScanSum.Net;
                }

                var umc = new UMCLight
                {
                    GroupId                 = this.datasetId,
                    Id                      = Convert.ToInt32(parts[headers["Feature_Index"]]),
                    ChargeState             = Convert.ToInt32(parts[headers["Class_Rep_Charge"]]),
                    MassMonoisotopic        = Convert.ToDouble(parts[headers["Monoisotopic_Mass"]]),
                    MassMonoisotopicAligned = Convert.ToDouble(parts[headers["Monoisotopic_Mass"]]),
                    Mz                      = Convert.ToDouble(parts[headers["Class_Rep_MZ"]]),
                    Abundance               = Convert.ToDouble(parts[headers["Abundance"]]),
                    ScanStart               = minLcScan,
                    NetStart                = minNet,
                    ScanEnd                 = maxLcScan,
                    NetEnd                  = maxNet,
                    Net                     = (minNet + maxNet) / 2,
                    NetAligned              = (minNet + maxNet) / 2,
                    ScanAligned             = (minLcScan + maxLcScan) / 2,
                    ImsScanStart            = Convert.ToInt32(parts[headers["IMS_Scan_Start"]]),
                    ImsScanEnd              = Convert.ToInt32(parts[headers["IMS_Scan_End"]]),
                    DriftTime               = Convert.ToDouble(parts[headers["Drift_Time"]]),
                    ConformationFitScore    =
                        Convert.ToDouble(parts[headers["Conformation_Fit_Score"]]),
                    AverageDeconFitScore = Convert.ToDouble(parts[headers["Average_Isotopic_Fit"]]),
                    SaturatedMemberCount =
                        Convert.ToInt32(parts[headers["Saturated_Member_Count"]]),
                };

                // min feature
                umc.AddChildFeature(new MSFeatureLight
                {
                    GroupId                 = datasetId,
                    Id                      = msFeatureId++,
                    ChargeState             = umc.ChargeState,
                    MassMonoisotopic        = umc.MassMonoisotopic,
                    MassMonoisotopicAligned = umc.MassMonoisotopicAligned,
                    Scan                    = umc.ScanStart,
                    Net                     = umc.NetStart,
                    NetAligned              = umc.NetStart,
                    ImsScanStart            = umc.ImsScanStart,
                    ImsScanEnd              = umc.ImsScanEnd,
                    DriftTime               = umc.DriftTime,
                });

                // max feature
                umc.AddChildFeature(new MSFeatureLight
                {
                    GroupId                 = datasetId,
                    Id                      = msFeatureId++,
                    ChargeState             = umc.ChargeState,
                    MassMonoisotopic        = umc.MassMonoisotopic,
                    MassMonoisotopicAligned = umc.MassMonoisotopicAligned,
                    Scan                    = umc.ScanEnd,
                    Net                     = umc.NetEnd,
                    NetAligned              = umc.NetEnd,
                    ImsScanStart            = umc.ImsScanStart,
                    ImsScanEnd              = umc.ImsScanEnd,
                    DriftTime               = umc.DriftTime,
                });

                umcs.Add(umc);
            }

            return(umcs);
        }