public ProteinSpectrumMatchSet[][] GroupAcrossRuns(List<ProteinSpectrumMatchSet>[] prsmGroup, INodeComparer<ProteinSpectrumMatchSet> prsmGroupComparer)
        {
            var nDataset = prsmGroup.Length;
            var prsmSet = new NodeSet<ProteinSpectrumMatchSet>() { };

            for (var i = 0; i < nDataset; i++)
            {
                var groupedPrsms = prsmGroup[i];
                if (groupedPrsms == null) continue;
                prsmSet.AddRange(groupedPrsms);
            }

            var alignedPrsms = prsmSet.ConnnectedComponents(prsmGroupComparer);
            var alignedResult = new ProteinSpectrumMatchSet[alignedPrsms.Count][];
            for (var i = 0; i < alignedResult.Length; i++) alignedResult[i] = new ProteinSpectrumMatchSet[nDataset];
            
            for(var i = 0; i < alignedPrsms.Count; i++)
            {
                foreach (var set in alignedPrsms[i])
                {
                    if (alignedResult[i][set.DataId] != null)
                    {
                        alignedResult[i][set.DataId].Merge(set);
                        //Console.WriteLine("[{4}] {0}-{1}...{2}-{3}", set.MinScanNum, set.MaxScanNum, alignedResult[i][set.DataId].MinScanNum, alignedResult[i][set.DataId].MaxScanNum, set.DataId);    
                    }
                    else
                    {
                        alignedResult[i][set.DataId] = set;    
                    }
                }
            }
            return alignedResult;
        }
Esempio n. 2
0
        public LcMsFeature(double repMass, int repCharge, double repMz, int repScanNum, double abundance,
                           int minCharge, int maxCharge, int minScan, int maxScan,
                           double minElution, double maxElution, double minNet = 0, double maxNet = 0)
        {
            Abundance             = abundance;
            RepresentativeMass    = repMass;
            RepresentativeCharge  = repCharge;
            RepresentativeMz      = repMz;
            RepresentativeScanNum = repScanNum;

            MinCharge  = (minCharge > 0) ? minCharge : repCharge;
            MaxCharge  = (maxCharge > 0) ? maxCharge : repCharge;
            MinScanNum = (minScan > 0) ? minScan : repScanNum;
            MaxScanNum = (maxScan > 0) ? maxScan : repScanNum;

            MinElutionTime = minElution;
            MaxElutionTime = maxElution;

            MaxNet = maxNet;
            MinNet = minNet;

            ProteinSpectrumMatches = new ProteinSpectrumMatchSet(0);
        }
Esempio n. 3
0
        public LcMsFeature(double repMass, int repCharge, double repMz, int repScanNum, double abundance,
            int minCharge, int maxCharge, int minScan, int maxScan,
            double minElution, double maxElution, double minNet = 0, double maxNet = 0)
        {
            Abundance = abundance;
            RepresentativeMass = repMass;
            RepresentativeCharge = repCharge;
            RepresentativeMz = repMz;
            RepresentativeScanNum = repScanNum;

            MinCharge = (minCharge > 0) ? minCharge : repCharge;
            MaxCharge = (maxCharge > 0) ? maxCharge : repCharge;
            MinScanNum = (minScan > 0) ? minScan : repScanNum;
            MaxScanNum = (maxScan > 0) ? maxScan : repScanNum;

            MinElutionTime = minElution;
            MaxElutionTime = maxElution;
            
            MaxNet = maxNet;
            MinNet = minNet;

            ProteinSpectrumMatches = new ProteinSpectrumMatchSet(0);
        }
Esempio n. 4
0
        public void TestQuantifyIdedProteoforms()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var prsmReader = new ProteinSpectrumMatchReader(0.01);

            var filesProcessed = 0;

            var tolerance = new Tolerance(10);

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                filesProcessed++;

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }

                // PrSM To Feature
                var prsmToFeatureIdMap = new int[prsmList.Count];
                for (var k = 0; k < prsmToFeatureIdMap.Length; k++)
                {
                    prsmToFeatureIdMap[k] = -1;
                }

                // Feature To PrSM
                var featureToPrsm = new List <ProteinSpectrumMatchSet>();

                var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());
                var featureList   = new List <LcMsPeakCluster>();
                var featureId     = 0;
                for (var j = 0; j < prsmList.Count; j++)
                {
                    if (prsmToFeatureIdMap[j] >= 0)
                    {
                        continue;
                    }

                    var match      = prsmList[j];
                    var minScanNum = match.ScanNum;
                    var maxScanNum = match.ScanNum;
                    var mass       = match.Mass;
                    var charge     = match.Charge;
                    var massTh     = tolerance.GetToleranceAsMz(mass);
                    var id1        = match.ProteinId;

                    var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                    var prsmSet = new ProteinSpectrumMatchSet(i)
                    {
                        match
                    };
                    if (feature == null)
                    {
                        feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge);
                        prsmToFeatureIdMap[j] = featureId;
                    }
                    else
                    {
                        prsmToFeatureIdMap[j] = featureId;
                        var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2);

                        for (var k = j + 1; k < prsmList.Count; k++)
                        {
                            var otherMatch = prsmList[k];
                            var id2        = otherMatch.ProteinId;
                            var et2        = run.GetElutionTime(otherMatch.ScanNum);

                            if (id1.Equals(id2) &&
                                feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol &&
                                Math.Abs(otherMatch.Mass - mass) < massTh)
                            {
                                prsmToFeatureIdMap[k] = featureId;
                                prsmSet.Add(otherMatch);
                            }
                        }
                    }
                    featureId++;

                    feature.Flag = 1;
                    featureList.Add(feature);
                    featureToPrsm.Add(prsmSet);
                }

                // Overlap between features???
                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    for (var k = j + 1; k < featureList.Count; k++)
                    {
                        var f2 = featureList[k];
                        if (f2.Flag < 1)
                        {
                            continue;
                        }

                        var prsm2 = featureToPrsm[k];
                        if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsMz(f1.Mass))
                        {
                            continue;
                        }
                        if (!f1.CoElutedByNet(f2, 0.005))
                        {
                            continue;
                        }
                        if (!prsm1.ShareProteinId(prsm2))
                        {
                            continue;
                        }

                        // let us merge!!
                        if (f1.ScanLength > f2.ScanLength)
                        {
                            prsm1.AddRange(prsm2);
                            prsm2.Clear();
                            f2.Flag = 0;
                        }
                        else
                        {
                            prsm2.AddRange(prsm1);
                            prsm1.Clear();
                            f1.Flag = 0;
                        }
                    }
                }

                // now output results!!
                var ms1ftFilePath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                var writer        = new StreamWriter(ms1ftFilePath);
                writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString());

                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1);
                    var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1);
                    f1.ExpandScanRange(minScanNum, maxScanNum);

                    writer.Write("{0}\t", j + 1);
                    writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1));
                }
                writer.Close();

                Console.WriteLine(ms1ftFilePath);
            }

            if (filesProcessed == 0)
            {
                Assert.Ignore("Skipped since data files not found");
            }
        }