public ProteinSpectrumMatchSet[][] GroupAcrossRuns(List<ProteinSpectrumMatchSet>[] prsmGroup, INodeComparer<ProteinSpectrumMatchSet> prsmGroupComparer) { var nDataset = prsmGroup.Length; var prsmSet = new NodeSet<ProteinSpectrumMatchSet>() { }; for (var i = 0; i < nDataset; i++) { var groupedPrsms = prsmGroup[i]; if (groupedPrsms == null) continue; prsmSet.AddRange(groupedPrsms); } var alignedPrsms = prsmSet.ConnnectedComponents(prsmGroupComparer); var alignedResult = new ProteinSpectrumMatchSet[alignedPrsms.Count][]; for (var i = 0; i < alignedResult.Length; i++) alignedResult[i] = new ProteinSpectrumMatchSet[nDataset]; for(var i = 0; i < alignedPrsms.Count; i++) { foreach (var set in alignedPrsms[i]) { if (alignedResult[i][set.DataId] != null) { alignedResult[i][set.DataId].Merge(set); //Console.WriteLine("[{4}] {0}-{1}...{2}-{3}", set.MinScanNum, set.MaxScanNum, alignedResult[i][set.DataId].MinScanNum, alignedResult[i][set.DataId].MaxScanNum, set.DataId); } else { alignedResult[i][set.DataId] = set; } } } return alignedResult; }
public LcMsFeature(double repMass, int repCharge, double repMz, int repScanNum, double abundance, int minCharge, int maxCharge, int minScan, int maxScan, double minElution, double maxElution, double minNet = 0, double maxNet = 0) { Abundance = abundance; RepresentativeMass = repMass; RepresentativeCharge = repCharge; RepresentativeMz = repMz; RepresentativeScanNum = repScanNum; MinCharge = (minCharge > 0) ? minCharge : repCharge; MaxCharge = (maxCharge > 0) ? maxCharge : repCharge; MinScanNum = (minScan > 0) ? minScan : repScanNum; MaxScanNum = (maxScan > 0) ? maxScan : repScanNum; MinElutionTime = minElution; MaxElutionTime = maxElution; MaxNet = maxNet; MinNet = minNet; ProteinSpectrumMatches = new ProteinSpectrumMatchSet(0); }
public void TestQuantifyIdedProteoforms() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2"; const string promexOutFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; if (!Directory.Exists(rawFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder); } var nDataset = 32; var dataset = new string[nDataset]; for (var i = 0; i < nDataset; i++) { dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1); //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); } var prsmReader = new ProteinSpectrumMatchReader(0.01); var filesProcessed = 0; var tolerance = new Tolerance(10); for (var i = 0; i < dataset.Length; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]); if (!File.Exists(path)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", path); continue; } var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); filesProcessed++; for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5); } // PrSM To Feature var prsmToFeatureIdMap = new int[prsmList.Count]; for (var k = 0; k < prsmToFeatureIdMap.Length; k++) { prsmToFeatureIdMap[k] = -1; } // Feature To PrSM var featureToPrsm = new List <ProteinSpectrumMatchSet>(); var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood()); var featureList = new List <LcMsPeakCluster>(); var featureId = 0; for (var j = 0; j < prsmList.Count; j++) { if (prsmToFeatureIdMap[j] >= 0) { continue; } var match = prsmList[j]; var minScanNum = match.ScanNum; var maxScanNum = match.ScanNum; var mass = match.Mass; var charge = match.Charge; var massTh = tolerance.GetToleranceAsMz(mass); var id1 = match.ProteinId; var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum); var prsmSet = new ProteinSpectrumMatchSet(i) { match }; if (feature == null) { feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge); prsmToFeatureIdMap[j] = featureId; } else { prsmToFeatureIdMap[j] = featureId; var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2); for (var k = j + 1; k < prsmList.Count; k++) { var otherMatch = prsmList[k]; var id2 = otherMatch.ProteinId; var et2 = run.GetElutionTime(otherMatch.ScanNum); if (id1.Equals(id2) && feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol && Math.Abs(otherMatch.Mass - mass) < massTh) { prsmToFeatureIdMap[k] = featureId; prsmSet.Add(otherMatch); } } } featureId++; feature.Flag = 1; featureList.Add(feature); featureToPrsm.Add(prsmSet); } // Overlap between features??? for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; for (var k = j + 1; k < featureList.Count; k++) { var f2 = featureList[k]; if (f2.Flag < 1) { continue; } var prsm2 = featureToPrsm[k]; if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsMz(f1.Mass)) { continue; } if (!f1.CoElutedByNet(f2, 0.005)) { continue; } if (!prsm1.ShareProteinId(prsm2)) { continue; } // let us merge!! if (f1.ScanLength > f2.ScanLength) { prsm1.AddRange(prsm2); prsm2.Clear(); f2.Flag = 0; } else { prsm2.AddRange(prsm1); prsm1.Clear(); f1.Flag = 0; } } } // now output results!! var ms1ftFilePath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]); var writer = new StreamWriter(ms1ftFilePath); writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString()); for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1); var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1); f1.ExpandScanRange(minScanNum, maxScanNum); writer.Write("{0}\t", j + 1); writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1)); } writer.Close(); Console.WriteLine(ms1ftFilePath); } if (filesProcessed == 0) { Assert.Ignore("Skipped since data files not found"); } }