public void TestReadFromFile() { IIdentifiedResult ir = new SequestResultTextFormat().ReadFromFile(@"../../../data/Standard_Protein_FIT_060222.noredundant"); Assert.AreEqual(19, ir.Count); List<IIdentifiedProtein> prohits = ir.GetProteins(); Assert.AreEqual(43, prohits.Count); List<IIdentifiedSpectrum> pephits = ir.GetSpectra(); Assert.AreEqual(287, pephits.Count); IIdentifiedProtein protein1_1 = ir[0][0]; Assert.AreEqual(1, protein1_1.GroupIndex); Assert.AreEqual("sp|P00489|PHS2_RABIT Glycogen phosphorylase, muscle form (EC 2.4.1.1) (Myophosphorylase) - Oryctolagus cuniculus (Rabbit).", protein1_1.Reference.Trim()); Assert.AreEqual(97, protein1_1.Peptides.Count); IIdentifiedSpectrum pephit = protein1_1.Peptides[0].Spectrum; Assert.AreEqual(1689.886, pephit.TheoreticalMH, 0.001); Assert.AreEqual(3.8133, pephit.Score, 0.01); Assert.AreEqual(0.65, pephit.DeltaScore, 0.01); Assert.AreEqual(1602.8, pephit.SpScore, 0.1); Assert.AreEqual(1, pephit.SpRank); Assert.AreEqual(27, pephit.MatchedIonCount); Assert.AreEqual(52, pephit.TheoreticalIonCount); Assert.AreEqual("K.ARPEFTLPVHFYGR.V", protein1_1.Peptides[0].Sequence); Assert.AreEqual("Standard_Protein_FIT_060222,7066", protein1_1.Peptides[0].Spectrum.Query.FileScan.ShortFileName); }
public override IEnumerable <string> Process(string filename) { SequestResultTextFormat format = new SequestResultTextFormat(); Progress.SetMessage("Reading from " + filename + "..."); IIdentifiedResult sr = format.ReadFromFile(filename); HashSet <IIdentifiedSpectrum> result = new HashSet <IIdentifiedSpectrum>(); foreach (IIdentifiedProteinGroup spg in sr) { if (acceptFunc(spg)) { result.UnionWith(spg[0].GetSpectra()); } } List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(result); spectra.Sort(); string resultFilename = MyConvert.Format("{0}.{1}.peptides", filename, uniqueStr); Progress.SetMessage("Writing to " + resultFilename + "..."); new SequestPeptideTextFormat(format.PeptideFormat.GetHeader()).WriteToFile(resultFilename, spectra); Progress.SetMessage("Finished"); return(new[] { resultFilename }); }
public override IEnumerable <string> Process(string fileName) { var ir = new MascotResultTextFormat().ReadFromFile(fileName); var oldFormat = new SequestResultTextFormat("\tReference\tPepCount\tUniquePepCount\tCoverPercent\tMW\tPI", "\t\"File, Scan(s)\"\tSequence\tMH+\tDiff(MH+)\tCharge\tRank\tXC\tDeltaCn\tSp\tRSp\tIons\tReference\tDIFF_MODIFIED_CANDIDATE\tPI\tGroupCount\tProteinCount"); var result = fileName + ".tmp"; oldFormat.WriteToFile(result, ir); return(new string[] { result }); }
protected override IFileProcessor GetFileProcessor() { double ppmTolerance = precursorPPMTolerance.Value; string rawDirectory; if (rawDir.FullName == "") { rawDirectory = new FileInfo(GetOriginFile()).DirectoryName; } else { rawDirectory = rawDir.FullName; } IIdentifiedResultTextFormat fileFormat; switch (searchEngine.SelectedItem) { case SearchEngineType.MASCOT: fileFormat = new MascotResultTextFormat(); break; case SearchEngineType.SEQUEST: fileFormat = new SequestResultTextFormat(); break; default: throw new Exception(MyConvert.Format("Unsupported search engine {0}, contact with author.", searchEngine.SelectedItem)); } return(new ExtendSilacQuantificationProteinFileProcessor( new SilacQuantificationOption() { RawFormat = rawFormats.SelectedItem, RawDir = rawDirectory, SilacParamFile = silacFile.FullName, PPMTolerance = ppmTolerance, IgnoreModifications = ignoreModifications.Text, ProfileLength = _profileLength.Value }, fileFormat, datasetClassification.GetClassificationSet(), rawPairClassification.GetClassificationSet()) { MinPeptideRegressionCorrelation = minCorrelation.Value }); }
/// <summary> /// 输出每次条件下,每个fraction的protein group文件 /// </summary> protected override void ExportIndividualFractionFile() { DirectoryInfo individualDir = new DirectoryInfo(resultDir.FullName + "\\individual"); FileInfo sourceFile = new FileInfo(option.SourceFileName); SequestResultTextFormat writeFormat = GetWriteFormat(); for (int iMinCount = option.FilterFrom; iMinCount <= option.FilterTo; iMinCount += option.FilterStep) { List <CalculationItem> currentItems = GetFilteredItems(iMinCount); if (!individualDir.Exists) { individualDir.Create(); } foreach (string keptClassifiedName in option.GetClassifiedNames()) { string result_file = MyConvert.Format(@"{0}\{1}.{2}.{3}{4}", individualDir.FullName, FileUtils.ChangeExtension(sourceFile.Name, ""), GetOptionCondition(iMinCount), keptClassifiedName, sourceFile.Extension); List <IIdentifiedProteinGroup> groups = new List <IIdentifiedProteinGroup>(); foreach (var item in currentItems) { if (item.GetClassifiedCount(keptClassifiedName) >= iMinCount) { IIdentifiedProteinGroup group = (IIdentifiedProteinGroup)item.Key; IIdentifiedProteinGroup clonedGroup = GetGroupContainClassifiedPeptideHitOnly(keptClassifiedName, group); groups.Add(clonedGroup); } } IdentifiedResult curResult = new IdentifiedResult(); curResult.AddRange(groups); curResult.Sort(); writeFormat.WriteToFile(result_file, curResult); } } }
public void TestClassifyPeptideHit2() { var ir = new SequestResultTextFormat().ReadFromFile(@"../../../data/TestDistributionOption.noredundant"); CalculationItem item = new CalculationItem() { Peptides = ir[0][0].GetDistinctPeptides() }; item.ClassifyPeptideHit(m => "G1"); Assert.AreEqual(1360, item.Classifications["G1"].PeptideCount); Assert.AreEqual(24, item.Classifications["G1"].UniquePeptideCount); item.Peptides = ir[1][0].GetDistinctPeptides(); item.ClassifyPeptideHit(m => "G1"); Assert.AreEqual(5, item.Classifications["G1"].PeptideCount); Assert.AreEqual(1, item.Classifications["G1"].UniquePeptideCount); }
public void TestClassifyPeptideHit2() { var ir = new SequestResultTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/TestDistributionOption.noredundant"); CalculationItem item = new CalculationItem() { Peptides = ir[0][0].GetDistinctPeptides() }; item.ClassifyPeptideHit(m => "G1"); Assert.AreEqual(1360, item.Classifications["G1"].PeptideCount); Assert.AreEqual(24, item.Classifications["G1"].UniquePeptideCount); item.Peptides = ir[1][0].GetDistinctPeptides(); item.ClassifyPeptideHit(m => "G1"); Assert.AreEqual(5, item.Classifications["G1"].PeptideCount); Assert.AreEqual(1, item.Classifications["G1"].UniquePeptideCount); }
public override IEnumerable <string> Process(string fileName) { var format = new SequestResultTextFormat(); format.Progress = this.Progress; Progress.SetMessage("Reading identified result from " + fileName + " ..."); IIdentifiedResult ir = format.ReadFromFile(fileName); Progress.SetMessage("Removing duplicated peptide ..."); Progress.SetRange(0, ir.Count); for (int i = 0; i < ir.Count; i++) { Progress.SetPosition(i); IIdentifiedProteinGroup group = ir[i]; List <IIdentifiedSpectrum> peps = UniquePeptideDistiller.KeepMaxScorePeptideOnly(group.GetPeptides()); foreach (var protein in group) { protein.Peptides.RemoveAll(m => !peps.Contains(m.Spectrum)); } } string resultFileName = fileName + ".unique"; Progress.SetMessage("Saving proteins to " + resultFileName + " ..."); format.WriteToFile(resultFileName, ir); List <IIdentifiedSpectrum> spectra = ir.GetSpectra(); var peptideFormat = new SequestPeptideTextFormat(format.PeptideFormat.GetHeader()); string peptideFileName = fileName + ".unique.peptides"; Progress.SetMessage("Saving peptides to " + peptideFileName + " ..."); peptideFormat.WriteToFile(peptideFileName, spectra); Progress.SetMessage("Finished."); return(new[] { resultFileName, peptideFileName }); }
public override IEnumerable <string> Process(string fileName) { SequestResultTextFormat format = new SequestResultTextFormat(); IIdentifiedResult sr = format.ReadFromFile(fileName); Dictionary <string, HashSet <IIdentifiedSpectrum> > peptideMap = sr.GetExperimentalPeptideMap(); Dictionary <IIdentifiedSpectrum, int> confused = new Dictionary <IIdentifiedSpectrum, int>(); List <IIdentifiedSpectrum> wrongMs2 = new List <IIdentifiedSpectrum>(); List <IIdentifiedSpectrum> wrongMs3 = new List <IIdentifiedSpectrum>(); int pepcount = 0; List <string> exps = new List <string>(peptideMap.Keys); exps.Sort(); foreach (string exp in exps) { Console.Out.WriteLine(exp); HashSet <IIdentifiedSpectrum> peps = peptideMap[exp]; pepcount += peps.Count; using (IRawFile rawFile = new RawFileImpl(expRawMap[exp])) { foreach (IIdentifiedSpectrum pep in peps) { int msLevel = rawFile.GetMsLevel(pep.Query.FileScan.FirstScan); bool bMs2 = ms2seqPattern.Match(pep.Sequence).Success; bool bMs3 = ms3seqPattern.Match(pep.Sequence).Success; if (bMs2 && bMs3) { confused[pep] = msLevel; continue; } if (bMs3) { if (msLevel != 3) { wrongMs3.Add(pep); continue; } } else if (bMs2) { if (msLevel != 2) { wrongMs2.Add(pep); continue; } } } } } string incorrectFilename = FileUtils.ChangeExtension(fileName, ".incorrect.peptides.txt"); using (StreamWriter sw = new StreamWriter(incorrectFilename)) { sw.WriteLine("Type\tFilename\tSequence\tScore\tDeltaScore\tmsLevel"); foreach (IIdentifiedSpectrum pep in confused.Keys) { sw.WriteLine(MyConvert.Format("Confused\t{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4}", pep.Query.FileScan.LongFileName, pep.Sequence, pep.Score, pep.DeltaScore, confused[pep])); } foreach (IIdentifiedSpectrum pep in wrongMs2) { sw.WriteLine(MyConvert.Format("WrongMS2\t{0}\t{1}\t{2:0.00}\t{3:0.00}\t3", pep.Query.FileScan.LongFileName, pep.Sequence, pep.Score, pep.DeltaScore)); } foreach (IIdentifiedSpectrum pep in wrongMs3) { sw.WriteLine(MyConvert.Format("WrongMS3\t{0}\t{1}\t{2:0.00}\t{3:0.00}\t2", pep.Query.FileScan.LongFileName, pep.Sequence, pep.Score, pep.DeltaScore)); } sw.WriteLine(); sw.WriteLine("Total\t" + pepcount); sw.WriteLine("Confused\t" + confused.Count); sw.WriteLine("WrongMs2\t" + wrongMs2.Count); sw.WriteLine("WrongMs3\t" + wrongMs3.Count); } List <string> incorrectDtafilenames = new List <string>(); foreach (IIdentifiedSpectrum pep in confused.Keys) { incorrectDtafilenames.Add(pep.Query.FileScan.LongFileName); } foreach (IIdentifiedSpectrum pep in wrongMs2) { incorrectDtafilenames.Add(pep.Query.FileScan.LongFileName); } foreach (IIdentifiedSpectrum pep in wrongMs3) { incorrectDtafilenames.Add(pep.Query.FileScan.LongFileName); } for (int i = sr.Count - 1; i >= 0; i--) { foreach (IIdentifiedProtein sp in sr[i]) { for (int j = sp.Peptides.Count - 1; j >= 0; j--) { if (incorrectDtafilenames.Contains(sp.Peptides[j].Spectrum.Query.FileScan.Experimental)) { sp.Peptides.RemoveAt(j); } } } if (sr[i][0].Peptides.Count == 0) { sr.RemoveAt(i); } } string correctFilename = FileUtils.ChangeExtension(fileName, ".correct.txt"); //BuildSummaryResultUtils.Write(correctFilename, cr); return(new[] { correctFilename }); }