public static Dictionary <string, string> GetChipTypes(string rExecute, string directory, bool includingSubDirectory, string outputFile) { var cels = GetCelFiles(directory); foreach (var dir in Directory.GetDirectories(directory)) { cels.AddRange(GetCelFiles(dir)); } if (cels.Count == 0) { return(new Dictionary <string, string>()); } var inputfile = Path.Combine(directory, "celfiles.tsv"); using (var sw = new StreamWriter(inputfile)) { foreach (var cel in cels) { sw.WriteLine(FileUtils.ToLinuxFormat(cel)); } } var roptions = new RTemplateProcessorOptions(); roptions.RExecute = rExecute; roptions.InputFile = inputfile; roptions.OutputFile = outputFile; roptions.RTemplate = FileUtils.GetTemplateDir() + "/getceltypes.r"; new RTemplateProcessor(roptions).Process(); return(new MapReader(0, 1).ReadFromFile(roptions.OutputFile)); }
public override IEnumerable <string> Process() { var design = new IsobaricLabelingExperimentalDesign(); design.LoadFromFile(options.ExpermentalDesignFile); string resultFileName = GetResultFilePrefix(options.ProteinFileName, design.GetReferenceNames("")); string paramFileName = Path.ChangeExtension(resultFileName, ".param"); options.SaveToFile(paramFileName); Progress.SetMessage("Reading proteins..."); IIdentifiedResult ir = new MascotResultTextFormat().ReadFromFile(options.ProteinFileName); var proteinpeptidefile = string.Format("{0}.pro_pep.tsv", resultFileName); using (var sw = new StreamWriter(proteinpeptidefile)) { sw.WriteLine("Index\tPeptide\tProteins\tDescription\tPepCount\tUniquePepCount"); foreach (var g in ir) { var peps = g.GetPeptides(); var seqs = (from p in peps select p.Peptide.PureSequence).Distinct().OrderBy(m => m).ToArray(); var proname = (from p in g select p.Name).Merge(" ! "); var description = (from p in g select p.Description).Merge(" ! "); foreach (var seq in seqs) { sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", g.Index, seq, proname, description, g[0].PeptideCount, g[0].UniquePeptideCount); } } } Progress.SetMessage("Quantifing proteins..."); var qoptions = new RTemplateProcessorOptions(); qoptions.InputFile = options.QuanPeptideFileName; qoptions.OutputFile = resultFileName + ".quan." + options.PeptideToProteinMethod + ".tsv"; qoptions.RTemplate = string.Format("{0}/ProteinQuantification.r", FileUtils.GetTemplateDir(), options.PeptideToProteinMethod); qoptions.Parameters.Add(string.Format("proteinfile<-\"{0}\"", proteinpeptidefile.Replace("\\", "/"))); qoptions.Parameters.Add(string.Format("method<-\"{0}\"", options.PeptideToProteinMethod)); qoptions.Parameters.Add("pvalue<-0.01"); qoptions.Parameters.Add("minFinalCount<-3"); new RTemplateProcessor(qoptions).Process(); Progress.SetMessage("Finished."); return(new[] { qoptions.OutputFile }); }
public void Calculate(IIdentifiedResult mr, Func <IIdentifiedSpectrum, bool> validFunc) { var proteinFiles = new List <WaitingEntry>(); foreach (var mpg in mr) { var pf = DoCalculate(mpg, validFunc, false); if (pf != null) { proteinFiles.Add(pf); } } if (proteinFiles.Count > 0) { var listfile = (this.DetailDirectory + "/rlm_file.csv").Replace("\\", "/"); using (var sw = new StreamWriter(listfile)) { sw.WriteLine("Protein,IntensityFile"); foreach (var we in proteinFiles) { sw.WriteLine("\"{0}\",\"{1}\"", we.Group[0].Name, we.IntensityFile); } } var linearfile = new FileInfo(this.DetailDirectory + "/rlm.linear").FullName.Replace("\\", "/"); var roptions = new RTemplateProcessorOptions(); roptions.InputFile = listfile; roptions.OutputFile = linearfile; roptions.RTemplate = FileUtils.GetTemplateDir() + "/MultiplePairQuantification.r"; new RTemplateProcessor(roptions).Process(); var results = (from line in File.ReadAllLines(linearfile).Skip(1) let parts = line.Split('\t') select new { ProteinName = parts[0].StringAfter("\"").StringBefore("\""), LinearRegressionResult = ParseLinearRegressionRatioResult(parts, 2) }).ToDictionary(m => m.ProteinName); foreach (var pg in mr) { if (results.ContainsKey(pg[0].Name)) { var res = results[pg[0].Name]; var lrrr = res.LinearRegressionResult; foreach (IIdentifiedProtein protein in pg) { this.intensityFunc.SaveToAnnotation(protein, lrrr); } } } } }
/// <summary> /// Normalization cel files and return the file contains all cel file names /// </summary> /// <param name="root"></param> /// <param name="outputFile"></param> /// <returns></returns> public string Normalization(string root, string outputFile) { var cels = CelFile.GetCelFiles(root); if (cels.Count == 0) { Progress.SetMessage("No cel file found in directory " + DataDir); return(string.Empty); } var inputFile = Path.Combine(root, "celfiles.tsv"); using (var sw = new StreamWriter(inputFile)) { foreach (var cel in cels) { sw.WriteLine(FileUtils.ToLinuxFormat(cel)); } } var roptions = new RTemplateProcessorOptions(); roptions.RExecute = rExecute; roptions.InputFile = inputFile; roptions.OutputFile = inputFile; roptions.NoResultFile = true; roptions.RTemplate = FileUtils.GetTemplateDir() + "\\frma.r"; roptions.CreateNoWindow = true; new RTemplateProcessor(roptions) { Progress = this.Progress }.Process(); CelFile.GetChipTypes(this.rExecute, root, true, outputFile); return(outputFile); }
public override IEnumerable <string> Process() { //Extract chromotagraph information var chroOptions = new ChromatographProfileBuilderOptions(); options.CopyProperties(chroOptions); chroOptions.InputFile = options.InputFile; chroOptions.OutputFile = options.BoundaryOutputFile; chroOptions.DrawImage = false; var builder = new ChromatographProfileBuilder(chroOptions); if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite) { Progress.SetMessage("Finding envelope ..."); builder.Progress = this.Progress; builder.Process(); } //Calculate deuterium enrichment for peptide if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite) { Progress.SetMessage("Calculating deuterium ..."); var deuteriumOptions = new RTemplateProcessorOptions() { InputFile = options.BoundaryOutputFile, OutputFile = options.DeuteriumOutputFile, RTemplate = DeuteriumR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0")); deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0")); new RTemplateProcessor(deuteriumOptions) { Progress = this.Progress }.Process(); } var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString()); //Read old spectra information var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(options.InputFile); foreach (var spec in spectra) { spec.Annotations.Remove("RetentionTime"); spec.Annotations.Remove("TheoreticalDeuterium"); spec.Annotations.Remove("ObservedDeuterium"); spec.Annotations.Remove("NumDeuteriumIncorporated"); spec.Annotations.Remove("NumExchangableHydrogen"); spec.Annotations.Remove("DeuteriumEnrichmentPercent"); } var calcSpectra = new List <IIdentifiedSpectrum>(); var aas = new Aminoacids(); foreach (var pep in spectra) { var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep)); if (deuteriumMap.ContainsKey(filename)) { var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence); var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string); pep.Annotations["PeakRetentionTime"] = deuteriumMap[filename].Annotations["RetentionTime"]; pep.Annotations["TheoreticalDeuterium"] = deuteriumMap[filename].Annotations["TheoreticalDeuterium"]; pep.Annotations["ObservedDeuterium"] = deuteriumMap[filename].Annotations["ObservedDeuterium"]; pep.Annotations["NumDeuteriumIncorporated"] = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"]; pep.Annotations["NumExchangableHydrogen"] = numExchangeableHydrogens; pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens; calcSpectra.Add(pep); } } format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent"; format.NotExportSummary = true; format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra); var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList(); var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray(); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t")); foreach (var peptide in specGroup) { var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray()); if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l))) { continue; } sw.Write(peptide.Key); foreach (var time in times) { if (curSpectra.ContainsKey(time)) { var deps = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray(); var depMedian = Statistics.Median(deps); sw.Write("\t{0:0.######}", depMedian); } else { sw.Write("\tNA"); } } sw.WriteLine(); } } Progress.SetMessage("Peptide deuterium enrichment calculation finished ..."); return(new string[] { options.OutputFile }); }
public override IEnumerable <string> Process() { //Prepare unique peptide file var format = new MascotResultTextFormat(); var proteins = format.ReadFromFile(options.InputFile); proteins.RemoveAmbiguousSpectra(); var spectra = proteins.GetSpectra(); foreach (var spec in spectra) { spec.Annotations.Remove("TheoreticalDeuterium"); spec.Annotations.Remove("ObservedDeuterium"); spec.Annotations.Remove("NumDeuteriumIncorporated"); spec.Annotations.Remove("NumExchangableHydrogen"); spec.Annotations.Remove("DeuteriumEnrichmentPercent"); } var peptideFile = Path.ChangeExtension(options.InputFile, ".unique.peptides"); var peptideFormat = new MascotPeptideTextFormat(format.PeptideFormat.Headers); peptideFormat.WriteToFile(peptideFile, spectra); //Calculate deterium enrichment at peptide level var pepOptions = new DeuteriumCalculatorOptions(); options.CopyProperties(pepOptions); pepOptions.InputFile = peptideFile; pepOptions.OutputFile = peptideFile + ".tsv"; var pepCalc = new PeptideDeuteriumCalculator(pepOptions); pepCalc.Progress = this.Progress; pepCalc.Process(); //Copy annotation from calculated peptide to original peptide var calcSpectra = peptideFormat.ReadFromFile(pepCalc.GetPeptideDeteriumFile()); var oldSpectraMap = spectra.ToDictionary(m => m.Query.FileScan.LongFileName); foreach (var calcSpec in calcSpectra) { var oldSpec = oldSpectraMap[calcSpec.Query.FileScan.LongFileName]; foreach (var ann in calcSpec.Annotations) { oldSpec.Annotations[ann.Key] = ann.Value; } } //Remove the peptide not contain calculation result for (int i = proteins.Count - 1; i >= 0; i--) { foreach (var protein in proteins[i]) { protein.Peptides.RemoveAll(l => !l.Spectrum.Annotations.ContainsKey("DeuteriumEnrichmentPercent")); } if (proteins[i][0].Peptides.Count == 0) { proteins.RemoveAt(i); } } format.PeptideFormat = peptideFormat.PeptideFormat; var noredundantFile = Path.ChangeExtension(options.OutputFile, ".individual.tsv"); format.WriteToFile(noredundantFile, proteins); var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray(); var timeFile = Path.ChangeExtension(options.OutputFile, ".times.tsv"); using (var sw = new StreamWriter(timeFile)) { sw.WriteLine("Protein\t{0}", (from t in times select t.ToString()).Merge("\t")); foreach (var protein in proteins) { var curSpectra = protein[0].GetSpectra(); if (options.PeptideInAllTimePointOnly) { var curMap = curSpectra.ToGroupDictionary(l => l.Peptide.PureSequence); curSpectra.Clear(); foreach (var peps in curMap.Values) { var pepMap = peps.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]); if (times.All(time => pepMap.ContainsKey(time))) { curSpectra.AddRange(peps); } } } if (curSpectra.Count == 0) { continue; } sw.Write((from p in protein select p.Name).Merge("/")); var curTimeMap = curSpectra.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]); foreach (var time in times) { if (curTimeMap.ContainsKey(time)) { var deps = (from spec in curTimeMap[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray(); var depMedian = Statistics.Median(deps); sw.Write("\t{0:0.######}", depMedian); } else { sw.Write("\tNA"); } } sw.WriteLine(); } } Progress.SetMessage("Calculating ratio consistant ..."); var deuteriumOptions = new RTemplateProcessorOptions() { InputFile = timeFile, OutputFile = options.OutputFile, RTemplate = RatioR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; new RTemplateProcessor(deuteriumOptions) { Progress = this.Progress }.Process(); Progress.SetMessage("Finished ..."); return(new string[] { options.OutputFile }); }
private void WriteMap(Dictionary <int, int> scanCounts, List <int> keys, string filename, Dictionary <int, Dictionary <int, List <PeakEntry> > > curMaps, bool exportIndividualIon) { foreach (var key in keys) { var totalCount = scanCounts[key]; string subfile = string.Empty; if (key == FULLMS_CHARGE) { subfile = filename + ".fullms"; } else if (key == 0) { subfile = filename + ".unknown"; } else { subfile = filename + ".ms2charge" + key.ToString(); } var map = curMaps[key]; foreach (var e in map.Values) { MergeIons(e); } var ens = (from e in map.Values from en in e select en).ToList(); var map2 = ens.GroupBy(m => (int)Math.Round(m.Ion.Mz + 0.5)).ToDictionary(m => m.Key, m => m.ToList()); foreach (var e in map2.Values) { MergeIons(e); } ens = (from e in map2.Values from en in e select en).ToList(); //remove the duplication foreach (var ee in ens) { ee.Intensities = (from intt in ee.Intensities.GroupBy(m => m.Scan) select(from inttt in intt orderby inttt.Intensity descending select inttt).First()).ToList(); } using (var sw2 = new StreamWriter(subfile)) { sw2.WriteLine("Ion\tCount\tFrequency\tMeanIntensity\tSD\tMedianIntensity"); var totalentries = (from en in ens orderby en.Intensities.Count descending select en).ToList(); totalentries.ForEach(m => { var ints = (from i in m.Intensities select i.Intensity).ToArray(); var mean = Statistics.Mean(ints); var sd = Statistics.StandardDeviation(ints); var median = Statistics.Median(ints); sw2.WriteLine("{0:0.0000}\t{1}\t{2:0.0000}\t{3:0.000}\t{4:0.000}\t{5:0.000}", m.Ion.Mz, m.Intensities.Count, m.Intensities.Count * 1.0 / totalCount, mean, sd, median); }); sw2.WriteLine(); } var options = new RTemplateProcessorOptions(); options.InputFile = subfile; options.OutputFile = subfile + ".sig.tsv"; options.RExecute = ExternalProgramConfig.GetExternalProgram("R"); options.RTemplate = FileUtils.GetTemplateDir() + "/DetectSignificantIon.r"; options.Parameters.Add("minfreq<-0.01"); options.Parameters.Add("probability<-0.95"); options.Parameters.Add("minMedianIntensity<-0.05"); new RTemplateProcessor(options) { Progress = this.Progress }.Process(); } }
public override IEnumerable <string> Process() { var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(options.InputFile); var peptideMap = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower()); var rawfiles = Directory.GetFiles(options.RawDirectory, "*.raw", SearchOption.AllDirectories).ToDictionary(m => Path.GetFileNameWithoutExtension(m).ToLower()); var rententionWindow = options.RetentionTimeWindow; var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray(); if (missed.Length > 0) { throw new Exception(string.Format("Cannot find raw file of {0} in directory {1}", missed.Merge("/"), options.RawDirectory)); } var option = new ParallelOptions() { //MaxDegreeOfParallelism = Math.Min(1, peptideMap.Count), MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, peptideMap.Count), }; Parallel.ForEach(peptideMap, option, raw => { //foreach (var raw in peptideMap) //{ var peptides = raw.Value; Progress.SetMessage("Preparing isotopic for " + raw.Key + " ..."); var waitingPeaks = new List <ChromatographProfile>(); foreach (var peptide in peptides) { string file = GetTargetFile(peptide); var chro = new ChromatographProfile() { Experimental = peptide.Query.FileScan.Experimental, IdentifiedScan = peptide.Query.FileScan.FirstScan, ObservedMz = peptide.GetPrecursorMz(), TheoreticalMz = peptide.GetTheoreticalMz(), Charge = peptide.Query.Charge, Sequence = peptide.Peptide.PureSequence, FileName = Path.GetFileName(file) }; chro.InitializeIsotopicIons(options.MzTolerancePPM); waitingPeaks.Add(chro); } if (waitingPeaks.Count == 0) { //continue; return; } Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >(); List <FullMS> fullMSList = new List <FullMS>(); Progress.SetMessage("Reading full ms list from " + rawfiles[raw.Key] + "..."); using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[raw.Key]))) { var firstScan = rawReader.GetFirstSpectrumNumber(); var lastScan = rawReader.GetLastSpectrumNumber(); for (int scan = firstScan; scan <= lastScan; scan++) { var mslevel = rawReader.GetMsLevel(scan); if (mslevel == 1) { fullMSList.Add(new FullMS() { Scan = scan, RetentionTime = rawReader.ScanToRetentionTime(scan), Peaks = null }); } } var chroGroups = waitingPeaks.GroupBy(chro => string.Format("{0}_{1:0.0000}", chro.Sequence, chro.TheoreticalMz)); foreach (var chroGroup in chroGroups) { List <ChromatographProfile> profileChros = new List <ChromatographProfile>(); foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan)) { var masterScanIndex = 0; for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedScan < fullMSList[i].Scan) { break; } masterScanIndex = i; } var masterScan = fullMSList[masterScanIndex].Scan; var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime; bool bExist = false; foreach (var profileChro in profileChros) { foreach (var pkl in profileChro.Profiles) { if (pkl.Scan == fullMSList[masterScanIndex].Scan) { pkl.Identified = true; bExist = true; break; } } if (bExist) { break; } } if (bExist) { continue; } Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName)); for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (masterRetentionTime - curRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { break; } if (scanIndex == masterScanIndex) { chro.Profiles.Last().Identified = true; } } chro.Profiles.Reverse(); for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (curRetentionTime - masterRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { break; } } profileChros.Add(chro); } profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount); profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count)); bool bMain = true; foreach (var chro in profileChros) { string filename; if (bMain) { filename = Path.Combine(GetTargetDirectory(chro.Experimental), chro.FileName); } else { filename = Path.Combine(GetTargetSubDirectory(chro.Experimental), Path.ChangeExtension(chro.FileName, ".sub" + Path.GetExtension(chro.FileName))); } bMain = false; new ChromatographProfileTextWriter().WriteToFile(filename, chro); new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro); } } } } ); Progress.SetMessage("Finding boundaries ..."); var boundaryOptions = new RTemplateProcessorOptions() { InputFile = targetDir, OutputFile = options.OutputFile, RTemplate = BoundaryR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; new RTemplateProcessor(boundaryOptions) { Progress = this.Progress }.Process(); return(new string[] { options.OutputFile }); }
private WaitingEntry DoCalculate(IIdentifiedProteinGroup proteinGroup, Func <IIdentifiedSpectrum, bool> validFunc, bool runRImmediately) { List <IIdentifiedSpectrum> spectra = (from s in proteinGroup[0].GetSpectra() where validFunc(s) && s.IsEnabled(true) && HasPeptideRatio(s) select s).ToList(); if (spectra.Count == 1) { var lrrr = new LinearRegressionRatioResult(CalculatePeptideRatio(spectra[0]), 0.0) { PointCount = 1, TValue = 0, PValue = 1, ReferenceIntensity = this.intensityFunc.GetReferenceIntensity(spectra[0]), SampleIntensity = this.intensityFunc.GetSampleIntensity(spectra[0]) }; var r = CalculatePeptideRatio(spectra[0]); foreach (var protein in proteinGroup) { this.intensityFunc.SaveToAnnotation(protein, lrrr); } return(null); } else if (spectra.Count > 1) { var intensities = this.intensityFunc.ConvertToArray(spectra); double sumSam = intensities[0].Max(); double sumRef = intensities[1].Max(); LinearRegressionRatioResult lrrr; if (sumSam == 0.0) { lrrr = new LinearRegressionRatioResult(20, 0.0) { PointCount = intensities.Count(), TValue = 0, PValue = 0, ReferenceIntensity = sumRef, }; lrrr.SampleIntensity = sumRef / lrrr.Ratio; } else { if (sumRef == 0.0) { lrrr = new LinearRegressionRatioResult(0.05, 0.0) { PointCount = intensities.Count(), TValue = 0, PValue = 0, SampleIntensity = sumSam }; lrrr.ReferenceIntensity = sumSam * lrrr.Ratio; } else { var filename = (this.DetailDirectory + "/" + proteinGroup[0].Name.Replace("|", "_") + ".csv").Replace("\\", "/"); PrepareIntensityFile(spectra, filename); if (!runRImmediately) { return(new WaitingEntry() { Group = proteinGroup, IntensityFile = filename }); } var linearfile = filename + ".linear"; var roptions = new RTemplateProcessorOptions(); roptions.InputFile = filename; roptions.OutputFile = linearfile; roptions.RTemplate = FileUtils.GetTemplateDir() + "/PairQuantification.r"; new RTemplateProcessor(roptions).Process(); var parts = File.ReadAllLines(linearfile).Skip(1).First().Split('\t'); lrrr = ParseLinearRegressionRatioResult(parts, 0); } } foreach (IIdentifiedProtein protein in proteinGroup) { this.intensityFunc.SaveToAnnotation(protein, lrrr); } } else { foreach (IIdentifiedProtein protein in proteinGroup) { this.intensityFunc.RemoveFromAnnotation(protein); } } return(null); }
public override IEnumerable <string> Process() { var design = new IsobaricLabelingExperimentalDesign(); design.LoadFromFile(options.DesignFile); string resultFileName = GetResultFilePrefix(design); string paramFileName = Path.ChangeExtension(resultFileName, ".param"); options.SaveToFile(paramFileName); Progress.SetMessage("Reading peptides..."); List <IIdentifiedSpectrum> spectra = new MascotPeptideTextFormat().ReadFromFile(options.PeptideFile); IsobaricScanUtils.Load(spectra, design.IsobaricFile, false, this.Progress); var isoSpectra = (from s in spectra where s.FindIsobaricItem() != null select s).ToList(); if (isoSpectra.Count == 0) { throw new Exception(string.Format("No isobaric labelling information between {0} and {1}", options.PeptideFile, options.DesignFile)); } if (options.PerformNormalizition) { var msg = "Normalizing channels using loess algorithm"; var detailsDir = resultFileName + ".details"; if (!Directory.Exists(detailsDir)) { Directory.CreateDirectory(detailsDir); } var isoGroup = isoSpectra.GroupBy(m => m.Query.FileScan.Experimental).ToList(); Progress.SetRange(0, isoGroup.Count); Progress.SetPosition(0); var fileIndex = 0; foreach (var isoFile in isoGroup) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } fileIndex++; Progress.SetMessage("{0} {1}/{2} ...", msg, fileIndex, isoGroup.Count); var datafile = string.Format("{0}\\{1}.{2}.tsv", detailsDir, Path.GetFileNameWithoutExtension(resultFileName), isoFile.Key); var rresultfile = Path.ChangeExtension(datafile, ".norm.tsv"); //if (!File.Exists(rresultfile)) { using (var sw = new StreamWriter(datafile)) { sw.WriteLine("FileScan\t{0}", (from cha in design.PlexType.Channels select cha.Name).Merge("\t")); foreach (var isoSpec in isoFile) { sw.Write("{0}", isoSpec.Query.FileScan.LongFileName); var item = isoSpec.FindIsobaricItem(); for (int i = 0; i < design.PlexType.Channels.Count; i++) { sw.Write("\t{0:0.0}", item[i].Intensity); } sw.WriteLine(); } } var roptions = new RTemplateProcessorOptions(); roptions.InputFile = datafile; roptions.OutputFile = rresultfile; roptions.Parameters.Add(string.Format("missingvalue<-{0}", IsobaricConsts.NULL_INTENSITY)); roptions.RTemplate = FileUtils.GetTemplateDir() + "/CyclicLoessNormalization.r"; new RTemplateProcessor(roptions).Process(); Progress.SetPosition(fileIndex); } var specMap = isoFile.ToDictionary(m => m.Query.FileScan.LongFileName); //read R result to replace the intensity of each spectrum using (var sr = new StreamReader(rresultfile)) { //ignore header string line = sr.ReadLine(); IIdentifiedSpectrum spec; while ((line = sr.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(line)) { break; } var parts = line.Split('\t'); if (!specMap.TryGetValue(parts[0], out spec)) { throw new Exception(string.Format("{0} can not be found! The first column of normalization result file {1} must be FileScan!", parts[0], rresultfile)); } var item = spec.FindIsobaricItem(); for (int i = 1; i < parts.Length; i++) { item[i - 1].Intensity = double.Parse(parts[i]); } } } } } Progress.SetMessage("Quantifying peptide with outlier detection ..."); FilterSpectraByQuantifyMode(isoSpectra); var refFuncs = design.References; var samFuncs = design.GetSamples(); var pepfile = resultFileName + ".tsv"; using (var sw = new StreamWriter(pepfile)) { sw.WriteLine("Subject\tDataset\tFileScan\tSequence\tREF\t{0}", samFuncs.ConvertAll(m => m.Name).Merge("\t")); Func <IIdentifiedSpectrum, string> keyFunc; if (options.Mode == QuantifyMode.qmModificationSite) { keyFunc = m => m.GetMatchSequence(); } else { keyFunc = m => m.Peptide.PureSequence; } var peptides = isoSpectra.ToGroupDictionary(m => keyFunc(m)).OrderBy(m => m.Key).ToList(); foreach (var pep in peptides) { foreach (var dsName in design.DatasetMap.Keys) { var dsSet = new HashSet <string>(design.DatasetMap[dsName]); var dsSpectra = (from s in pep.Value where dsSet.Contains(s.Query.FileScan.Experimental) orderby s.Peptide.Sequence select s).ToList(); foreach (var spec in dsSpectra) { var isoitem = spec.FindIsobaricItem(); sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4:0.0}\t{5}", pep.Key, dsName, spec.Query.FileScan.ShortFileName, spec.Peptide.Sequence, refFuncs.ConvertAll(m => m.GetValue(isoitem)).Sum(), samFuncs.ConvertAll(m => string.Format("{0:0.0}", m.GetValue(isoitem))).Merge("\t")); } } } } var qoptions = new RTemplateProcessorOptions(); qoptions.InputFile = pepfile; qoptions.OutputFile = resultFileName + ".quan.tsv"; qoptions.RTemplate = string.Format("{0}/PeptideQuantification.r", FileUtils.GetTemplateDir()); qoptions.Parameters.Add(string.Format("missingvalue<-{0}", IsobaricConsts.NULL_INTENSITY)); qoptions.Parameters.Add("pvalue<-0.01"); qoptions.Parameters.Add("minFinalCount<-3"); new RTemplateProcessor(qoptions).Process(); Progress.SetMessage("Finished."); return(new[] { qoptions.OutputFile }); }
public override IEnumerable <string> Process() { var boundaryInput = Path.ChangeExtension(options.OutputFile, ".chros.tsv"); if (!File.Exists(boundaryInput) || options.Overwrite) { var format = GetPeptideReader(); var spectra = format.ReadFromFile(options.InputFile); var peptideMap = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower()); var rawfiles = options.RawFiles.ToDictionary(m => RawFileFactory.GetExperimental(m).ToLower()); var rententionWindow = options.MaximumRetentionTimeWindow; var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray(); if (missed.Length > 0) { throw new Exception(string.Format("Cannot find raw file of {0} in file list", missed.Merge("/"))); } var optionThreadCount = options.ThreadCount == 0 ? Environment.ProcessorCount : options.ThreadCount; var option = new ParallelOptions() { MaxDegreeOfParallelism = Math.Min(optionThreadCount, peptideMap.Count), }; var chroMap = new List <Tuple <string, List <ChromatographProfile> > >(); foreach (var raw in peptideMap) { var peptides = raw.Value; var waitingPeaks = new List <ChromatographProfile>(); foreach (var peptide in peptides) { var chro = new ChromatographProfile() { Experimental = peptide.Query.FileScan.Experimental, IdentifiedScan = peptide.Query.FileScan.FirstScan, IdentifiedRetentionTime = peptide.Query.FileScan.RetentionTime, ObservedMz = peptide.GetPrecursorMz(), TheoreticalMz = peptide.GetTheoreticalMz(), Charge = peptide.Query.Charge, Sequence = peptide.Peptide.PureSequence, FileName = GetTargetFile(peptide), SubFileName = GetTargetSubFile(peptide) }; chro.InitializeIsotopicIons(options.MzTolerancePPM, options.MinimumIsotopicPercentage); waitingPeaks.Add(chro); } chroMap.Add(new Tuple <string, List <ChromatographProfile> >(raw.Key, waitingPeaks)); } ConcurrentBag <ChromatographProfile> detected = new ConcurrentBag <ChromatographProfile>(); Parallel.ForEach(chroMap, option, raw => { var rawFileName = raw.Item1; var waitingPeaks = raw.Item2; Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >(); List <FullMS> fullMSList = new List <FullMS>(); Progress.SetMessage("Reading full ms list from " + rawfiles[rawFileName] + "..."); using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[rawFileName]))) { var firstScan = rawReader.GetFirstSpectrumNumber(); var lastScan = rawReader.GetLastSpectrumNumber(); for (int scan = firstScan; scan <= lastScan; scan++) { var mslevel = rawReader.GetMsLevel(scan); if (mslevel == 1) { fullMSList.Add(new FullMS() { Scan = scan, RetentionTime = rawReader.ScanToRetentionTime(scan), Peaks = null }); } } foreach (var chro in waitingPeaks) { if (chro.IdentifiedScan == 0 && chro.IdentifiedRetentionTime > 0) { for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedRetentionTime < fullMSList[i].RetentionTime) { break; } chro.IdentifiedScan = fullMSList[i].Scan + 1; } } } var chroGroups = waitingPeaks.GroupBy(chro => chro.GetPeptideId()); foreach (var chroGroup in chroGroups) { List <ChromatographProfile> profileChros = new List <ChromatographProfile>(); foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan)) { var masterScanIndex = 0; for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedScan < fullMSList[i].Scan) { break; } masterScanIndex = i; } var masterScan = fullMSList[masterScanIndex].Scan; var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime; bool bExist = false; foreach (var profileChro in profileChros) { foreach (var pkl in profileChro.Profiles) { if (pkl.Scan == fullMSList[masterScanIndex].Scan) { pkl.Identified = true; bExist = true; break; } } if (bExist) { break; } } if (bExist) { continue; } //Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName)); //allow one missed scan int naCount = 2; for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (masterRetentionTime - curRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { naCount--; if (naCount == 0) { break; } else { continue; } } if (scanIndex == masterScanIndex) { chro.Profiles.Last().Identified = true; } } chro.Profiles.Reverse(); naCount = 2; for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (curRetentionTime - masterRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { naCount--; if (naCount == 0) { break; } else { continue; } } } profileChros.Add(chro); } profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount); profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count)); bool bMain = true; foreach (var chro in profileChros) { var filename = bMain ? chro.FileName : chro.SubFileName; if (bMain) { detected.Add(chro); } bMain = false; new ChromatographProfileTextWriter().WriteToFile(filename, chro); new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro); } } } } ); var chroList = new List <ChromatographProfile>(detected); chroList.Sort((m1, m2) => m1.FileName.CompareTo(m2.FileName)); if (chroList.Count == 0) { throw new Exception("Cannot find chromotograph!"); } using (var sw = new StreamWriter(boundaryInput)) { sw.WriteLine("ChroDirectory\tChroFile\tSample\tPeptideId\tTheoreticalMz\tCharge\tIdentifiedScan"); foreach (var chro in chroList) { sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", Path.GetDirectoryName(chro.FileName).Replace("\\", "/"), Path.GetFileNameWithoutExtension(chro.FileName), chro.Experimental, chro.GetPeptideId(), chro.TheoreticalMz, chro.Charge, chro.IdentifiedScan); } } } if (!File.Exists(options.OutputFile) || options.Overwrite) { Progress.SetMessage("Finding boundaries ..."); var boundaryOptions = new RTemplateProcessorOptions() { InputFile = boundaryInput, OutputFile = options.OutputFile, RTemplate = BoundaryR, RExecute = ExternalProgramConfig.GetExternalProgram("R"), CreateNoWindow = true }; boundaryOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0")); boundaryOptions.Parameters.Add("maximumProfileDistance<-" + options.MaximumProfileDistance.ToString()); new RTemplateProcessor(boundaryOptions) { Progress = this.Progress }.Process(); } //if (options.DrawImage) //{ // Progress.SetMessage("Drawing images ..."); // var imageOptions = new RTemplateProcessorOptions() // { // InputFile = options.OutputFile, // OutputFile = Path.ChangeExtension(options.OutputFile, ".image"), // RTemplate = ImageR, // RExecute = SystemUtils.GetRExecuteLocation(), // CreateNoWindow = true, // NoResultFile = true // }; // new RTemplateProcessor(imageOptions) { Progress = this.Progress }.Process(); //} return(new string[] { options.OutputFile }); }