Example #1
0
        public static Dictionary <string, string> GetChipTypes(string rExecute, string directory, bool includingSubDirectory, string outputFile)
        {
            var cels = GetCelFiles(directory);

            foreach (var dir in Directory.GetDirectories(directory))
            {
                cels.AddRange(GetCelFiles(dir));
            }

            if (cels.Count == 0)
            {
                return(new Dictionary <string, string>());
            }

            var inputfile = Path.Combine(directory, "celfiles.tsv");

            using (var sw = new StreamWriter(inputfile))
            {
                foreach (var cel in cels)
                {
                    sw.WriteLine(FileUtils.ToLinuxFormat(cel));
                }
            }

            var roptions = new RTemplateProcessorOptions();

            roptions.RExecute   = rExecute;
            roptions.InputFile  = inputfile;
            roptions.OutputFile = outputFile;
            roptions.RTemplate  = FileUtils.GetTemplateDir() + "/getceltypes.r";
            new RTemplateProcessor(roptions).Process();
            return(new MapReader(0, 1).ReadFromFile(roptions.OutputFile));
        }
        public override IEnumerable <string> Process()
        {
            var design = new IsobaricLabelingExperimentalDesign();

            design.LoadFromFile(options.ExpermentalDesignFile);

            string resultFileName = GetResultFilePrefix(options.ProteinFileName, design.GetReferenceNames(""));

            string paramFileName = Path.ChangeExtension(resultFileName, ".param");

            options.SaveToFile(paramFileName);

            Progress.SetMessage("Reading proteins...");

            IIdentifiedResult ir = new MascotResultTextFormat().ReadFromFile(options.ProteinFileName);

            var proteinpeptidefile = string.Format("{0}.pro_pep.tsv", resultFileName);

            using (var sw = new StreamWriter(proteinpeptidefile))
            {
                sw.WriteLine("Index\tPeptide\tProteins\tDescription\tPepCount\tUniquePepCount");
                foreach (var g in ir)
                {
                    var peps = g.GetPeptides();
                    var seqs = (from p in peps
                                select p.Peptide.PureSequence).Distinct().OrderBy(m => m).ToArray();
                    var proname     = (from p in g select p.Name).Merge(" ! ");
                    var description = (from p in g select p.Description).Merge(" ! ");
                    foreach (var seq in seqs)
                    {
                        sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                     g.Index,
                                     seq,
                                     proname,
                                     description,
                                     g[0].PeptideCount,
                                     g[0].UniquePeptideCount);
                    }
                }
            }

            Progress.SetMessage("Quantifing proteins...");

            var qoptions = new RTemplateProcessorOptions();

            qoptions.InputFile  = options.QuanPeptideFileName;
            qoptions.OutputFile = resultFileName + ".quan." + options.PeptideToProteinMethod + ".tsv";

            qoptions.RTemplate = string.Format("{0}/ProteinQuantification.r", FileUtils.GetTemplateDir(), options.PeptideToProteinMethod);
            qoptions.Parameters.Add(string.Format("proteinfile<-\"{0}\"", proteinpeptidefile.Replace("\\", "/")));
            qoptions.Parameters.Add(string.Format("method<-\"{0}\"", options.PeptideToProteinMethod));
            qoptions.Parameters.Add("pvalue<-0.01");
            qoptions.Parameters.Add("minFinalCount<-3");

            new RTemplateProcessor(qoptions).Process();

            Progress.SetMessage("Finished.");

            return(new[] { qoptions.OutputFile });
        }
        public void Calculate(IIdentifiedResult mr, Func <IIdentifiedSpectrum, bool> validFunc)
        {
            var proteinFiles = new List <WaitingEntry>();

            foreach (var mpg in mr)
            {
                var pf = DoCalculate(mpg, validFunc, false);
                if (pf != null)
                {
                    proteinFiles.Add(pf);
                }
            }

            if (proteinFiles.Count > 0)
            {
                var listfile = (this.DetailDirectory + "/rlm_file.csv").Replace("\\", "/");
                using (var sw = new StreamWriter(listfile))
                {
                    sw.WriteLine("Protein,IntensityFile");
                    foreach (var we in proteinFiles)
                    {
                        sw.WriteLine("\"{0}\",\"{1}\"", we.Group[0].Name, we.IntensityFile);
                    }
                }

                var linearfile = new FileInfo(this.DetailDirectory + "/rlm.linear").FullName.Replace("\\", "/");

                var roptions = new RTemplateProcessorOptions();

                roptions.InputFile  = listfile;
                roptions.OutputFile = linearfile;
                roptions.RTemplate  = FileUtils.GetTemplateDir() + "/MultiplePairQuantification.r";

                new RTemplateProcessor(roptions).Process();

                var results = (from line in File.ReadAllLines(linearfile).Skip(1)
                               let parts = line.Split('\t')
                                           select new
                {
                    ProteinName = parts[0].StringAfter("\"").StringBefore("\""),
                    LinearRegressionResult = ParseLinearRegressionRatioResult(parts, 2)
                }).ToDictionary(m => m.ProteinName);

                foreach (var pg in mr)
                {
                    if (results.ContainsKey(pg[0].Name))
                    {
                        var res  = results[pg[0].Name];
                        var lrrr = res.LinearRegressionResult;
                        foreach (IIdentifiedProtein protein in pg)
                        {
                            this.intensityFunc.SaveToAnnotation(protein, lrrr);
                        }
                    }
                }
            }
        }
        /// <summary>
        /// Normalization cel files and return the file contains all cel file names
        /// </summary>
        /// <param name="root"></param>
        /// <param name="outputFile"></param>
        /// <returns></returns>
        public string Normalization(string root, string outputFile)
        {
            var cels = CelFile.GetCelFiles(root);

            if (cels.Count == 0)
            {
                Progress.SetMessage("No cel file found in directory " + DataDir);
                return(string.Empty);
            }

            var inputFile = Path.Combine(root, "celfiles.tsv");

            using (var sw = new StreamWriter(inputFile))
            {
                foreach (var cel in cels)
                {
                    sw.WriteLine(FileUtils.ToLinuxFormat(cel));
                }
            }

            var roptions = new RTemplateProcessorOptions();

            roptions.RExecute       = rExecute;
            roptions.InputFile      = inputFile;
            roptions.OutputFile     = inputFile;
            roptions.NoResultFile   = true;
            roptions.RTemplate      = FileUtils.GetTemplateDir() + "\\frma.r";
            roptions.CreateNoWindow = true;
            new RTemplateProcessor(roptions)
            {
                Progress = this.Progress
            }.Process();

            CelFile.GetChipTypes(this.rExecute, root, true, outputFile);
            return(outputFile);
        }
Example #5
0
        public override IEnumerable <string> Process()
        {
            //Extract chromotagraph information
            var chroOptions = new ChromatographProfileBuilderOptions();

            options.CopyProperties(chroOptions);
            chroOptions.InputFile  = options.InputFile;
            chroOptions.OutputFile = options.BoundaryOutputFile;
            chroOptions.DrawImage  = false;
            var builder = new ChromatographProfileBuilder(chroOptions);

            if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding envelope ...");
                builder.Progress = this.Progress;
                builder.Process();
            }

            //Calculate deuterium enrichment for peptide
            if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Calculating deuterium ...");
                var deuteriumOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = options.BoundaryOutputFile,
                    OutputFile     = options.DeuteriumOutputFile,
                    RTemplate      = DeuteriumR,
                    RExecute       = SystemUtils.GetRExecuteLocation(),
                    CreateNoWindow = true
                };

                deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0"));

                new RTemplateProcessor(deuteriumOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString());

            //Read old spectra information
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(options.InputFile);

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("RetentionTime");
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var calcSpectra = new List <IIdentifiedSpectrum>();
            var aas         = new Aminoacids();

            foreach (var pep in spectra)
            {
                var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep));
                if (deuteriumMap.ContainsKey(filename))
                {
                    var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence);
                    var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string);

                    pep.Annotations["PeakRetentionTime"]          = deuteriumMap[filename].Annotations["RetentionTime"];
                    pep.Annotations["TheoreticalDeuterium"]       = deuteriumMap[filename].Annotations["TheoreticalDeuterium"];
                    pep.Annotations["ObservedDeuterium"]          = deuteriumMap[filename].Annotations["ObservedDeuterium"];
                    pep.Annotations["NumDeuteriumIncorporated"]   = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"];
                    pep.Annotations["NumExchangableHydrogen"]     = numExchangeableHydrogens;
                    pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens;

                    calcSpectra.Add(pep);
                }
            }
            format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent";
            format.NotExportSummary      = true;
            format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra);

            var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList();

            var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var peptide in specGroup)
                {
                    var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray());
                    if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l)))
                    {
                        continue;
                    }

                    sw.Write(peptide.Key);

                    foreach (var time in times)
                    {
                        if (curSpectra.ContainsKey(time))
                        {
                            var deps      = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Peptide deuterium enrichment calculation finished ...");

            return(new string[] { options.OutputFile });
        }
Example #6
0
        public override IEnumerable <string> Process()
        {
            //Prepare unique peptide file
            var format   = new MascotResultTextFormat();
            var proteins = format.ReadFromFile(options.InputFile);

            proteins.RemoveAmbiguousSpectra();

            var spectra = proteins.GetSpectra();

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var peptideFile   = Path.ChangeExtension(options.InputFile, ".unique.peptides");
            var peptideFormat = new MascotPeptideTextFormat(format.PeptideFormat.Headers);

            peptideFormat.WriteToFile(peptideFile, spectra);

            //Calculate deterium enrichment at peptide level
            var pepOptions = new DeuteriumCalculatorOptions();

            options.CopyProperties(pepOptions);
            pepOptions.InputFile  = peptideFile;
            pepOptions.OutputFile = peptideFile + ".tsv";

            var pepCalc = new PeptideDeuteriumCalculator(pepOptions);

            pepCalc.Progress = this.Progress;
            pepCalc.Process();

            //Copy annotation from calculated peptide to original peptide
            var calcSpectra   = peptideFormat.ReadFromFile(pepCalc.GetPeptideDeteriumFile());
            var oldSpectraMap = spectra.ToDictionary(m => m.Query.FileScan.LongFileName);

            foreach (var calcSpec in calcSpectra)
            {
                var oldSpec = oldSpectraMap[calcSpec.Query.FileScan.LongFileName];
                foreach (var ann in calcSpec.Annotations)
                {
                    oldSpec.Annotations[ann.Key] = ann.Value;
                }
            }

            //Remove the peptide not contain calculation result
            for (int i = proteins.Count - 1; i >= 0; i--)
            {
                foreach (var protein in proteins[i])
                {
                    protein.Peptides.RemoveAll(l => !l.Spectrum.Annotations.ContainsKey("DeuteriumEnrichmentPercent"));
                }

                if (proteins[i][0].Peptides.Count == 0)
                {
                    proteins.RemoveAt(i);
                }
            }

            format.PeptideFormat = peptideFormat.PeptideFormat;

            var noredundantFile = Path.ChangeExtension(options.OutputFile, ".individual.tsv");

            format.WriteToFile(noredundantFile, proteins);

            var times    = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();
            var timeFile = Path.ChangeExtension(options.OutputFile, ".times.tsv");

            using (var sw = new StreamWriter(timeFile))
            {
                sw.WriteLine("Protein\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var protein in proteins)
                {
                    var curSpectra = protein[0].GetSpectra();
                    if (options.PeptideInAllTimePointOnly)
                    {
                        var curMap = curSpectra.ToGroupDictionary(l => l.Peptide.PureSequence);
                        curSpectra.Clear();
                        foreach (var peps in curMap.Values)
                        {
                            var pepMap = peps.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);
                            if (times.All(time => pepMap.ContainsKey(time)))
                            {
                                curSpectra.AddRange(peps);
                            }
                        }
                    }

                    if (curSpectra.Count == 0)
                    {
                        continue;
                    }

                    sw.Write((from p in protein select p.Name).Merge("/"));
                    var curTimeMap = curSpectra.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);

                    foreach (var time in times)
                    {
                        if (curTimeMap.ContainsKey(time))
                        {
                            var deps      = (from spec in curTimeMap[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Calculating ratio consistant ...");
            var deuteriumOptions = new RTemplateProcessorOptions()
            {
                InputFile      = timeFile,
                OutputFile     = options.OutputFile,
                RTemplate      = RatioR,
                RExecute       = SystemUtils.GetRExecuteLocation(),
                CreateNoWindow = true
            };

            new RTemplateProcessor(deuteriumOptions)
            {
                Progress = this.Progress
            }.Process();

            Progress.SetMessage("Finished ...");

            return(new string[] { options.OutputFile });
        }
        private void WriteMap(Dictionary <int, int> scanCounts, List <int> keys, string filename, Dictionary <int, Dictionary <int, List <PeakEntry> > > curMaps, bool exportIndividualIon)
        {
            foreach (var key in keys)
            {
                var    totalCount = scanCounts[key];
                string subfile    = string.Empty;

                if (key == FULLMS_CHARGE)
                {
                    subfile = filename + ".fullms";
                }
                else if (key == 0)
                {
                    subfile = filename + ".unknown";
                }
                else
                {
                    subfile = filename + ".ms2charge" + key.ToString();
                }
                var map = curMaps[key];

                foreach (var e in map.Values)
                {
                    MergeIons(e);
                }

                var ens  = (from e in map.Values from en in e select en).ToList();
                var map2 = ens.GroupBy(m => (int)Math.Round(m.Ion.Mz + 0.5)).ToDictionary(m => m.Key, m => m.ToList());
                foreach (var e in map2.Values)
                {
                    MergeIons(e);
                }

                ens = (from e in map2.Values from en in e select en).ToList();

                //remove the duplication
                foreach (var ee in ens)
                {
                    ee.Intensities = (from intt in ee.Intensities.GroupBy(m => m.Scan)
                                      select(from inttt in intt
                                             orderby inttt.Intensity descending
                                             select inttt).First()).ToList();
                }

                using (var sw2 = new StreamWriter(subfile))
                {
                    sw2.WriteLine("Ion\tCount\tFrequency\tMeanIntensity\tSD\tMedianIntensity");

                    var totalentries = (from en in ens
                                        orderby en.Intensities.Count descending
                                        select en).ToList();

                    totalentries.ForEach(m =>
                    {
                        var ints   = (from i in m.Intensities select i.Intensity).ToArray();
                        var mean   = Statistics.Mean(ints);
                        var sd     = Statistics.StandardDeviation(ints);
                        var median = Statistics.Median(ints);

                        sw2.WriteLine("{0:0.0000}\t{1}\t{2:0.0000}\t{3:0.000}\t{4:0.000}\t{5:0.000}", m.Ion.Mz, m.Intensities.Count, m.Intensities.Count * 1.0 / totalCount, mean, sd, median);
                    });
                    sw2.WriteLine();
                }

                var options = new RTemplateProcessorOptions();

                options.InputFile  = subfile;
                options.OutputFile = subfile + ".sig.tsv";
                options.RExecute   = ExternalProgramConfig.GetExternalProgram("R");
                options.RTemplate  = FileUtils.GetTemplateDir() + "/DetectSignificantIon.r";
                options.Parameters.Add("minfreq<-0.01");
                options.Parameters.Add("probability<-0.95");
                options.Parameters.Add("minMedianIntensity<-0.05");

                new RTemplateProcessor(options)
                {
                    Progress = this.Progress
                }.Process();
            }
        }
        public override IEnumerable <string> Process()
        {
            var format           = new MascotPeptideTextFormat();
            var spectra          = format.ReadFromFile(options.InputFile);
            var peptideMap       = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower());
            var rawfiles         = Directory.GetFiles(options.RawDirectory, "*.raw", SearchOption.AllDirectories).ToDictionary(m => Path.GetFileNameWithoutExtension(m).ToLower());
            var rententionWindow = options.RetentionTimeWindow;

            var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray();

            if (missed.Length > 0)
            {
                throw new Exception(string.Format("Cannot find raw file of {0} in directory {1}", missed.Merge("/"), options.RawDirectory));
            }

            var option = new ParallelOptions()
            {
                //MaxDegreeOfParallelism = Math.Min(1, peptideMap.Count),
                MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, peptideMap.Count),
            };

            Parallel.ForEach(peptideMap, option, raw =>
            {
                //foreach (var raw in peptideMap)
                //{
                var peptides = raw.Value;

                Progress.SetMessage("Preparing isotopic for " + raw.Key + " ...");
                var waitingPeaks = new List <ChromatographProfile>();
                foreach (var peptide in peptides)
                {
                    string file = GetTargetFile(peptide);
                    var chro    = new ChromatographProfile()
                    {
                        Experimental   = peptide.Query.FileScan.Experimental,
                        IdentifiedScan = peptide.Query.FileScan.FirstScan,
                        ObservedMz     = peptide.GetPrecursorMz(),
                        TheoreticalMz  = peptide.GetTheoreticalMz(),
                        Charge         = peptide.Query.Charge,
                        Sequence       = peptide.Peptide.PureSequence,
                        FileName       = Path.GetFileName(file)
                    };
                    chro.InitializeIsotopicIons(options.MzTolerancePPM);
                    waitingPeaks.Add(chro);
                }

                if (waitingPeaks.Count == 0)
                {
                    //continue;
                    return;
                }

                Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >();

                List <FullMS> fullMSList = new List <FullMS>();
                Progress.SetMessage("Reading full ms list from " + rawfiles[raw.Key] + "...");
                using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[raw.Key])))
                {
                    var firstScan = rawReader.GetFirstSpectrumNumber();
                    var lastScan  = rawReader.GetLastSpectrumNumber();
                    for (int scan = firstScan; scan <= lastScan; scan++)
                    {
                        var mslevel = rawReader.GetMsLevel(scan);
                        if (mslevel == 1)
                        {
                            fullMSList.Add(new FullMS()
                            {
                                Scan          = scan,
                                RetentionTime = rawReader.ScanToRetentionTime(scan),
                                Peaks         = null
                            });
                        }
                    }

                    var chroGroups = waitingPeaks.GroupBy(chro => string.Format("{0}_{1:0.0000}", chro.Sequence, chro.TheoreticalMz));
                    foreach (var chroGroup in chroGroups)
                    {
                        List <ChromatographProfile> profileChros = new List <ChromatographProfile>();
                        foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan))
                        {
                            var masterScanIndex = 0;
                            for (int i = 1; i < fullMSList.Count; i++)
                            {
                                if (chro.IdentifiedScan < fullMSList[i].Scan)
                                {
                                    break;
                                }
                                masterScanIndex = i;
                            }
                            var masterScan          = fullMSList[masterScanIndex].Scan;
                            var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime;

                            bool bExist = false;
                            foreach (var profileChro in profileChros)
                            {
                                foreach (var pkl in profileChro.Profiles)
                                {
                                    if (pkl.Scan == fullMSList[masterScanIndex].Scan)
                                    {
                                        pkl.Identified = true;
                                        bExist         = true;
                                        break;
                                    }
                                }

                                if (bExist)
                                {
                                    break;
                                }
                            }

                            if (bExist)
                            {
                                continue;
                            }

                            Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName));

                            for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }

                                var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                if (masterRetentionTime - curRetentionTime > rententionWindow)
                                {
                                    break;
                                }

                                if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                {
                                    break;
                                }

                                if (scanIndex == masterScanIndex)
                                {
                                    chro.Profiles.Last().Identified = true;
                                }
                            }
                            chro.Profiles.Reverse();

                            for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }

                                var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                if (curRetentionTime - masterRetentionTime > rententionWindow)
                                {
                                    break;
                                }

                                if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                {
                                    break;
                                }
                            }

                            profileChros.Add(chro);
                        }

                        profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount);
                        profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count));

                        bool bMain = true;
                        foreach (var chro in profileChros)
                        {
                            string filename;
                            if (bMain)
                            {
                                filename = Path.Combine(GetTargetDirectory(chro.Experimental), chro.FileName);
                            }
                            else
                            {
                                filename = Path.Combine(GetTargetSubDirectory(chro.Experimental), Path.ChangeExtension(chro.FileName, ".sub" + Path.GetExtension(chro.FileName)));
                            }
                            bMain = false;

                            new ChromatographProfileTextWriter().WriteToFile(filename, chro);
                            new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro);
                        }
                    }
                }
            }
                             );

            Progress.SetMessage("Finding boundaries ...");
            var boundaryOptions = new RTemplateProcessorOptions()
            {
                InputFile      = targetDir,
                OutputFile     = options.OutputFile,
                RTemplate      = BoundaryR,
                RExecute       = SystemUtils.GetRExecuteLocation(),
                CreateNoWindow = true
            };

            new RTemplateProcessor(boundaryOptions)
            {
                Progress = this.Progress
            }.Process();

            return(new string[] { options.OutputFile });
        }
        private WaitingEntry DoCalculate(IIdentifiedProteinGroup proteinGroup, Func <IIdentifiedSpectrum, bool> validFunc, bool runRImmediately)
        {
            List <IIdentifiedSpectrum> spectra = (from s in proteinGroup[0].GetSpectra()
                                                  where validFunc(s) && s.IsEnabled(true) && HasPeptideRatio(s)
                                                  select s).ToList();

            if (spectra.Count == 1)
            {
                var lrrr = new LinearRegressionRatioResult(CalculatePeptideRatio(spectra[0]), 0.0)
                {
                    PointCount         = 1,
                    TValue             = 0,
                    PValue             = 1,
                    ReferenceIntensity = this.intensityFunc.GetReferenceIntensity(spectra[0]),
                    SampleIntensity    = this.intensityFunc.GetSampleIntensity(spectra[0])
                };

                var r = CalculatePeptideRatio(spectra[0]);
                foreach (var protein in proteinGroup)
                {
                    this.intensityFunc.SaveToAnnotation(protein, lrrr);
                }
                return(null);
            }
            else if (spectra.Count > 1)
            {
                var intensities = this.intensityFunc.ConvertToArray(spectra);

                double sumSam = intensities[0].Max();
                double sumRef = intensities[1].Max();

                LinearRegressionRatioResult lrrr;

                if (sumSam == 0.0)
                {
                    lrrr = new LinearRegressionRatioResult(20, 0.0)
                    {
                        PointCount         = intensities.Count(),
                        TValue             = 0,
                        PValue             = 0,
                        ReferenceIntensity = sumRef,
                    };
                    lrrr.SampleIntensity = sumRef / lrrr.Ratio;
                }
                else
                {
                    if (sumRef == 0.0)
                    {
                        lrrr = new LinearRegressionRatioResult(0.05, 0.0)
                        {
                            PointCount      = intensities.Count(),
                            TValue          = 0,
                            PValue          = 0,
                            SampleIntensity = sumSam
                        };
                        lrrr.ReferenceIntensity = sumSam * lrrr.Ratio;
                    }
                    else
                    {
                        var filename = (this.DetailDirectory + "/" + proteinGroup[0].Name.Replace("|", "_") + ".csv").Replace("\\", "/");

                        PrepareIntensityFile(spectra, filename);

                        if (!runRImmediately)
                        {
                            return(new WaitingEntry()
                            {
                                Group = proteinGroup,
                                IntensityFile = filename
                            });
                        }

                        var linearfile = filename + ".linear";

                        var roptions = new RTemplateProcessorOptions();

                        roptions.InputFile  = filename;
                        roptions.OutputFile = linearfile;
                        roptions.RTemplate  = FileUtils.GetTemplateDir() + "/PairQuantification.r";

                        new RTemplateProcessor(roptions).Process();

                        var parts = File.ReadAllLines(linearfile).Skip(1).First().Split('\t');

                        lrrr = ParseLinearRegressionRatioResult(parts, 0);
                    }
                }

                foreach (IIdentifiedProtein protein in proteinGroup)
                {
                    this.intensityFunc.SaveToAnnotation(protein, lrrr);
                }
            }
            else
            {
                foreach (IIdentifiedProtein protein in proteinGroup)
                {
                    this.intensityFunc.RemoveFromAnnotation(protein);
                }
            }
            return(null);
        }
        public override IEnumerable <string> Process()
        {
            var design = new IsobaricLabelingExperimentalDesign();

            design.LoadFromFile(options.DesignFile);

            string resultFileName = GetResultFilePrefix(design);

            string paramFileName = Path.ChangeExtension(resultFileName, ".param");

            options.SaveToFile(paramFileName);

            Progress.SetMessage("Reading peptides...");

            List <IIdentifiedSpectrum> spectra = new MascotPeptideTextFormat().ReadFromFile(options.PeptideFile);

            IsobaricScanUtils.Load(spectra, design.IsobaricFile, false, this.Progress);

            var isoSpectra = (from s in spectra
                              where s.FindIsobaricItem() != null
                              select s).ToList();

            if (isoSpectra.Count == 0)
            {
                throw new Exception(string.Format("No isobaric labelling information between {0} and {1}", options.PeptideFile, options.DesignFile));
            }

            if (options.PerformNormalizition)
            {
                var msg = "Normalizing channels using loess algorithm";

                var detailsDir = resultFileName + ".details";
                if (!Directory.Exists(detailsDir))
                {
                    Directory.CreateDirectory(detailsDir);
                }

                var isoGroup = isoSpectra.GroupBy(m => m.Query.FileScan.Experimental).ToList();
                Progress.SetRange(0, isoGroup.Count);
                Progress.SetPosition(0);
                var fileIndex = 0;
                foreach (var isoFile in isoGroup)
                {
                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }
                    fileIndex++;

                    Progress.SetMessage("{0} {1}/{2} ...", msg, fileIndex, isoGroup.Count);

                    var datafile    = string.Format("{0}\\{1}.{2}.tsv", detailsDir, Path.GetFileNameWithoutExtension(resultFileName), isoFile.Key);
                    var rresultfile = Path.ChangeExtension(datafile, ".norm.tsv");
                    //if (!File.Exists(rresultfile))
                    {
                        using (var sw = new StreamWriter(datafile))
                        {
                            sw.WriteLine("FileScan\t{0}", (from cha in design.PlexType.Channels select cha.Name).Merge("\t"));

                            foreach (var isoSpec in isoFile)
                            {
                                sw.Write("{0}", isoSpec.Query.FileScan.LongFileName);
                                var item = isoSpec.FindIsobaricItem();
                                for (int i = 0; i < design.PlexType.Channels.Count; i++)
                                {
                                    sw.Write("\t{0:0.0}", item[i].Intensity);
                                }
                                sw.WriteLine();
                            }
                        }

                        var roptions = new RTemplateProcessorOptions();
                        roptions.InputFile  = datafile;
                        roptions.OutputFile = rresultfile;
                        roptions.Parameters.Add(string.Format("missingvalue<-{0}", IsobaricConsts.NULL_INTENSITY));
                        roptions.RTemplate = FileUtils.GetTemplateDir() + "/CyclicLoessNormalization.r";

                        new RTemplateProcessor(roptions).Process();

                        Progress.SetPosition(fileIndex);
                    }

                    var specMap = isoFile.ToDictionary(m => m.Query.FileScan.LongFileName);

                    //read R result to replace the intensity of each spectrum
                    using (var sr = new StreamReader(rresultfile))
                    {
                        //ignore header
                        string line = sr.ReadLine();
                        IIdentifiedSpectrum spec;
                        while ((line = sr.ReadLine()) != null)
                        {
                            if (string.IsNullOrWhiteSpace(line))
                            {
                                break;
                            }

                            var parts = line.Split('\t');
                            if (!specMap.TryGetValue(parts[0], out spec))
                            {
                                throw new Exception(string.Format("{0} can not be found! The first column of normalization result file {1} must be FileScan!", parts[0], rresultfile));
                            }

                            var item = spec.FindIsobaricItem();
                            for (int i = 1; i < parts.Length; i++)
                            {
                                item[i - 1].Intensity = double.Parse(parts[i]);
                            }
                        }
                    }
                }
            }

            Progress.SetMessage("Quantifying peptide with outlier detection ...");
            FilterSpectraByQuantifyMode(isoSpectra);

            var refFuncs = design.References;
            var samFuncs = design.GetSamples();

            var pepfile = resultFileName + ".tsv";

            using (var sw = new StreamWriter(pepfile))
            {
                sw.WriteLine("Subject\tDataset\tFileScan\tSequence\tREF\t{0}",
                             samFuncs.ConvertAll(m => m.Name).Merge("\t"));
                Func <IIdentifiedSpectrum, string> keyFunc;
                if (options.Mode == QuantifyMode.qmModificationSite)
                {
                    keyFunc = m => m.GetMatchSequence();
                }
                else
                {
                    keyFunc = m => m.Peptide.PureSequence;
                }

                var peptides = isoSpectra.ToGroupDictionary(m => keyFunc(m)).OrderBy(m => m.Key).ToList();
                foreach (var pep in peptides)
                {
                    foreach (var dsName in design.DatasetMap.Keys)
                    {
                        var dsSet     = new HashSet <string>(design.DatasetMap[dsName]);
                        var dsSpectra = (from s in pep.Value
                                         where dsSet.Contains(s.Query.FileScan.Experimental)
                                         orderby s.Peptide.Sequence
                                         select s).ToList();

                        foreach (var spec in dsSpectra)
                        {
                            var isoitem = spec.FindIsobaricItem();
                            sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4:0.0}\t{5}",
                                         pep.Key,
                                         dsName,
                                         spec.Query.FileScan.ShortFileName,
                                         spec.Peptide.Sequence,
                                         refFuncs.ConvertAll(m => m.GetValue(isoitem)).Sum(),
                                         samFuncs.ConvertAll(m => string.Format("{0:0.0}", m.GetValue(isoitem))).Merge("\t"));
                        }
                    }
                }
            }

            var qoptions = new RTemplateProcessorOptions();

            qoptions.InputFile  = pepfile;
            qoptions.OutputFile = resultFileName + ".quan.tsv";

            qoptions.RTemplate = string.Format("{0}/PeptideQuantification.r", FileUtils.GetTemplateDir());
            qoptions.Parameters.Add(string.Format("missingvalue<-{0}", IsobaricConsts.NULL_INTENSITY));
            qoptions.Parameters.Add("pvalue<-0.01");
            qoptions.Parameters.Add("minFinalCount<-3");

            new RTemplateProcessor(qoptions).Process();

            Progress.SetMessage("Finished.");

            return(new[] { qoptions.OutputFile });
        }
        public override IEnumerable <string> Process()
        {
            var boundaryInput = Path.ChangeExtension(options.OutputFile, ".chros.tsv");

            if (!File.Exists(boundaryInput) || options.Overwrite)
            {
                var format           = GetPeptideReader();
                var spectra          = format.ReadFromFile(options.InputFile);
                var peptideMap       = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower());
                var rawfiles         = options.RawFiles.ToDictionary(m => RawFileFactory.GetExperimental(m).ToLower());
                var rententionWindow = options.MaximumRetentionTimeWindow;

                var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray();
                if (missed.Length > 0)
                {
                    throw new Exception(string.Format("Cannot find raw file of {0} in file list", missed.Merge("/")));
                }

                var optionThreadCount = options.ThreadCount == 0 ? Environment.ProcessorCount : options.ThreadCount;
                var option            = new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Math.Min(optionThreadCount, peptideMap.Count),
                };

                var chroMap = new List <Tuple <string, List <ChromatographProfile> > >();
                foreach (var raw in peptideMap)
                {
                    var peptides = raw.Value;

                    var waitingPeaks = new List <ChromatographProfile>();
                    foreach (var peptide in peptides)
                    {
                        var chro = new ChromatographProfile()
                        {
                            Experimental            = peptide.Query.FileScan.Experimental,
                            IdentifiedScan          = peptide.Query.FileScan.FirstScan,
                            IdentifiedRetentionTime = peptide.Query.FileScan.RetentionTime,
                            ObservedMz    = peptide.GetPrecursorMz(),
                            TheoreticalMz = peptide.GetTheoreticalMz(),
                            Charge        = peptide.Query.Charge,
                            Sequence      = peptide.Peptide.PureSequence,
                            FileName      = GetTargetFile(peptide),
                            SubFileName   = GetTargetSubFile(peptide)
                        };
                        chro.InitializeIsotopicIons(options.MzTolerancePPM, options.MinimumIsotopicPercentage);
                        waitingPeaks.Add(chro);
                    }

                    chroMap.Add(new Tuple <string, List <ChromatographProfile> >(raw.Key, waitingPeaks));
                }

                ConcurrentBag <ChromatographProfile> detected = new ConcurrentBag <ChromatographProfile>();

                Parallel.ForEach(chroMap, option, raw =>
                {
                    var rawFileName  = raw.Item1;
                    var waitingPeaks = raw.Item2;

                    Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >();

                    List <FullMS> fullMSList = new List <FullMS>();
                    Progress.SetMessage("Reading full ms list from " + rawfiles[rawFileName] + "...");
                    using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[rawFileName])))
                    {
                        var firstScan = rawReader.GetFirstSpectrumNumber();
                        var lastScan  = rawReader.GetLastSpectrumNumber();
                        for (int scan = firstScan; scan <= lastScan; scan++)
                        {
                            var mslevel = rawReader.GetMsLevel(scan);
                            if (mslevel == 1)
                            {
                                fullMSList.Add(new FullMS()
                                {
                                    Scan          = scan,
                                    RetentionTime = rawReader.ScanToRetentionTime(scan),
                                    Peaks         = null
                                });
                            }
                        }

                        foreach (var chro in waitingPeaks)
                        {
                            if (chro.IdentifiedScan == 0 && chro.IdentifiedRetentionTime > 0)
                            {
                                for (int i = 1; i < fullMSList.Count; i++)
                                {
                                    if (chro.IdentifiedRetentionTime < fullMSList[i].RetentionTime)
                                    {
                                        break;
                                    }
                                    chro.IdentifiedScan = fullMSList[i].Scan + 1;
                                }
                            }
                        }

                        var chroGroups = waitingPeaks.GroupBy(chro => chro.GetPeptideId());
                        foreach (var chroGroup in chroGroups)
                        {
                            List <ChromatographProfile> profileChros = new List <ChromatographProfile>();
                            foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan))
                            {
                                var masterScanIndex = 0;
                                for (int i = 1; i < fullMSList.Count; i++)
                                {
                                    if (chro.IdentifiedScan < fullMSList[i].Scan)
                                    {
                                        break;
                                    }
                                    masterScanIndex = i;
                                }
                                var masterScan          = fullMSList[masterScanIndex].Scan;
                                var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime;

                                bool bExist = false;
                                foreach (var profileChro in profileChros)
                                {
                                    foreach (var pkl in profileChro.Profiles)
                                    {
                                        if (pkl.Scan == fullMSList[masterScanIndex].Scan)
                                        {
                                            pkl.Identified = true;
                                            bExist         = true;
                                            break;
                                        }
                                    }

                                    if (bExist)
                                    {
                                        break;
                                    }
                                }

                                if (bExist)
                                {
                                    continue;
                                }

                                //Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName));

                                //allow one missed scan
                                int naCount = 2;
                                for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--)
                                {
                                    if (Progress.IsCancellationPending())
                                    {
                                        throw new UserTerminatedException();
                                    }

                                    var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                    if (masterRetentionTime - curRetentionTime > rententionWindow)
                                    {
                                        break;
                                    }

                                    if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                    {
                                        naCount--;
                                        if (naCount == 0)
                                        {
                                            break;
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }

                                    if (scanIndex == masterScanIndex)
                                    {
                                        chro.Profiles.Last().Identified = true;
                                    }
                                }
                                chro.Profiles.Reverse();

                                naCount = 2;
                                for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++)
                                {
                                    if (Progress.IsCancellationPending())
                                    {
                                        throw new UserTerminatedException();
                                    }

                                    var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                    if (curRetentionTime - masterRetentionTime > rententionWindow)
                                    {
                                        break;
                                    }

                                    if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                    {
                                        naCount--;
                                        if (naCount == 0)
                                        {
                                            break;
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }
                                }

                                profileChros.Add(chro);
                            }

                            profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount);
                            profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count));

                            bool bMain = true;
                            foreach (var chro in profileChros)
                            {
                                var filename = bMain ? chro.FileName : chro.SubFileName;
                                if (bMain)
                                {
                                    detected.Add(chro);
                                }

                                bMain = false;

                                new ChromatographProfileTextWriter().WriteToFile(filename, chro);
                                new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro);
                            }
                        }
                    }
                }
                                 );

                var chroList = new List <ChromatographProfile>(detected);
                chroList.Sort((m1, m2) => m1.FileName.CompareTo(m2.FileName));

                if (chroList.Count == 0)
                {
                    throw new Exception("Cannot find chromotograph!");
                }

                using (var sw = new StreamWriter(boundaryInput))
                {
                    sw.WriteLine("ChroDirectory\tChroFile\tSample\tPeptideId\tTheoreticalMz\tCharge\tIdentifiedScan");
                    foreach (var chro in chroList)
                    {
                        sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}",
                                     Path.GetDirectoryName(chro.FileName).Replace("\\", "/"),
                                     Path.GetFileNameWithoutExtension(chro.FileName),
                                     chro.Experimental,
                                     chro.GetPeptideId(),
                                     chro.TheoreticalMz,
                                     chro.Charge,
                                     chro.IdentifiedScan);
                    }
                }
            }

            if (!File.Exists(options.OutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding boundaries ...");
                var boundaryOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = boundaryInput,
                    OutputFile     = options.OutputFile,
                    RTemplate      = BoundaryR,
                    RExecute       = ExternalProgramConfig.GetExternalProgram("R"),
                    CreateNoWindow = true
                };
                boundaryOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                boundaryOptions.Parameters.Add("maximumProfileDistance<-" + options.MaximumProfileDistance.ToString());
                new RTemplateProcessor(boundaryOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            //if (options.DrawImage)
            //{
            //  Progress.SetMessage("Drawing images ...");

            //  var imageOptions = new RTemplateProcessorOptions()
            //  {
            //    InputFile = options.OutputFile,
            //    OutputFile = Path.ChangeExtension(options.OutputFile, ".image"),
            //    RTemplate = ImageR,
            //    RExecute = SystemUtils.GetRExecuteLocation(),
            //    CreateNoWindow = true,
            //    NoResultFile = true
            //  };
            //  new RTemplateProcessor(imageOptions) { Progress = this.Progress }.Process();
            //}

            return(new string[] { options.OutputFile });
        }