예제 #1
0
        public List <FilterItem> ReadFromFile(string fileName)
        {
            var anns   = new AnnotationFormat().ReadFromFile(fileName);
            var result = new List <FilterItem>();

            foreach (var ann in anns)
            {
                var item = new FilterItem();
                item.Chr              = ann.Annotations["chr"] as string;
                item.Start            = ann.Annotations["start"] as string;
                item.End              = ann.Annotations["end"] as string;
                item.MajorAllele      = ann.Annotations["major_allele"] as string;
                item.MinorAllele      = ann.Annotations["minor_allele"] as string;
                item.ReferenceAllele  = ann.Annotations["ref_allele"] as string;
                item.NormalMajorCount = int.Parse(ann.Annotations["normal_major_count"] as string);
                item.NormalMinorCount = int.Parse(ann.Annotations["normal_minor_count"] as string);
                item.TumorMajorCount  = int.Parse(ann.Annotations["tumor_major_count"] as string);
                item.TumorMinorCount  = int.Parse(ann.Annotations["tumor_minor_count"] as string);
                item.FisherGroup      = double.Parse(ann.Annotations["fisher_group"] as string);
                item.FisherNormal     = ann.Annotations["fisher_normal"] as string;
                item.BrglmConverged   = ann.Annotations["brglm_converged"] as string;
                item.BrglmGroup       = double.Parse(ann.Annotations["brglm_group"] as string);
                item.BrglmScore       = ann.Annotations["brglm_score"] as string;
                item.BrglmStrand      = ann.Annotations["brglm_strand"] as string;
                item.BrglmPosition    = ann.Annotations["brglm_position"] as string;
                item.BrglmGroupFdr    = double.Parse(ann.Annotations["brglm_group_fdr"] as string);
                item.Filter           = ann.Annotations["filter"] as string;
                item.Identity         = ann.Annotations["Identity"] as string;
                result.Add(item);
            }
            return(result);
        }
예제 #2
0
    public FindParticipantProtein(string clinicShipmentPortionFile)
    {
      var anns = new AnnotationFormat().ReadFromFile(clinicShipmentPortionFile);

      maps = anns.ToDictionary(m => m.Annotations["bcr_shipment_portion_uuid"].ToString().ToLower(),
        m => m.Annotations["bcr_sample_barcode"].ToString());
    }
예제 #3
0
        public FindParticipantProtein(string clinicShipmentPortionFile)
        {
            var anns = new AnnotationFormat().ReadFromFile(clinicShipmentPortionFile);

            maps = anns.ToDictionary(m => m.Annotations["bcr_shipment_portion_uuid"].ToString().ToLower(),
                                     m => m.Annotations["bcr_sample_barcode"].ToString());
        }
예제 #4
0
 private static void Format <T>(StringBuilder sb, IEnumerable <T> nags, AnnotationFormat annotationFormat)
     where T : struct, Enum
 {
     foreach (var nag in nags)
     {
         Format(sb, nag, annotationFormat).Append(" ");
     }
 }
예제 #5
0
        public string ToString(AnnotationFormat format)
        {
            StringBuilder sb = new StringBuilder();

            Format(sb, MoveNAG, format);
            Format(sb, PositionalNAGs, format);
            Format(sb, TimeTroubleNAG, format);
            Format(sb, NonStandardNAGs, format);
            return(sb.ToString().Trim());
        }
예제 #6
0
 public void TestReadingAnnotationsForOneId()
 {
     using (var reader = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(Examples.MouseUniprotAnnotations))))
     {
         var annotations = AnnotationFormat.ReadMappings(reader, 0, new [] { 1, 21 }).ToDictionary(annot => annot.fromId, annot => annot.toIds);
         Assert.AreEqual("Htr2c", annotations["p34968"][1][0]);
         Assert.AreEqual("Htr2c", annotations["b1atn5"][1][0]);
         Assert.AreEqual("Abca7", annotations["q91v24"][1][0]);
         Assert.AreEqual("Abhd10", annotations["q8k188"][1][0]);
     }
 }
        public static List <SomaticItem> ParseAnnovarFile(string fileName, Regex reg, string scorePrefix, Func <string, bool> acceptChromosome)
        {
            var result = new List <SomaticItem>();

            var bar     = Path.GetFileName(fileName).StringBefore(".");
            var annos   = new AnnotationFormat("^#").ReadFromFile(fileName);
            var headers = File.ReadAllLines(fileName).Where(l => !l.StartsWith("#")).First().Split('\t');

            foreach (var ann in annos)
            {
                //Chr	Start	End	Ref	Alt	Func.refGene	Gene.refGene	GeneDetail.refGene	ExonicFunc.refGene	AAChange.refGene	snp138	cosmic70	FILTER	INFO	FORMAT	H_LS-A7-A0D9-01A-31W-A071-09-1	H_LS-A7-A0D9-10A-01W-A071-09-1

                var chr = ann.Annotations["Chr"].ToString();
                if (!acceptChromosome(chr))
                {
                    continue;
                }

                var m1            = reg.Match(ann.Annotations[headers[headers.Length - 2]].ToString());
                var m1Major       = int.Parse(m1.Groups[1].Value);
                var m1Minor       = int.Parse(m1.Groups[2].Value);
                var m2            = reg.Match(ann.Annotations[headers[headers.Length - 1]].ToString());
                var m2Major       = int.Parse(m2.Groups[1].Value);
                var m2Minor       = int.Parse(m2.Groups[2].Value);
                var isNormalFirst = (((double)m1Minor) / (m1Major + m1Minor)) < (((double)m2Minor) / (m2Major + m2Minor));

                var info = ann.Annotations["INFO"].ToString();

                var item = new SomaticItem()
                {
                    Sample            = bar,
                    Chrom             = chr,
                    StartPosition     = int.Parse(ann.Annotations["Start"].ToString()),
                    RefAllele         = ann.Annotations["Ref"].ToString(),
                    AltAllele         = ann.Annotations["Alt"].ToString(),
                    NormalMajorCount  = isNormalFirst ? m1Major : m2Major,
                    NormalMinorCount  = isNormalFirst ? m1Minor : m2Minor,
                    TumorMajorCount   = isNormalFirst ? m2Major : m1Major,
                    TumorMinorCount   = isNormalFirst ? m2Minor : m1Minor,
                    Score             = double.Parse(info.StringAfter(scorePrefix)),
                    RefGeneFunc       = GetDictionaryValue(ann.Annotations, "Func.refGene", string.Empty),
                    RefGeneName       = GetDictionaryValue(ann.Annotations, "Gene.refGene", string.Empty),
                    RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "ExonicFunc.refGene", string.Empty)
                };

                result.Add(item);
            }
            return(result);
        }
예제 #8
0
    public static List<SomaticItem> ParseAnnovarFile(string fileName, Regex reg, string scorePrefix, Func<string, bool> acceptChromosome)
    {
      var result = new List<SomaticItem>();

      var bar = Path.GetFileName(fileName).StringBefore(".");
      var annos = new AnnotationFormat("^#").ReadFromFile(fileName);
      var headers = File.ReadAllLines(fileName).Where(l => !l.StartsWith("#")).First().Split('\t');
      foreach (var ann in annos)
      {
        //Chr	Start	End	Ref	Alt	Func.refGene	Gene.refGene	GeneDetail.refGene	ExonicFunc.refGene	AAChange.refGene	snp138	cosmic70	FILTER	INFO	FORMAT	H_LS-A7-A0D9-01A-31W-A071-09-1	H_LS-A7-A0D9-10A-01W-A071-09-1

        var chr = ann.Annotations["Chr"].ToString();
        if (!acceptChromosome(chr))
        {
          continue;
        }

        var m1 = reg.Match(ann.Annotations[headers[headers.Length - 2]].ToString());
        var m1Major = int.Parse(m1.Groups[1].Value);
        var m1Minor = int.Parse(m1.Groups[2].Value);
        var m2 = reg.Match(ann.Annotations[headers[headers.Length - 1]].ToString());
        var m2Major = int.Parse(m2.Groups[1].Value);
        var m2Minor = int.Parse(m2.Groups[2].Value);
        var isNormalFirst = (((double)m1Minor) / (m1Major + m1Minor)) < (((double)m2Minor) / (m2Major + m2Minor));

        var info = ann.Annotations["INFO"].ToString();

        var item = new SomaticItem()
        {
          Sample = bar,
          Chrom = chr,
          StartPosition = int.Parse(ann.Annotations["Start"].ToString()),
          RefAllele = ann.Annotations["Ref"].ToString(),
          AltAllele = ann.Annotations["Alt"].ToString(),
          NormalMajorCount = isNormalFirst ? m1Major : m2Major,
          NormalMinorCount = isNormalFirst ? m1Minor : m2Minor,
          TumorMajorCount = isNormalFirst ? m2Major : m1Major,
          TumorMinorCount = isNormalFirst ? m2Minor : m1Minor,
          Score = double.Parse(info.StringAfter(scorePrefix)),
          RefGeneFunc = GetDictionaryValue(ann.Annotations, "Func.refGene", string.Empty),
          RefGeneName = GetDictionaryValue(ann.Annotations, "Gene.refGene", string.Empty),
          RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "ExonicFunc.refGene", string.Empty)
        };

        result.Add(item);
      }
      return result;
    }
        public List <IIdentifiedSpectrum> ExtractPeptides(Dictionary <string, MaxQuantModificationItem> shortModMap, string peptideFile)
        {
            var mods = MaxQuantModificationList.ReadFromFile(_option.MaxQuantModificationXml);

            var sites   = new AnnotationFormat().ReadFromFile(_option.MaxQuantSiteFile);
            var msmsIds = new HashSet <string>(from s in sites
                                               let msmsids = s.Annotations["MS/MS IDs"] as string
                                                             let msmsIdList = msmsids.Split(';')
                                                                              from msmsId in msmsIdList
                                                                              select msmsId);
            var format   = new AnnotationFormat();
            var msmsList = format.ReadFromFile(_option.MaxQuantMSMSFile);

            msmsList.RemoveAll(l => !msmsIds.Contains(l.Annotations["id"].ToString()));

            using (var sw = new StreamWriter(peptideFile))
            {
                sw.WriteLine("FileScan\tSequence\tMH+\tDiff(MH+)\tCharge\tScore\tReference\tModification\tRetentionTime");
                foreach (var msms in msmsList)
                {
                    string modification;
                    string modifiedSequence;

                    ParseModification(msms, shortModMap, mods, out modification, out modifiedSequence);

                    var mh      = double.Parse(msms.Annotations["Mass"].ToString()) + Atom.H.MonoMass;
                    var diffStr = msms.Annotations["Mass Error [ppm]"].ToString();
                    var diffmh  = diffStr.Equals("NaN") ? 0 : PrecursorUtils.ppm2mz(mh, double.Parse(diffStr));

                    sw.WriteLine("{0},{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}",
                                 msms.Annotations["Raw file"],
                                 msms.Annotations["Scan number"],
                                 modifiedSequence,
                                 mh,
                                 diffmh,
                                 msms.Annotations["Charge"],
                                 msms.Annotations["Score"],
                                 msms.Annotations["Proteins"].ToString().Replace(";", "/"),
                                 modification,
                                 msms.Annotations["Retention time"]);
                }
            }

            return(new MascotPeptideTextFormat().ReadFromFile(peptideFile));
        }
예제 #10
0
        private static StringBuilder Format <T>(StringBuilder sb, T nag, AnnotationFormat annotationFormat)
            where T : struct, Enum
        {
            var value = (int)EnumsNET.Enums.GetUnderlyingValue(nag);

            if (value != 0)
            {
                var strNag = annotationFormat == AnnotationFormat.Symbolic
                    ? nag.AsString(_symbolFormat)
                    : "$" + EnumsNET.Enums.GetUnderlyingValue(nag);
                if (!string.IsNullOrWhiteSpace(strNag))
                {
                    sb.Append(strNag).Append(" ");
                }
            }

            return(sb);
        }
예제 #11
0
 public void TestReadingAnnotationsWithDifferentSelections([ValueSource(nameof(_selections))] int[] selection)
 {
     using (var reader = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(Examples.MouseUniprotAnnotations))))
     {
         var annotations = AnnotationFormat.ReadMappings(reader, 0, selection).ToArray();
         var expected    = new[]
         {
             "p34968", "b1atn5", "q5wru6", "q91v24", "q9jl36", "q9dbm0", "q6pe15", "q3tmb4", "q8c3s8", "q8c724", "q8k188"
         };
         Assert.AreEqual(expected.Length, annotations.Length);
         CollectionAssert.AreEquivalent(expected, annotations.Select(annot => annot.fromId));
         foreach (var(fromId, toIds) in annotations)
         {
             Assert.AreEqual(fromId, fromId.ToLower());
             Assert.AreEqual(toIds.Length, selection.Length);
         }
     }
 }
예제 #12
0
        public override IEnumerable <string> Process()
        {
            var format = new AnnotationFormat();

            var items = format.ReadFromFile(_options.ClinicalFile);

            format.Format.Headers = sampleBarcodeKey + "\t" + format.Format.Headers;

            var itemMap = items.ToDictionary(m => m.BarCode());

            using (StreamReader sr = new StreamReader(_options.DataFile))
            {
                var barcodes            = sr.ReadLine().Split('\t').Where(m => m.StartsWith("TCGA")).ToList();
                List <Annotation> found = new List <Annotation>();
                foreach (var barcode in barcodes)
                {
                    var patient = barcode.Substring(0, 12);

                    Annotation ann;
                    if (!itemMap.TryGetValue(patient, out ann))
                    {
                        if (_options.ThrowException)
                        {
                            throw new Exception("Cannot find patient information for " + patient);
                        }

                        Console.Error.WriteLine("Cannot find patient information for " + patient);
                        ann = new Annotation();
                    }

                    var curann = new Annotation();
                    curann.Annotations[sampleBarcodeKey] = barcode;
                    curann.Annotations[TCGAClinicalInformation.BcrPatientBarcode] = patient;
                    foreach (var e in ann.Annotations)
                    {
                        curann.Annotations[e.Key] = e.Value;
                    }
                    found.Add(curann);
                }
                format.WriteToFile(_options.OutputFile, found);
            }

            return(new[] { _options.OutputFile });
        }
    public override IEnumerable<string> Process()
    {
      var format = new AnnotationFormat();

      var items = format.ReadFromFile(_options.ClinicalFile);
      format.Format.Headers = sampleBarcodeKey + "\t" + format.Format.Headers;

      var itemMap = items.ToDictionary(m => m.BarCode());

      using (StreamReader sr = new StreamReader(_options.DataFile))
      {
        var barcodes = sr.ReadLine().Split('\t').Where(m => m.StartsWith("TCGA")).ToList();
        List<Annotation> found = new List<Annotation>();
        foreach (var barcode in barcodes)
        {
          var patient = barcode.Substring(0, 12);

          Annotation ann;
          if (!itemMap.TryGetValue(patient, out ann))
          {
            if (_options.ThrowException)
            {
              throw new Exception("Cannot find patient information for " + patient);
            }

            Console.Error.WriteLine("Cannot find patient information for " + patient);
            ann = new Annotation();
          }

          var curann = new Annotation();
          curann.Annotations[sampleBarcodeKey] = barcode;
          curann.Annotations[TCGAClinicalInformation.BcrPatientBarcode] = patient;
          foreach (var e in ann.Annotations)
          {
            curann.Annotations[e.Key] = e.Value;
          }
          found.Add(curann);
        }
        format.WriteToFile(_options.OutputFile, found);
      }

      return new[] { _options.OutputFile };
    }
        public static List <SomaticItem> ParseGlmvcFile(string fileName, Func <string, bool> acceptChromosome)
        {
            var result = new List <SomaticItem>();

            var bar   = Path.GetFileName(fileName).StringBefore(".");
            var annos = new AnnotationFormat().ReadFromFile(fileName);

            foreach (var ann in annos)
            {
                var chr = ann.Annotations["chr"].ToString();
                if (!acceptChromosome(chr))
                {
                    continue;
                }

                var fdr  = ann.Annotations["brglm_group_fdr"].ToString();
                var item = new SomaticItem()
                {
                    Sample            = bar,
                    Chrom             = chr,
                    StartPosition     = int.Parse(ann.Annotations["start"].ToString()),
                    RefAllele         = ann.Annotations["major_allele"].ToString(),
                    AltAllele         = ann.Annotations["minor_allele"].ToString(),
                    NormalMajorCount  = int.Parse(ann.Annotations["normal_major_count"].ToString()),
                    NormalMinorCount  = int.Parse(ann.Annotations["normal_minor_count"].ToString()),
                    TumorMajorCount   = int.Parse(ann.Annotations["tumor_major_count"].ToString()),
                    TumorMinorCount   = int.Parse(ann.Annotations["tumor_minor_count"].ToString()),
                    LogisticScore     = ann.Annotations["brglm_score"].ToString(),
                    LogisticStrand    = ann.Annotations["brglm_strand"].ToString(),
                    LogisticPosition  = ann.Annotations["brglm_position"].ToString(),
                    LogisticGroupFdr  = fdr,
                    Score             = -Math.Log(double.Parse(fdr)),
                    RefGeneFunc       = GetDictionaryValue(ann.Annotations, "annovar_Func.refGene", string.Empty),
                    RefGeneName       = GetDictionaryValue(ann.Annotations, "annovar_Gene.refGene", string.Empty),
                    RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "annovar_ExonicFunc.refGene", string.Empty),
                    RefGeneAAChange   = GetDictionaryValue(ann.Annotations, "annovar_AAChange.refGene", string.Empty),
                };

                result.Add(item);
            }
            return(result);
        }
예제 #15
0
        public Dictionary <string, Dictionary <string, List <string> > > ReadDescriptionFromDirectory(string dir)
        {
            var sdrfFile = GetSdrfFiles(dir);

            if (sdrfFile.Length == 0)
            {
                throw new ArgumentException("Cannot find sdrf file in directory " + dir);
            }

            var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]);

            Dictionary <string, Dictionary <string, List <string> > > result = new Dictionary <string, Dictionary <string, List <string> > >();

            foreach (var a in ann)
            {
                string filename;
                if (a.Annotations.ContainsKey("Array Data File"))
                {
                    filename = a.Annotations["Array Data File"] as string;
                }
                else if (a.Annotations.ContainsKey("GEO asscession number"))
                {
                    filename = a.Annotations["GEO asscession number"] as string;
                }
                else
                {
                    throw new Exception("I don't know how to get filename from " + sdrfFile[0]);
                }

                filename = Path.GetFileNameWithoutExtension(filename);
                var dic = new Dictionary <string, List <string> >();
                foreach (var kv in a.Annotations)
                {
                    dic[kv.Key] = new string[] { kv.Value as string }.ToList();
                }

                result[filename] = dic;
            }

            return(result);
        }
예제 #16
0
    public Dictionary<string, Dictionary<string, List<string>>> ReadDescriptionFromDirectory(string dir)
    {
      var sdrfFile = GetSdrfFiles(dir);
      if (sdrfFile.Length == 0)
      {
        throw new ArgumentException("Cannot find sdrf file in directory " + dir);
      }

      var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]);

      Dictionary<string, Dictionary<string, List<string>>> result = new Dictionary<string, Dictionary<string, List<string>>>();

      foreach (var a in ann)
      {
        string filename;
        if (a.Annotations.ContainsKey("Array Data File"))
        {
          filename = a.Annotations["Array Data File"] as string;
        }
        else if (a.Annotations.ContainsKey("GEO asscession number"))
        {
          filename = a.Annotations["GEO asscession number"] as string;
        }
        else
        {
          throw new Exception("I don't know how to get filename from " + sdrfFile[0]);
        }

        filename = Path.GetFileNameWithoutExtension(filename);
        var dic = new Dictionary<string, List<string>>();
        foreach (var kv in a.Annotations)
        {
          dic[kv.Key] = new string[] { kv.Value as string }.ToList();
        }

        result[filename] = dic;
      }

      return result;
    }
예제 #17
0
        public List <BreastCancerSampleItem> ParseDataset(string datasetDirectory)
        {
            var files = new HashSet <string>(from f in CelFile.GetCelFiles(datasetDirectory, false)
                                             select Path.GetFileNameWithoutExtension(f));

            var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt");

            if (sdrfFile.Length == 0)
            {
                throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory);
            }

            var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]);

            return((from a in ann
                    let filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample))
                                   where files.Contains(filename)
                                   select new BreastCancerSampleItem()
            {
                Dataset = Path.GetFileName(datasetDirectory),
                Sample = filename,
                Age = FindValue(a, ColumnName.Age),
                ER = new StatusValue(FindValue(a, ColumnName.ER)).Value,
                PR = new StatusValue(FindValue(a, ColumnName.PR)).Value,
                HER2 = new StatusValue(FindValue(a, ColumnName.HER2)).Value,
                Stage = FindValue(a, ColumnName.Stage),
                TumorStatus = FindValue(a, ColumnName.TumorStage),
                Grade = FindValue(a, ColumnName.Grade),
                NodalStatus = FindValue(a, ColumnName.NodalStatus),
                PCR = FindValue(a, ColumnName.PCR),
                DFS = FindValue(a, ColumnName.DFS),
                DFSTime = FindValue(a, ColumnName.DFSTime),
                RFS = FindValue(a, ColumnName.RFS),
                RFSTime = FindValue(a, ColumnName.RFSTime),
                DMFS = FindValue(a, ColumnName.DMFS),
                DMFSTime = FindValue(a, ColumnName.DMFSTime),
                OverallSurvival = FindValue(a, ColumnName.OverallServive),
                DeadOfDisease = FindValue(a, ColumnName.DeadOfDisease)
            }).ToList());
        }
예제 #18
0
    public List<BreastCancerSampleItem> ParseDataset(string datasetDirectory)
    {
      var files = new HashSet<string>(from f in CelFile.GetCelFiles(datasetDirectory, false)
                                      select Path.GetFileNameWithoutExtension(f));

      var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt");
      if (sdrfFile.Length == 0)
      {
        throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory);
      }

      var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]);
      return (from a in ann
              let filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample))
              where files.Contains(filename)
              select new BreastCancerSampleItem()
              {
                Dataset = Path.GetFileName(datasetDirectory),
                Sample = filename,
                Age = FindValue(a, ColumnName.Age),
                ER = new StatusValue(FindValue(a, ColumnName.ER)).Value,
                PR = new StatusValue(FindValue(a, ColumnName.PR)).Value,
                HER2 = new StatusValue(FindValue(a, ColumnName.HER2)).Value,
                Stage = FindValue(a, ColumnName.Stage),
                TumorStatus = FindValue(a, ColumnName.TumorStage),
                Grade = FindValue(a, ColumnName.Grade),
                NodalStatus = FindValue(a, ColumnName.NodalStatus),
                PCR = FindValue(a, ColumnName.PCR),
                DFS = FindValue(a, ColumnName.DFS),
                DFSTime = FindValue(a, ColumnName.DFSTime),
                RFS = FindValue(a, ColumnName.RFS),
                RFSTime = FindValue(a, ColumnName.RFSTime),
                DMFS = FindValue(a, ColumnName.DMFS),
                DMFSTime = FindValue(a, ColumnName.DMFSTime),
                OverallSurvival = FindValue(a, ColumnName.OverallServive),
                DeadOfDisease = FindValue(a, ColumnName.DeadOfDisease)
              }).ToList();
    }
예제 #19
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var result = new List <IIdentifiedSpectrum>();
            var anns   = new AnnotationFormat().ReadFromFile(fileName);

            foreach (var ann in anns)
            {
                var peptideId = ann.Annotations["PeptideId"] as string;
                var sequence  = peptideId.StringBefore("_");
                var spec      = new IdentifiedSpectrum();
                var pep       = new IdentifiedPeptide(spec);
                pep.Sequence = sequence;

                spec.Query.FileScan.Experimental  = ann.Annotations["Sample"] as string;
                spec.Query.FileScan.RetentionTime = double.Parse(ann.Annotations["PredictionRetentionTime"] as string);
                spec.Query.FileScan.Charge        = int.Parse(ann.Annotations["Charge"] as string);
                spec.IsPrecursorMonoisotopic      = true;
                spec.TheoreticalMH = PrecursorUtils.MzToMH(double.Parse(ann.Annotations["TheoreticalMz"] as string), spec.Query.FileScan.Charge, true);

                result.Add(spec);
            }

            return(result);
        }
예제 #20
0
        public void ParseDataset(string datasetDirectory, Dictionary <string, BreastCancerSampleItem> sampleMap)
        {
            var files = new HashSet <string>(from f in CelFile.GetCelFiles(datasetDirectory, false)
                                             select Path.GetFileNameWithoutExtension(f));

            var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt");

            if (sdrfFile.Length == 0)
            {
                throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory);
            }

            var ann     = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]);
            var dataset = Path.GetFileName(datasetDirectory);

            foreach (var a in ann)
            {
                var filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample));
                if (files.Contains(filename))
                {
                    if (!sampleMap.ContainsKey(filename))
                    {
                        sampleMap[filename]         = new BreastCancerSampleItem();
                        sampleMap[filename].Dataset = dataset;
                        sampleMap[filename].Sample  = filename;
                    }
                    var item = sampleMap[filename];

                    string value;
                    if (FindValue(a, ColumnName.Age, out value))
                    {
                        item.Age = value;
                    }

                    if (FindValue(a, ColumnName.ER, out value))
                    {
                        item.ER = StatusValue.TransferStatus(value);
                    }

                    if (FindValue(a, ColumnName.PR, out value))
                    {
                        item.PR = StatusValue.TransferStatus(value);
                    }

                    if (FindValue(a, ColumnName.HER2, out value))
                    {
                        item.HER2 = StatusValue.TransferStatus(value);
                    }

                    if (FindValue(a, ColumnName.Stage, out value))
                    {
                        item.Stage = value;
                    }

                    if (FindValue(a, ColumnName.TumorStage, out value))
                    {
                        item.TumorStatus = value;
                    }

                    if (FindValue(a, ColumnName.Grade, out value))
                    {
                        item.Grade = value;
                    }

                    if (FindValue(a, ColumnName.NodalStatus, out value))
                    {
                        item.NodalStatus = value;
                    }

                    if (FindValue(a, ColumnName.PCR, out value))
                    {
                        item.PCR = value;
                    }

                    if (FindValue(a, ColumnName.DFS, out value))
                    {
                        item.DFS = value;
                    }

                    if (FindValue(a, ColumnName.DFSTime, out value))
                    {
                        item.DFSTime = value;
                    }

                    if (FindValue(a, ColumnName.RFS, out value))
                    {
                        item.RFS = value;
                    }

                    if (FindValue(a, ColumnName.RFSTime, out value))
                    {
                        item.RFSTime = value;
                    }

                    if (FindValue(a, ColumnName.DMFS, out value))
                    {
                        item.DMFS = value;
                    }

                    if (FindValue(a, ColumnName.DMFSTime, out value))
                    {
                        item.DMFSTime = value;
                    }

                    if (FindValue(a, ColumnName.OverallServive, out value))
                    {
                        item.OverallSurvival = value;
                    }

                    if (FindValue(a, ColumnName.DeadOfDisease, out value))
                    {
                        item.DeadOfDisease = value;
                    }
                }
            }
        }
예제 #21
0
        public override IEnumerable <string> Process()
        {
            //Extract chromotagraph information
            var chroOptions = new ChromatographProfileBuilderOptions();

            options.CopyProperties(chroOptions);
            chroOptions.InputFile  = options.InputFile;
            chroOptions.OutputFile = options.BoundaryOutputFile;
            chroOptions.DrawImage  = false;
            var builder = new ChromatographProfileBuilder(chroOptions);

            if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding envelope ...");
                builder.Progress = this.Progress;
                builder.Process();
            }

            //Calculate deuterium enrichment for peptide
            if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Calculating deuterium ...");
                var deuteriumOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = options.BoundaryOutputFile,
                    OutputFile     = options.DeuteriumOutputFile,
                    RTemplate      = DeuteriumR,
                    RExecute       = SystemUtils.GetRExecuteLocation(),
                    CreateNoWindow = true
                };

                deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0"));

                new RTemplateProcessor(deuteriumOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString());

            //Read old spectra information
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(options.InputFile);

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("RetentionTime");
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var calcSpectra = new List <IIdentifiedSpectrum>();
            var aas         = new Aminoacids();

            foreach (var pep in spectra)
            {
                var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep));
                if (deuteriumMap.ContainsKey(filename))
                {
                    var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence);
                    var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string);

                    pep.Annotations["PeakRetentionTime"]          = deuteriumMap[filename].Annotations["RetentionTime"];
                    pep.Annotations["TheoreticalDeuterium"]       = deuteriumMap[filename].Annotations["TheoreticalDeuterium"];
                    pep.Annotations["ObservedDeuterium"]          = deuteriumMap[filename].Annotations["ObservedDeuterium"];
                    pep.Annotations["NumDeuteriumIncorporated"]   = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"];
                    pep.Annotations["NumExchangableHydrogen"]     = numExchangeableHydrogens;
                    pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens;

                    calcSpectra.Add(pep);
                }
            }
            format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent";
            format.NotExportSummary      = true;
            format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra);

            var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList();

            var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var peptide in specGroup)
                {
                    var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray());
                    if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l)))
                    {
                        continue;
                    }

                    sw.Write(peptide.Key);

                    foreach (var time in times)
                    {
                        if (curSpectra.ContainsKey(time))
                        {
                            var deps      = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Peptide deuterium enrichment calculation finished ...");

            return(new string[] { options.OutputFile });
        }
예제 #22
0
        public override IEnumerable <string> Process()
        {
            var annos      = new AnnotationFormat().ReadFromFile(boundaryFile);
            var aas        = new Aminoacids();
            var pbuilder   = new EmassProfileBuilder();
            var chroFormat = new ChromatographProfileTextWriter();

            foreach (var ann in annos)
            {
                var chroFile = Path.Combine(ann.Annotations["ChroDirectory"] as string, ann.Annotations["ChroFile"] as string) + ".tsv";
                var peptide  = (ann.Annotations["PeptideId"] as string).StringBefore("_");
                var start    = double.Parse(ann.Annotations["ChroLeft"] as string);
                var end      = double.Parse(ann.Annotations["ChroRight"] as string);

                var chro         = chroFormat.ReadFromFile(chroFile);
                var chroFiltered = chro.Profiles.Where(l => l.RetentionTime >= start && l.RetentionTime <= end).ToArray();

                //all observed isotopimers, start from 1, transfer to zero-based
                var allIsotopics = (from f in chroFiltered
                                    from s in f
                                    select s.Isotopic - 1).Distinct().OrderBy(l => l).ToArray();

                //get minimum intensity of each isotopimer
                var minIntensities = (from iso in allIsotopics
                                      select(from f in chroFiltered
                                             where f.Count > iso
                                             select f[iso].Intensity).Min()).ToArray();

                //how many isotopic should I trust?
                var iso0   = (from f in chroFiltered select f.First().Intensity).ToArray();
                int maxIso = 1;
                for (int i = 2; i < allIsotopics.Length; i++)
                {
                    var isoi = (from f in chroFiltered select f.Count > i ? f[i].Intensity : minIntensities[i]).ToArray();
                    var corr = MathNet.Numerics.Statistics.Correlation.Pearson(iso0, isoi);
                    if (corr < minPearsonCorrelation)
                    {
                        maxIso = i;
                        break;
                    }
                }
                var maxIsoArray = allIsotopics.Where(l => l <= maxIso).ToArray();

                //get observed profile in each scan
                var observedIons = (from f in chroFiltered
                                    select(from iso in maxIsoArray select f.Count > iso ? f[iso].Intensity : minIntensities[iso]).ToArray()).ToArray();

                var atomComposition = aas.GetPeptideAtomComposition(peptide);
                var hatom           = (int)(Math.Round(aas.ExchangableHAtom(peptide)));
                for (int h2 = 0; h2 < hatom; h2++)
                {
                }


                var profiles     = pbuilder.GetProfile(atomComposition, 1, 3);
                var isotopicIons = (from peak in profiles
                                    select new IsotopicIon()
                {
                    Mz = peak.Mz,
                    Intensity = peak.Intensity,
                }).ToArray();
            }

            return(null);
        }
예제 #23
0
    public static List<SomaticItem> ParseGlmvcFile(string fileName, Func<string, bool> acceptChromosome)
    {
      var result = new List<SomaticItem>();

      var bar = Path.GetFileName(fileName).StringBefore(".");
      var annos = new AnnotationFormat().ReadFromFile(fileName);
      foreach (var ann in annos)
      {
        var chr = ann.Annotations["chr"].ToString();
        if (!acceptChromosome(chr))
        {
          continue;
        }

        var fdr = ann.Annotations["brglm_group_fdr"].ToString();
        var item = new SomaticItem()
        {
          Sample = bar,
          Chrom = chr,
          StartPosition = int.Parse(ann.Annotations["start"].ToString()),
          RefAllele = ann.Annotations["major_allele"].ToString(),
          AltAllele = ann.Annotations["minor_allele"].ToString(),
          NormalMajorCount = int.Parse(ann.Annotations["normal_major_count"].ToString()),
          NormalMinorCount = int.Parse(ann.Annotations["normal_minor_count"].ToString()),
          TumorMajorCount = int.Parse(ann.Annotations["tumor_major_count"].ToString()),
          TumorMinorCount = int.Parse(ann.Annotations["tumor_minor_count"].ToString()),
          LogisticScore = ann.Annotations["brglm_score"].ToString(),
          LogisticStrand = ann.Annotations["brglm_strand"].ToString(),
          LogisticPosition = ann.Annotations["brglm_position"].ToString(),
          LogisticGroupFdr = fdr,
          Score = -Math.Log(double.Parse(fdr)),
          RefGeneFunc = GetDictionaryValue(ann.Annotations, "annovar_Func.refGene", string.Empty),
          RefGeneName = GetDictionaryValue(ann.Annotations, "annovar_Gene.refGene", string.Empty),
          RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "annovar_ExonicFunc.refGene", string.Empty),
          RefGeneAAChange = GetDictionaryValue(ann.Annotations, "annovar_AAChange.refGene", string.Empty),
        };

        result.Add(item);
      }
      return result;
    }
예제 #24
0
    public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap)
    {
      var files = new HashSet<string>(from f in CelFile.GetCelFiles(datasetDirectory, false)
                                      select Path.GetFileNameWithoutExtension(f));

      var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt");
      if (sdrfFile.Length == 0)
      {
        throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory);
      }

      var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]);
      var dataset = Path.GetFileName(datasetDirectory);
      foreach (var a in ann)
      {
        var filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample));
        if (files.Contains(filename))
        {
          if (!sampleMap.ContainsKey(filename))
          {
            sampleMap[filename] = new BreastCancerSampleItem();
            sampleMap[filename].Dataset = dataset;
            sampleMap[filename].Sample = filename;
          }
          var item = sampleMap[filename];

          string value;
          if (FindValue(a, ColumnName.Age, out value))
          {
            item.Age = value;
          }

          if (FindValue(a, ColumnName.ER, out value))
          {
            item.ER = StatusValue.TransferStatus(value);
          }

          if (FindValue(a, ColumnName.PR, out value))
          {
            item.PR = StatusValue.TransferStatus(value);
          }

          if (FindValue(a, ColumnName.HER2, out value))
          {
            item.HER2 = StatusValue.TransferStatus(value);
          }

          if (FindValue(a, ColumnName.Stage, out value))
          {
            item.Stage = value;
          }

          if (FindValue(a, ColumnName.TumorStage, out value))
          {
            item.TumorStatus = value;
          }

          if (FindValue(a, ColumnName.Grade, out value))
          {
            item.Grade = value;
          }

          if (FindValue(a, ColumnName.NodalStatus, out value))
          {
            item.NodalStatus = value;
          }

          if (FindValue(a, ColumnName.PCR, out value))
          {
            item.PCR = value;
          }

          if (FindValue(a, ColumnName.DFS, out value))
          {
            item.DFS = value;
          }

          if (FindValue(a, ColumnName.DFSTime, out value))
          {
            item.DFSTime = value;
          }

          if (FindValue(a, ColumnName.RFS, out value))
          {
            item.RFS = value;
          }

          if (FindValue(a, ColumnName.RFSTime, out value))
          {
            item.RFSTime = value;
          }

          if (FindValue(a, ColumnName.DMFS, out value))
          {
            item.DMFS = value;
          }

          if (FindValue(a, ColumnName.DMFSTime, out value))
          {
            item.DMFSTime = value;
          }

          if (FindValue(a, ColumnName.OverallServive, out value))
          {
            item.OverallSurvival = value;
          }

          if (FindValue(a, ColumnName.DeadOfDisease, out value))
          {
            item.DeadOfDisease = value;
          }
        }
      }
    }