public List <FilterItem> ReadFromFile(string fileName) { var anns = new AnnotationFormat().ReadFromFile(fileName); var result = new List <FilterItem>(); foreach (var ann in anns) { var item = new FilterItem(); item.Chr = ann.Annotations["chr"] as string; item.Start = ann.Annotations["start"] as string; item.End = ann.Annotations["end"] as string; item.MajorAllele = ann.Annotations["major_allele"] as string; item.MinorAllele = ann.Annotations["minor_allele"] as string; item.ReferenceAllele = ann.Annotations["ref_allele"] as string; item.NormalMajorCount = int.Parse(ann.Annotations["normal_major_count"] as string); item.NormalMinorCount = int.Parse(ann.Annotations["normal_minor_count"] as string); item.TumorMajorCount = int.Parse(ann.Annotations["tumor_major_count"] as string); item.TumorMinorCount = int.Parse(ann.Annotations["tumor_minor_count"] as string); item.FisherGroup = double.Parse(ann.Annotations["fisher_group"] as string); item.FisherNormal = ann.Annotations["fisher_normal"] as string; item.BrglmConverged = ann.Annotations["brglm_converged"] as string; item.BrglmGroup = double.Parse(ann.Annotations["brglm_group"] as string); item.BrglmScore = ann.Annotations["brglm_score"] as string; item.BrglmStrand = ann.Annotations["brglm_strand"] as string; item.BrglmPosition = ann.Annotations["brglm_position"] as string; item.BrglmGroupFdr = double.Parse(ann.Annotations["brglm_group_fdr"] as string); item.Filter = ann.Annotations["filter"] as string; item.Identity = ann.Annotations["Identity"] as string; result.Add(item); } return(result); }
public FindParticipantProtein(string clinicShipmentPortionFile) { var anns = new AnnotationFormat().ReadFromFile(clinicShipmentPortionFile); maps = anns.ToDictionary(m => m.Annotations["bcr_shipment_portion_uuid"].ToString().ToLower(), m => m.Annotations["bcr_sample_barcode"].ToString()); }
private static void Format <T>(StringBuilder sb, IEnumerable <T> nags, AnnotationFormat annotationFormat) where T : struct, Enum { foreach (var nag in nags) { Format(sb, nag, annotationFormat).Append(" "); } }
public string ToString(AnnotationFormat format) { StringBuilder sb = new StringBuilder(); Format(sb, MoveNAG, format); Format(sb, PositionalNAGs, format); Format(sb, TimeTroubleNAG, format); Format(sb, NonStandardNAGs, format); return(sb.ToString().Trim()); }
public void TestReadingAnnotationsForOneId() { using (var reader = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(Examples.MouseUniprotAnnotations)))) { var annotations = AnnotationFormat.ReadMappings(reader, 0, new [] { 1, 21 }).ToDictionary(annot => annot.fromId, annot => annot.toIds); Assert.AreEqual("Htr2c", annotations["p34968"][1][0]); Assert.AreEqual("Htr2c", annotations["b1atn5"][1][0]); Assert.AreEqual("Abca7", annotations["q91v24"][1][0]); Assert.AreEqual("Abhd10", annotations["q8k188"][1][0]); } }
public static List <SomaticItem> ParseAnnovarFile(string fileName, Regex reg, string scorePrefix, Func <string, bool> acceptChromosome) { var result = new List <SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat("^#").ReadFromFile(fileName); var headers = File.ReadAllLines(fileName).Where(l => !l.StartsWith("#")).First().Split('\t'); foreach (var ann in annos) { //Chr Start End Ref Alt Func.refGene Gene.refGene GeneDetail.refGene ExonicFunc.refGene AAChange.refGene snp138 cosmic70 FILTER INFO FORMAT H_LS-A7-A0D9-01A-31W-A071-09-1 H_LS-A7-A0D9-10A-01W-A071-09-1 var chr = ann.Annotations["Chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var m1 = reg.Match(ann.Annotations[headers[headers.Length - 2]].ToString()); var m1Major = int.Parse(m1.Groups[1].Value); var m1Minor = int.Parse(m1.Groups[2].Value); var m2 = reg.Match(ann.Annotations[headers[headers.Length - 1]].ToString()); var m2Major = int.Parse(m2.Groups[1].Value); var m2Minor = int.Parse(m2.Groups[2].Value); var isNormalFirst = (((double)m1Minor) / (m1Major + m1Minor)) < (((double)m2Minor) / (m2Major + m2Minor)); var info = ann.Annotations["INFO"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["Start"].ToString()), RefAllele = ann.Annotations["Ref"].ToString(), AltAllele = ann.Annotations["Alt"].ToString(), NormalMajorCount = isNormalFirst ? m1Major : m2Major, NormalMinorCount = isNormalFirst ? m1Minor : m2Minor, TumorMajorCount = isNormalFirst ? m2Major : m1Major, TumorMinorCount = isNormalFirst ? m2Minor : m1Minor, Score = double.Parse(info.StringAfter(scorePrefix)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "ExonicFunc.refGene", string.Empty) }; result.Add(item); } return(result); }
public static List<SomaticItem> ParseAnnovarFile(string fileName, Regex reg, string scorePrefix, Func<string, bool> acceptChromosome) { var result = new List<SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat("^#").ReadFromFile(fileName); var headers = File.ReadAllLines(fileName).Where(l => !l.StartsWith("#")).First().Split('\t'); foreach (var ann in annos) { //Chr Start End Ref Alt Func.refGene Gene.refGene GeneDetail.refGene ExonicFunc.refGene AAChange.refGene snp138 cosmic70 FILTER INFO FORMAT H_LS-A7-A0D9-01A-31W-A071-09-1 H_LS-A7-A0D9-10A-01W-A071-09-1 var chr = ann.Annotations["Chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var m1 = reg.Match(ann.Annotations[headers[headers.Length - 2]].ToString()); var m1Major = int.Parse(m1.Groups[1].Value); var m1Minor = int.Parse(m1.Groups[2].Value); var m2 = reg.Match(ann.Annotations[headers[headers.Length - 1]].ToString()); var m2Major = int.Parse(m2.Groups[1].Value); var m2Minor = int.Parse(m2.Groups[2].Value); var isNormalFirst = (((double)m1Minor) / (m1Major + m1Minor)) < (((double)m2Minor) / (m2Major + m2Minor)); var info = ann.Annotations["INFO"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["Start"].ToString()), RefAllele = ann.Annotations["Ref"].ToString(), AltAllele = ann.Annotations["Alt"].ToString(), NormalMajorCount = isNormalFirst ? m1Major : m2Major, NormalMinorCount = isNormalFirst ? m1Minor : m2Minor, TumorMajorCount = isNormalFirst ? m2Major : m1Major, TumorMinorCount = isNormalFirst ? m2Minor : m1Minor, Score = double.Parse(info.StringAfter(scorePrefix)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "ExonicFunc.refGene", string.Empty) }; result.Add(item); } return result; }
public List <IIdentifiedSpectrum> ExtractPeptides(Dictionary <string, MaxQuantModificationItem> shortModMap, string peptideFile) { var mods = MaxQuantModificationList.ReadFromFile(_option.MaxQuantModificationXml); var sites = new AnnotationFormat().ReadFromFile(_option.MaxQuantSiteFile); var msmsIds = new HashSet <string>(from s in sites let msmsids = s.Annotations["MS/MS IDs"] as string let msmsIdList = msmsids.Split(';') from msmsId in msmsIdList select msmsId); var format = new AnnotationFormat(); var msmsList = format.ReadFromFile(_option.MaxQuantMSMSFile); msmsList.RemoveAll(l => !msmsIds.Contains(l.Annotations["id"].ToString())); using (var sw = new StreamWriter(peptideFile)) { sw.WriteLine("FileScan\tSequence\tMH+\tDiff(MH+)\tCharge\tScore\tReference\tModification\tRetentionTime"); foreach (var msms in msmsList) { string modification; string modifiedSequence; ParseModification(msms, shortModMap, mods, out modification, out modifiedSequence); var mh = double.Parse(msms.Annotations["Mass"].ToString()) + Atom.H.MonoMass; var diffStr = msms.Annotations["Mass Error [ppm]"].ToString(); var diffmh = diffStr.Equals("NaN") ? 0 : PrecursorUtils.ppm2mz(mh, double.Parse(diffStr)); sw.WriteLine("{0},{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}", msms.Annotations["Raw file"], msms.Annotations["Scan number"], modifiedSequence, mh, diffmh, msms.Annotations["Charge"], msms.Annotations["Score"], msms.Annotations["Proteins"].ToString().Replace(";", "/"), modification, msms.Annotations["Retention time"]); } } return(new MascotPeptideTextFormat().ReadFromFile(peptideFile)); }
private static StringBuilder Format <T>(StringBuilder sb, T nag, AnnotationFormat annotationFormat) where T : struct, Enum { var value = (int)EnumsNET.Enums.GetUnderlyingValue(nag); if (value != 0) { var strNag = annotationFormat == AnnotationFormat.Symbolic ? nag.AsString(_symbolFormat) : "$" + EnumsNET.Enums.GetUnderlyingValue(nag); if (!string.IsNullOrWhiteSpace(strNag)) { sb.Append(strNag).Append(" "); } } return(sb); }
public void TestReadingAnnotationsWithDifferentSelections([ValueSource(nameof(_selections))] int[] selection) { using (var reader = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(Examples.MouseUniprotAnnotations)))) { var annotations = AnnotationFormat.ReadMappings(reader, 0, selection).ToArray(); var expected = new[] { "p34968", "b1atn5", "q5wru6", "q91v24", "q9jl36", "q9dbm0", "q6pe15", "q3tmb4", "q8c3s8", "q8c724", "q8k188" }; Assert.AreEqual(expected.Length, annotations.Length); CollectionAssert.AreEquivalent(expected, annotations.Select(annot => annot.fromId)); foreach (var(fromId, toIds) in annotations) { Assert.AreEqual(fromId, fromId.ToLower()); Assert.AreEqual(toIds.Length, selection.Length); } } }
public override IEnumerable <string> Process() { var format = new AnnotationFormat(); var items = format.ReadFromFile(_options.ClinicalFile); format.Format.Headers = sampleBarcodeKey + "\t" + format.Format.Headers; var itemMap = items.ToDictionary(m => m.BarCode()); using (StreamReader sr = new StreamReader(_options.DataFile)) { var barcodes = sr.ReadLine().Split('\t').Where(m => m.StartsWith("TCGA")).ToList(); List <Annotation> found = new List <Annotation>(); foreach (var barcode in barcodes) { var patient = barcode.Substring(0, 12); Annotation ann; if (!itemMap.TryGetValue(patient, out ann)) { if (_options.ThrowException) { throw new Exception("Cannot find patient information for " + patient); } Console.Error.WriteLine("Cannot find patient information for " + patient); ann = new Annotation(); } var curann = new Annotation(); curann.Annotations[sampleBarcodeKey] = barcode; curann.Annotations[TCGAClinicalInformation.BcrPatientBarcode] = patient; foreach (var e in ann.Annotations) { curann.Annotations[e.Key] = e.Value; } found.Add(curann); } format.WriteToFile(_options.OutputFile, found); } return(new[] { _options.OutputFile }); }
public override IEnumerable<string> Process() { var format = new AnnotationFormat(); var items = format.ReadFromFile(_options.ClinicalFile); format.Format.Headers = sampleBarcodeKey + "\t" + format.Format.Headers; var itemMap = items.ToDictionary(m => m.BarCode()); using (StreamReader sr = new StreamReader(_options.DataFile)) { var barcodes = sr.ReadLine().Split('\t').Where(m => m.StartsWith("TCGA")).ToList(); List<Annotation> found = new List<Annotation>(); foreach (var barcode in barcodes) { var patient = barcode.Substring(0, 12); Annotation ann; if (!itemMap.TryGetValue(patient, out ann)) { if (_options.ThrowException) { throw new Exception("Cannot find patient information for " + patient); } Console.Error.WriteLine("Cannot find patient information for " + patient); ann = new Annotation(); } var curann = new Annotation(); curann.Annotations[sampleBarcodeKey] = barcode; curann.Annotations[TCGAClinicalInformation.BcrPatientBarcode] = patient; foreach (var e in ann.Annotations) { curann.Annotations[e.Key] = e.Value; } found.Add(curann); } format.WriteToFile(_options.OutputFile, found); } return new[] { _options.OutputFile }; }
public static List <SomaticItem> ParseGlmvcFile(string fileName, Func <string, bool> acceptChromosome) { var result = new List <SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat().ReadFromFile(fileName); foreach (var ann in annos) { var chr = ann.Annotations["chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var fdr = ann.Annotations["brglm_group_fdr"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["start"].ToString()), RefAllele = ann.Annotations["major_allele"].ToString(), AltAllele = ann.Annotations["minor_allele"].ToString(), NormalMajorCount = int.Parse(ann.Annotations["normal_major_count"].ToString()), NormalMinorCount = int.Parse(ann.Annotations["normal_minor_count"].ToString()), TumorMajorCount = int.Parse(ann.Annotations["tumor_major_count"].ToString()), TumorMinorCount = int.Parse(ann.Annotations["tumor_minor_count"].ToString()), LogisticScore = ann.Annotations["brglm_score"].ToString(), LogisticStrand = ann.Annotations["brglm_strand"].ToString(), LogisticPosition = ann.Annotations["brglm_position"].ToString(), LogisticGroupFdr = fdr, Score = -Math.Log(double.Parse(fdr)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "annovar_Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "annovar_Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "annovar_ExonicFunc.refGene", string.Empty), RefGeneAAChange = GetDictionaryValue(ann.Annotations, "annovar_AAChange.refGene", string.Empty), }; result.Add(item); } return(result); }
public Dictionary <string, Dictionary <string, List <string> > > ReadDescriptionFromDirectory(string dir) { var sdrfFile = GetSdrfFiles(dir); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + dir); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); Dictionary <string, Dictionary <string, List <string> > > result = new Dictionary <string, Dictionary <string, List <string> > >(); foreach (var a in ann) { string filename; if (a.Annotations.ContainsKey("Array Data File")) { filename = a.Annotations["Array Data File"] as string; } else if (a.Annotations.ContainsKey("GEO asscession number")) { filename = a.Annotations["GEO asscession number"] as string; } else { throw new Exception("I don't know how to get filename from " + sdrfFile[0]); } filename = Path.GetFileNameWithoutExtension(filename); var dic = new Dictionary <string, List <string> >(); foreach (var kv in a.Annotations) { dic[kv.Key] = new string[] { kv.Value as string }.ToList(); } result[filename] = dic; } return(result); }
public Dictionary<string, Dictionary<string, List<string>>> ReadDescriptionFromDirectory(string dir) { var sdrfFile = GetSdrfFiles(dir); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + dir); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); Dictionary<string, Dictionary<string, List<string>>> result = new Dictionary<string, Dictionary<string, List<string>>>(); foreach (var a in ann) { string filename; if (a.Annotations.ContainsKey("Array Data File")) { filename = a.Annotations["Array Data File"] as string; } else if (a.Annotations.ContainsKey("GEO asscession number")) { filename = a.Annotations["GEO asscession number"] as string; } else { throw new Exception("I don't know how to get filename from " + sdrfFile[0]); } filename = Path.GetFileNameWithoutExtension(filename); var dic = new Dictionary<string, List<string>>(); foreach (var kv in a.Annotations) { dic[kv.Key] = new string[] { kv.Value as string }.ToList(); } result[filename] = dic; } return result; }
public List <BreastCancerSampleItem> ParseDataset(string datasetDirectory) { var files = new HashSet <string>(from f in CelFile.GetCelFiles(datasetDirectory, false) select Path.GetFileNameWithoutExtension(f)); var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt"); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); return((from a in ann let filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample)) where files.Contains(filename) select new BreastCancerSampleItem() { Dataset = Path.GetFileName(datasetDirectory), Sample = filename, Age = FindValue(a, ColumnName.Age), ER = new StatusValue(FindValue(a, ColumnName.ER)).Value, PR = new StatusValue(FindValue(a, ColumnName.PR)).Value, HER2 = new StatusValue(FindValue(a, ColumnName.HER2)).Value, Stage = FindValue(a, ColumnName.Stage), TumorStatus = FindValue(a, ColumnName.TumorStage), Grade = FindValue(a, ColumnName.Grade), NodalStatus = FindValue(a, ColumnName.NodalStatus), PCR = FindValue(a, ColumnName.PCR), DFS = FindValue(a, ColumnName.DFS), DFSTime = FindValue(a, ColumnName.DFSTime), RFS = FindValue(a, ColumnName.RFS), RFSTime = FindValue(a, ColumnName.RFSTime), DMFS = FindValue(a, ColumnName.DMFS), DMFSTime = FindValue(a, ColumnName.DMFSTime), OverallSurvival = FindValue(a, ColumnName.OverallServive), DeadOfDisease = FindValue(a, ColumnName.DeadOfDisease) }).ToList()); }
public List<BreastCancerSampleItem> ParseDataset(string datasetDirectory) { var files = new HashSet<string>(from f in CelFile.GetCelFiles(datasetDirectory, false) select Path.GetFileNameWithoutExtension(f)); var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt"); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); return (from a in ann let filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample)) where files.Contains(filename) select new BreastCancerSampleItem() { Dataset = Path.GetFileName(datasetDirectory), Sample = filename, Age = FindValue(a, ColumnName.Age), ER = new StatusValue(FindValue(a, ColumnName.ER)).Value, PR = new StatusValue(FindValue(a, ColumnName.PR)).Value, HER2 = new StatusValue(FindValue(a, ColumnName.HER2)).Value, Stage = FindValue(a, ColumnName.Stage), TumorStatus = FindValue(a, ColumnName.TumorStage), Grade = FindValue(a, ColumnName.Grade), NodalStatus = FindValue(a, ColumnName.NodalStatus), PCR = FindValue(a, ColumnName.PCR), DFS = FindValue(a, ColumnName.DFS), DFSTime = FindValue(a, ColumnName.DFSTime), RFS = FindValue(a, ColumnName.RFS), RFSTime = FindValue(a, ColumnName.RFSTime), DMFS = FindValue(a, ColumnName.DMFS), DMFSTime = FindValue(a, ColumnName.DMFSTime), OverallSurvival = FindValue(a, ColumnName.OverallServive), DeadOfDisease = FindValue(a, ColumnName.DeadOfDisease) }).ToList(); }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { var result = new List <IIdentifiedSpectrum>(); var anns = new AnnotationFormat().ReadFromFile(fileName); foreach (var ann in anns) { var peptideId = ann.Annotations["PeptideId"] as string; var sequence = peptideId.StringBefore("_"); var spec = new IdentifiedSpectrum(); var pep = new IdentifiedPeptide(spec); pep.Sequence = sequence; spec.Query.FileScan.Experimental = ann.Annotations["Sample"] as string; spec.Query.FileScan.RetentionTime = double.Parse(ann.Annotations["PredictionRetentionTime"] as string); spec.Query.FileScan.Charge = int.Parse(ann.Annotations["Charge"] as string); spec.IsPrecursorMonoisotopic = true; spec.TheoreticalMH = PrecursorUtils.MzToMH(double.Parse(ann.Annotations["TheoreticalMz"] as string), spec.Query.FileScan.Charge, true); result.Add(spec); } return(result); }
public void ParseDataset(string datasetDirectory, Dictionary <string, BreastCancerSampleItem> sampleMap) { var files = new HashSet <string>(from f in CelFile.GetCelFiles(datasetDirectory, false) select Path.GetFileNameWithoutExtension(f)); var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt"); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); var dataset = Path.GetFileName(datasetDirectory); foreach (var a in ann) { var filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample)); if (files.Contains(filename)) { if (!sampleMap.ContainsKey(filename)) { sampleMap[filename] = new BreastCancerSampleItem(); sampleMap[filename].Dataset = dataset; sampleMap[filename].Sample = filename; } var item = sampleMap[filename]; string value; if (FindValue(a, ColumnName.Age, out value)) { item.Age = value; } if (FindValue(a, ColumnName.ER, out value)) { item.ER = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.PR, out value)) { item.PR = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.HER2, out value)) { item.HER2 = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.Stage, out value)) { item.Stage = value; } if (FindValue(a, ColumnName.TumorStage, out value)) { item.TumorStatus = value; } if (FindValue(a, ColumnName.Grade, out value)) { item.Grade = value; } if (FindValue(a, ColumnName.NodalStatus, out value)) { item.NodalStatus = value; } if (FindValue(a, ColumnName.PCR, out value)) { item.PCR = value; } if (FindValue(a, ColumnName.DFS, out value)) { item.DFS = value; } if (FindValue(a, ColumnName.DFSTime, out value)) { item.DFSTime = value; } if (FindValue(a, ColumnName.RFS, out value)) { item.RFS = value; } if (FindValue(a, ColumnName.RFSTime, out value)) { item.RFSTime = value; } if (FindValue(a, ColumnName.DMFS, out value)) { item.DMFS = value; } if (FindValue(a, ColumnName.DMFSTime, out value)) { item.DMFSTime = value; } if (FindValue(a, ColumnName.OverallServive, out value)) { item.OverallSurvival = value; } if (FindValue(a, ColumnName.DeadOfDisease, out value)) { item.DeadOfDisease = value; } } } }
public override IEnumerable <string> Process() { //Extract chromotagraph information var chroOptions = new ChromatographProfileBuilderOptions(); options.CopyProperties(chroOptions); chroOptions.InputFile = options.InputFile; chroOptions.OutputFile = options.BoundaryOutputFile; chroOptions.DrawImage = false; var builder = new ChromatographProfileBuilder(chroOptions); if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite) { Progress.SetMessage("Finding envelope ..."); builder.Progress = this.Progress; builder.Process(); } //Calculate deuterium enrichment for peptide if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite) { Progress.SetMessage("Calculating deuterium ..."); var deuteriumOptions = new RTemplateProcessorOptions() { InputFile = options.BoundaryOutputFile, OutputFile = options.DeuteriumOutputFile, RTemplate = DeuteriumR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0")); deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0")); new RTemplateProcessor(deuteriumOptions) { Progress = this.Progress }.Process(); } var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString()); //Read old spectra information var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(options.InputFile); foreach (var spec in spectra) { spec.Annotations.Remove("RetentionTime"); spec.Annotations.Remove("TheoreticalDeuterium"); spec.Annotations.Remove("ObservedDeuterium"); spec.Annotations.Remove("NumDeuteriumIncorporated"); spec.Annotations.Remove("NumExchangableHydrogen"); spec.Annotations.Remove("DeuteriumEnrichmentPercent"); } var calcSpectra = new List <IIdentifiedSpectrum>(); var aas = new Aminoacids(); foreach (var pep in spectra) { var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep)); if (deuteriumMap.ContainsKey(filename)) { var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence); var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string); pep.Annotations["PeakRetentionTime"] = deuteriumMap[filename].Annotations["RetentionTime"]; pep.Annotations["TheoreticalDeuterium"] = deuteriumMap[filename].Annotations["TheoreticalDeuterium"]; pep.Annotations["ObservedDeuterium"] = deuteriumMap[filename].Annotations["ObservedDeuterium"]; pep.Annotations["NumDeuteriumIncorporated"] = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"]; pep.Annotations["NumExchangableHydrogen"] = numExchangeableHydrogens; pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens; calcSpectra.Add(pep); } } format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent"; format.NotExportSummary = true; format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra); var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList(); var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray(); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t")); foreach (var peptide in specGroup) { var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray()); if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l))) { continue; } sw.Write(peptide.Key); foreach (var time in times) { if (curSpectra.ContainsKey(time)) { var deps = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray(); var depMedian = Statistics.Median(deps); sw.Write("\t{0:0.######}", depMedian); } else { sw.Write("\tNA"); } } sw.WriteLine(); } } Progress.SetMessage("Peptide deuterium enrichment calculation finished ..."); return(new string[] { options.OutputFile }); }
public override IEnumerable <string> Process() { var annos = new AnnotationFormat().ReadFromFile(boundaryFile); var aas = new Aminoacids(); var pbuilder = new EmassProfileBuilder(); var chroFormat = new ChromatographProfileTextWriter(); foreach (var ann in annos) { var chroFile = Path.Combine(ann.Annotations["ChroDirectory"] as string, ann.Annotations["ChroFile"] as string) + ".tsv"; var peptide = (ann.Annotations["PeptideId"] as string).StringBefore("_"); var start = double.Parse(ann.Annotations["ChroLeft"] as string); var end = double.Parse(ann.Annotations["ChroRight"] as string); var chro = chroFormat.ReadFromFile(chroFile); var chroFiltered = chro.Profiles.Where(l => l.RetentionTime >= start && l.RetentionTime <= end).ToArray(); //all observed isotopimers, start from 1, transfer to zero-based var allIsotopics = (from f in chroFiltered from s in f select s.Isotopic - 1).Distinct().OrderBy(l => l).ToArray(); //get minimum intensity of each isotopimer var minIntensities = (from iso in allIsotopics select(from f in chroFiltered where f.Count > iso select f[iso].Intensity).Min()).ToArray(); //how many isotopic should I trust? var iso0 = (from f in chroFiltered select f.First().Intensity).ToArray(); int maxIso = 1; for (int i = 2; i < allIsotopics.Length; i++) { var isoi = (from f in chroFiltered select f.Count > i ? f[i].Intensity : minIntensities[i]).ToArray(); var corr = MathNet.Numerics.Statistics.Correlation.Pearson(iso0, isoi); if (corr < minPearsonCorrelation) { maxIso = i; break; } } var maxIsoArray = allIsotopics.Where(l => l <= maxIso).ToArray(); //get observed profile in each scan var observedIons = (from f in chroFiltered select(from iso in maxIsoArray select f.Count > iso ? f[iso].Intensity : minIntensities[iso]).ToArray()).ToArray(); var atomComposition = aas.GetPeptideAtomComposition(peptide); var hatom = (int)(Math.Round(aas.ExchangableHAtom(peptide))); for (int h2 = 0; h2 < hatom; h2++) { } var profiles = pbuilder.GetProfile(atomComposition, 1, 3); var isotopicIons = (from peak in profiles select new IsotopicIon() { Mz = peak.Mz, Intensity = peak.Intensity, }).ToArray(); } return(null); }
public static List<SomaticItem> ParseGlmvcFile(string fileName, Func<string, bool> acceptChromosome) { var result = new List<SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat().ReadFromFile(fileName); foreach (var ann in annos) { var chr = ann.Annotations["chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var fdr = ann.Annotations["brglm_group_fdr"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["start"].ToString()), RefAllele = ann.Annotations["major_allele"].ToString(), AltAllele = ann.Annotations["minor_allele"].ToString(), NormalMajorCount = int.Parse(ann.Annotations["normal_major_count"].ToString()), NormalMinorCount = int.Parse(ann.Annotations["normal_minor_count"].ToString()), TumorMajorCount = int.Parse(ann.Annotations["tumor_major_count"].ToString()), TumorMinorCount = int.Parse(ann.Annotations["tumor_minor_count"].ToString()), LogisticScore = ann.Annotations["brglm_score"].ToString(), LogisticStrand = ann.Annotations["brglm_strand"].ToString(), LogisticPosition = ann.Annotations["brglm_position"].ToString(), LogisticGroupFdr = fdr, Score = -Math.Log(double.Parse(fdr)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "annovar_Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "annovar_Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "annovar_ExonicFunc.refGene", string.Empty), RefGeneAAChange = GetDictionaryValue(ann.Annotations, "annovar_AAChange.refGene", string.Empty), }; result.Add(item); } return result; }
public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap) { var files = new HashSet<string>(from f in CelFile.GetCelFiles(datasetDirectory, false) select Path.GetFileNameWithoutExtension(f)); var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt"); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); var dataset = Path.GetFileName(datasetDirectory); foreach (var a in ann) { var filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample)); if (files.Contains(filename)) { if (!sampleMap.ContainsKey(filename)) { sampleMap[filename] = new BreastCancerSampleItem(); sampleMap[filename].Dataset = dataset; sampleMap[filename].Sample = filename; } var item = sampleMap[filename]; string value; if (FindValue(a, ColumnName.Age, out value)) { item.Age = value; } if (FindValue(a, ColumnName.ER, out value)) { item.ER = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.PR, out value)) { item.PR = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.HER2, out value)) { item.HER2 = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.Stage, out value)) { item.Stage = value; } if (FindValue(a, ColumnName.TumorStage, out value)) { item.TumorStatus = value; } if (FindValue(a, ColumnName.Grade, out value)) { item.Grade = value; } if (FindValue(a, ColumnName.NodalStatus, out value)) { item.NodalStatus = value; } if (FindValue(a, ColumnName.PCR, out value)) { item.PCR = value; } if (FindValue(a, ColumnName.DFS, out value)) { item.DFS = value; } if (FindValue(a, ColumnName.DFSTime, out value)) { item.DFSTime = value; } if (FindValue(a, ColumnName.RFS, out value)) { item.RFS = value; } if (FindValue(a, ColumnName.RFSTime, out value)) { item.RFSTime = value; } if (FindValue(a, ColumnName.DMFS, out value)) { item.DMFS = value; } if (FindValue(a, ColumnName.DMFSTime, out value)) { item.DMFSTime = value; } if (FindValue(a, ColumnName.OverallServive, out value)) { item.OverallSurvival = value; } if (FindValue(a, ColumnName.DeadOfDisease, out value)) { item.DeadOfDisease = value; } } } }