public OnekgTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) { Console.WriteLine(version.ToString()); _onekgWriter = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OneKgenTag, "AF1000G", true, sequenceProvider); _refMinorWriter = new SaMiscTsvWriter(outputDirectory, version, genomeAssembly.ToString(), InterimSaCommon.RefMinorTag, sequenceProvider); }
public GnomadTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider, string sequencingDataType) { Console.WriteLine(version.ToString()); _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(), SaTsvCommon.SchemaVersion, _jsonKeyDictionary[sequencingDataType], null, true, sequenceProvider); }
public CosmicTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) { Console.WriteLine(version.ToString()); _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(), SaTsvCommon.CosmicSchemaVersion, InterimSaCommon.CosmicTag, InterimSaCommon.CosmicVcfTag, false, sequenceProvider, true); }
public TopMedTsvWriter(DataSourceVersion version, string outputFileName, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) { Console.WriteLine(version.ToString()); _writer = new SaTsvWriter(outputFileName, version, genomeAssembly.ToString(), SaTsvCommon.SchemaVersion, InterimSaCommon.TopMedTag, null, true, sequenceProvider); }
public CustomAnnoTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, bool isPositional, ISequenceProvider sequenceProvider) { Console.WriteLine(version.ToString()); _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(), SaTsvCommon.CustomItemSchemaVersion, version.Name, null, !isPositional, sequenceProvider, true); }
public ExacTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) { Console.WriteLine(version.ToString()); _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.ExacTag, null, true, sequenceProvider); }
private static ClinvarVariant GetClinvarVariant(XElement xElement, GenomeAssembly genomeAssembly, IDictionary <string, IChromosome> refChromDict, int?variantId) { if (xElement == null) { return(null); } //<SequenceLocation Assembly="GRCh38" Chr="17" Accession="NC_000017.11" start="43082402" stop="43082402" variantLength="1" referenceAllele="A" alternateAllele="C" /> if (genomeAssembly.ToString() != xElement.Attribute(AssemblyTag)?.Value && genomeAssembly != GenomeAssembly.Unknown) { return(null); } var chromosome = refChromDict.ContainsKey(xElement.Attribute(ChrTag)?.Value) ? refChromDict[xElement.Attribute(ChrTag)?.Value] : null; int start = Convert.ToInt32(xElement.Attribute(StartTag)?.Value); int stop = Convert.ToInt32(xElement.Attribute(StopTag)?.Value); string referenceAllele = xElement.Attribute(RefAlleleTag)?.Value; string altAllele = xElement.Attribute(AltAlleleTag)?.Value; if (stop - start + 1 > MaxVariantLength) { return(null); } AdjustVariant(ref start, ref referenceAllele, ref altAllele); return(new ClinvarVariant(chromosome, start, stop, variantId, referenceAllele, altAllele)); }
private static void AddFile(this ICollection <RemoteFile> files, GenomeAssembly genomeAssembly, string saDirectory, string path) { string filename = Path.GetFileName(path); string remotePath = path; string localPath = Path.Combine(saDirectory, genomeAssembly.ToString(), filename); string description = $"{filename} ({genomeAssembly})"; files.Add(new RemoteFile(remotePath, localPath, description)); }
private static void AddCache(this ICollection <RemoteFile> files, GenomeAssembly genomeAssembly, string remoteCacheDirectory, string cacheDirectory, string type) { string filename = $"Both.{type}.ndb"; string remotePath = $"{remoteCacheDirectory}/{CacheConstants.DataVersion}/{genomeAssembly}/{filename}"; string localPath = Path.Combine(cacheDirectory, genomeAssembly.ToString(), filename); string description = $"{filename} ({genomeAssembly})"; files.Add(new RemoteFile(remotePath, localPath, description)); }
private void CreateSvTsv(string sourceName, string fileName) { if (string.IsNullOrEmpty(fileName)) { return; } var benchMark = new Benchmark(); //Console.WriteLine($"Creating TSV from {fileName}"); var dataSource = ""; var version = DataSourceVersionReader.GetSourceVersion(fileName); switch (sourceName) { case InterimSaCommon.DgvTag: dataSource = "DGV"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.DgvSchemaVersion, InterimSaCommon.DgvTag, ReportFor.StructuralVariants)) { var reader = new DgvReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetDgvItems(), writer); } break; case InterimSaCommon.ClinGenTag: dataSource = "ClinGen"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.ClinGenSchemaVersion, InterimSaCommon.ClinGenTag, ReportFor.StructuralVariants)) { var reader = new ClinGenReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetClinGenItems(), writer); } break; case InterimSaCommon.OnekSvTag: dataSource = "OnekSv"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OnekSvTag, ReportFor.StructuralVariants)) { var reader = new OneKGenSvReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetOneKGenSvItems(), writer); } break; default: Console.WriteLine("invalid source name"); break; } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(dataSource, version.Version, timeSpan); }
private static ClinvarVariant GetClinvarVariant(LiteXmlElement xmlElement, GenomeAssembly genomeAssembly) { if (xmlElement.Children == null) { return(null); } //<SequenceLocation Assembly="GRCh38" Chr="17" Accession="NC_000017.11" start="43082402" stop="43082402" variantLength="1" referenceAllele="A" alternateAllele="C" /> string chromosome = null, referenceAllele = null, altAllele = null; int start = 0, stop = 0; foreach (var attribute in xmlElement.Attributes) { switch (attribute.Key) { case "Assembly": if (attribute.Value != genomeAssembly.ToString() && genomeAssembly != GenomeAssembly.Unknown) { return(null); } break; case "Chr": chromosome = attribute.Value; break; case "display_start": start = Convert.ToInt32(attribute.Value); break; case "display_stop": stop = Convert.ToInt32(attribute.Value); break; case "referenceAllele": referenceAllele = attribute.Value; break; case "alternateAllele": altAllele = attribute.Value; break; } } AdjustVariant(ref start, ref stop, ref referenceAllele, ref altAllele); return(new ClinvarVariant(chromosome, start, stop, referenceAllele, altAllele)); }
public void CreateTsv() { var benchMark = new Benchmark(); const string dataSource = "COSMIC"; if (_cnvStream != null) { using (var writer = new IntervalTsvWriter(_outputDirectory, _version, _genomeAssembly.ToString(), SaTsvCommon.CosmicSvSchemaVersion, InterimSaCommon.CosmicCnvTag, ReportFor.StructuralVariants)) using (var cnvReader = new CosmicCnvReader(_cnvStream, _refNameToChorm, _genomeAssembly)) { foreach (var cnvEntry in cnvReader.GetEntries()) { writer.AddEntry(cnvEntry.Chromosome.EnsemblName, cnvEntry.Start, cnvEntry.End, cnvEntry.GetJsonString()); } } } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(dataSource, _version.Version, timeSpan); }
public static string GetCachePathPrefix(GenomeAssembly genomeAssembly, string baseUrl = null) { return(LambdaUrlHelper.GetCacheFolder(baseUrl).UrlCombine(genomeAssembly.ToString()) .UrlCombine(LambdaUrlHelper.DefaultCacheSource)); }
public ClinvarTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) : this(new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(), SaTsvCommon.ClinvarSchemaVersion, InterimSaCommon.ClinvarTag, InterimSaCommon.ClinvarVcfTag, false, sequenceProvider, true)) { Console.WriteLine(version.ToString()); }
public EvsTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) : this(new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.EvsTag, InterimSaCommon.EvsVcfTag, true, sequenceProvider)) { Console.WriteLine(version.ToString()); }