Пример #1
0
        public OnekgTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider)
        {
            Console.WriteLine(version.ToString());

            _onekgWriter = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                           SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OneKgenTag, "AF1000G", true, sequenceProvider);

            _refMinorWriter = new SaMiscTsvWriter(outputDirectory, version, genomeAssembly.ToString(), InterimSaCommon.RefMinorTag, sequenceProvider);
        }
Пример #2
0
        public GnomadTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider, string sequencingDataType)
        {
            Console.WriteLine(version.ToString());

            _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                      SaTsvCommon.SchemaVersion, _jsonKeyDictionary[sequencingDataType], null, true, sequenceProvider);
        }
Пример #3
0
        public CosmicTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider)
        {
            Console.WriteLine(version.ToString());

            _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                      SaTsvCommon.CosmicSchemaVersion, InterimSaCommon.CosmicTag, InterimSaCommon.CosmicVcfTag, false, sequenceProvider, true);
        }
Пример #4
0
        public TopMedTsvWriter(DataSourceVersion version, string outputFileName, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider)
        {
            Console.WriteLine(version.ToString());

            _writer = new SaTsvWriter(outputFileName, version, genomeAssembly.ToString(),
                                      SaTsvCommon.SchemaVersion, InterimSaCommon.TopMedTag, null, true, sequenceProvider);
        }
Пример #5
0
        public CustomAnnoTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, bool isPositional, ISequenceProvider sequenceProvider)
        {
            Console.WriteLine(version.ToString());

            _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                      SaTsvCommon.CustomItemSchemaVersion, version.Name, null, !isPositional, sequenceProvider, true);
        }
Пример #6
0
        public ExacTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider)
        {
            Console.WriteLine(version.ToString());

            _writer = new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                      SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.ExacTag, null, true, sequenceProvider);
        }
Пример #7
0
        private static ClinvarVariant GetClinvarVariant(XElement xElement, GenomeAssembly genomeAssembly, IDictionary <string, IChromosome> refChromDict, int?variantId)
        {
            if (xElement == null)
            {
                return(null);
            }
            //<SequenceLocation Assembly="GRCh38" Chr="17" Accession="NC_000017.11" start="43082402" stop="43082402" variantLength="1" referenceAllele="A" alternateAllele="C" />

            if (genomeAssembly.ToString() != xElement.Attribute(AssemblyTag)?.Value &&
                genomeAssembly != GenomeAssembly.Unknown)
            {
                return(null);
            }

            var    chromosome      = refChromDict.ContainsKey(xElement.Attribute(ChrTag)?.Value) ? refChromDict[xElement.Attribute(ChrTag)?.Value] : null;
            int    start           = Convert.ToInt32(xElement.Attribute(StartTag)?.Value);
            int    stop            = Convert.ToInt32(xElement.Attribute(StopTag)?.Value);
            string referenceAllele = xElement.Attribute(RefAlleleTag)?.Value;
            string altAllele       = xElement.Attribute(AltAlleleTag)?.Value;

            if (stop - start + 1 > MaxVariantLength)
            {
                return(null);
            }
            AdjustVariant(ref start, ref referenceAllele, ref altAllele);

            return(new ClinvarVariant(chromosome, start, stop, variantId, referenceAllele, altAllele));
        }
        private static void AddFile(this ICollection <RemoteFile> files, GenomeAssembly genomeAssembly, string saDirectory, string path)
        {
            string filename    = Path.GetFileName(path);
            string remotePath  = path;
            string localPath   = Path.Combine(saDirectory, genomeAssembly.ToString(), filename);
            string description = $"{filename} ({genomeAssembly})";

            files.Add(new RemoteFile(remotePath, localPath, description));
        }
Пример #9
0
        private static void AddCache(this ICollection <RemoteFile> files, GenomeAssembly genomeAssembly,
                                     string remoteCacheDirectory, string cacheDirectory, string type)
        {
            string filename    = $"Both.{type}.ndb";
            string remotePath  = $"{remoteCacheDirectory}/{CacheConstants.DataVersion}/{genomeAssembly}/{filename}";
            string localPath   = Path.Combine(cacheDirectory, genomeAssembly.ToString(), filename);
            string description = $"{filename} ({genomeAssembly})";

            files.Add(new RemoteFile(remotePath, localPath, description));
        }
Пример #10
0
        private void CreateSvTsv(string sourceName, string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }

            var benchMark = new Benchmark();
            //Console.WriteLine($"Creating TSV from {fileName}");
            var dataSource = "";
            var version    = DataSourceVersionReader.GetSourceVersion(fileName);

            switch (sourceName)
            {
            case InterimSaCommon.DgvTag:
                dataSource = "DGV";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.DgvSchemaVersion, InterimSaCommon.DgvTag, ReportFor.StructuralVariants))
                {
                    var reader = new DgvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetDgvItems(), writer);
                }
                break;

            case InterimSaCommon.ClinGenTag:
                dataSource = "ClinGen";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.ClinGenSchemaVersion, InterimSaCommon.ClinGenTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new ClinGenReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetClinGenItems(), writer);
                }

                break;

            case InterimSaCommon.OnekSvTag:
                dataSource = "OnekSv";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OnekSvTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new OneKGenSvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetOneKGenSvItems(), writer);
                }

                break;

            default:
                Console.WriteLine("invalid source name");
                break;
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(dataSource, version.Version, timeSpan);
        }
Пример #11
0
        private static ClinvarVariant GetClinvarVariant(LiteXmlElement xmlElement, GenomeAssembly genomeAssembly)
        {
            if (xmlElement.Children == null)
            {
                return(null);
            }
            //<SequenceLocation Assembly="GRCh38" Chr="17" Accession="NC_000017.11" start="43082402" stop="43082402" variantLength="1" referenceAllele="A" alternateAllele="C" />

            string chromosome = null, referenceAllele = null, altAllele = null;
            int    start = 0, stop = 0;

            foreach (var attribute in xmlElement.Attributes)
            {
                switch (attribute.Key)
                {
                case "Assembly":
                    if (attribute.Value != genomeAssembly.ToString() &&
                        genomeAssembly != GenomeAssembly.Unknown)
                    {
                        return(null);
                    }
                    break;

                case "Chr":
                    chromosome = attribute.Value;
                    break;

                case "display_start":
                    start = Convert.ToInt32(attribute.Value);
                    break;

                case "display_stop":
                    stop = Convert.ToInt32(attribute.Value);
                    break;

                case "referenceAllele":
                    referenceAllele = attribute.Value;
                    break;

                case "alternateAllele":
                    altAllele = attribute.Value;
                    break;
                }
            }

            AdjustVariant(ref start, ref stop, ref referenceAllele, ref altAllele);
            return(new ClinvarVariant(chromosome, start, stop, referenceAllele, altAllele));
        }
Пример #12
0
        public void CreateTsv()
        {
            var          benchMark  = new Benchmark();
            const string dataSource = "COSMIC";

            if (_cnvStream != null)
            {
                using (var writer = new IntervalTsvWriter(_outputDirectory, _version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.CosmicSvSchemaVersion, InterimSaCommon.CosmicCnvTag, ReportFor.StructuralVariants))
                    using (var cnvReader = new CosmicCnvReader(_cnvStream, _refNameToChorm, _genomeAssembly))
                    {
                        foreach (var cnvEntry in cnvReader.GetEntries())
                        {
                            writer.AddEntry(cnvEntry.Chromosome.EnsemblName, cnvEntry.Start, cnvEntry.End, cnvEntry.GetJsonString());
                        }
                    }
            }


            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(dataSource, _version.Version, timeSpan);
        }
Пример #13
0
 public static string GetCachePathPrefix(GenomeAssembly genomeAssembly, string baseUrl = null)
 {
     return(LambdaUrlHelper.GetCacheFolder(baseUrl).UrlCombine(genomeAssembly.ToString())
            .UrlCombine(LambdaUrlHelper.DefaultCacheSource));
 }
Пример #14
0
 public ClinvarTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) : this(new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                                                                                                                                                      SaTsvCommon.ClinvarSchemaVersion, InterimSaCommon.ClinvarTag, InterimSaCommon.ClinvarVcfTag, false, sequenceProvider, true))
 {
     Console.WriteLine(version.ToString());
 }
Пример #15
0
 public EvsTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) : this(new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                                                                                                                                                  SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.EvsTag, InterimSaCommon.EvsVcfTag, true, sequenceProvider))
 {
     Console.WriteLine(version.ToString());
 }