public void RemoveConflictingAlleles_does_not_remove_duplicates() { var seqProvider = ParserTestUtils.GetSequenceProvider(70220313, "TGCC", 'A', _chromDict); var topMedReader = new TopMedReader(new StreamReader(GetDupItemsStream()), seqProvider); var items = topMedReader.GetItems().ToList(); var saItems = new List <ISupplementaryDataItem>(items); saItems = SuppDataUtilities.RemoveConflictingAlleles(saItems, false); Assert.Single(saItems); }
public void GetItems_test() { var sequence = new SimpleSequence(new string('T', VariantUtils.MaxUpstreamLength) + "A" + new string('T', 10146 - 10128) + "AC" + new string('T', 10177 - 10146 - 1) + "A", 10128 - 1 - VariantUtils.MaxUpstreamLength); var seqProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, _chromDict); var gnomadReader = new TopMedReader(new StreamReader(GetStream()), seqProvider); var items = gnomadReader.GetItems().ToList(); Assert.Equal(3, items.Count); Assert.Equal("\"allAf\":0.00266,\"allAn\":125568,\"allAc\":334,\"allHc\":0,\"failedFilter\":true", items[0].GetJsonString()); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var topMedReader = new TopMedReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.TopMedTag, true, false, SaCommon.SchemaVersion, false)) { nsaWriter.Write(topMedReader.GetItems()); } return(ExitCodes.Success); }