public void NestedChildTables() { var tables = TestSets.Countries(1000, 37).Process( () => new SimpleTableMapper(new TableDefinition("Test") .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId) .Fact("Value", s => s.Current <IVisitAggregationContext>().Visit.Value) .Map(s => s.Current <IVisitAggregationContext>().Visit.Pages, new TableDefinition("Pages") .Key("Id", s => s.Current <PageData>().Item.Id) .Fact("Value", s => s.Current <PageData>().PageEvents.Sum(pe => pe.Value)) .Map(s => s.Current <PageData>().PageEvents, new TableDefinition("Events") .Dimension("Event", s => s.Current <PageEventData>().PageEventDefinitionId) .Fact("Value", s => s.Current <PageEventData>().Value))) .Map(s => new[] { s.Current <IVisitAggregationContext>().Visit.Pages.First() }, new TableDefinition("Pages2") .Key("Id", s => s.Current <PageData>().Item.Id)))); var visits = tables.FirstOrDefault(t => t.Schema.Name == "Test"); var pages = tables.FirstOrDefault(t => t.Schema.Name == "Pages"); var pages2 = tables.FirstOrDefault(t => t.Schema.Name == "Pages2"); var events = tables.FirstOrDefault(t => t.Schema.Name == "Events"); Assert.AreEqual(1000, visits.Rows.Count()); Assert.AreEqual(14000, visits.Fields <int>("Value").Sum()); Assert.AreEqual(14000, pages.Fields <int>("Value").Sum()); Assert.AreEqual(14000, events.Fields <int>("Value").Sum()); Assert.AreEqual(3000, pages.Rows.Count()); Assert.AreEqual(3000, events.Rows.Count()); Assert.AreEqual(1000, pages2.Rows.Count()); }
public void LookupInLookup() { var tables = TestSets.Countries(1000, 9).Process(() => new TableDefinition("Test") .Dimension("VisitId", scope => scope.Current <IVisitAggregationContext>().Visit.InteractionId) .Map(new FieldMapperSet("Country", false, new IFieldMapper[] { new SimpleFieldMapper("Country", scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Country, typeof(string), FieldType.Dimension), new FieldMapperSet("Country2", false, new [] { new SimpleFieldMapper("Country", scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Country, typeof(string), FieldType.Dimension), new SimpleFieldMapper("Count", s => 1, typeof(int), FieldType.Fact), }), })) ); var visits = tables.FirstOrDefault(t => t.Schema.Name == "Test"); var countries = tables.FirstOrDefault(t => t.Schema.Name == "Country"); var countries2 = tables.FirstOrDefault(t => t.Schema.Name == "Country2"); Assert.AreEqual(1000, visits.Rows.Count()); Assert.AreEqual(9, countries.Rows.Count()); Assert.AreEqual(9, countries2.Rows.Count()); Assert.AreEqual(9, visits.Fields <long>("CountryId").Distinct().Count()); Assert.AreEqual(9, countries.Fields <long>("Country2Id").Distinct().Count()); Assert.AreEqual(1000, countries2.Fields <int>("Count").Sum()); }
public void Count() { var table = TestSets.Countries(1000, 37).Process( () => new SimpleTableMapper(new TableDefinition("Test") .Count())).FirstOrDefault(); Assert.AreEqual(1000, table.Field <int>("Count", table.Rows.First())); }
public void RowPerVisitHashKey() { var table = TestSets.Countries(1000, 37).Process( () => new SimpleTableMapper(new TableDefinition("Test") .Dimension("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId))).FirstOrDefault(); Assert.AreEqual(1000, table.Rows.Count()); }
public void FactAggregation() { var table = TestSets.Countries(1000, 37).Process( () => new SimpleTableMapper(new TableDefinition("Test") .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId) .Fact("Value", s => s.Current <IVisitAggregationContext>().Visit.Value))).FirstOrDefault(); Assert.AreEqual(1000, table.Rows.Count()); Assert.AreEqual(14000, table.Fields <int>("Value").Sum()); }
public void CountSum() { var table = TestSets.Countries(1000, 5).Process( () => new SimpleTableMapper(new TableDefinition("Test") .Key("Country", s => s.Current <IVisitAggregationContext>().Visit.GeoData.Country) .Count())).FirstOrDefault(); var rows = table.Rows.ToList(); Assert.AreEqual(5, rows.Count); Assert.AreEqual(1000, table.Fields <int>("Count").Sum()); }
public void Splitter() { var splitter = new PageConditionSplitter((scope, page) => page.VisitPageIndex == 2); var table = TestSets.Countries(1000, 37).Process( () => new SimpleTableMapper(new TableDefinition("Test") .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId) .Map(new SplittingFieldMapper(splitter, (name) => new[] { new SimpleFieldMapper("Value" + name, s => s.Current <IVisitAggregationContext>() .Visit.Value, typeof(int), FieldType.Fact) })))).FirstOrDefault(); Assert.AreEqual(1000, table.Rows.Count()); Assert.AreEqual(5000, table.Fields <int>("ValueBefore").Sum()); Assert.AreEqual(9000, table.Fields <int>("ValueAfter").Sum()); Assert.AreEqual(14000, table.Fields <int>("ValueTotal").Sum()); }
public void TwoFieldInlineLookup() { var tables = TestSets.Countries(1000, 9, regionsPerCountry: 3).Process(() => new TableDefinition("Test") .Dimension("VisitId", scope => scope.Current <IVisitAggregationContext>().Visit.InteractionId) .Map(new FieldMapperSet("Country", true, new[] { new SimpleFieldMapper("Country", scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Country, typeof(string), FieldType.Dimension), new SimpleFieldMapper("Region", scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Region, typeof(string), FieldType.Dimension), })) ); Assert.AreEqual(1, tables.Count()); var visits = tables.FirstOrDefault(t => t.Schema.Name == "Test"); Assert.AreEqual(1000, visits.Rows.Count()); Assert.AreEqual(9, visits.Fields <string>("Country").Distinct().Count()); Assert.AreEqual(27, visits.Fields <string>("Region").Distinct().Count()); }
public void NestedChildTablesWithBatchingInCsv() { var csvDir = Path.Combine(Directory.GetCurrentDirectory(), "~tmp"); if (Directory.Exists(csvDir)) { Directory.Delete(csvDir, true); } var csv = new CsvExporter(csvDir, binaryPartitions: true); var batchWriter = new TableDataBatchWriter(csv); //1000 visits. //1 row in Test per visit //3 rows in Events per visit //3 rows in Pages per visit //1 row in Pages2 per visit //-------------------------- //8 rows per visit = 80000 in total //Batch size 26672 (ceiling(1000/3) visits) gives two disk partitions with 26672 and one with 26656 in memory var visitCount = 1000; var rowsPerVisit = 8; var visitPerBatch = (int)Math.Ceiling(visitCount / 3d); var rowsPerFilePartition = visitPerBatch * rowsPerVisit; var eventRowsPerVisit = 3; var expectedFilePartitions = 2; var tables = TestSets.Countries(visitCount, 37).Process( () => new SimpleTableMapper(new TableDefinition("Test") .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId) .Fact("Value", s => s.Current <IVisitAggregationContext>().Visit.Value) .Map(s => s.Current <IVisitAggregationContext>().Visit.Pages, new TableDefinition("Pages") .Key("PageId", s => s.Current <PageData>().Item.Id) .Fact("Value", s => s.Current <PageData>().PageEvents.Sum(pe => pe.Value)) .Map(s => s.Current <PageData>().PageEvents, new TableDefinition("Events") .Dimension("Event", s => s.Current <PageEventData>().PageEventDefinitionId) .Fact("Value", s => s.Current <PageEventData>().Value))) .Map(s => new[] { s.Current <IVisitAggregationContext>().Visit.Pages.First() }, new TableDefinition("Pages2") .Key("Id", s => s.Current <PageData>().Item.Id))), initializer: p => { p.BatchWriter = batchWriter; p.BatchSize = rowsPerFilePartition; }); var partitions = new DirectoryInfo(csvDir).GetDirectories().Length; Assert.AreEqual(2, partitions, string.Format("{0:N0} rows should create 2 file partitions and one in memory", visitCount * rowsPerVisit)); Assert.AreEqual(expectedFilePartitions * eventRowsPerVisit * visitPerBatch, batchWriter.Tables.FirstOrDefault(t => t.Schema.Name == "Events").Rows.Count(), string.Format("{0:N0} rows in event tables in file partitions", expectedFilePartitions * eventRowsPerVisit * visitPerBatch)); Assert.AreEqual(expectedFilePartitions * rowsPerVisit * visitPerBatch, batchWriter.Tables.Sum(t => t.Rows.Count()), string.Format("{0:N0} total rows in file partitions", expectedFilePartitions * rowsPerVisit * visitPerBatch)); Assert.AreEqual(visitCount * rowsPerVisit, tables.Sum(t => t.Rows.Count()), string.Format("{0:N0} rows in file + memory partitions", visitCount * rowsPerVisit)); //Merge partitions tables = csv.Export(tables); //Delete partitions batchWriter.Dispose(); partitions = new DirectoryInfo(csvDir).GetDirectories().Length; Assert.AreEqual(0, partitions, "Temporary partition directories are deleted"); var visits = tables.FirstOrDefault(t => t.Schema.Name == "Test"); var pages = tables.FirstOrDefault(t => t.Schema.Name == "Pages"); var pages2 = tables.FirstOrDefault(t => t.Schema.Name == "Pages2"); var events = tables.FirstOrDefault(t => t.Schema.Name == "Events"); Assert.AreEqual(1000, visits.Rows.Count()); Assert.AreEqual(14000, visits.Fields <int>("Value").Sum()); Assert.AreEqual(14000, pages.Fields <int>("Value").Sum()); Assert.AreEqual(14000, events.Fields <int>("Value").Sum()); Assert.AreEqual(3000, pages.Rows.Count()); Assert.AreEqual(3000, events.Rows.Count()); Assert.AreEqual(1000, pages2.Rows.Count()); if (Directory.Exists(csvDir)) { Directory.Delete(csvDir, true); } }