public void NestedChildTables()
        {
            var tables = TestSets.Countries(1000, 37).Process(
                () => new SimpleTableMapper(new TableDefinition("Test")
                                            .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId)
                                            .Fact("Value", s => s.Current <IVisitAggregationContext>().Visit.Value)
                                            .Map(s => s.Current <IVisitAggregationContext>().Visit.Pages,
                                                 new TableDefinition("Pages")
                                                 .Key("Id", s => s.Current <PageData>().Item.Id)
                                                 .Fact("Value", s => s.Current <PageData>().PageEvents.Sum(pe => pe.Value))
                                                 .Map(s => s.Current <PageData>().PageEvents,
                                                      new TableDefinition("Events")
                                                      .Dimension("Event", s => s.Current <PageEventData>().PageEventDefinitionId)
                                                      .Fact("Value", s => s.Current <PageEventData>().Value)))
                                            .Map(s => new[] { s.Current <IVisitAggregationContext>().Visit.Pages.First() },
                                                 new TableDefinition("Pages2")
                                                 .Key("Id", s => s.Current <PageData>().Item.Id))));

            var visits = tables.FirstOrDefault(t => t.Schema.Name == "Test");
            var pages  = tables.FirstOrDefault(t => t.Schema.Name == "Pages");
            var pages2 = tables.FirstOrDefault(t => t.Schema.Name == "Pages2");
            var events = tables.FirstOrDefault(t => t.Schema.Name == "Events");


            Assert.AreEqual(1000, visits.Rows.Count());
            Assert.AreEqual(14000, visits.Fields <int>("Value").Sum());
            Assert.AreEqual(14000, pages.Fields <int>("Value").Sum());
            Assert.AreEqual(14000, events.Fields <int>("Value").Sum());

            Assert.AreEqual(3000, pages.Rows.Count());
            Assert.AreEqual(3000, events.Rows.Count());
            Assert.AreEqual(1000, pages2.Rows.Count());
        }
Пример #2
0
        public void LookupInLookup()
        {
            var tables = TestSets.Countries(1000, 9).Process(() =>
                                                             new TableDefinition("Test")
                                                             .Dimension("VisitId", scope => scope.Current <IVisitAggregationContext>().Visit.InteractionId)
                                                             .Map(new FieldMapperSet("Country", false, new IFieldMapper[]
            {
                new SimpleFieldMapper("Country",
                                      scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Country, typeof(string),
                                      FieldType.Dimension),
                new FieldMapperSet("Country2", false, new []
                {
                    new SimpleFieldMapper("Country",
                                          scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Country, typeof(string),
                                          FieldType.Dimension),
                    new SimpleFieldMapper("Count", s => 1, typeof(int), FieldType.Fact),
                }),
            }))
                                                             );

            var visits     = tables.FirstOrDefault(t => t.Schema.Name == "Test");
            var countries  = tables.FirstOrDefault(t => t.Schema.Name == "Country");
            var countries2 = tables.FirstOrDefault(t => t.Schema.Name == "Country2");

            Assert.AreEqual(1000, visits.Rows.Count());
            Assert.AreEqual(9, countries.Rows.Count());
            Assert.AreEqual(9, countries2.Rows.Count());
            Assert.AreEqual(9, visits.Fields <long>("CountryId").Distinct().Count());
            Assert.AreEqual(9, countries.Fields <long>("Country2Id").Distinct().Count());
            Assert.AreEqual(1000, countries2.Fields <int>("Count").Sum());
        }
        public void Count()
        {
            var table = TestSets.Countries(1000, 37).Process(
                () => new SimpleTableMapper(new TableDefinition("Test")
                                            .Count())).FirstOrDefault();

            Assert.AreEqual(1000, table.Field <int>("Count", table.Rows.First()));
        }
        public void RowPerVisitHashKey()
        {
            var table = TestSets.Countries(1000, 37).Process(
                () => new SimpleTableMapper(new TableDefinition("Test")
                                            .Dimension("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId))).FirstOrDefault();

            Assert.AreEqual(1000, table.Rows.Count());
        }
        public void FactAggregation()
        {
            var table = TestSets.Countries(1000, 37).Process(
                () => new SimpleTableMapper(new TableDefinition("Test")
                                            .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId)
                                            .Fact("Value", s => s.Current <IVisitAggregationContext>().Visit.Value))).FirstOrDefault();

            Assert.AreEqual(1000, table.Rows.Count());
            Assert.AreEqual(14000, table.Fields <int>("Value").Sum());
        }
        public void CountSum()
        {
            var table = TestSets.Countries(1000, 5).Process(
                () => new SimpleTableMapper(new TableDefinition("Test")
                                            .Key("Country", s => s.Current <IVisitAggregationContext>().Visit.GeoData.Country)
                                            .Count())).FirstOrDefault();

            var rows = table.Rows.ToList();

            Assert.AreEqual(5, rows.Count);
            Assert.AreEqual(1000, table.Fields <int>("Count").Sum());
        }
Пример #7
0
        public void Splitter()
        {
            var splitter = new PageConditionSplitter((scope, page) => page.VisitPageIndex == 2);

            var table = TestSets.Countries(1000, 37).Process(
                () => new SimpleTableMapper(new TableDefinition("Test")
                                            .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId)
                                            .Map(new SplittingFieldMapper(splitter, (name) =>
                                                                          new[] {
                new SimpleFieldMapper("Value" + name,
                                      s =>
                                      s.Current <IVisitAggregationContext>()
                                      .Visit.Value, typeof(int), FieldType.Fact)
            })))).FirstOrDefault();

            Assert.AreEqual(1000, table.Rows.Count());
            Assert.AreEqual(5000, table.Fields <int>("ValueBefore").Sum());
            Assert.AreEqual(9000, table.Fields <int>("ValueAfter").Sum());
            Assert.AreEqual(14000, table.Fields <int>("ValueTotal").Sum());
        }
Пример #8
0
        public void TwoFieldInlineLookup()
        {
            var tables = TestSets.Countries(1000, 9, regionsPerCountry: 3).Process(() =>
                                                                                   new TableDefinition("Test")
                                                                                   .Dimension("VisitId", scope => scope.Current <IVisitAggregationContext>().Visit.InteractionId)
                                                                                   .Map(new FieldMapperSet("Country", true, new[]
            {
                new SimpleFieldMapper("Country",
                                      scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Country, typeof(string),
                                      FieldType.Dimension),
                new SimpleFieldMapper("Region",
                                      scope => scope.Current <IVisitAggregationContext>().Visit.GeoData.Region, typeof(string),
                                      FieldType.Dimension),
            }))
                                                                                   );

            Assert.AreEqual(1, tables.Count());
            var visits = tables.FirstOrDefault(t => t.Schema.Name == "Test");

            Assert.AreEqual(1000, visits.Rows.Count());
            Assert.AreEqual(9, visits.Fields <string>("Country").Distinct().Count());
            Assert.AreEqual(27, visits.Fields <string>("Region").Distinct().Count());
        }
        public void NestedChildTablesWithBatchingInCsv()
        {
            var csvDir = Path.Combine(Directory.GetCurrentDirectory(), "~tmp");

            if (Directory.Exists(csvDir))
            {
                Directory.Delete(csvDir, true);
            }

            var csv = new CsvExporter(csvDir, binaryPartitions: true);

            var batchWriter = new TableDataBatchWriter(csv);

            //1000 visits.
            //1 row in Test per visit
            //3 rows in Events per visit
            //3 rows in Pages per visit
            //1 row in Pages2 per visit
            //--------------------------
            //8 rows per visit = 80000 in total

            //Batch size 26672 (ceiling(1000/3) visits) gives two disk partitions with 26672 and one with 26656 in memory

            var visitCount             = 1000;
            var rowsPerVisit           = 8;
            var visitPerBatch          = (int)Math.Ceiling(visitCount / 3d);
            var rowsPerFilePartition   = visitPerBatch * rowsPerVisit;
            var eventRowsPerVisit      = 3;
            var expectedFilePartitions = 2;

            var tables = TestSets.Countries(visitCount, 37).Process(
                () => new SimpleTableMapper(new TableDefinition("Test")
                                            .Key("VisitId", s => s.Current <IVisitAggregationContext>().Visit.InteractionId)
                                            .Fact("Value", s => s.Current <IVisitAggregationContext>().Visit.Value)
                                            .Map(s => s.Current <IVisitAggregationContext>().Visit.Pages,
                                                 new TableDefinition("Pages")
                                                 .Key("PageId", s => s.Current <PageData>().Item.Id)
                                                 .Fact("Value", s => s.Current <PageData>().PageEvents.Sum(pe => pe.Value))
                                                 .Map(s => s.Current <PageData>().PageEvents,
                                                      new TableDefinition("Events")
                                                      .Dimension("Event", s => s.Current <PageEventData>().PageEventDefinitionId)
                                                      .Fact("Value", s => s.Current <PageEventData>().Value)))
                                            .Map(s => new[] { s.Current <IVisitAggregationContext>().Visit.Pages.First() },
                                                 new TableDefinition("Pages2")
                                                 .Key("Id", s => s.Current <PageData>().Item.Id))),
                initializer: p =>
            {
                p.BatchWriter = batchWriter;
                p.BatchSize   = rowsPerFilePartition;
            });


            var partitions = new DirectoryInfo(csvDir).GetDirectories().Length;

            Assert.AreEqual(2, partitions, string.Format("{0:N0} rows should create 2 file partitions and one in memory", visitCount * rowsPerVisit));
            Assert.AreEqual(expectedFilePartitions * eventRowsPerVisit * visitPerBatch, batchWriter.Tables.FirstOrDefault(t => t.Schema.Name == "Events").Rows.Count(),
                            string.Format("{0:N0} rows in event tables in file partitions", expectedFilePartitions * eventRowsPerVisit * visitPerBatch));

            Assert.AreEqual(expectedFilePartitions * rowsPerVisit * visitPerBatch, batchWriter.Tables.Sum(t => t.Rows.Count()),
                            string.Format("{0:N0} total rows in file partitions", expectedFilePartitions * rowsPerVisit * visitPerBatch));

            Assert.AreEqual(visitCount * rowsPerVisit, tables.Sum(t => t.Rows.Count()),
                            string.Format("{0:N0} rows in file + memory partitions", visitCount * rowsPerVisit));

            //Merge partitions
            tables = csv.Export(tables);


            //Delete partitions
            batchWriter.Dispose();
            partitions = new DirectoryInfo(csvDir).GetDirectories().Length;
            Assert.AreEqual(0, partitions, "Temporary partition directories are deleted");


            var visits = tables.FirstOrDefault(t => t.Schema.Name == "Test");
            var pages  = tables.FirstOrDefault(t => t.Schema.Name == "Pages");
            var pages2 = tables.FirstOrDefault(t => t.Schema.Name == "Pages2");
            var events = tables.FirstOrDefault(t => t.Schema.Name == "Events");


            Assert.AreEqual(1000, visits.Rows.Count());
            Assert.AreEqual(14000, visits.Fields <int>("Value").Sum());
            Assert.AreEqual(14000, pages.Fields <int>("Value").Sum());
            Assert.AreEqual(14000, events.Fields <int>("Value").Sum());

            Assert.AreEqual(3000, pages.Rows.Count());
            Assert.AreEqual(3000, events.Rows.Count());
            Assert.AreEqual(1000, pages2.Rows.Count());

            if (Directory.Exists(csvDir))
            {
                Directory.Delete(csvDir, true);
            }
        }