Beispiel #1
0
        public void GeoCoder_PerfsTestsUsingMatchedNamesCache()
        {
            const string dbLocation1 = @"TestGeoLocationTool.sdf";

            connection = DBHelper.GetDbConnection(dbLocation1);
            GeoCoder geoCoder = new GeoCoder(connection);

            geoCoder.LoadGazetteerFile(@"TestGaz1.csv");
            Stopwatch watch = new Stopwatch();

            geoCoder.SetGazetteerColumns(
                new GazetteerColumnHeaders
            {
                Level1Code = "ID_1",
                Level2Code = "ID_2",
                Level3Code = "ID_3",
                Level1Name = "NAME_1",
                Level2Name = "NAME_2",
                Level3Name = "NAME_3"
            },
                false);


            foreach (
                string inputFile in
                new[]
                { @"TestInput1000.csv", @"TestInput10000.csv", @"TestInput50000.csv" })
            {
                geoCoder.LoadInputFileCsv(inputFile);
                geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders());

                // use cache
                watch.Restart();
                InputData.UseMatchedNamesCache = true;
                geoCoder.AddAllLocationCodes();
                var elapsed = watch.Elapsed.TotalSeconds;

                // don't use cache
                geoCoder.LoadInputFileCsv(inputFile);
                geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders());
                watch.Restart();
                InputData.UseMatchedNamesCache = false;
                geoCoder.AddAllLocationCodes();
                Debug.WriteLine(
                    "input file: " + inputFile + " cached: " + elapsed +
                    " vs " + "non cached: " + watch.Elapsed.TotalSeconds);
            }

            // Example results
            // input file: TestInput1000.csv cached: 0.0089728 vs non cached: 0.3835499
            // input file: TestInput10000.csv cached: 0.073879 vs non cached: 2.8980328
            // input file: TestInput50000.csv cached: 0.3852506 vs non cached: 14.464649
        }
Beispiel #2
0
        public void GeoCoder_PerfsTestsUsingDictionaries()
        {
            connection = DBHelper.GetDbConnection(dbLocation);
            connection.InitializeDB();
            GeoCoder geoCoder = new GeoCoder(connection);

            geoCoder.LoadGazetteerFile(@"PHL_adm3.csv");
            //You need to copy this file manually
            Stopwatch watch = new Stopwatch();

            watch.Start();
            geoCoder.SetGazetteerColumns(
                new GazetteerColumnHeaders
            {
                Level1Code = "ID_1",
                Level2Code = "ID_2",
                Level3Code = "ID_3",
                Level1Name = "NAME_1",
                Level2Name = "NAME_2",
                Level3Name = "NAME_3"
            },
                false);

            Debug.WriteLine("Time to create dictionaries: " + watch.Elapsed.TotalSeconds);

            foreach (int linesCount in new[] { 500, 1000, 2000 })
            {
                geoCoder.LoadInputFileCsv(GenerateInputFile(linesCount));
                geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders());
                watch.Restart();
                geoCoder.AddAllLocationCodes();
                var elapsed = watch.Elapsed.TotalSeconds;
                //LocationCodes.useDictionaries = !LocationCodes.useDictionaries;
                geoCoder.LoadInputFileCsv(GenerateInputFile(linesCount));
                geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders());
                watch.Restart();
                geoCoder.AddAllLocationCodes();
                Debug.WriteLine(
                    linesCount + " input lines: " + elapsed + " vs " +
                    watch.Elapsed.TotalSeconds);
                // LocationCodes.useDictionaries = !LocationCodes.useDictionaries;

                foreach (var row in geoCoder.InputData.AsEnumerable())
                {
                    var elems = row.ItemArray;
                    Assert.IsFalse(elems[5] is DBNull);
                    Assert.IsFalse(elems[6] is DBNull);
                    Assert.IsFalse(elems[7] is DBNull);
                }
            }
        }
Beispiel #3
0
        public void CodeAll_InputContainMissSpellingsWithDifferentCasingToSavedMatches_AllCodesAdded()
        {
            // arrange
            GeoCoder geoCoder =
                new GeoCoder(MockRepository.GenerateStub <IDbConnection>() as DbConnection);
            InputColumnHeaders     inputColumnHeaders     = InputColumnNames();
            GazetteerColumnHeaders gazetteerColumnHeaders = GazetteerColumnNames();

            //gazetteer data
            string[] names1 = { "P1", "T1", "V1" };
            string[] codes1 = { "1", "10", "100" };

            // saved matched names data
            string[] names2 = { "P1x", "T1x", "V1x" };

            // input data
            // line 1, all names miss-spelt with different casing
            string[] names3 = { "p1x", "t1x", "v1x" };

            InputTestData inputTestData = new InputTestData();

            inputTestData.AddLine(names3);
            geoCoder.SetInputData(inputTestData.Data(inputColumnHeaders));
            geoCoder.SetInputColumns(inputColumnHeaders);

            // create gazetteer data
            GazetteerRecords gazetteerRecords = new GazetteerRecords();

            gazetteerRecords.AddLine(names1, codes1);
            geoCoder.SetGazetteerData(gazetteerRecords.Data(gazetteerColumnHeaders));

            // add records matched names records
            MatchProviderTestData matchProviderTestData = new MatchProviderTestData();

            // add records matching saved matched names to gazetteer names
            matchProviderTestData.AddLevel1(names2, names1);
            matchProviderTestData.AddLevel2(names2, names1);
            matchProviderTestData.AddLevel3(names2, names1);
            MatchProviderStub matchProviderStub = new MatchProviderStub(matchProviderTestData);

            geoCoder.SetMatchProvider(matchProviderStub.MatchProvider());

            geoCoder.SetGazetteerColumns(gazetteerColumnHeaders, false);

            // act
            geoCoder.AddAllLocationCodes();

            // assert
            var columns = geoCoder.LocationCodeColumnHeaders();

            //line 1 - should contain codes 1
            DataRow line1 = geoCoder.InputData.Rows[0];

            Assert.AreEqual(codes1[0], line1[columns.Level1]);
            Assert.AreEqual(codes1[1], line1[columns.Level2]);
            Assert.AreEqual(codes1[2], line1[columns.Level3]);
        }
Beispiel #4
0
        public void GeoCoderCodeAll_PerfsTests_TimeToCodeAll()
        {
            const string dbLocation1 = @"TestGeoLocationTool.sdf";

            connection = DBHelper.GetDbConnection(dbLocation1);

            GeoCoder geoCoder = new GeoCoder(connection);

            geoCoder.LoadGazetteerFile(@"TestGaz1.csv");
            Stopwatch watch = new Stopwatch();

            geoCoder.SetGazetteerColumns(
                new GazetteerColumnHeaders
            {
                Level1Code    = "ID_1",
                Level2Code    = "ID_2",
                Level3Code    = "ID_3",
                Level1Name    = "NAME_1",
                Level2Name    = "NAME_2",
                Level3Name    = "NAME_3",
                Level1AltName = "VARNAME_1",
                Level2AltName = "VARNAME_2",
                Level3AltName = "VARNAME_3"
            },
                false);

            geoCoder.LoadInputFileCsv("TestInput1.csv");
            geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders());
            watch.Start();
            geoCoder.AddAllLocationCodes();
            Debug.WriteLine(
                geoCoder.InputData.Rows.Count + " input lines: " +
                watch.Elapsed.TotalSeconds);

            // Example results
            // 25 input lines: 0.072016
        }
Beispiel #5
0
        CodeAll_CorrectAndMissSpeltInputWithSavedMatchesForMissSpellings_AllCodesAdded
            ()
        {
            // arrange
            GeoCoder geoCoder =
                new GeoCoder(MockRepository.GenerateStub <IDbConnection>() as DbConnection);
            InputColumnHeaders     inputColumnHeaders     = InputColumnNames();
            GazetteerColumnHeaders gazetteerColumnHeaders = GazetteerColumnNames();

            // create input test data with
            // line 1, all names correct
            // line 2, all names correct
            // line 3, all names miss-spelt
            string[] names1 = { "P1", "T1", "V1" };
            string[] names2 = { "P2", "T2", "V2" };
            string[] names3 = { "P1x", "T1x", "V1x" };
            string[] codes1 = { "1", "10", "100" };
            string[] codes2 = { "2", "20", "200" };

            InputTestData inputTestData = new InputTestData();

            inputTestData.AddLine(names1);
            inputTestData.AddLine(names2);
            inputTestData.AddLine(names3);
            geoCoder.SetInputData(inputTestData.Data(inputColumnHeaders));
            geoCoder.SetInputColumns(inputColumnHeaders);

            // create gazetteer data
            GazetteerRecords gazetteerRecords = new GazetteerRecords();

            gazetteerRecords.AddLine(names1, codes1);
            gazetteerRecords.AddLine(names2, codes2);
            geoCoder.SetGazetteerData(gazetteerRecords.Data(gazetteerColumnHeaders));

            // add records matching input line 3 names to gazetteer names 1
            MatchProviderTestData matchProviderTestData = new MatchProviderTestData();

            matchProviderTestData.AddLevel1(names3, names1);
            matchProviderTestData.AddLevel2(names3, names1);
            matchProviderTestData.AddLevel3(names3, names1);

            MatchProviderStub matchProviderStub = new MatchProviderStub(matchProviderTestData);

            geoCoder.SetMatchProvider(matchProviderStub.MatchProvider());

            geoCoder.SetGazetteerColumns(gazetteerColumnHeaders, false);

            // act
            geoCoder.AddAllLocationCodes();

            // assert
            var columns = geoCoder.LocationCodeColumnHeaders();

            //line 1 - should contain codes 1
            DataRow line1 = geoCoder.InputData.Rows[0];

            Assert.AreEqual(codes1[0], line1[columns.Level1]);
            Assert.AreEqual(codes1[1], line1[columns.Level2]);
            Assert.AreEqual(codes1[2], line1[columns.Level3]);

            //line 2 - should contain codes 2
            DataRow line2 = geoCoder.InputData.Rows[1];

            Assert.AreEqual(codes2[0], line2[columns.Level1]);
            Assert.AreEqual(codes2[1], line2[columns.Level2]);
            Assert.AreEqual(codes2[2], line2[columns.Level3]);

            //line 3 - should contain codes 1
            DataRow line3 = geoCoder.InputData.Rows[2];

            Assert.AreEqual(codes1[0], line3[columns.Level1]);
            Assert.AreEqual(codes1[1], line3[columns.Level2]);
            Assert.AreEqual(codes1[2], line3[columns.Level3]);
        }