public void GeoCoderCodeAll_PerfsTests_TimeToCodeAll() { const string dbLocation1 = @"TestGeoLocationTool.sdf"; connection = DBHelper.GetDbConnection(dbLocation1); GeoCoder geoCoder = new GeoCoder(connection); geoCoder.LoadGazetteerFile(@"TestGaz1.csv"); Stopwatch watch = new Stopwatch(); geoCoder.SetGazetteerColumns( new GazetteerColumnHeaders { Level1Code = "ID_1", Level2Code = "ID_2", Level3Code = "ID_3", Level1Name = "NAME_1", Level2Name = "NAME_2", Level3Name = "NAME_3", Level1AltName = "VARNAME_1", Level2AltName = "VARNAME_2", Level3AltName = "VARNAME_3" }, false); geoCoder.LoadInputFileCsv("TestInput1.csv"); geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders()); watch.Start(); geoCoder.AddAllLocationCodes(); Debug.WriteLine( geoCoder.InputData.Rows.Count + " input lines: " + watch.Elapsed.TotalSeconds); // Example results // 25 input lines: 0.072016 }
public void GeoCoder_PerfsTestsUsingMatchedNamesCache() { const string dbLocation1 = @"TestGeoLocationTool.sdf"; connection = DBHelper.GetDbConnection(dbLocation1); GeoCoder geoCoder = new GeoCoder(connection); geoCoder.LoadGazetteerFile(@"TestGaz1.csv"); Stopwatch watch = new Stopwatch(); geoCoder.SetGazetteerColumns( new GazetteerColumnHeaders { Level1Code = "ID_1", Level2Code = "ID_2", Level3Code = "ID_3", Level1Name = "NAME_1", Level2Name = "NAME_2", Level3Name = "NAME_3" }, false); foreach ( string inputFile in new[] {@"TestInput1000.csv", @"TestInput10000.csv", @"TestInput50000.csv"}) { geoCoder.LoadInputFileCsv(inputFile); geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders()); // use cache watch.Restart(); InputData.UseMatchedNamesCache = true; geoCoder.AddAllLocationCodes(); var elapsed = watch.Elapsed.TotalSeconds; // don't use cache geoCoder.LoadInputFileCsv(inputFile); geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders()); watch.Restart(); InputData.UseMatchedNamesCache = false; geoCoder.AddAllLocationCodes(); Debug.WriteLine( "input file: " + inputFile + " cached: " + elapsed + " vs " + "non cached: " + watch.Elapsed.TotalSeconds); } // Example results // input file: TestInput1000.csv cached: 0.0089728 vs non cached: 0.3835499 // input file: TestInput10000.csv cached: 0.073879 vs non cached: 2.8980328 // input file: TestInput50000.csv cached: 0.3852506 vs non cached: 14.464649 }
public void CodeAll_CorrectAndMissSpeltInputWithSavedMatchesForMissSpellings_AllCodesAdded() { // arrange GeoCoder geoCoder = new GeoCoder(MockRepository.GenerateStub<IDbConnection>() as DbConnection); InputColumnHeaders inputColumnHeaders = InputColumnNames(); GazetteerColumnHeaders gazetteerColumnHeaders = GazetteerColumnNames(); // create input test data with // line 1, all names correct // line 2, all names correct // line 3, all names miss-spelt string[] names1 = {"P1", "T1", "V1"}; string[] names2 = {"P2", "T2", "V2"}; string[] names3 = {"P1x", "T1x", "V1x"}; string[] codes1 = {"1", "10", "100"}; string[] codes2 = {"2", "20", "200"}; InputTestData inputTestData = new InputTestData(); inputTestData.AddLine(names1); inputTestData.AddLine(names2); inputTestData.AddLine(names3); geoCoder.SetInputData(inputTestData.Data(inputColumnHeaders)); geoCoder.SetInputColumns(inputColumnHeaders); // create gazetteer data GazetteerRecords gazetteerRecords = new GazetteerRecords(); gazetteerRecords.AddLine(names1, codes1); gazetteerRecords.AddLine(names2, codes2); geoCoder.SetGazetteerData(gazetteerRecords.Data(gazetteerColumnHeaders)); // add records matching input line 3 names to gazetteer names 1 MatchProviderTestData matchProviderTestData = new MatchProviderTestData(); matchProviderTestData.AddLevel1(names3, names1); matchProviderTestData.AddLevel2(names3, names1); matchProviderTestData.AddLevel3(names3, names1); MatchProviderStub matchProviderStub = new MatchProviderStub(matchProviderTestData); geoCoder.SetMatchProvider(matchProviderStub.MatchProvider()); geoCoder.SetGazetteerColumns(gazetteerColumnHeaders, false); // act geoCoder.AddAllLocationCodes(); // assert var columns = geoCoder.LocationCodeColumnHeaders(); //line 1 - should contain codes 1 DataRow line1 = geoCoder.InputData.Rows[0]; Assert.AreEqual(codes1[0], line1[columns.Level1]); Assert.AreEqual(codes1[1], line1[columns.Level2]); Assert.AreEqual(codes1[2], line1[columns.Level3]); //line 2 - should contain codes 2 DataRow line2 = geoCoder.InputData.Rows[1]; Assert.AreEqual(codes2[0], line2[columns.Level1]); Assert.AreEqual(codes2[1], line2[columns.Level2]); Assert.AreEqual(codes2[2], line2[columns.Level3]); //line 3 - should contain codes 1 DataRow line3 = geoCoder.InputData.Rows[2]; Assert.AreEqual(codes1[0], line3[columns.Level1]); Assert.AreEqual(codes1[1], line3[columns.Level2]); Assert.AreEqual(codes1[2], line3[columns.Level3]); }
public void GeoCoder_PerfsTestsUsingDictionaries() { connection = DBHelper.GetDbConnection(dbLocation); connection.InitializeDB(); GeoCoder geoCoder = new GeoCoder(connection); geoCoder.LoadGazetteerFile(@"PHL_adm3.csv"); //You need to copy this file manually Stopwatch watch = new Stopwatch(); watch.Start(); geoCoder.SetGazetteerColumns( new GazetteerColumnHeaders { Level1Code = "ID_1", Level2Code = "ID_2", Level3Code = "ID_3", Level1Name = "NAME_1", Level2Name = "NAME_2", Level3Name = "NAME_3" }, false); Debug.WriteLine("Time to create dictionaries: " + watch.Elapsed.TotalSeconds); foreach (int linesCount in new[] {500, 1000, 2000}) { geoCoder.LoadInputFileCsv(GenerateInputFile(linesCount)); geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders()); watch.Restart(); geoCoder.AddAllLocationCodes(); var elapsed = watch.Elapsed.TotalSeconds; //LocationCodes.useDictionaries = !LocationCodes.useDictionaries; geoCoder.LoadInputFileCsv(GenerateInputFile(linesCount)); geoCoder.SetInputColumns(geoCoder.DefaultInputColumnHeaders()); watch.Restart(); geoCoder.AddAllLocationCodes(); Debug.WriteLine( linesCount + " input lines: " + elapsed + " vs " + watch.Elapsed.TotalSeconds); // LocationCodes.useDictionaries = !LocationCodes.useDictionaries; foreach (var row in geoCoder.InputData.AsEnumerable()) { var elems = row.ItemArray; Assert.IsFalse(elems[5] is DBNull); Assert.IsFalse(elems[6] is DBNull); Assert.IsFalse(elems[7] is DBNull); } } }
public void CodeAll_InputContainMissSpellingsWithDifferentCasingToSavedMatches_AllCodesAdded() { // arrange GeoCoder geoCoder = new GeoCoder(MockRepository.GenerateStub<IDbConnection>() as DbConnection); InputColumnHeaders inputColumnHeaders = InputColumnNames(); GazetteerColumnHeaders gazetteerColumnHeaders = GazetteerColumnNames(); //gazetteer data string[] names1 = { "P1", "T1", "V1" }; string[] codes1 = { "1", "10", "100" }; // saved matched names data string[] names2 = { "P1x", "T1x", "V1x" }; // input data // line 1, all names miss-spelt with different casing string[] names3 = {"p1x", "t1x", "v1x"}; InputTestData inputTestData = new InputTestData(); inputTestData.AddLine(names3); geoCoder.SetInputData(inputTestData.Data(inputColumnHeaders)); geoCoder.SetInputColumns(inputColumnHeaders); // create gazetteer data GazetteerRecords gazetteerRecords = new GazetteerRecords(); gazetteerRecords.AddLine(names1, codes1); geoCoder.SetGazetteerData(gazetteerRecords.Data(gazetteerColumnHeaders)); // add records matched names records MatchProviderTestData matchProviderTestData = new MatchProviderTestData(); // add records matching saved matched names to gazetteer names matchProviderTestData.AddLevel1(names2, names1); matchProviderTestData.AddLevel2(names2, names1); matchProviderTestData.AddLevel3(names2, names1); MatchProviderStub matchProviderStub = new MatchProviderStub(matchProviderTestData); geoCoder.SetMatchProvider(matchProviderStub.MatchProvider()); geoCoder.SetGazetteerColumns(gazetteerColumnHeaders, false); // act geoCoder.AddAllLocationCodes(); // assert var columns = geoCoder.LocationCodeColumnHeaders(); //line 1 - should contain codes 1 DataRow line1 = geoCoder.InputData.Rows[0]; Assert.AreEqual(codes1[0], line1[columns.Level1]); Assert.AreEqual(codes1[1], line1[columns.Level2]); Assert.AreEqual(codes1[2], line1[columns.Level3]); }