// Write to an output file with the following format: place id|place name|list of words in the place name| static public void WriteToFile(Deduplication deduplication, string outputFileName = "GroundTruth.txt", int maxOutputSize = 100) { int count = 0; System.IO.StreamWriter file = new System.IO.StreamWriter(outputFileName); foreach (var entry in deduplication.IsCoreWordProbability) { Place place = entry.Key; var probabilities = entry.Value; string line = Convert.ToString(place.id); line += "|"; line += place.title; line += "|"; int wordCount = 0; foreach (var word in probabilities.Keys) { line += word; ++wordCount; if (wordCount < probabilities.Count) { line += " "; } } line += "|"; file.WriteLine(line); ++count; if (count >= maxOutputSize) { break; } } file.Close(); }
static void Main(string[] args) { Deduplication deduplication = new Deduplication(); var database = deduplication.Database; string[] filePaths = Directory.GetFiles("../../../Data/"); foreach (string filePath in filePaths) { if (filePath.Contains("Paris")) { database.Load(filePath); } } //database.GenerateTiles(0.05); database.GenerateTilesByCity(); //database.AddPlace("Starbucks Coffee"); //database.AddPlace("Peets Coffee"); //database.AddPlace("Starbucks"); deduplication.Setup(); // Run the expectation maximization algorithm deduplication.ExpectationMaximization(100, 1E-3, Deduplication.Model.Name); //var cp = deduplication.IsCoreWordProbability[database.GetByName("Metro-Station Mouton Duvernet (Linie 4)")[0]]; Test test = new Test(); test.LoadGroundTruthFromFile("GroundTruth.txt"); System.IO.StreamWriter file = new System.IO.StreamWriter("precision SpatialContext.csv"); for (int i = 1; i < 15; ++i) { double precision = test.GetPrecision(deduplication.IsCoreWordProbability, i * 10); file.WriteLine(i * 10 + "," + precision); } file.Close(); }