Exemplo n.º 1
0
        // Write to an output file with the following format: place id|place name|list of words in the place name|
        static public void WriteToFile(Deduplication deduplication, string outputFileName = "GroundTruth.txt", int maxOutputSize = 100)
        {
            int count = 0;

            System.IO.StreamWriter file = new System.IO.StreamWriter(outputFileName);
            foreach (var entry in deduplication.IsCoreWordProbability)
            {
                Place  place         = entry.Key;
                var    probabilities = entry.Value;
                string line          = Convert.ToString(place.id);
                line += "|";
                line += place.title;
                line += "|";
                int wordCount = 0;
                foreach (var word in probabilities.Keys)
                {
                    line += word;
                    ++wordCount;
                    if (wordCount < probabilities.Count)
                    {
                        line += " ";
                    }
                }
                line += "|";
                file.WriteLine(line);
                ++count;
                if (count >= maxOutputSize)
                {
                    break;
                }
            }
            file.Close();
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            Deduplication deduplication = new Deduplication();
            var           database      = deduplication.Database;

            string[] filePaths = Directory.GetFiles("../../../Data/");
            foreach (string filePath in filePaths)
            {
                if (filePath.Contains("Paris"))
                {
                    database.Load(filePath);
                }
            }
            //database.GenerateTiles(0.05);
            database.GenerateTilesByCity();

            //database.AddPlace("Starbucks Coffee");
            //database.AddPlace("Peets Coffee");
            //database.AddPlace("Starbucks");

            deduplication.Setup();

            // Run the expectation maximization algorithm
            deduplication.ExpectationMaximization(100, 1E-3, Deduplication.Model.Name);
            //var cp = deduplication.IsCoreWordProbability[database.GetByName("Metro-Station Mouton Duvernet (Linie 4)")[0]];
            Test test = new Test();

            test.LoadGroundTruthFromFile("GroundTruth.txt");
            System.IO.StreamWriter file = new System.IO.StreamWriter("precision SpatialContext.csv");
            for (int i = 1; i < 15; ++i)
            {
                double precision = test.GetPrecision(deduplication.IsCoreWordProbability, i * 10);
                file.WriteLine(i * 10 + "," + precision);
            }
            file.Close();
        }