Beispiel #1
0
        private static void ResolveList(bool useWordStemmer, string outputfile, string dictionaryfile, string inputfile)
        {
            // Initalize instances of all of the resolvers that will be tested
            DocumentResolver          documentResolver    = new DocumentResolver();
            BayesResolverEngine       bayesResolver       = new BayesResolverEngine();
            LevenshteinResolverEngine levenshteinResolver = new LevenshteinResolverEngine();
            TFIDFResolverEngine       tfidfResolver       = new TFIDFResolverEngine();

            // Load the dictionary into all of the resolvers
            Dictionary <string, string> dictionary = LoadData(dictionaryfile);

            documentResolver.SetDictionary(dictionary);
            bayesResolver.Dictionary       = dictionary;
            levenshteinResolver.Dictionary = dictionary;
            tfidfResolver.Dictionary       = dictionary;

            // Load the data to be resolved
            Dictionary <string, string> documents = LoadData(inputfile);

            // Process all 100 documents with each resolver, recording the time
            // each resolver takes to complete the task.
            System.IO.File.AppendAllText(outputfile,
                                         string.Format("Processing {0} documents against a dictionary with {1} entries.\r\n\r\n",
                                                       documents.Count(),
                                                       dictionary.Count()));

            documentResolver.SetEngine(DocumentResolver.EngineType.BayesTFIDF);
            DateTime startTime = DateTime.Now;

            foreach (KeyValuePair <string, string> document in documents)
            {
                documentResolver.Resolve(document.Value, useWordStemmer);
            }
            DateTime endTime = DateTime.Now;

            System.IO.File.AppendAllText(outputfile, string.Format("Bayes/TFIDF processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            documentResolver.SetEngine(DocumentResolver.EngineType.BayesLevenshtein);
            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                documentResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("Bayes/Levenshtein processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                bayesResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("Bayes processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                levenshteinResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("Levenshtein processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                tfidfResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("TFIDF processing complete.  {0} seconds elapsed.\r\n\r\n\r\n", (endTime - startTime).TotalSeconds.ToString()));
        }
Beispiel #2
0
        private static void ResolveDocument(bool useWordStemmer, string outputfile, string dictionaryfile, string document)
        {
            // Initalize instances of all of the resolvers that will be tested
            DocumentResolver          documentResolver    = new DocumentResolver();
            BayesResolverEngine       bayesResolver       = new BayesResolverEngine();
            LevenshteinResolverEngine levenshteinResolver = new LevenshteinResolverEngine();
            TFIDFResolverEngine       tfidfResolver       = new TFIDFResolverEngine();

            // Load the dictionary into all of the resolvers
            Dictionary <string, string> dictionary = LoadData(dictionaryfile);

            documentResolver.SetDictionary(dictionary);
            bayesResolver.Dictionary       = dictionary;
            levenshteinResolver.Dictionary = dictionary;
            tfidfResolver.Dictionary       = dictionary;

            // Process all 100 documents with each resolver, recording the time
            // each resolver takes to complete the task.
            System.IO.File.AppendAllText(outputfile,
                                         string.Format("Processing '{0}' against a dictionary with {1} entries.\r\n",
                                                       document,
                                                       dictionary.Count()));

            System.IO.File.AppendAllText(outputfile, "\r\nBayes/TFIDF\r\n");
            documentResolver.SetEngine(DocumentResolver.EngineType.BayesTFIDF);
            List <ResolutionResult> resolutionResults = documentResolver.Resolve(document, useWordStemmer);

            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nBayes/Levenshtein\r\n");
            documentResolver.SetEngine(DocumentResolver.EngineType.BayesLevenshtein);
            resolutionResults = documentResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nBayes\r\n");
            resolutionResults = bayesResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nLevenshtein\r\n");
            resolutionResults = levenshteinResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nTFIDF\r\n");
            resolutionResults = tfidfResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\n\r\n");
        }