Exemplo n.º 1
0
 private void SetEngine(DocumentResolver resolver)
 {
     if (rdoTFIDF.Checked)
     {
         resolver.SetEngine(DocumentResolver.EngineType.BayesTFIDF);
     }
     if (rdoLevenshtein.Checked)
     {
         resolver.SetEngine(DocumentResolver.EngineType.BayesLevenshtein);
     }
 }
Exemplo n.º 2
0
        /// <summary>
        /// Get a resolver object from the cache or create a new resolver object.
        /// </summary>
        /// <param name="authors"></param>
        /// <param name="year"></param>
        /// <param name="type"></param>
        /// <param name="cacheKey"></param>
        /// <returns></returns>
        private DocumentResolver GetResolver(string authors, string year, string type, String cacheKey)
        {
            DocumentResolver resolver = null;

            if (MemoryCache.Default[cacheKey] != null)
            {
                // Use cached version
                resolver = (DocumentResolver)MemoryCache.Default[cacheKey];
            }
            else
            {
                // Set up resolver
                resolver = new DocumentResolver();
                resolver.SetEngine(DocumentResolver.EngineType.BayesTFIDF);
                switch (type)
                {
                case "title":
                    resolver.SetDictionary(GetTitleDictionary(authors != string.Empty, year != string.Empty));
                    break;

                case "part":
                default:
                    resolver.SetDictionary(GetSegmentDictionary(authors != string.Empty, year != string.Empty));
                    break;
                }

                MemoryCache.Default.Add(cacheKey, resolver, DateTime.Now.AddMinutes(
                                            Convert.ToDouble(ConfigurationManager.AppSettings["ResolverCacheTime"])));
            }

            return(resolver);
        }
Exemplo n.º 3
0
        private static void ResolveList(bool useWordStemmer, string outputfile, string dictionaryfile, string inputfile)
        {
            // Initalize instances of all of the resolvers that will be tested
            DocumentResolver          documentResolver    = new DocumentResolver();
            BayesResolverEngine       bayesResolver       = new BayesResolverEngine();
            LevenshteinResolverEngine levenshteinResolver = new LevenshteinResolverEngine();
            TFIDFResolverEngine       tfidfResolver       = new TFIDFResolverEngine();

            // Load the dictionary into all of the resolvers
            Dictionary <string, string> dictionary = LoadData(dictionaryfile);

            documentResolver.SetDictionary(dictionary);
            bayesResolver.Dictionary       = dictionary;
            levenshteinResolver.Dictionary = dictionary;
            tfidfResolver.Dictionary       = dictionary;

            // Load the data to be resolved
            Dictionary <string, string> documents = LoadData(inputfile);

            // Process all 100 documents with each resolver, recording the time
            // each resolver takes to complete the task.
            System.IO.File.AppendAllText(outputfile,
                                         string.Format("Processing {0} documents against a dictionary with {1} entries.\r\n\r\n",
                                                       documents.Count(),
                                                       dictionary.Count()));

            documentResolver.SetEngine(DocumentResolver.EngineType.BayesTFIDF);
            DateTime startTime = DateTime.Now;

            foreach (KeyValuePair <string, string> document in documents)
            {
                documentResolver.Resolve(document.Value, useWordStemmer);
            }
            DateTime endTime = DateTime.Now;

            System.IO.File.AppendAllText(outputfile, string.Format("Bayes/TFIDF processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            documentResolver.SetEngine(DocumentResolver.EngineType.BayesLevenshtein);
            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                documentResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("Bayes/Levenshtein processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                bayesResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("Bayes processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                levenshteinResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("Levenshtein processing complete.  {0} seconds elapsed.\r\n", (endTime - startTime).TotalSeconds.ToString()));

            startTime = DateTime.Now;
            foreach (KeyValuePair <string, string> document in documents)
            {
                tfidfResolver.Resolve(document.Value, useWordStemmer);
            }
            endTime = DateTime.Now;
            System.IO.File.AppendAllText(outputfile, string.Format("TFIDF processing complete.  {0} seconds elapsed.\r\n\r\n\r\n", (endTime - startTime).TotalSeconds.ToString()));
        }
Exemplo n.º 4
0
        private static void ResolveDocument(bool useWordStemmer, string outputfile, string dictionaryfile, string document)
        {
            // Initalize instances of all of the resolvers that will be tested
            DocumentResolver          documentResolver    = new DocumentResolver();
            BayesResolverEngine       bayesResolver       = new BayesResolverEngine();
            LevenshteinResolverEngine levenshteinResolver = new LevenshteinResolverEngine();
            TFIDFResolverEngine       tfidfResolver       = new TFIDFResolverEngine();

            // Load the dictionary into all of the resolvers
            Dictionary <string, string> dictionary = LoadData(dictionaryfile);

            documentResolver.SetDictionary(dictionary);
            bayesResolver.Dictionary       = dictionary;
            levenshteinResolver.Dictionary = dictionary;
            tfidfResolver.Dictionary       = dictionary;

            // Process all 100 documents with each resolver, recording the time
            // each resolver takes to complete the task.
            System.IO.File.AppendAllText(outputfile,
                                         string.Format("Processing '{0}' against a dictionary with {1} entries.\r\n",
                                                       document,
                                                       dictionary.Count()));

            System.IO.File.AppendAllText(outputfile, "\r\nBayes/TFIDF\r\n");
            documentResolver.SetEngine(DocumentResolver.EngineType.BayesTFIDF);
            List <ResolutionResult> resolutionResults = documentResolver.Resolve(document, useWordStemmer);

            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nBayes/Levenshtein\r\n");
            documentResolver.SetEngine(DocumentResolver.EngineType.BayesLevenshtein);
            resolutionResults = documentResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nBayes\r\n");
            resolutionResults = bayesResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nLevenshtein\r\n");
            resolutionResults = levenshteinResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\nTFIDF\r\n");
            resolutionResults = tfidfResolver.Resolve(document, useWordStemmer);
            foreach (ResolutionResult resolutionResult in resolutionResults)
            {
                System.IO.File.AppendAllText(outputfile, string.Format("{0} {1} {2}\r\n", resolutionResult.Score.ToString(), resolutionResult.Key, resolutionResult.Document));
            }

            System.IO.File.AppendAllText(outputfile, "\r\n\r\n");
        }