예제 #1
0
 /// <summary>
 /// Check if stopList is not null, if null then throw exception
 /// </summary>
 /// <param name="stopList">Instance of stopList to check</param>
 private void IsStopListValid(StopList stopList)
 {
     if (stopList == null)
     {
         throw new ArgumentNullException(nameof(stopList));
     }
 }
예제 #2
0
        /// <summary>
        /// Generate The document stp list and save it to disk with extension .stp
        /// </summary>
        /// <param name="stopList">Instance of existing stopList that has the stop words</param>
        /// <returns></returns>
        public async Task GenerateStpFileAsync(StopList stopList)
        {
            IsStopListValid(stopList);

            _documentStpWords = _documentWords.Where(x => !stopList.StopWords.Any(s => s == x)).Select(x => x.ToLower()).ToList();

            await File.WriteAllLinesAsync($"{_stpFile}", _documentStpWords.ToArray());
        }
예제 #3
0
        public void RunAllQueries(StopList stopList)
        {
            string[] files = Utilities.GetFilesFromDirectory(AppConstant.QueriesDirectory, new List <string> {
                AppConstant.QueryExtension
            });

            List <Document> documents = files.Select((f, index) => new Document(f, _terms)).ToList();
            List <Task>     tasks     = new List <Task>();

            documents.ForEach(x =>
            {
                tasks.Add(x.GenerateStpFileAsync(stopList));
            });

            Task.WaitAll(tasks.ToArray());

            tasks = new List <Task>();

            documents.ForEach(x =>
            {
                tasks.Add(x.GenerateStemmedFileAsync());
            });

            Task.WaitAll(tasks.ToArray());

            //tasks = new List<Task>();
            Console.WriteLine($"Query count: {documents.Count}");
            documents.ForEach(x =>
            {
                //tasks.Add(Task.Factory.StartNew(() =>
                //{
                string fileName = $"{AppConstant.QueryCosDirectory}\\{x.FileNameWithoutExtension}-cos.csv";
                SubmitQuery(x, x.Query, x.Id, fileName, testCollection: true);
                Console.WriteLine(x.FileNameWithoutExtension);
                //}));
            });

            //Task.WaitAll(tasks.ToArray());
        }
예제 #4
0
        private static void Main(string[] args)
        {
            Console.WriteLine($"init.. {DateTime.Now}");

            DocumentTerms terms = new DocumentTerms();

            StopList stopList = new StopList(AppConstant.StopListPath);

            string[] files = Utilities.GetFilesFromDirectory(AppConstant.DocumentDirectory, new List <string> {
                AppConstant.DocumentExtension
            });

            List <Document> documents = files.Select(f => new Document(f, terms)).ToList();
            List <Task>     tasks     = new List <Task>();

            documents.ForEach(x =>
            {
                tasks.Add(x.GenerateStpFileAsync(stopList));
            });

            Task.WaitAll(tasks.ToArray());

            Console.WriteLine($"Done Phase 1 - StopWords removal, please check generated files. ({DateTime.Now})");

            tasks = new List <Task>();

            documents.ForEach(x =>
            {
                tasks.Add(x.GenerateStemmedFileAsync());
            });

            Task.WaitAll(tasks.ToArray());

            Console.WriteLine($"Done Phase 2 - Suffix removal, please check generated files. ({DateTime.Now})");

            InvertedModel model = new InvertedModel(terms, documents);

            Console.WriteLine($"\tStart Generating Boolean Inverted File. ({DateTime.Now})");
            model.GenerateInvertedFile();
            Console.WriteLine($"\tDone Generating Boolean Inverted File. ({DateTime.Now})");
            Console.WriteLine();
            Console.WriteLine($"\tStart Generating TFIDF Inverted File. ({DateTime.Now})");
            model.GenerateTFIDFValuesFile();
            Console.WriteLine($"\tDone Generating TFIDF Inverted File. ({DateTime.Now})");
            Console.WriteLine();
            Console.WriteLine($"Done Phase 3 - Generate Inverted File (Boolean & TFIDF), please check generated files. ({DateTime.Now})");

options:
            Console.WriteLine("=====================");
            Console.WriteLine("Choose one of the following options:");
            Console.WriteLine("1. Run MEDIAN Test Collection (will generate COS & Precision and Recall");
            Console.WriteLine("2. Run custom query search (will generate COS)");
            Console.WriteLine("3. Exit");

            string optionStr = Console.ReadLine();

            int.TryParse(optionStr, out int option);
            if (option == 1)
            {
                Console.WriteLine($"Generating MEDIAN. ({DateTime.Now})");
                model.RunAllQueries(stopList);
                Console.WriteLine($"Done generating MEDIAN, check generated files. ({DateTime.Now})");
                goto options;
            }
            else if (option == 2)
            {
                int queryNumber = 1;
                while (true)
                {
                    Console.Write("Enter Query (EXIT to exit): ");
                    string query = Console.ReadLine();

                    if (string.IsNullOrWhiteSpace(query) || query.ToUpper() == "EXIT")
                    {
                        break;
                    }
                    Console.WriteLine($"Working on it... ({DateTime.Now})");
                    Document queryDocument = new Document(terms, query);
                    Task.WaitAll(queryDocument.GenerateStpFileAsync(stopList));
                    Task.WaitAll(queryDocument.GenerateStemmedFileAsync());

                    model.SubmitQuery(queryDocument, query, queryNumber, string.Format(AppConstant.QueryCosFile, queryNumber));

                    Console.WriteLine($"Done, check generated file. ({DateTime.Now})");
                    queryNumber++;
                }
                goto options;
            }
            else if (option == 3)
            {
                return;
            }
            else
            {
                goto options;
            }
        }