/// <summary> /// Check if stopList is not null, if null then throw exception /// </summary> /// <param name="stopList">Instance of stopList to check</param> private void IsStopListValid(StopList stopList) { if (stopList == null) { throw new ArgumentNullException(nameof(stopList)); } }
/// <summary> /// Generate The document stp list and save it to disk with extension .stp /// </summary> /// <param name="stopList">Instance of existing stopList that has the stop words</param> /// <returns></returns> public async Task GenerateStpFileAsync(StopList stopList) { IsStopListValid(stopList); _documentStpWords = _documentWords.Where(x => !stopList.StopWords.Any(s => s == x)).Select(x => x.ToLower()).ToList(); await File.WriteAllLinesAsync($"{_stpFile}", _documentStpWords.ToArray()); }
public void RunAllQueries(StopList stopList) { string[] files = Utilities.GetFilesFromDirectory(AppConstant.QueriesDirectory, new List <string> { AppConstant.QueryExtension }); List <Document> documents = files.Select((f, index) => new Document(f, _terms)).ToList(); List <Task> tasks = new List <Task>(); documents.ForEach(x => { tasks.Add(x.GenerateStpFileAsync(stopList)); }); Task.WaitAll(tasks.ToArray()); tasks = new List <Task>(); documents.ForEach(x => { tasks.Add(x.GenerateStemmedFileAsync()); }); Task.WaitAll(tasks.ToArray()); //tasks = new List<Task>(); Console.WriteLine($"Query count: {documents.Count}"); documents.ForEach(x => { //tasks.Add(Task.Factory.StartNew(() => //{ string fileName = $"{AppConstant.QueryCosDirectory}\\{x.FileNameWithoutExtension}-cos.csv"; SubmitQuery(x, x.Query, x.Id, fileName, testCollection: true); Console.WriteLine(x.FileNameWithoutExtension); //})); }); //Task.WaitAll(tasks.ToArray()); }
private static void Main(string[] args) { Console.WriteLine($"init.. {DateTime.Now}"); DocumentTerms terms = new DocumentTerms(); StopList stopList = new StopList(AppConstant.StopListPath); string[] files = Utilities.GetFilesFromDirectory(AppConstant.DocumentDirectory, new List <string> { AppConstant.DocumentExtension }); List <Document> documents = files.Select(f => new Document(f, terms)).ToList(); List <Task> tasks = new List <Task>(); documents.ForEach(x => { tasks.Add(x.GenerateStpFileAsync(stopList)); }); Task.WaitAll(tasks.ToArray()); Console.WriteLine($"Done Phase 1 - StopWords removal, please check generated files. ({DateTime.Now})"); tasks = new List <Task>(); documents.ForEach(x => { tasks.Add(x.GenerateStemmedFileAsync()); }); Task.WaitAll(tasks.ToArray()); Console.WriteLine($"Done Phase 2 - Suffix removal, please check generated files. ({DateTime.Now})"); InvertedModel model = new InvertedModel(terms, documents); Console.WriteLine($"\tStart Generating Boolean Inverted File. ({DateTime.Now})"); model.GenerateInvertedFile(); Console.WriteLine($"\tDone Generating Boolean Inverted File. ({DateTime.Now})"); Console.WriteLine(); Console.WriteLine($"\tStart Generating TFIDF Inverted File. ({DateTime.Now})"); model.GenerateTFIDFValuesFile(); Console.WriteLine($"\tDone Generating TFIDF Inverted File. ({DateTime.Now})"); Console.WriteLine(); Console.WriteLine($"Done Phase 3 - Generate Inverted File (Boolean & TFIDF), please check generated files. ({DateTime.Now})"); options: Console.WriteLine("====================="); Console.WriteLine("Choose one of the following options:"); Console.WriteLine("1. Run MEDIAN Test Collection (will generate COS & Precision and Recall"); Console.WriteLine("2. Run custom query search (will generate COS)"); Console.WriteLine("3. Exit"); string optionStr = Console.ReadLine(); int.TryParse(optionStr, out int option); if (option == 1) { Console.WriteLine($"Generating MEDIAN. ({DateTime.Now})"); model.RunAllQueries(stopList); Console.WriteLine($"Done generating MEDIAN, check generated files. ({DateTime.Now})"); goto options; } else if (option == 2) { int queryNumber = 1; while (true) { Console.Write("Enter Query (EXIT to exit): "); string query = Console.ReadLine(); if (string.IsNullOrWhiteSpace(query) || query.ToUpper() == "EXIT") { break; } Console.WriteLine($"Working on it... ({DateTime.Now})"); Document queryDocument = new Document(terms, query); Task.WaitAll(queryDocument.GenerateStpFileAsync(stopList)); Task.WaitAll(queryDocument.GenerateStemmedFileAsync()); model.SubmitQuery(queryDocument, query, queryNumber, string.Format(AppConstant.QueryCosFile, queryNumber)); Console.WriteLine($"Done, check generated file. ({DateTime.Now})"); queryNumber++; } goto options; } else if (option == 3) { return; } else { goto options; } }