Ejemplo n.º 1
0
 /// <summary>
 /// Create new instance of Document when the user submit a query
 /// </summary>
 /// <param name="terms">Existing document terms</param>
 /// <param name="query">The user query submited to the system</param>
 public Document(DocumentTerms terms, string query)
 {
     _terms = terms;
     Query  = query;
     FileNameWithoutExtension = "UserQuery";
     PrepareQueryDocument();
 }
Ejemplo n.º 2
0
 /// <summary>
 /// Create new instance of Document. We will use this document to apply the project steps on it
 /// </summary>
 /// <param name="path">Path of the document on disk</param>
 /// <param name="terms">Existing document terms</param>
 public Document(string path, DocumentTerms terms)
 {
     _documentPath            = path;
     _terms                   = terms;
     FileNameWithoutExtension = Path.GetFileNameWithoutExtension(path);
     PrepareDocument();
 }
Ejemplo n.º 3
0
 public InvertedModel(DocumentTerms terms, List <Document> documents)
 {
     _terms     = terms;
     _documents = documents;
     _termDocumentFrequencies = _terms.Terms.Select(x => new TermDocumentFrequency {
         Term = x
     }).ToList();
 }
Ejemplo n.º 4
0
 public VectorModel(DocumentTerms terms, List <Document> documents)
 {
     _terms     = terms;
     _documents = documents;
 }
Ejemplo n.º 5
0
        private static void Main(string[] args)
        {
            Console.WriteLine($"init.. {DateTime.Now}");

            DocumentTerms terms = new DocumentTerms();

            StopList stopList = new StopList(AppConstant.StopListPath);

            string[] files = Utilities.GetFilesFromDirectory(AppConstant.DocumentDirectory, new List <string> {
                AppConstant.DocumentExtension
            });

            List <Document> documents = files.Select(f => new Document(f, terms)).ToList();
            List <Task>     tasks     = new List <Task>();

            documents.ForEach(x =>
            {
                tasks.Add(x.GenerateStpFileAsync(stopList));
            });

            Task.WaitAll(tasks.ToArray());

            Console.WriteLine($"Done Phase 1 - StopWords removal, please check generated files. ({DateTime.Now})");

            tasks = new List <Task>();

            documents.ForEach(x =>
            {
                tasks.Add(x.GenerateStemmedFileAsync());
            });

            Task.WaitAll(tasks.ToArray());

            Console.WriteLine($"Done Phase 2 - Suffix removal, please check generated files. ({DateTime.Now})");

            InvertedModel model = new InvertedModel(terms, documents);

            Console.WriteLine($"\tStart Generating Boolean Inverted File. ({DateTime.Now})");
            model.GenerateInvertedFile();
            Console.WriteLine($"\tDone Generating Boolean Inverted File. ({DateTime.Now})");
            Console.WriteLine();
            Console.WriteLine($"\tStart Generating TFIDF Inverted File. ({DateTime.Now})");
            model.GenerateTFIDFValuesFile();
            Console.WriteLine($"\tDone Generating TFIDF Inverted File. ({DateTime.Now})");
            Console.WriteLine();
            Console.WriteLine($"Done Phase 3 - Generate Inverted File (Boolean & TFIDF), please check generated files. ({DateTime.Now})");

options:
            Console.WriteLine("=====================");
            Console.WriteLine("Choose one of the following options:");
            Console.WriteLine("1. Run MEDIAN Test Collection (will generate COS & Precision and Recall");
            Console.WriteLine("2. Run custom query search (will generate COS)");
            Console.WriteLine("3. Exit");

            string optionStr = Console.ReadLine();

            int.TryParse(optionStr, out int option);
            if (option == 1)
            {
                Console.WriteLine($"Generating MEDIAN. ({DateTime.Now})");
                model.RunAllQueries(stopList);
                Console.WriteLine($"Done generating MEDIAN, check generated files. ({DateTime.Now})");
                goto options;
            }
            else if (option == 2)
            {
                int queryNumber = 1;
                while (true)
                {
                    Console.Write("Enter Query (EXIT to exit): ");
                    string query = Console.ReadLine();

                    if (string.IsNullOrWhiteSpace(query) || query.ToUpper() == "EXIT")
                    {
                        break;
                    }
                    Console.WriteLine($"Working on it... ({DateTime.Now})");
                    Document queryDocument = new Document(terms, query);
                    Task.WaitAll(queryDocument.GenerateStpFileAsync(stopList));
                    Task.WaitAll(queryDocument.GenerateStemmedFileAsync());

                    model.SubmitQuery(queryDocument, query, queryNumber, string.Format(AppConstant.QueryCosFile, queryNumber));

                    Console.WriteLine($"Done, check generated file. ({DateTime.Now})");
                    queryNumber++;
                }
                goto options;
            }
            else if (option == 3)
            {
                return;
            }
            else
            {
                goto options;
            }
        }