public GroupEngine(SearchEngine searchEngine) { this.searchEngine = searchEngine; }
public static void Main(string[] args) { SearchEngine searchEngine = new SearchEngine(); const int SEARCH_ENGINE = 1; const int GROUPING_ENGINE = 2; int mode; Console.WriteLine("Choose mode: 1 - search engine, 2 - k-means grouping"); while (true) { bool result = Int32.TryParse(Console.ReadLine(), out mode); if (result) { if (mode != 1 && mode != 2) { Console.WriteLine("Invalid mode. Try again."); } else { break; } } else { Console.WriteLine("Invalid input. Try again."); } } Console.WriteLine("Type keywords file name."); while (true) { try { searchEngine.readKeywordsFile(Console.ReadLine()); foreach (string keyword in searchEngine.keywords) { searchEngine.stemmedKeywords.Add(Utils.stemToken(keyword, searchEngine.stemmer)); } searchEngine.stemmedKeywords = searchEngine.stemmedKeywords.Distinct().ToList(); break; } catch (FileNotFoundException) { Console.WriteLine("File not found. Try again."); } catch (ArgumentException) { Console.WriteLine("Invalid file name. Try again."); } } Console.WriteLine("Type documents file name."); while (true) { try { searchEngine.readDocumentsFile(Console.ReadLine()); foreach (Document doc in searchEngine.documents) { if (mode == SEARCH_ENGINE) { doc.processDocument(searchEngine.stemmedKeywords, searchEngine.stemmer, true); } else { doc.processDocument(searchEngine.stemmedKeywords, searchEngine.stemmer, false); } } searchEngine.IDFVector = searchEngine.createIDFVector(); foreach (Document doc in searchEngine.documents) { doc.TFIDFVector = Utils.multiplyVectorsCoords(doc.TFVector, searchEngine.IDFVector); } break; } catch (FileNotFoundException) { Console.WriteLine("File not found. Try again."); } catch (ArgumentException) { Console.WriteLine("Invalid file name. Try again."); } } GroupEngine groupEngine = new GroupEngine(searchEngine); string action = ""; bool closing = false; while (true) { if (closing) { break; } if (mode == SEARCH_ENGINE) { Console.WriteLine("Choose action: 1 - search, 2 - display document, " + "3 - display tokens, 4 - display stemmed tokens, 5 - quit"); } else { Console.WriteLine("Choose action: 1 - classify, 2 - display document, " + "3 - display tokens, 4 - display stemmed tokens, 5 - quit"); } action = Console.ReadLine(); switch (action) { case "1": if (mode == SEARCH_ENGINE) { Console.WriteLine("Type query."); string query = Console.ReadLine(); string[] queryTokens = Utils.extractTokens(query); List <string> stemmedQueryTokens = new List <string> (); foreach (string token in queryTokens) { stemmedQueryTokens.Add(Utils.stemToken(token, searchEngine.stemmer)); } List <double> queryTFVector; queryTFVector = Utils.createTFVector(searchEngine.stemmedKeywords, stemmedQueryTokens); List <double> queryTFIDFVector = Utils.multiplyVectorsCoords(queryTFVector, searchEngine.IDFVector); Dictionary <Document, double> vectorsCosinus = new Dictionary <Document, double> (); double cosinus = 0; foreach (Document doc in searchEngine.documents) { cosinus = Utils.calculateVectorsCosinus(queryTFIDFVector, doc.TFIDFVector); vectorsCosinus.Add(doc, cosinus); } var vectorsCosinusList = vectorsCosinus.ToList(); var sorted = vectorsCosinusList.OrderByDescending(pair => pair.Value); int index = 0; foreach (KeyValuePair <Document, double> kvp in sorted) { Console.WriteLine(index + "." + kvp.Value + " " + kvp.Key.title); index++; } Console.WriteLine("Do you want to choose relevent documents. 1 - yes, 2 - no"); var choose = Console.ReadLine(); switch (choose) { case "1": { Console.WriteLine("Choose numbers of relevant documents. Format: 'number of document' 'number od document ..."); var StringDocumentsIndexes = Console.ReadLine(); List <int> intDocumentsIndexes = Utils.takeIndexesOfDocuments(StringDocumentsIndexes); var relevant = Utils.getRelevantDocuments(intDocumentsIndexes, sorted.ToList()); var unRelevant = Utils.getUnRelevantDocuments(intDocumentsIndexes, sorted.ToList()); var newQuery = Utils.calculateNewQueryTFIDFQuery(relevant, unRelevant, queryTFIDFVector); //create new list of keys Dictionary <string, double> keyWords = new Dictionary <string, double> (); for (int i = 0; i < newQuery.Count(); i++) { keyWords.Add(searchEngine.stemmedKeywords [i], newQuery [i]); } var sortedKeyWords = keyWords.OrderByDescending(i => i.Value); foreach (var k in sortedKeyWords) { if (k.Value != 0) { Console.WriteLine($"{k.Key} {k.Value}"); } } Console.WriteLine("Type new query"); string query1 = Console.ReadLine(); string[] queryTokens1 = Utils.extractTokens(query1); List <string> stemmedQueryTokens1 = new List <string> (); foreach (string token in queryTokens1) { stemmedQueryTokens1.Add(Utils.stemToken(token, searchEngine.stemmer)); } List <double> queryTFVector1 = Utils.createTFVector(searchEngine.stemmedKeywords, stemmedQueryTokens1); List <double> queryTFIDFVector1 = Utils.multiplyVectorsCoords(queryTFVector1, searchEngine.IDFVector); Dictionary <Document, double> vectorsCosinus1 = new Dictionary <Document, double> (); double cosinus1 = 0; foreach (Document doc in searchEngine.documents) { cosinus1 = Utils.calculateVectorsCosinus(queryTFIDFVector1, doc.TFIDFVector); vectorsCosinus1.Add(doc, cosinus1); } var vectorsCosinusList1 = vectorsCosinus1.ToList(); var sorted1 = vectorsCosinusList1.OrderByDescending(pair => pair.Value); int index1 = 0; foreach (KeyValuePair <Document, double> kvp in sorted1) { Console.WriteLine(index1 + ". " + kvp.Value + " " + kvp.Key.title); index1++; } break; } case "2": { break; } } } else if (mode == GROUPING_ENGINE) { // int groupsNumber; int iterationsNubmer; Console.WriteLine("Type number of groups to classify documents."); while (true) { bool result = Int32.TryParse(Console.ReadLine(), out groupsNumber); if (result) { // TODO: define maximal number of groups if (groupsNumber < 2 || groupsNumber > 20) { Console.WriteLine("Invalid groups number. Try again."); } else { break; } } else { Console.WriteLine("Invalid input. Try again."); } } Console.WriteLine("Type number of iterations of k-means algorithm."); while (true) { bool result = Int32.TryParse(Console.ReadLine(), out iterationsNubmer); if (result) { if (iterationsNubmer < 1) { Console.WriteLine("Invalid iterations number. Try again."); } else { break; } } else { Console.WriteLine("Invalid input. Try again."); } } // groupEngine.classify(groupsNumber, iterationsNubmer); } break; case "2": searchEngine.documents [searchEngine.readDocumentNumber()] .displayDocument(); break; case "3": searchEngine.documents [searchEngine.readDocumentNumber()] .displayTokens(); break; case "4": searchEngine.documents [searchEngine.readDocumentNumber()] .displayStemmedTokens(); break; case "5": Console.WriteLine("Closing."); closing = true; break; default: Console.WriteLine("Incorrect option. Try again."); break; } } }
static int Main(string[] args) { try { for (int i = 0; i < args.Length; i++) { if (args[i] == "--help" || args[i] == "-h") { Console.WriteLine($"--debug -d Show additional information"); Console.WriteLine($"--help -h Show help"); Console.WriteLine($"--memory-limit -m Max memory size. 0 for disable"); Console.WriteLine($"--normalize -n Pre-process every word before insert"); Console.WriteLine($"--pattern -p Pattern for removing unwanted characters, used for each word before insert"); Console.WriteLine($"--source -s Load data from specific path at start"); Console.WriteLine($"--extension -e Set extension for loading data at start"); } if (args[i] == "--debug" || args[i] == "-d") { if (args[i + 1].IndexOf("-") != 0) { debug = args[i + 1] == "true"; } } if (args[i] == "--memory-limit" || args[i] == "-m") { if (args[i + 1].IndexOf("-") != 0) { memoryLimit = Convert.ToInt32(args[i + 1]); } } if (args[i] == "--normalize" || args[i] == "-n") { if (args[i + 1].IndexOf("-") != 0) { normalize = args[i + 1] == "true"; } } if (args[i] == "--orderFixed" || args[i] == "-of") { if (args[i + 1].IndexOf("-") != 0) { orderFixed = args[i + 1] == "true"; } } if (args[i] == "--numberOfPermutation" || args[i] == "-nop") { if (args[i + 1].IndexOf("-") != 0) { numberOfPermutation = int.TryParse(args[i + 1], out var nop) ? nop : 2; } } if (args[i] == "--pattern" || args[i] == "-p") { if (args[i + 1].IndexOf("-") != 0) { pattern = args[i + 1]; } } if (args[i] == "--source" || args[i] == "-s") { if (args[i + 1].IndexOf("-") != 0) { initialSource = args[i + 1]; } } if (args[i] == "--extension" || args[i] == "-e") { if (args[i + 1].IndexOf("-") != 0) { initialExtension = args[i + 1]; } } } } catch (Exception ex) { Console.WriteLine($"Wrong arguments {Environment.NewLine} {ex.ToString()}"); return(-2); } // sample data http://mlg.ucd.ie/datasets/bbc.html _SearchEngine = new SearchEngine(debug, normalize, orderFixed, numberOfPermutation, pattern, memoryLimit); if (initialSource != "") { LoadFromSource(initialSource, initialExtension); } while (true) { Console.Write(_prompt); string userInput = Console.ReadLine(); ParseInput(userInput); } }