public static void BatchesFiller() { int NumberOfFile = 0; while (NumberOfFile < File.Length) { Listt <string> TempWords = new Listt <string>(); int UniqueWordsInFile = QueryWords.Count(), TotalNumberOfOccurences = 0; foreach (string it in QueryWords) //Loops on each word in the set of words { if (!Hashmap[NumberOfFile].ContainsKey(it)) //If the word isn't in the file { UniqueWordsInFile--; } else { TotalNumberOfOccurences += Hashmap[NumberOfFile][it]; //Add the number of occurence of this word to the total number TempWords.add(it); //add the word to the temp List } } /*Unlike C++, Dictionaries and maps and everything with a key and value, You can't access the key if it doesn't exist as it doesn't create a key on it's own * So i have to check whether the key exists or not and add the key if it doesn't exist, And you have to intialize and object for every list inside a dictionary * Or for every dictionary inside a dictioanry or a Tuple inside a dictionary and so on*/ if (Batches.ContainsKey(UniqueWordsInFile)) { if (Batches[UniqueWordsInFile].ContainsKey(TotalNumberOfOccurences)) { Batches[UniqueWordsInFile][TotalNumberOfOccurences].add(NumberOfFile); } else { Batches[UniqueWordsInFile].Add(TotalNumberOfOccurences, null); //Add the key first and set the value to null Batches[UniqueWordsInFile][TotalNumberOfOccurences] = new Listt <int>(); //Intialize the value which is a list Batches[UniqueWordsInFile][TotalNumberOfOccurences].add(NumberOfFile); //Add the value inside the list } } else { Batches.Add(UniqueWordsInFile, null); //Add the key to the main dictionary Batches[UniqueWordsInFile] = new SortedDictionary <int, Listt <int> >(); //Intialize the value of the main dictionary which is a dictionary Batches[UniqueWordsInFile].Add(TotalNumberOfOccurences, null); //Give it a key and set value to null Batches[UniqueWordsInFile][TotalNumberOfOccurences] = new Listt <int>(); //Intialize the value of the second dictionary which is a list Batches[UniqueWordsInFile][TotalNumberOfOccurences].add(NumberOfFile); //Add the value to the list } //new Tuple<type,type>(value,value) WordOccurenceInFiles.Add(new Tuple <int, int>(NumberOfFile, TotalNumberOfOccurences), null); WordOccurenceInFiles[new Tuple <int, int>(NumberOfFile, TotalNumberOfOccurences)] = new Listt <string>(); WordOccurenceInFiles[new Tuple <int, int>(NumberOfFile, TotalNumberOfOccurences)] = TempWords; NumberOfFile++; } }
public static void InverseIndexing() { string FileName = dir + @"\DelimitersWords.txt";; //The file of word delimiters like and or which where what and so on Listt <string> DelimiterW = new Listt <string>(); //a list to store the word delimiters FileStream fsIn1 = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.Read); using (StreamReader sr = new StreamReader(fsIn1, Encoding.UTF8, true)) { // While not at the end of the file, read lines from the file. while (sr.Peek() > -1) { DelimiterW.add(sr.ReadLine()); } } //////////////////////////////////////////////////////// int NumberOfFiles = 0; while (NumberOfFiles < File.Length) { Hashmap[NumberOfFiles] = new Dictionary <string, int>(); //Intializing an object for each index of the array of dictionary FileName = dir + @"\Files\" + File[NumberOfFiles].ToString(); //directory + name of file + .txt for a full directory path of each file string FirstThreeLines = "", Line; int TempCount = 0; FileStream fsIn2 = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.Read); using (StreamReader sr = new StreamReader(fsIn2, Encoding.UTF8, true)) { // While not at the end of the file, read lines from the file. while (sr.Peek() > -1) { Line = sr.ReadLine(); TempCount++; if (TempCount <= 3) { FirstThreeLines += Line + Environment.NewLine;//writing the first lines in this string if (TempCount == 3) { FirstLines.add(FirstThreeLines);//Pushing the whole string into the first index of the list which means the first file and so on } } Line = Line.RemoveSpecialCharacters(); var words = Line.Split(' '); //splitting the line into an array of words foreach (string word in words) //loop on each word in the var words { bool Flag = true; string temp = word.ToLower();//change the word to lowercase //foreach (string delim in DelimiterW)//loop on the word delimeters to check whether the word is a word delimiter or not for (int i = 1; i <= DelimiterW.count(); i++) { string delim = DelimiterW.getvalue(i); if (temp == delim) { Flag = false; break; } } if (Flag) //If the word isn't a delimiter { if (Hashmap[NumberOfFiles].ContainsKey(temp)) //If this dictionary contains that key, get the value and add 1 on it and then put it back again { int value; Hashmap[NumberOfFiles].TryGetValue(temp, out value); value++; Hashmap[NumberOfFiles][temp] = value; } else//Add the word to the dictionary with a value of one { Hashmap[NumberOfFiles].Add(temp, 1); } } } } } NumberOfFiles++; } }