Beispiel #1
0
        public static void BatchesFiller()
        {
            int NumberOfFile = 0;

            while (NumberOfFile < File.Length)
            {
                Listt <string> TempWords = new Listt <string>();
                int            UniqueWordsInFile = QueryWords.Count(), TotalNumberOfOccurences = 0;
                foreach (string it in QueryWords)               //Loops on each word in the set of words
                {
                    if (!Hashmap[NumberOfFile].ContainsKey(it)) //If the word isn't in the file
                    {
                        UniqueWordsInFile--;
                    }
                    else
                    {
                        TotalNumberOfOccurences += Hashmap[NumberOfFile][it]; //Add the number of occurence of this word to the total number
                        TempWords.add(it);                                    //add the word to the temp List
                    }
                }

                /*Unlike C++, Dictionaries and maps and everything with a key and value, You can't access the key if it doesn't exist as it doesn't create a key on it's own
                 * So i have to check whether the key exists or not and add the key if it doesn't exist, And you have to intialize and object for every list inside a dictionary
                 * Or for every dictionary inside a dictioanry or a Tuple inside a dictionary and so on*/
                if (Batches.ContainsKey(UniqueWordsInFile))
                {
                    if (Batches[UniqueWordsInFile].ContainsKey(TotalNumberOfOccurences))
                    {
                        Batches[UniqueWordsInFile][TotalNumberOfOccurences].add(NumberOfFile);
                    }
                    else
                    {
                        Batches[UniqueWordsInFile].Add(TotalNumberOfOccurences, null);           //Add the key first and set the value to null
                        Batches[UniqueWordsInFile][TotalNumberOfOccurences] = new Listt <int>(); //Intialize the value which is a list
                        Batches[UniqueWordsInFile][TotalNumberOfOccurences].add(NumberOfFile);   //Add the value inside the list
                    }
                }
                else
                {
                    Batches.Add(UniqueWordsInFile, null);                                    //Add the key to the main dictionary
                    Batches[UniqueWordsInFile] = new SortedDictionary <int, Listt <int> >(); //Intialize the value of the main dictionary which is a dictionary
                    Batches[UniqueWordsInFile].Add(TotalNumberOfOccurences, null);           //Give it a key and set value to null
                    Batches[UniqueWordsInFile][TotalNumberOfOccurences] = new Listt <int>(); //Intialize the value of the second dictionary which is a list
                    Batches[UniqueWordsInFile][TotalNumberOfOccurences].add(NumberOfFile);   //Add the value to the list
                }

                //new Tuple<type,type>(value,value)
                WordOccurenceInFiles.Add(new Tuple <int, int>(NumberOfFile, TotalNumberOfOccurences), null);
                WordOccurenceInFiles[new Tuple <int, int>(NumberOfFile, TotalNumberOfOccurences)] = new Listt <string>();
                WordOccurenceInFiles[new Tuple <int, int>(NumberOfFile, TotalNumberOfOccurences)] = TempWords;

                NumberOfFile++;
            }
        }
Beispiel #2
0
        public static void InverseIndexing()
        {
            string         FileName   = dir + @"\DelimitersWords.txt";; //The file of word delimiters like and or which where what and so on
            Listt <string> DelimiterW = new Listt <string>();           //a list to store the word delimiters
            FileStream     fsIn1      = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.Read);

            using (StreamReader sr = new StreamReader(fsIn1, Encoding.UTF8, true))
            {
                // While not at the end of the file, read lines from the file.
                while (sr.Peek() > -1)
                {
                    DelimiterW.add(sr.ReadLine());
                }
            }
            ////////////////////////////////////////////////////////

            int NumberOfFiles = 0;

            while (NumberOfFiles < File.Length)
            {
                Hashmap[NumberOfFiles] = new Dictionary <string, int>();      //Intializing an object for each index of the array of dictionary
                FileName = dir + @"\Files\" + File[NumberOfFiles].ToString(); //directory + name of file + .txt for a full directory path of each file
                string     FirstThreeLines = "", Line;
                int        TempCount = 0;
                FileStream fsIn2     = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.Read);
                using (StreamReader sr = new StreamReader(fsIn2, Encoding.UTF8, true))
                {
                    // While not at the end of the file, read lines from the file.
                    while (sr.Peek() > -1)
                    {
                        Line = sr.ReadLine();
                        TempCount++;
                        if (TempCount <= 3)
                        {
                            FirstThreeLines += Line + Environment.NewLine;//writing the first lines in this string
                            if (TempCount == 3)
                            {
                                FirstLines.add(FirstThreeLines);//Pushing the whole string into the first index of the list which means the first file and so on
                            }
                        }
                        Line = Line.RemoveSpecialCharacters();
                        var words = Line.Split(' ');   //splitting the line into an array of words
                        foreach (string word in words) //loop on each word in the var words
                        {
                            bool   Flag = true;
                            string temp = word.ToLower();//change the word to lowercase
                            //foreach (string delim in DelimiterW)//loop on the word delimeters to check whether the word is a word delimiter or not
                            for (int i = 1; i <= DelimiterW.count(); i++)
                            {
                                string delim = DelimiterW.getvalue(i);
                                if (temp == delim)
                                {
                                    Flag = false; break;
                                }
                            }

                            if (Flag)                                         //If the word isn't a delimiter
                            {
                                if (Hashmap[NumberOfFiles].ContainsKey(temp)) //If this dictionary contains that key, get the value and add 1 on it and then put it back again
                                {
                                    int value;
                                    Hashmap[NumberOfFiles].TryGetValue(temp, out value);
                                    value++;
                                    Hashmap[NumberOfFiles][temp] = value;
                                }
                                else//Add the word to the dictionary with a value of one
                                {
                                    Hashmap[NumberOfFiles].Add(temp, 1);
                                }
                            }
                        }
                    }
                }

                NumberOfFiles++;
            }
        }