Пример #1
0
        // rank for eachDoc
        private Dictionary <string, double> calculatingDocumentsBM25(Dictionary <string, LinkedList <WordsConnected> > dic)
        {
            Dictionary <string, double> array_doc = new Dictionary <string, double>();
            double average_doc = m_average_lenght_doc / m_doc_dic.Count;
            int    numberOfDoc = m_doc_dic.Count;


            foreach (string doc_name in dic.Keys)
            {
                LinkedList <WordsConnected> Word_D = dic[doc_name];
                int    countWord = 0;
                double doc_final = 0;
                double K         = 0;
                double k1        = 1.1;
                double b         = 0;
                double k2        = 1000;

                K = k1 * ((1 - b) + b * (m_doc_dic[doc_name].DOC_LENGTH));
                double part1 = 0;
                double part2 = 0;
                double part3 = 0;

                while (countWord < Word_D.Count)
                {
                    WordsConnected first = Word_D.First();
                    Word_D.RemoveFirst();
                    part1     = (0.5 / 0.5) / ((m_terms_dictionary[first.NAME].DF + 0.5) / (numberOfDoc - m_terms_dictionary[first.NAME].DF + 0.5));
                    part1     = Math.Log(part1);
                    part2     = ((k1 + 1) * first.LOCATION) / (K + first.LOCATION);
                    part3     = ((k2 + 1) * m_words_in_query[first.NAME]) / (k2 + m_words_in_query[first.NAME]);
                    doc_final = doc_final + (part1 * part2 * part3);
                    part1     = 0;
                    part2     = 0;
                    part3     = 0;
                    countWord++;
                    Word_D.AddLast(first);
                }

                array_doc.Add(doc_name, doc_final);
                doc_final = 0;
            }

            return(array_doc);
        }
Пример #2
0
        // CosSim
        private Dictionary <string, double> calculatingDocumentsCosSim()
        {
            Dictionary <string, double> array_doc = new Dictionary <string, double>();
            Dictionary <string, double> idf_words = new Dictionary <string, double>();
            double wcalcu     = 0;
            int    number_doc = m_doc_dic.Count();

            foreach (string word in m_words_in_query.Keys)
            {
                if (m_terms_dictionary.ContainsKey(word))
                {
                    wcalcu = number_doc / m_terms_dictionary[word].DF;
                    if (!idf_words.ContainsKey(word))
                    {
                        idf_words[word] = wcalcu;
                    }
                }
            }

            double cosSimD = 0;

            foreach (string doc in m_query_in_RelevantDoc.Keys)
            {
                LinkedList <WordsConnected> Word_D = m_query_in_RelevantDoc[doc];
                int countWord = 0;

                while (countWord < Word_D.Count)
                {
                    WordsConnected first = Word_D.First();
                    Word_D.RemoveFirst();
                    cosSimD = cosSimD + idf_words[first.NAME];
                    Word_D.AddLast(first);
                    countWord++;
                }
                cosSimD = cosSimD / ((m_words_in_query.Count) * (m_doc_dic[doc].DOC_LENGTH));
                //  cosSimD = cosSimD / (Math.Sqrt(m_words_in_query.Count) * Math.Sqrt(m_doc_dic[doc].DOC_LENGTH));
                array_doc.Add(doc, cosSimD);
                cosSimD = 0;
            }
            return(array_doc);
        }
Пример #3
0
        // LInked list of - [ for each document : words + count ]
        public Dictionary <string, LinkedList <WordsConnected> > getTheDocument(Dictionary <int, string[]> query_list, bool Dostemming, string path_posting)
        {
            //number of doc + average of the length of all doc
            Dictionary <string, int> words_in_query = new Dictionary <string, int>();
            int    number_of_doc = m_doc_dic.Count;
            double average       = m_average_lenght_doc / number_of_doc;
            Dictionary <int, List <string> > list_of_relevet = new Dictionary <int, List <string> >();
            Dictionary <string, LinkedList <WordsConnected> > doc_to_calculate = new Dictionary <string, LinkedList <WordsConnected> >();

            FileStream fs1, fs2;

            if (Dostemming == true)/////////////////////////////////////////////
            {
                fs1 = new FileStream(path_posting + "/DictionaryStemming", FileMode.Open, FileAccess.Read);
                fs2 = new FileStream(path_posting + "/PostingFileStemming", FileMode.Open, FileAccess.Read);
            }
            else
            {
                fs1 = new FileStream(path_posting + "/Dictionary", FileMode.Open, FileAccess.Read);
                fs2 = new FileStream(path_posting + "/PostingFile", FileMode.Open, FileAccess.Read);
            }
            StreamReader sr1 = new StreamReader(fs1); //read dictionary
            StreamReader sr2 = new StreamReader(fs2); //read posting

            //get the parameters to the BM25
            foreach (int query_number in query_list.Keys)
            {
                string[] wordsQuery   = query_list[query_number];
                int      length_query = wordsQuery.Length;
                Array.Sort(wordsQuery); // check if sort //////////////////////////////

                //count the times of each word in the query
                for (int i = 0; i < wordsQuery.Length; i++)
                {
                    if (!words_in_query.ContainsKey(wordsQuery[i]))
                    {
                        words_in_query[(wordsQuery[i])] = 1;
                    }
                    else
                    {
                        words_in_query[(wordsQuery[i])]++;
                    }
                }

                m_words_in_query = words_in_query;
                foreach (string word in words_in_query.Keys)
                {
                    //int qfi = words_in_query[word];
                    //int word_df = dic_to_use[word].DF;


                    //find the posting of the word
                    string line1 = sr1.ReadLine(); //read dictionary
                    string line2 = sr2.ReadLine(); //read posting
                    int    index;
                    string posting;

                    while (line1 != null && line2 != null)
                    {
                        index = line1.IndexOf("/");
                        line1 = line1.Substring(0, index);

                        //find the word
                        if (line1 == word)
                        {
                            posting = line2;
                            while (posting.Contains(";"))
                            {
                                index = posting.IndexOf(",");
                                string doc_number = posting.Substring(0, index);
                                posting = posting.Substring(index + 1);
                                index   = posting.IndexOf(";");
                                string appear_number = posting.Substring(0, index);

                                int i3;
                                int appear = 0;
                                if (Int32.TryParse(appear_number, out i3))
                                {
                                    appear = i3;
                                }
                                posting = posting.Substring(index + 1);
                                WordsConnected to_add = new WordsConnected(word, appear);
                                // check if the key alreday exist

                                if (doc_to_calculate.ContainsKey(doc_number))
                                {
                                    doc_to_calculate[doc_number].AddFirst(to_add);
                                }
                                else // not exist
                                {
                                    if (m_list_languages_pressed.Contains(m_doc_dic[doc_number].LANGUAGE.ToLower()) || m_list_languages_pressed.Count == 0)
                                    {
                                        doc_to_calculate.Add(doc_number, new LinkedList <WordsConnected>());
                                        doc_to_calculate[doc_number].AddFirst(to_add);
                                    }
                                }
                            }

                            line1 = null;
                            line2 = null;
                        }
                        else
                        {
                            line1 = sr1.ReadLine();
                            line2 = sr2.ReadLine();
                        }
                    }
                    // reset the postion of the line to the start
                    sr1.Close();
                    sr2.Close();
                    fs1.Close();
                    fs2.Close();

                    if (Dostemming == true)/////////////////////////////////////////////
                    {
                        fs1 = new FileStream(path_posting + "/DictionaryStemming", FileMode.Open, FileAccess.Read);
                        fs2 = new FileStream(path_posting + "/PostingFileStemming", FileMode.Open, FileAccess.Read);
                    }
                    else
                    {
                        fs1 = new FileStream(path_posting + "/Dictionary", FileMode.Open, FileAccess.Read);
                        fs2 = new FileStream(path_posting + "/PostingFile", FileMode.Open, FileAccess.Read);
                    }
                    sr1 = new StreamReader(fs1);
                    sr2 = new StreamReader(fs2);
                }
            }

            sr1.Close();
            sr2.Close();
            fs1.Close();
            fs2.Close();

            //     Dictionary<string, double> doc_rank = new Dictionary<string, double>();
            //   Dictionary<string, double> doc_rank_sim = new Dictionary<string, double>();


            // m_query_in_RelevantDoc = doc_to_calculate; <= was before
            return(doc_to_calculate);


            // doc_rank_sim = calculatingDocumentsCosSim();
            //doc_rank = calculatingDocumentsBM25(average_lenght_doc);

            //  return doc_rank;
        }
Пример #4
0
        // parse the terms
        public void parse(Dictionary <int, string[]> d, string path_stopWords, string path_newFile, string path_docs, bool Dostemming, bool Doindexer, string path_wordsConnected, int total_lenght_doc, Dictionary <string, Document> doc_dic, Dictionary <string, Term> terms_dictionary, ArrayList list_languages_pressed, string path_queries_ranked)
        {
            Dictionary <int, string[]> dicOfTerms = new Dictionary <int, string[]>();
            Dictionary <string, ArrayConnectedwords> final_connectWords = new Dictionary <string, ArrayConnectedwords>();
            LinkedList <WordsConnected> list_wordsCOnnected             = new LinkedList <WordsConnected>();
            ArrayList documents = new ArrayList(); // create a list of documnets

            char[]           delimiters = { ' ' };
            HashSet <string> stopWords  = new HashSet <string>();

            stopWords = StopWords(path_stopWords);

            //create month dictionary
            Dictionary <string, string> MonthDic = new Dictionary <string, string>();

            MonthDic = CreateDic(MonthDic);

            //int how = 1;
            foreach (int NumOfFile in d.Keys)
            {
                string[] words = d[NumOfFile][1].Split(delimiters);

                string[] AfterParse = new String[words.Length * 2];


                //delete from the end the ; . , : ) *
                for (int i = 0; i < words.Length; i++)
                {
                    if (((words[i] != "") && (words[i].ToLower() != "u.s.")) && ((words[i].Substring(words[i].Length - 1, 1) == "-" || words[i].Substring(words[i].Length - 1, 1) == "!" || words[i].Substring(words[i].Length - 1, 1) == "?" || words[i].Substring(words[i].Length - 1, 1) == "\"" || words[i].Substring(words[i].Length - 1, 1) == ";" || words[i].Substring(words[i].Length - 1, 1) == "." || words[i].Substring(words[i].Length - 1, 1) == "," || words[i].Substring(words[i].Length - 1, 1) == ":" || words[i].Substring(words[i].Length - 1, 1) == ")" || words[i].Substring(words[i].Length - 1, 1) == "*")))
                    {
                        words[i] = words[i].Substring(0, words[i].Length - 1);
                    }
                }

                int    counterWords      = 0;
                int    counterAfterParse = 0;
                double i1;
                double i2;
                bool   CheckIFEnter = false;

                while (counterWords < words.Length)
                {
                    //delete the , and the .  , ; * ) : from the end of the string
                    while ((words[counterWords] != "" && (words[counterWords].ToLower() != "u.s.")) && (words[counterWords].Substring(words[counterWords].Length - 1, 1) == "]" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "{" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "(" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "," || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "'" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "|" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "`" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "\"" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "." || words[counterWords].Substring(words[counterWords].Length - 1, 1) == ";" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "*" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "?" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == ")" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == ":"))
                    {
                        words[counterWords] = words[counterWords].Substring(0, words[counterWords].Length - 1);
                    }

                    //delete * " ( from the beginning
                    while ((words[counterWords] != "") && ((words[counterWords].Substring(0, 1) == "(") || words[counterWords].Substring(0, 1) == "\\" || words[counterWords].Substring(0, 1) == "}" || words[counterWords].Substring(0, 1) == ")" || words[counterWords].Substring(0, 1) == "@" || words[counterWords].Substring(0, 1) == ";" || words[counterWords].Substring(0, 1) == ":" || words[counterWords].Substring(0, 1) == "!" || words[counterWords].Substring(0, 1) == "%" || words[counterWords].Substring(0, 1) == "=" || words[counterWords].Substring(0, 1) == "]" || words[counterWords].Substring(0, 1) == ")" || words[counterWords].Substring(0, 1) == "+" || words[counterWords].Substring(0, 1) == "|" || words[counterWords].Substring(0, 1) == "'" || (words[counterWords].Substring(0, 1) == ".") || words[counterWords].Substring(0, 1) == "`" || (words[counterWords].Substring(0, 1) == ",") || (words[counterWords].Substring(0, 1) == "?") || (words[counterWords].Substring(0, 1) == "&") || (words[counterWords].Substring(0, 1) == "[") || words[counterWords].Substring(0, 1) == "/" || words[counterWords].Substring(0, 1) == "_" || (words[counterWords].Substring(0, 1) == "\"") || (words[counterWords].Substring(0, 1) == "-") || (words[counterWords].Substring(0, 1) == "*")))
                    {
                        words[counterWords] = words[counterWords].Substring(1, words[counterWords].Length - 1);
                    }


                    // delete foe the next word .  , ; * ) : from the end of the string
                    if ((counterWords + 1 < words.Length))
                    {
                        counterWords++;
                        //delete the , and the .  , ; * ) : from the end of the string
                        while ((words[counterWords] != "" && (words[counterWords].ToLower() != "u.s.")) && (words[counterWords].Substring(words[counterWords].Length - 1, 1) == "]" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "," || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "'" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "|" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "`" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "\"" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "." || words[counterWords].Substring(words[counterWords].Length - 1, 1) == ";" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "*" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == "?" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == ")" || words[counterWords].Substring(words[counterWords].Length - 1, 1) == ":"))
                        {
                            words[counterWords] = words[counterWords].Substring(0, words[counterWords].Length - 1);
                        }
                        counterWords--;
                    }

                    if (Dostemming == true)
                    {
                        Stemmer s = new Stemmer();
                        words[counterWords] = s.stemTerm(words[counterWords]);
                    }

                    // check if it is  date - [16TH]
                    if ((words[counterWords] != "") && (words[counterWords].Length == 4 && words[counterWords].Substring(2).ToLower() == "th") && (Double.TryParse(words[counterWords].Substring(0, 2), out i1)))
                    {
                        words[counterWords] = words[counterWords].Substring(0, 2);
                    }
                    // check if it is  date - [9TH]
                    if ((words[counterWords] != "") && (words[counterWords].Length == 3 && words[counterWords].Substring(1).ToLower() == "th") && (Double.TryParse(words[counterWords].Substring(0, 1), out i1)))
                    {
                        words[counterWords] = words[counterWords].Substring(0, 2);
                    }
                    //check if the term is number in the end
                    if ((words[counterWords] != "") && (Double.TryParse(words[counterWords], out i2)) && (words.Length == counterWords + 1))
                    {
                        AfterParse[counterAfterParse] = words[counterWords];
                        CheckIFEnter = true;
                        counterAfterParse++;
                    }  //check if it is NUMBER
                    else if ((words[counterWords] != "") && (Double.TryParse(words[counterWords], out i2) && !MonthDic.ContainsKey(words[counterWords + 1])))
                    {
                        double number = Double.Parse(words[counterWords]);


                        // if the string is up to miliion
                        if (number > 999999)
                        {
                            //the last word in the array
                            if ((counterWords + 1) == words.Length)
                            {
                                AfterParse[counterAfterParse] = IfNumber(Double.Parse(words[counterWords]), "");
                                CheckIFEnter = true;
                            }
                            else
                            {
                                // word in the middle
                                AfterParse[counterAfterParse] = IfNumber(Double.Parse(words[counterWords]), words[counterWords + 1]);
                                counterAfterParse++;
                                CheckIFEnter = true;
                                if (Dostemming == true)
                                {
                                    Stemmer s = new Stemmer();
                                    words[counterWords + 1] = s.stemTerm(words[counterWords + 1]);
                                }
                                if (words[counterWords + 1].ToLower() == "million" || words[counterWords + 1].ToLower() == "billion" || words[counterWords + 1].ToLower() == "trillion")
                                {
                                    counterWords++;
                                }
                            }
                        }

                        else // the string is less than million [ num < MILLION]
                        {
                            //the last word in the array
                            if ((counterWords + 1) == words.Length)
                            {
                                AfterParse[counterAfterParse] = words[counterWords];
                                CheckIFEnter = true;
                            }
                            //if the string is SHEVER [ LIKE 3/4 ]
                            else if (words[counterWords + 1].Contains("/"))
                            {
                                int    i = words[counterWords + 1].IndexOf("/");
                                double i3, i4;
                                if (Double.TryParse(words[counterWords + 1].Substring(0, i), out i3) && Double.TryParse(words[counterWords + 1].Substring(i + 1, words[counterWords + 1].Length - i - 1), out i4))
                                {
                                    double new_num = i3 / i4;
                                    i2 = i2 + new_num;
                                    AfterParse[counterAfterParse] = i2.ToString();
                                    counterWords++;
                                    counterAfterParse++;
                                    CheckIFEnter = true;
                                }
                                //AfterParse[counterAfterParse] = words[counterWords] + " " + words[counterWords + 1];
                            }
                            else // the string is in the middle
                            {
                                //check if there is enough words to end
                                if (counterWords + 1 < words.Length)
                                {
                                    //this month
                                    if (MonthDic.ContainsKey(words[counterWords + 1].ToLower()))
                                    {
                                        // check if the string is not in the end
                                        if (counterWords + 2 < words.Length)
                                        {
                                            // this is NUMBER MONTH NUMBER [ LIKE 14 MAY 1991]
                                            if (Double.TryParse(words[counterWords + 2], out i1))
                                            {
                                                // this is 2 DIGIT NUMBER [LIKE 91]
                                                if (words[counterWords + 2].Length == 2)
                                                {
                                                    if (i2 < 10)
                                                    {
                                                        words[counterWords] = "0" + words[counterWords];
                                                    }
                                                    AfterParse[counterAfterParse] = "19" + words[counterWords + 2] + "-" + MonthDic[words[counterWords + 1].ToLower()] + "-" + words[counterWords];
                                                    CheckIFEnter = true;
                                                    counterAfterParse++;
                                                    counterWords = counterWords + 2;
                                                }

                                                // this is 4 DIGITS NUMBER [LIKE 1991]
                                                else if (words[counterWords + 2].Length == 4)
                                                {
                                                    if (i2 < 10)
                                                    {
                                                        words[counterWords] = "0" + words[counterWords];
                                                    }
                                                    AfterParse[counterAfterParse] = words[counterWords + 2] + "-" + MonthDic[words[counterWords + 1].ToLower()] + "-" + words[counterWords];
                                                    CheckIFEnter = true;
                                                    counterAfterParse++;
                                                    counterWords = counterWords + 2;
                                                }
                                            }
                                            // this is NUMBER MONTH [ 14 MAY ]
                                            else
                                            {
                                                double ii;
                                                // check if the number is small than 10. than change to 03
                                                if (Double.TryParse(words[counterWords], out ii))
                                                {
                                                    // check if the number is small than 10. than change to 03
                                                    if (ii < 10)
                                                    {
                                                        words[counterWords] = "0" + words[counterWords];
                                                    }
                                                }
                                                AfterParse[counterAfterParse] = MonthDic[words[counterWords + 1].ToLower()] + "-" + words[counterWords];
                                                CheckIFEnter = true;
                                                counterAfterParse++;
                                                counterWords++;
                                            }
                                        }
                                        // this is only NUMBER MONTH in the end of the string! [14 MAY]
                                        else
                                        {
                                            double ii;
                                            // check if the number is small than 10. than change to 03
                                            if (Double.TryParse(words[counterWords], out ii))
                                            {
                                                // check if the number is small than 10. than change to 03
                                                if (ii < 10)
                                                {
                                                    words[counterWords] = "0" + words[counterWords];
                                                }
                                            }
                                            AfterParse[counterAfterParse] = MonthDic[words[counterWords + 1].ToLower()] + "-" + words[counterWords];
                                            CheckIFEnter = true;
                                            counterAfterParse++;
                                            counterWords++;
                                        }
                                    }
                                    else //check if this is a Million/Trillion/billion
                                    {
                                        if (Dostemming == true)
                                        {
                                            Stemmer s = new Stemmer();
                                            words[counterWords + 1] = s.stemTerm(words[counterWords + 1]);
                                        }
                                        if (words[counterWords + 1].ToLower() == "million" || words[counterWords + 1].ToLower() == "billion" || words[counterWords + 1].ToLower() == "trillion")
                                        {
                                            AfterParse[counterAfterParse] = IfNumber(i2, words[counterWords + 1]);
                                            counterAfterParse++;
                                            counterWords++;
                                        }
                                    }
                                }
                                else // the number is at the end of the array
                                {
                                    AfterParse[counterAfterParse] = words[counterWords];
                                    CheckIFEnter = true;
                                    counterAfterParse++;
                                }
                            }
                        }
                    }

                    //check if in the phrase is BETWEEN
                    if ((words[counterWords] != "") && words[counterWords].ToLower() == "between")
                    {
                        if (counterWords + 3 < words.Length)
                        {
                            //check if it BETWEEN NUMBER and NUMBER

                            if (Double.TryParse(words[counterWords + 1], out i1) || double.TryParse(words[counterWords + 3], out i1))
                            {
                                AfterParse[counterAfterParse] = words[counterWords] + " " + words[counterWords + 1] + " " + words[counterWords + 2] + " " + words[counterWords + 3];
                                CheckIFEnter = true;
                                counterAfterParse++;
                                counterWords = counterWords + 3;
                            }
                            else // not contain a numberic variable
                            {
                                AfterParse[counterAfterParse] = words[counterWords];
                                CheckIFEnter = true;
                                counterAfterParse++;
                            }
                        }
                    }

                    //check if it is percenct and add %
                    if ((words[counterWords] != "") && (words[counterWords].ToLower() == "percent" || words[counterWords].ToLower() == "percentage"))
                    {
                        if (counterWords != 0)
                        {
                            // this in number in front of him [ like 9 % ]
                            if ((words[counterWords - 1] != "") && (Double.TryParse((AfterParse[counterAfterParse - 1]), out i1)))
                            {
                                AfterParse[counterAfterParse - 1] = AfterParse[counterAfterParse - 1] + "%";

                                CheckIFEnter = true;
                            }
                        }
                    }

                    //check if it is dollar and add DOLLARS WITH STEMMING
                    if (Dostemming == true)
                    {
                        if (words[counterWords] != "" && (words[counterWords].ToLower() == "dollar"))
                        {
                            if (Double.TryParse(words[counterWords - 1], out i2) || words[counterWords - 1].Contains("/"))
                            {
                                AfterParse[counterAfterParse - 1] = AfterParse[counterAfterParse - 1] + " Dollars";
                                CheckIFEnter = true;
                            }
                        }
                    }

                    //check if it is dollar and add DOLLARS
                    if (Dostemming == false)
                    {
                        if (words[counterWords] != "" && (words[counterWords].ToLower() == "dollars"))
                        {
                            if (Double.TryParse(words[counterWords - 1], out i2) || words[counterWords - 1].Contains("/"))
                            {
                                AfterParse[counterAfterParse - 1] = AfterParse[counterAfterParse - 1] + " Dollars";
                                CheckIFEnter = true;
                            }
                        }
                    }

                    //check if it is contain $ and add DOLLARS
                    if (words[counterWords] != "" && words[counterWords].Substring(0, 1) == ("$"))
                    {
                        words[counterWords] = words[counterWords].Substring(1);
                        //check that it is a NUMBER
                        if (Double.TryParse(words[counterWords], out i1))
                        {
                            // more than 999999
                            if (i1 > 999999)
                            {
                                AfterParse[counterAfterParse] = IfNumber(i1, "") + " Dollars";
                                CheckIFEnter = true;
                                counterAfterParse++;
                            }
                            else // less than 999999 ( i1 < 999999 )
                            {
                                if (Dostemming == true)
                                {
                                    Stemmer s = new Stemmer();
                                    words[counterWords + 1] = s.stemTerm(words[counterWords + 1]);
                                }
                                // the next word is million or billion or trillion
                                if (words[counterWords + 1].ToLower() == "million" || words[counterWords + 1].ToLower() == "trillion" || words[counterWords + 1].ToLower() == "billion")

                                {
                                    AfterParse[counterAfterParse] = IfNumber(i1, words[counterWords + 1]) + " Dollars";
                                    CheckIFEnter = true;
                                    counterWords++;
                                    counterAfterParse++;
                                }
                                else // NO next word
                                {
                                    AfterParse[counterAfterParse] = words[counterWords] + " Dollars";
                                    CheckIFEnter = true;
                                    counterAfterParse++;
                                }
                            }
                        }
                    }

                    //check for tha MONTH
                    if (words[counterWords] != "" && MonthDic.ContainsKey(words[counterWords].ToLower()))
                    {
                        if (counterWords + 2 < words.Length)
                        {
                            // LIKE APRIL 28, 1990
                            if (Double.TryParse(words[counterWords + 2], out i2) && (Double.TryParse(words[counterWords + 1], out i1)))
                            {
                                // ADD 0 if the number is under 10
                                if (i1 < 10)
                                {
                                    words[counterWords + 1] = "0" + words[counterWords + 1];
                                }

                                AfterParse[counterAfterParse] = words[counterWords + 2] + "-" + MonthDic[words[counterWords].ToLower()] + "-" + words[counterWords + 1].Substring(0, words[counterWords + 1].Length - 1);
                                CheckIFEnter = true;
                                counterAfterParse++;
                                counterWords = counterWords + 2;
                            }

                            // LIKE APRIL 28
                            else
                            {
                                if (Double.TryParse(words[counterWords + 1], out i2))
                                {
                                    // MONTH DD [ APRIL 4 ]
                                    if (i2 < 32)
                                    {
                                        // ADD 0 if the number is under 10
                                        if (i2 < 10)
                                        {
                                            words[counterWords + 1] = "0" + words[counterWords + 1];
                                        }
                                        AfterParse[counterAfterParse] = MonthDic[words[counterWords].ToLower()] + "-" + words[counterWords + 1];
                                        CheckIFEnter = true;
                                        counterAfterParse++;
                                        counterWords = counterWords++;
                                    }
                                    // if it is MONTH YEAR [ APRIL 1991]
                                    else
                                    {
                                        // ADD 0 if the number is under 10
                                        if (words[counterWords + 1] != "")
                                        {
                                            AfterParse[counterAfterParse] = words[counterWords + 1] + "-" + MonthDic[words[counterWords].ToLower()];
                                            CheckIFEnter = true;
                                            counterAfterParse++;
                                            counterWords++;
                                        }
                                    }
                                }
                            }
                        }
                        // the string is in the end! [LIKE APRIL 28]
                        else if (counterWords + 1 < words.Length)
                        {
                            if (Double.TryParse(words[counterWords + 1], out i2))
                            {
                                // MONTH DD [ APRIL 4 ]
                                if (i2 < 32)
                                {
                                    // ADD 0 if the number is under 10
                                    if (i2 < 10)
                                    {
                                        words[counterWords + 1] = "0" + words[counterWords + 1];
                                    }
                                    AfterParse[counterAfterParse] = MonthDic[words[counterWords].ToLower()] + "-" + words[counterWords + 1];
                                    CheckIFEnter = true;
                                    counterAfterParse++;
                                    counterWords = counterWords++;
                                }
                                // if it is MONTH YEAR [ APRIL 1991]
                                else
                                {
                                    AfterParse[counterAfterParse] = words[counterWords + 1] + "-" + MonthDic[words[counterWords].ToLower()];
                                    CheckIFEnter = true;
                                    counterAfterParse++;
                                    counterWords++;
                                }
                            }
                        }

                        else // the MONTH is in the end of the string
                        {
                            AfterParse[counterAfterParse] = words[counterWords];
                            CheckIFEnter = true;
                            counterAfterParse++;
                        }
                    }

                    // if the case is 100bn DOLLARS
                    if ((words[counterWords] != "" && words[counterWords].Length > 1))
                    {
                        if (words[counterWords].Substring(words[counterWords].Length - 2, 2).ToLower() == "bn")
                        {
                            if (Double.TryParse(words[counterWords].Substring(0, words[counterWords].Length - 2), out i1))
                            {
                                double num = i1 * 1000;
                                if (Dostemming == true)
                                {
                                    Stemmer s = new Stemmer();
                                    words[counterWords + 1] = s.stemTerm(words[counterWords + 1]);
                                    if (words[counterWords + 1].ToLower() == "dollar")
                                    {
                                        AfterParse[counterAfterParse] = num.ToString() + " M Dollars";
                                        CheckIFEnter = true;
                                    }
                                }

                                else if (words[counterWords + 1].ToLower() == "dollars")
                                {
                                    AfterParse[counterAfterParse] = num.ToString() + " M Dollars";
                                    CheckIFEnter = true;
                                }
                            }
                        }
                    }

                    // for case 100 billion U.S dollars
                    if ((words[counterWords] != "" && counterWords + 2 < words.Length))
                    {
                        if (Dostemming == true)
                        {
                            Stemmer s = new Stemmer();
                            words[counterWords + 1] = s.stemTerm(words[counterWords + 1]);
                            if (words[counterWords].ToLower() == "u.s." && words[counterWords + 1].ToLower() == "dollar")
                            {
                                AfterParse[counterAfterParse - 1] = AfterParse[counterAfterParse - 1] + " Dollars";
                                CheckIFEnter = true;
                            }
                        }
                        else if (words[counterWords].ToLower() == "u.s." && words[counterWords + 1].ToLower() == "dollars")
                        {
                            AfterParse[counterAfterParse - 1] = AfterParse[counterAfterParse - 1] + " Dollars";
                            CheckIFEnter = true;
                        }
                    }

                    // case of Price UP to Million = 20.6m Dollars
                    if ((words[counterWords] != "" && counterWords + 1 < words.Length))
                    {
                        if (Dostemming == true)
                        {
                            Stemmer s = new Stemmer();
                            words[counterWords + 1] = s.stemTerm(words[counterWords + 1]);
                            if (words[counterWords].Substring(words[counterWords].Length - 1, 1).ToLower() == "m" && Double.TryParse(words[counterWords].Substring(0, words[counterWords].Length - 1), out i1) && words[counterWords + 1].ToLower() == "dollar")
                            {
                                AfterParse[counterAfterParse] = i1.ToString() + " M Dollars";
                                CheckIFEnter = true;
                                counterAfterParse++;
                                counterWords++;
                            }
                        }
                        else if (words[counterWords].Substring(words[counterWords].Length - 1, 1).ToLower() == "m" && Double.TryParse(words[counterWords].Substring(0, words[counterWords].Length - 1), out i1) && words[counterWords + 1].ToLower() == "dollars")
                        {
                            AfterParse[counterAfterParse] = i1.ToString() + " M Dollars";
                            CheckIFEnter = true;
                            counterAfterParse++;
                            counterWords++;
                        }
                    }

                    // if it is just a REGULAR STRING
                    if (words[counterWords] != "" && CheckIFEnter == false)
                    {
                        if (words[counterWords].Contains("/"))
                        {
                            int index = words[counterWords].IndexOf("/");
                            if (!Double.TryParse(words[counterWords].Substring(0, index), out i1))
                            {
                                AfterParse[counterAfterParse] = words[counterWords].Substring(0, index);
                                counterAfterParse++;

                                //contain more than 1 /
                                string temp_string = words[counterWords].Substring(index + 1, words[counterWords].Length - index - 1);
                                //delete * " ( from the beginning
                                while ((temp_string != "") && ((temp_string.Substring(0, 1) == "(") || temp_string.Substring(0, 1) == "}" || temp_string.Substring(0, 1) == ")" || temp_string.Substring(0, 1) == "[" || temp_string.Substring(0, 1) == "/" || temp_string.Substring(0, 1) == "|" || temp_string.Substring(0, 1) == "]" || temp_string.Substring(0, 1) == ";" || temp_string.Substring(0, 1) == ":" || temp_string.Substring(0, 1) == "_" || temp_string.Substring(0, 1) == "@" || temp_string.Substring(0, 1) == "=" || temp_string.Substring(0, 1) == "+" || temp_string.Substring(0, 1) == "!" || temp_string.Substring(0, 1) == "%" || temp_string.Substring(0, 1) == "|" || temp_string.Substring(0, 1) == "'" || (temp_string.Substring(0, 1) == ".") || temp_string.Substring(0, 1) == "`" || (temp_string.Substring(0, 1) == ",") || (temp_string.Substring(0, 1) == "?") || (temp_string.Substring(0, 1) == "&") || (temp_string.Substring(0, 1) == "[") || (temp_string.Substring(0, 1) == "\"") || (temp_string.Substring(0, 1) == "-") || (temp_string.Substring(0, 1) == "*")))
                                {
                                    temp_string = temp_string.Substring(1, temp_string.Length - 1);
                                }
                                while (temp_string.Contains("/"))
                                {
                                    index = temp_string.IndexOf("/");
                                    AfterParse[counterAfterParse] = temp_string.Substring(0, index);
                                    counterAfterParse++;
                                    temp_string = temp_string.Substring(index + 1, temp_string.Length - index - 1);
                                }

                                AfterParse[counterAfterParse] = temp_string;
                                counterAfterParse++;
                            }
                        }
                        else
                        {
                            // check if it is not contain the stop words
                            if (!stopWords.Contains(words[counterWords].ToLower()))
                            {
                                if (words[counterWords].Substring(0, 1) != "-")
                                {
                                    AfterParse[counterAfterParse] = words[counterWords];
                                    counterAfterParse++;
                                    CheckIFEnter = false;
                                    // add the list the words connected
                                    list_wordsCOnnected.AddLast(new WordsConnected(words[counterWords], counterAfterParse - 1));
                                }
                            }
                        }
                    }

                    counterWords++;
                    CheckIFEnter = false;
                }
                if (Doindexer == true)
                {
                    Document doc = new Document(d[NumOfFile][2], AfterParse, counterAfterParse - 1, d[NumOfFile][0], d[NumOfFile][3]);
                    documents.Add(doc);
                    while (list_wordsCOnnected.Count >= 2)
                    {
                        WordsConnected obj1 = ((WordsConnected)list_wordsCOnnected.First());
                        list_wordsCOnnected.RemoveFirst();
                        WordsConnected obj2 = ((WordsConnected)list_wordsCOnnected.First());
                        list_wordsCOnnected.RemoveFirst();
                        // check if the string is connected - one string is after the other
                        if (obj1.LOCATION + 1 == obj2.LOCATION)
                        {
                            string key = obj1.NAME + " " + obj2.NAME;
                            // check if it is in the dictionary
                            if (final_connectWords.ContainsKey(key))
                            {
                                if (final_connectWords[key].DICTIONARY.ContainsKey(d[NumOfFile][2])) // if contains name of file
                                {
                                    final_connectWords[key].DICTIONARY[d[NumOfFile][2]]++;           // add the num of apperance
                                }
                            }
                            //not in the dictionary
                            else
                            {
                                final_connectWords.Add(key, new ArrayConnectedwords(d[NumOfFile][2]));
                                final_connectWords[key].DICTIONARY.Add(d[NumOfFile][2], 1);
                            }
                        }
                        list_wordsCOnnected.AddFirst(obj2);
                    }
                }
                else // enter the string after the parsing
                {
                    d[NumOfFile] = AfterParse;
                    ArrayList arrayList = new ArrayList();
                    for (int i = 0; i < AfterParse.Length; i++)
                    {
                        if (AfterParse[i] != null)
                        {
                            arrayList.Add(AfterParse[i]);
                        }
                    }
                    string[] final_after_parsing = new string[arrayList.Count];
                    int      j = 0;
                    foreach (string val in arrayList)
                    {
                        final_after_parsing[j] = val;
                        j++;
                    }
                    // create the final dictionary for queries
                    Dictionary <int, string[]> final_dic_query = new Dictionary <int, string[]>();
                    // get the num of the query
                    foreach (int num_query in d.Keys)
                    {
                        final_dic_query.Add(num_query, final_after_parsing);
                    }
                    //sent to the Ranker!
                    Ranker    r        = new Ranker(terms_dictionary, doc_dic, final_dic_query, total_lenght_doc, Dostemming, path_newFile, list_languages_pressed, path_queries_ranked);
                    ArrayList list_rel = new ArrayList();
                    m_rel_doc = new ArrayList();
                    Dictionary <string, double> temp_dic = new Dictionary <string, double>();
                    temp_dic = r.TOP50DOCS;
                    foreach (string doc in temp_dic.Keys)
                    {
                        list_rel.Add(doc);
                    }
                    m_rel_doc   = list_rel;
                    m_query_num = r.QUERY_NUM; ////////////////////////////////
                    return;
                }
            }

            //for the documents
            if (Doindexer == true)
            {
                // string chaining = "";
                Dictionary <string, ArrayConnectedwords> Dic_connect_new = new Dictionary <string, ArrayConnectedwords>();

                foreach (var name in final_connectWords.OrderBy(i => i.Key))
                {
                    Dic_connect_new[name.Key] = name.Value;
                }

                FileStream   fs = new FileStream(path_wordsConnected, FileMode.Create);
                StreamWriter sw = new StreamWriter(fs);
                foreach (string term in Dic_connect_new.Keys)
                {
                    sw.Write(term + "/");
                    foreach (string nameDoc in Dic_connect_new[term].DICTIONARY.Keys)
                    {
                        sw.Write(nameDoc + "," + Dic_connect_new[term].DICTIONARY[nameDoc] + ";");
                    }
                    sw.WriteLine();
                }
                sw.Close();
                fs.Close();
                Indexer indexer = new Indexer(documents, path_newFile, path_docs);
            }

            // for the queries
            else
            {
            }
        }