Example #1
0
        protected void Inverted_Index(object sender, EventArgs e)
        {
            SqlConnection conn = new SqlConnection(connstring);

            try
            {
                conn.Open();

                //SqlCommand cmd = new SqlCommand("select new_terms,doc_id,position from IRdb.dbo.newterm  ", conn);

                SqlCommand    cmd    = new SqlCommand("select Term,docid , positions from [IRdb].[dbo].[newterm]  ", conn);
                SqlDataReader reader = cmd.ExecuteReader();


                while (reader.Read())
                {
                    //c++;
                    //if (c == 10)
                    //{
                    //    break;
                    //}
                    string stemming_term = (string)reader["Term"];     // mn l table after stemming
                    string document_id   = (string)reader["docid"];
                    string position      = (string)reader["positions"];


                    if (InvertedIndex_Dic.ContainsKey(stemming_term))
                    {
                        // da lw l term mwgood , Hat check 3l doc id
                        // lw howa howa  hazwad el freq bt3to
                        //lw la2 yb2a ha3ml add lel doc id f nafs l makan(index) wl freq bt3to hazwdha ganb l freq brdo fe nfs el index


                        if (InvertedIndex_Dic[stemming_term.ToString()].DOCID.Contains(document_id))
                        {
                            int ind = InvertedIndex_Dic[stemming_term.ToString()].DOCID.IndexOf(document_id);
                            InvertedIndex_Dic[stemming_term].FREQ[ind]++;
                            InvertedIndex_Dic[stemming_term].POS[ind].Add(position);
                        }
                        else       // lw l stemmed term msh mwgood fl dic fa ha3mlo add
                        {
                            InvertedIndex_Dic[stemming_term.ToString()].DOCID.Add(document_id);
                            InvertedIndex_Dic[stemming_term].FREQ.Add(1);
                            List <string> newPos = new List <string>();
                            newPos.Add(position);
                            InvertedIndex_Dic[stemming_term].POS.Add(newPos);
                        }
                    }

                    else      // lw msh mwgood h3ml lel term w add le doc id wl freq
                    {
                        var ii = new Inverted_index();

                        ii.DOCID = new List <string>();
                        ii.FREQ  = new List <int>();
                        ii.POS   = new List <List <string> >();
                        ii.DOCID.Add(document_id);
                        ii.FREQ.Add(1);
                        List <string> doc_id_pos = new List <string>();
                        doc_id_pos.Add(position);
                        ii.POS.Add(doc_id_pos);

                        InvertedIndex_Dic.Add(stemming_term, ii);
                    }
                }     // while

                conn.Close();


                // } // end for

                int test = 0;
                foreach (string s in InvertedIndex_Dic.Keys)
                {
                    string t = "", f = "", p = "", docString = "";

                    t = s;

                    for (int doc_i = 0; doc_i < InvertedIndex_Dic[s].DOCID.Count; doc_i++)
                    {
                        if (InvertedIndex_Dic[s].DOCID.Count != 1)
                        {
                            docString += InvertedIndex_Dic[s].DOCID[doc_i] + ",";
                        }
                        else
                        {
                            docString = InvertedIndex_Dic[s].DOCID[doc_i];
                        }
                    }
                    for (int freq_i = 0; freq_i < InvertedIndex_Dic[s].FREQ.Count; freq_i++)
                    {
                        if (InvertedIndex_Dic[s].FREQ.Count != 1)
                        {
                            f += InvertedIndex_Dic[s].FREQ[freq_i] + ",";
                        }
                        else
                        {
                            f = InvertedIndex_Dic[s].FREQ[freq_i].ToString();
                        }
                    }

                    for (int pos_i = 0; pos_i < InvertedIndex_Dic[s].POS.Count; pos_i++)
                    {
                        for (int lists = 0; lists < InvertedIndex_Dic[s].POS[pos_i].Count; lists++)
                        {
                            if (InvertedIndex_Dic[s].POS[pos_i].Count == 1)
                            {
                                p += InvertedIndex_Dic[s].POS[pos_i][lists];
                            }
                            else
                            {
                                p += InvertedIndex_Dic[s].POS[pos_i][lists] + ",";
                            }
                        }
                        if (InvertedIndex_Dic[s].POS.Count != 1)
                        {
                            p += "@";
                        }
                    }

                    insert_invertedIndex(t, docString, f, p);
                }     // foreach

                int test2 = 0;
            }



            catch (SqlException)
            {
            }
            conn.Close();
        }
Example #2
0
        protected void Search(object sender, EventArgs e)
        {
            SqlConnection conn = new SqlConnection(connstring);

            try
            {
                conn.Open();


                SqlCommand    cmd    = new SqlCommand("select Term,docid , positions from IRTable.dbo.Updated_InvertedIndex ", conn);
                SqlDataReader reader = cmd.ExecuteReader();

                var iiObj = new Inverted_index();

                iiObj.DOCID = new List <string>();
                iiObj.FREQ  = new List <int>();
                iiObj.POS   = new List <List <string> >();


                while (reader.Read())
                {
                    string invertedTerm = (string)reader["Term"]; // mn l table after stemming
                    string document_id  = (string)reader["docid"];
                    string freq         = (string)reader["Frequency"];
                    string position     = (string)reader["positions"];


                    string[] docid = document_id.Split(',');
                    string[] frq   = freq.Split(',');
                    iiObj.DOCID.AddRange(docid);
                    for (int l = 0; l < frq.Length; l++)
                    {
                        int f = int.Parse(frq[l]);
                        iiObj.FREQ.Add(f);
                    }

                    string[] poslist1 = position.Split('@');
                    for (int y = 0; y < poslist1.Length; y++)
                    {
                        List <string> pos2 = new List <string>();
                        pos2.AddRange(poslist1[y].Split(','));
                        iiObj.POS.Add(pos2);
                    }
                }//while
            }   //try
            catch (SqlException)
            {
            }



            if (txt_searchQuery.Text == "")
            {
                ClientScript.RegisterStartupScript(this.GetType(), "alert", "alert('Enter Your Search Query Please!')", true);
            }
            else
            {
                if (RadioButton1.Checked == true)
                {
                }
                if (RadioButton2.Checked == true)
                {
                }
                else         // search 3ady ya2ma multi search aw exact search
                {
                    if (txt_searchQuery.Text[0] == '"' && txt_searchQuery.Text[txt_searchQuery.Text.Length - 1] == '"')
                    {
                        #region exact_search
                        {
                            List <string> terms_without_stops = new List <string>();

                            IEnumerable <String> un_id;

                            string query = txt_searchQuery.Text.ToString();



                            char[] delimiters = new char[] { '#', '=', '\'', '\"', ':', '×', ';', ',', '÷', '.', '\\', '?', ' ', '/', '<', '>',
                                                             '&', '!', '(', ')', '~', '@', '$', '%', '^', '*', ']', '[', '+', '_', '-', '|', '}', '{' };

                            String   stop_word = "a about above across after afterwards again against all almost alone along already also although always am among amongst amoungst amount an and another any anyhow anyone anything anyway anywhere are around as at back be became because become becomes becoming been before beforehand behind being below beside besides between beyond bill both bottom but by call can cannot cant co computer con could couldnt cry de describe detail do done down due during each eg eight either eleven else elsewhere empty enough etc even ever every everyone everything everywhere except few fifteen fify fill find fire first five for former formerly forty found four from front full further get give go had has hasnt have he hence her here hereafter hereby herein hereupon hers herself him himself his how however hundred i ie if in inc indeed interest into is it its it's i'm itself keep last latter latterly least less ltd made many may me meanwhile might mill mine more moreover most mostly move much must my mysel name namely neither never nevertheless next nine no nobody none noon nor not nothing now nowhere of off often on once one only onto or other others otherwis our ours ourselves out over own part per perhaps please put rather re same see seem seemed seemin seems serious severa she should show side since sincere six sixty so some somehow someone something sometime sometimes somewhere still such syste take ten than that the their them themselves then thence there thereafter thereby therefore therein thereupon these they thick thin this third this those though three through throughout thru thus to together too top toward towards twelve twenty two un under until up upon us very via was we well were what whatever when whence whenever where whereafter whereas whereby wherein whereupon wherever whether which while whither who whoever whole whom whose why will with within without would yet you your yours yourself yourselves";
                            string[] stopword  = stop_word.Split(' ');

                            query = query.ToLower();
                            for (int i = 0; i < delimiters.Length; i++)
                            {
                                query = query.Replace(delimiters[i], ' ');
                            }
                            //term with stopword
                            string[] terms = query.Split(' ');

                            string Two_Terms1 = "";
                            string Two_Terms2 = "";

                            List <string>   pos1 = new List <string>();
                            List <string>   pos2 = new List <string>();
                            SortedSet <int> Unq  = new SortedSet <int>();
                            Dictionary <string, List <int> > Messo = new Dictionary <string, List <int> >();
                            List <int> minVal  = new List <int>();
                            List <int> freqPos = new List <int>();


                            //----------------------------------------------
                            #region GetFreq Of Positions


                            for (int i = 0; i < terms.Length - 1; i++)
                            {
                                Two_Terms1 = terms[i];
                                Two_Terms2 = terms[i + 1];

                                //remove stop words
                                if (!stopword.Contains(Two_Terms1) && Two_Terms1 != " " && !stopword.Contains(Two_Terms2) && Two_Terms2 != " ")
                                {
                                    Porter2 s = new Porter2();
                                    Two_Terms1 = s.stem(Two_Terms1);
                                    Two_Terms2 = s.stem(Two_Terms2);


                                    un_id = InvertedIndex_Dic[Two_Terms1].DOCID.Intersect(InvertedIndex_Dic[Two_Terms2].DOCID);         // intersect between doc


                                    foreach (string UnId in un_id)
                                    {
                                        Unq.Add(int.Parse(UnId));

                                        freqPos = new List <int>();
                                        int c    = 0;
                                        int ind1 = InvertedIndex_Dic[Two_Terms1].DOCID.IndexOf(UnId);         // index of unID
                                        int ind2 = InvertedIndex_Dic[Two_Terms2].DOCID.IndexOf(UnId);

                                        // bageb l indices 3shan aroo7 ageeb l positions bt3t l indices de
                                        foreach (String pos1_ in InvertedIndex_Dic[Two_Terms1].POS[ind1])         // pos of word at docid 1 ( unID)
                                        {
                                            foreach (String pos2_ in InvertedIndex_Dic[Two_Terms2].POS[ind2])
                                            {
                                                if (int.Parse(pos2_) - int.Parse(pos1_) == 1)
                                                {
                                                    c++;
                                                    break;
                                                }
                                            }
                                        }
                                        freqPos.Add(c);
                                        if (!Messo.ContainsKey(UnId))
                                        {
                                            Messo.Add(UnId, freqPos);
                                        }
                                        else
                                        {
                                            Messo[UnId].AddRange(freqPos);
                                        }
                                    } // each
                                }     //if
                            }         //for i
                            #endregion
                            //-------------------------------------------

                            int max        = 0;
                            int DOcUmEntId = 0;
                            int index      = 0;
                            UrlList.Clear();
                            minVal.Clear();
                            for (int min = 0; min < Unq.Count; min++)
                            {
                                minVal.Add(Messo[Unq.ElementAt(min).ToString()].Min());
                            }
                            int minvalueCount = minVal.Count;
                            for (int x = 0; x < minvalueCount; x++)
                            {
                                max = minVal.Max();           //haroo7 ageeb l docid of max number da mn  Messoo Dic

                                if (max == 0)
                                {
                                    Get_URLs(max);
                                }
                                else
                                {
                                    index      = minVal.IndexOf(max);
                                    DOcUmEntId = int.Parse(Messo.Keys.ElementAt(index));
                                    Get_URLs(DOcUmEntId);
                                    minVal.RemoveAt(minVal.IndexOf(max));
                                    minVal.Insert(index, -100000);
                                }
                                //var items = new List<int> { 8, 5, 9 };
                                //for (int t = 0; t < items.Count; t++)
                                //{
                                //    int test = items.IndexOf(items.Max());
                                //    items.RemoveAt(test);
                                //    items.Insert(test, -100000);

                                //}
                            }
                            add_links_to_text(UrlList);
                            UrlList.Clear();
                        }
                        #endregion
                    }
                    else
                    {
                        #region multi_word_search

                        {
                            List <string> terms_without_stops = new List <string>();

                            IEnumerable <String> un_id;

                            string query      = txt_searchQuery.Text.ToString();
                            char[] delimiters = new char[] { '#', '=', '\'', '\"', ':', '×', ';', ',', '÷', '.', '\\', '?', ' ', '/', '<', '>',
                                                             '&', '!', '(', ')', '~', '@', '$', '%', '^', '*', ']', '[', '+', '_', '-', '|', '}', '{' };

                            String   stop_word = "a about above across after afterwards again against all almost alone along already also although always am among amongst amoungst amount an and another any anyhow anyone anything anyway anywhere are around as at back be became because become becomes becoming been before beforehand behind being below beside besides between beyond bill both bottom but by call can cannot cant co computer con could couldnt cry de describe detail do done down due during each eg eight either eleven else elsewhere empty enough etc even ever every everyone everything everywhere except few fifteen fify fill find fire first five for former formerly forty found four from front full further get give go had has hasnt have he hence her here hereafter hereby herein hereupon hers herself him himself his how however hundred i ie if in inc indeed interest into is it its it's i'm itself keep last latter latterly least less ltd made many may me meanwhile might mill mine more moreover most mostly move much must my mysel name namely neither never nevertheless next nine no nobody none noon nor not nothing now nowhere of off often on once one only onto or other others otherwis our ours ourselves out over own part per perhaps please put rather re same see seem seemed seemin seems serious severa she should show side since sincere six sixty so some somehow someone something sometime sometimes somewhere still such syste take ten than that the their them themselves then thence there thereafter thereby therefore therein thereupon these they thick thin this third this those though three through throughout thru thus to together too top toward towards twelve twenty two un under until up upon us very via was we well were what whatever when whence whenever where whereafter whereas whereby wherein whereupon wherever whether which while whither who whoever whole whom whose why will with within without would yet you your yours yourself yourselves";
                            string[] stopword  = stop_word.Split(' ');

                            query = query.ToLower();
                            for (int i = 0; i < delimiters.Length; i++)
                            {
                                query = query.Replace(delimiters[i], ' ');
                            }
                            //term with stopword
                            string[] terms = query.Split(' ');

                            string Two_Terms1 = "";
                            string Two_Terms2 = "";

                            List <string>   pos1 = new List <string>();
                            List <string>   pos2 = new List <string>();
                            SortedSet <int> Unq  = new SortedSet <int>();
                            Dictionary <string, List <int> > Messo = new Dictionary <string, List <int> >();
                            List <int> PosDifference = new List <int>();
                            List <int> MinValues     = new List <int>();

                            List <List <int> > MinValuesList = new List <List <int> >();
                            //----------------------------------------------
                            if (terms.Length > 1)         // more than one word
                            {
                                #region GetFreq Of Positions


                                for (int i = 0; i < terms.Length - 1; i++)
                                {
                                    int c    = 0;
                                    int min  = 0;
                                    int sum2 = 0;
                                    Two_Terms1 = terms[i];
                                    Two_Terms2 = terms[i + 1];

                                    //remove stop words
                                    if (!stopword.Contains(Two_Terms1) && Two_Terms1 != " " && !stopword.Contains(Two_Terms2) && Two_Terms2 != " ")
                                    {
                                        Porter2 s = new Porter2();
                                        Two_Terms1 = s.stem(Two_Terms1);
                                        Two_Terms2 = s.stem(Two_Terms2);



                                        un_id = InvertedIndex_Dic[Two_Terms1].DOCID.Intersect(InvertedIndex_Dic[Two_Terms2].DOCID);


                                        foreach (string UnId in un_id)
                                        {
                                            Unq.Add(int.Parse(UnId));

                                            PosDifference = new List <int>();

                                            int ind1 = InvertedIndex_Dic[Two_Terms1].DOCID.IndexOf(UnId);         // index of unID
                                            int ind2 = InvertedIndex_Dic[Two_Terms2].DOCID.IndexOf(UnId);

                                            // bageb l indices 3shan aroo7 ageeb l positions bt3t l indices de
                                            foreach (String pos1_ in InvertedIndex_Dic[Two_Terms1].POS[ind1])         // pos of word at docid 1 ( unID)
                                            {
                                                foreach (String pos2_ in InvertedIndex_Dic[Two_Terms2].POS[ind2])
                                                {
                                                    PosDifference.Add(Math.Abs(int.Parse(pos2_) - int.Parse(pos1_)));
                                                }
                                            }
                                            min = PosDifference.Min();
                                            List <int> dd = new List <int>();
                                            dd.Add(min);
                                            if (MinValuesList.Count > 1)
                                            {
                                                MinValuesList.ElementAt(c).AddRange(dd);
                                            }
                                            else
                                            {
                                                MinValuesList.Add(dd);
                                            }

                                            ////// hageb l min bt3t l list awl mara w b3den ageeb l min tany mara
                                            if (!Messo.ContainsKey(UnId))
                                            {
                                                Messo.Add(UnId, PosDifference);
                                            }
                                            else
                                            {
                                                Messo[UnId].AddRange(PosDifference);
                                            }

                                            c++;
                                        } // each
                                    }     //if
                                }         //for i
                                #endregion
                            }
                            else          // one word
                            {
                                #region GetFreq Of Positions

                                int c    = 0;
                                int min  = 0;
                                int sum2 = 0;
                                Two_Terms1 = terms[0];

                                //remove stop words
                                if (!stopword.Contains(Two_Terms1) && Two_Terms1 != " ")
                                {
                                    Porter2 s = new Porter2();
                                    Two_Terms1 = s.stem(Two_Terms1);

                                    un_id = InvertedIndex_Dic[Two_Terms1].DOCID;


                                    foreach (string UnId in un_id)
                                    {
                                        Unq.Add(int.Parse(UnId));         // contains all docid

                                        Get_URLs(int.Parse(UnId));
                                    }
                                    #endregion     // one word
                                }
                                add_links_to_text(UrlList);
                                UrlList.Clear();
                            }
                        }
                        #endregion
                    }
                }     // else searhc 3ady
            }         // else eno maknsh wa7ed mn l buttons checked
        }