public ActionResult geturls(string searchterms, string type) { searchquery s = new searchquery(); s.query = searchterms; string[] searchquery_terms = s.Query_to_words(); char[] searchquery_arr = searchterms.ToCharArray(); Dictionary <Tuple <string, int>, Tuple <int, List <int> > > dict = new Dictionary <Tuple <string, int>, Tuple <int, List <int> > >(); Dictionary <String, int> Ndocs = new Dictionary <string, int>(); bool exact_search = false; List <int> Doc_No = new List <int>(); if (searchquery_arr[0] == '"' && searchquery_arr[searchquery_arr.Length - 1] == '"') { s.query = searchterms.Substring(1, searchterms.Length - 2); exact_search = true; } List <string> searchquery_terms_stemmed = new List <string>(); SqlConnection con = new SqlConnection(@"Data Source=HOSSAM\MOHAMEDHOSSAM;Initial Catalog=web_crawler;Integrated Security=True"); con.Open(); for (int i = 0; i < searchquery_terms.Length; i++) { if (!Remove_stopwords(searchquery_terms[i])) { Porter stemer = new Porter(); //object from porter stemmer string output = stemer.stem(searchquery_terms[i]); //pass the term for the stemmer to apply the porter stemmer on it searchquery_terms_stemmed.Add(output); SqlCommand cmd = new SqlCommand("select * from Inverted_index where Term=@term", con); // SqlParameter term = new SqlParameter("@term",output);//current term in dictionary // cmd.Parameters.Add(term);//select all records 'page content' from crawler database cmd.Parameters.Add("@term", SqlDbType.VarChar).Value = output; SqlDataReader reader = cmd.ExecuteReader(); //reader on database if (!reader.HasRows && type == "spell") { misSpelledword = searchquery_terms[i]; } else { int repeation = 0; while (reader.Read()) { string word = reader[0].ToString(); int doc_id = (int)reader[1]; if (!Doc_No.Contains(doc_id)) { Doc_No.Add(doc_id); } int frequency = (int)reader[2]; string positions = reader[3].ToString(); List <int> term_positions = new List <int>(); string[] arr = positions.Split(','); int[] position = Array.ConvertAll(arr, int.Parse); for (int j = 0; j < position.Length; j++) { term_positions.Add(position[j]); } dict.Add(Tuple.Create(word, doc_id), Tuple.Create(frequency, term_positions)); repeation++; } Ndocs.Add(output, repeation); } reader.Close(); } } List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > Exact_Docs = new List <Tuple <int, float, List <Tuple <string, int, List <int> > > > >(); List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > Inexact_Docs = new List <Tuple <int, float, List <Tuple <string, int, List <int> > > > >(); List <String> Urls = new List <string>(); if (exact_search) { List <Tuple <int, List <Tuple <String, int, List <int> > > > > docs = new List <Tuple <int, List <Tuple <String, int, List <int> > > > >(); for (int i = 0; i < Doc_No.Count; i++) { int count = 0, frequency; List <int> allpositions = new List <int>(); List <Tuple <String, int, List <int> > > words = new List <Tuple <string, int, List <int> > >(); for (int j = 0; j < searchquery_terms_stemmed.Count; j++) { if (dict.ContainsKey(Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i]))) { count++; allpositions = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item2; frequency = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item1; words.Add(Tuple.Create(searchquery_terms_stemmed[j], frequency, allpositions)); } } if (count == searchquery_terms_stemmed.Count) { docs.Add(Tuple.Create(Doc_No[i], words)); } } List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > Docs_distances = check_distance(docs, searchquery_terms_stemmed.Count, Ndocs); foreach (var item in Docs_distances.OrderByDescending(Key => Key.Item2)) { Exact_Docs.Add(item); } Urls = Read_URls_from_database(Exact_Docs); } else { Dictionary <int, List <Tuple <int, List <Tuple <String, int, List <int> > > > > > num_of_occurence = new Dictionary <int, List <Tuple <int, List <Tuple <String, int, List <int> > > > > >(); for (int i = 0; i < Doc_No.Count; i++) { int count = 0, frequency; List <int> allpositions = new List <int>(); List <Tuple <String, int, List <int> > > word = new List <Tuple <string, int, List <int> > >(); for (int j = 0; j < searchquery_terms_stemmed.Count; j++) { if (dict.ContainsKey(Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i]))) { count++; allpositions = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item2; frequency = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item1; word.Add(Tuple.Create(searchquery_terms_stemmed[j], frequency, allpositions)); } } if (num_of_occurence.ContainsKey(count)) { num_of_occurence[count].Add(Tuple.Create(Doc_No[i], word)); } else { List <Tuple <int, List <Tuple <String, int, List <int> > > > > alldocs = new List <Tuple <int, List <Tuple <String, int, List <int> > > > >(); alldocs.Add(Tuple.Create(Doc_No[i], word)); num_of_occurence.Add(count, alldocs); } } foreach (var Item in num_of_occurence.OrderByDescending(key => key.Key)) { List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > docs_distances = calculate_distance(Item.Value, Item.Key); foreach (var item in docs_distances.OrderBy(key => key.Item2)) { Inexact_Docs.Add(item); } } Urls = Read_URls_from_database(Inexact_Docs); if (misSpelledword != "" && type == "spell") { List <String> nearest_words = spellchecker_words(misSpelledword); ViewBag.nearest_words = nearest_words; } else if (type == "soundex") { misSpelledword = searchquery_terms[0]; List <String> soundex_words = Soundex_words(misSpelledword); ViewBag.nearest_words = soundex_words; } } ViewBag.Urls = Urls; ViewBag.query = searchterms; ViewBag.type = type; return(View()); }
public ActionResult searchquery(string searchterms, string type) { searchquery s = new searchquery(); s.query = searchterms; //1- apply tokenization and casefolding string[] searchquery_terms = s.Query_to_words(); char[] searchquery_arr = searchterms.ToCharArray(); Dictionary <Tuple <string, int>, Tuple <int, List <int> > > dict = new Dictionary <Tuple <string, int>, Tuple <int, List <int> > >(); //store word and its docid as a key, its frequency and list of its positions as value Dictionary <String, int> Ndocs = new Dictionary <string, int>(); //Dictionary to store the word and its frequency bool exact_search = false; List <int> Doc_No = new List <int>(); //to check if the user want to exact search if (searchquery_arr[0] == '"' && searchquery_arr[searchquery_arr.Length - 1] == '"') { s.query = searchterms.Substring(1, searchterms.Length - 2); //if yes , take whole query except the two double quotation exact_search = true; } //2- apply removing stop words and porter stemmer List <string> searchquery_terms_stemmed = new List <string>(); SqlConnection con = new SqlConnection(@"Data Source=DESKTOP-KMG2RBB\SQLEXPRESS;Initial Catalog=web_crawler;Integrated Security=True"); con.Open(); for (int i = 0; i < searchquery_terms.Length; i++) { if (!Remove_stopwords(searchquery_terms[i])) // if word not stop word { Porter stemer = new Porter(); //object from porter stemmer string output = stemer.stem(searchquery_terms[i]); //pass the term for the stemmer to apply the porter stemmer on it searchquery_terms_stemmed.Add(output); SqlCommand cmd = new SqlCommand("select * from Inverted_index where Term=@term", con); cmd.Parameters.Add("@term", SqlDbType.VarChar).Value = output; SqlDataReader reader = cmd.ExecuteReader(); //reader on database //if the reader return nothing , and we want to apply spelling correction in this word ,store it if (!reader.HasRows && type == "spell") { misSpelledword = searchquery_terms[i]; } // if normal search without choosing apply spelling correction or phonetic correction else { int repeation = 0; while (reader.Read()) { string word = reader[0].ToString(); int doc_id = (int)reader[1]; if (!Doc_No.Contains(doc_id)) { Doc_No.Add(doc_id); } int frequency = (int)reader[2]; string positions = reader[3].ToString(); List <int> term_positions = new List <int>(); string[] arr = positions.Split(','); int[] position = Array.ConvertAll(arr, int.Parse); for (int j = 0; j < position.Length; j++) { term_positions.Add(position[j]); } dict.Add(Tuple.Create(word, doc_id), Tuple.Create(frequency, term_positions)); repeation++; } Ndocs.Add(output, repeation); } reader.Close(); } } List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > Exact_Docs = new List <Tuple <int, float, List <Tuple <string, int, List <int> > > > >(); List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > Inexact_Docs = new List <Tuple <int, float, List <Tuple <string, int, List <int> > > > >(); List <String> Urls = new List <string>(); // in exact search if (exact_search) { List <Tuple <int, List <Tuple <String, int, List <int> > > > > docs = new List <Tuple <int, List <Tuple <String, int, List <int> > > > >(); for (int i = 0; i < Doc_No.Count; i++) //loops on all documents which contains the words in search query { int count = 0, frequency; List <int> allpositions = new List <int>(); List <Tuple <String, int, List <int> > > words = new List <Tuple <string, int, List <int> > >(); // list of store the words and its frequency and its positions for (int j = 0; j < searchquery_terms_stemmed.Count; j++) //loops on words { if (dict.ContainsKey(Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i]))) { count++; allpositions = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item2; //store all positions of these word in these doc frequency = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item1; //store frequency of these word in these doc words.Add(Tuple.Create(searchquery_terms_stemmed[j], frequency, allpositions)); } } // if we Found all words of search query in this docs => store this docs with the words, frequecny and its positions to check if ALL query words in the same order submitted by the user or not , if yes => Sort the returned documents by Occurrence (frequency) if (count == searchquery_terms_stemmed.Count) { docs.Add(Tuple.Create(Doc_No[i], words)); } } List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > Docs_distances = check_distance(docs, searchquery_terms_stemmed.Count, Ndocs); //Ranking documents according to cumulative frequency and store it to display foreach (var item in Docs_distances.OrderByDescending(Key => Key.Item2)) { Exact_Docs.Add(item); } Urls = Read_URls_from_database(Exact_Docs); } // in multi word search else { // store in dictionary num_of_occurence (Number of words appear in the document , list of documents which include this words) Dictionary <int, List <Tuple <int, List <Tuple <String, int, List <int> > > > > > num_of_occurence = new Dictionary <int, List <Tuple <int, List <Tuple <String, int, List <int> > > > > >(); //we store how many words in the user query appear in specific documents for (int i = 0; i < Doc_No.Count; i++) { int count = 0, frequency; List <int> allpositions = new List <int>(); List <Tuple <String, int, List <int> > > word = new List <Tuple <string, int, List <int> > >(); for (int j = 0; j < searchquery_terms_stemmed.Count; j++) { if (dict.ContainsKey(Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i]))) { count++; allpositions = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item2; frequency = dict[Tuple.Create(searchquery_terms_stemmed[j], Doc_No[i])].Item1; word.Add(Tuple.Create(searchquery_terms_stemmed[j], frequency, allpositions)); } } if (num_of_occurence.ContainsKey(count)) { num_of_occurence[count].Add(Tuple.Create(Doc_No[i], word)); } else { List <Tuple <int, List <Tuple <String, int, List <int> > > > > alldocs = new List <Tuple <int, List <Tuple <String, int, List <int> > > > >(); alldocs.Add(Tuple.Create(Doc_No[i], word)); num_of_occurence.Add(count, alldocs); } } // sort the dictionary Descending with the count of the words in the search query that appears in specific document foreach (var Item in num_of_occurence.OrderByDescending(key => key.Key)) { List <Tuple <int, float, List <Tuple <String, int, List <int> > > > > docs_distances = calculate_distance(Item.Value, Item.Key); // ranking the documents in multi word search by ascending (minimum distance between words ) foreach (var item in docs_distances.OrderBy(key => key.Item2)) { Inexact_Docs.Add(item); } } Urls = Read_URls_from_database(Inexact_Docs); //if there exist miss spell word and the user checked the radio button of spelling correction => apply spell checker algorithm if (misSpelledword != "" && type == "spell") { List <String> nearest_words = spellchecker_words(misSpelledword); ViewBag.nearest_words = nearest_words; } // if user checked the raido button of apply phonetic correction else if (type == "soundex") { misSpelledword = searchquery_terms[0]; List <String> soundex_words = Soundex_words(misSpelledword); ViewBag.nearest_words = soundex_words; } } ViewBag.type = type; ViewBag.Urls = Urls; ViewBag.query = searchterms; return(View()); }