/// ----------------------------------------------------------------------------- /// <summary> /// AddIndexWords adds the Index Words to the Data Store /// </summary> /// <remarks> /// </remarks> /// <param name="indexId">The Id of the SearchItem</param> /// <param name="searchItem">The SearchItem</param> /// <param name="language">The Language of the current Item</param> /// <history> /// [cnurse] 11/15/2004 documented /// [cnurse] 11/16/2004 replaced calls to separate content clean-up /// functions with new call to HtmlUtils.Clean(). /// replaced logic to determine whether word should /// be indexed by call to CanIndexWord() /// [vnguyen] 09/03/2010 added searchitem title to the content and /// also tab title, description, keywords where the /// content resides for indexed searching /// </history> /// ----------------------------------------------------------------------------- private void AddIndexWords(int indexId, SearchItemInfo searchItem, string language) { //Get the Search Settings for this Portal var settings = new SearchConfig(SearchDataStoreController.GetSearchSettings(searchItem.ModuleId)); var IndexWords = new Dictionary<string, int>(); var IndexPositions = new Dictionary<string, List<int>>(); string Content = GetSearchContent(searchItem); string title = HtmlUtils.StripPunctuation(searchItem.Title, true); // Tab and Module Metadata // Retreive module and page names ModuleInfo objModule = new ModuleController().GetModule(searchItem.ModuleId); TabInfo objTab = new TabController().GetTab(objModule.TabID, objModule.PortalID, false); string tabName = HtmlUtils.StripPunctuation(objTab.TabName, true); string tabTitle = HtmlUtils.StripPunctuation(objTab.Title, true); string tabDescription = HtmlUtils.StripPunctuation(objTab.Description, true); string tabKeywords = HtmlUtils.StripPunctuation(objTab.KeyWords, true); string tagfilter = PortalController.GetPortalSetting("SearchIncludedTagInfoFilter", objModule.PortalID, Host.SearchIncludedTagInfoFilter); // clean content Content = HtmlUtils.CleanWithTagInfo(Content, tagfilter, true); // append tab and module metadata Content = Content.ToLower() + title.ToLower() + " " + tabName.ToLower() + " " + tabTitle.ToLower() + " " + tabDescription.ToLower() + " " + tabKeywords.ToLower(); // split content into words string[] ContentWords = Content.Split(' '); //process each word int intWord = 0; foreach (string strWord in ContentWords) { if (CanIndexWord(strWord, language, settings)) { var encodedWord = HttpUtility.HtmlEncode(strWord); intWord = intWord + 1; if (IndexWords.ContainsKey(encodedWord) == false) { IndexWords.Add(encodedWord, 0); IndexPositions.Add(encodedWord, new List<int>()); } //track number of occurrences of word in content IndexWords[encodedWord] = IndexWords[encodedWord] + 1; //track positions of word in content IndexPositions[encodedWord].Add(intWord); } } //get list of words ( non-common ) Hashtable Words = GetSearchWords(); //this could be cached int WordId; //iterate through each indexed word foreach (object objWord in IndexWords.Keys) { string strWord = Convert.ToString(objWord); if (Words.ContainsKey(strWord)) { //word is in the DataStore WordId = Convert.ToInt32(Words[strWord]); } else { //add the word to the DataStore WordId = DataProvider.Instance().AddSearchWord(strWord); Words.Add(strWord, WordId); } //add the indexword int SearchItemWordID = DataProvider.Instance().AddSearchItemWord(indexId, WordId, IndexWords[strWord]); string strPositions = Null.NullString; foreach (int position in IndexPositions[strWord]) { strPositions += position + ","; } DataProvider.Instance().AddSearchItemWordPosition(SearchItemWordID, strPositions); } }
/// ----------------------------------------------------------------------------- /// <summary> /// AddIndexWords adds the Index Words to the Data Store /// </summary> /// <remarks> /// </remarks> /// <param name="indexId">The Id of the SearchItem</param> /// <param name="searchItem">The SearchItem</param> /// <param name="language">The Language of the current Item</param> /// <history> /// [cnurse] 11/15/2004 documented /// [cnurse] 11/16/2004 replaced calls to separate content clean-up /// functions with new call to HtmlUtils.Clean(). /// replaced logic to determine whether word should /// be indexed by call to CanIndexWord() /// [vnguyen] 09/03/2010 added searchitem title to the content and /// also tab title, description, keywords where the /// content resides for indexed searching /// </history> /// ----------------------------------------------------------------------------- private void AddIndexWords(int indexId, SearchItemInfo searchItem, string language) { //Get the Search Settings for this Portal var settings = new SearchConfig(SearchDataStoreController.GetSearchSettings(searchItem.ModuleId)); var IndexWords = new Dictionary <string, int>(); var IndexPositions = new Dictionary <string, List <int> >(); string Content = GetSearchContent(searchItem); string title = HtmlUtils.StripPunctuation(searchItem.Title, true); // Tab and Module Metadata // Retreive module and page names ModuleInfo objModule = new ModuleController().GetModule(searchItem.ModuleId); TabInfo objTab = new TabController().GetTab(objModule.TabID, objModule.PortalID, false); string tabName = HtmlUtils.StripPunctuation(objTab.TabName, true); string tabTitle = HtmlUtils.StripPunctuation(objTab.Title, true); string tabDescription = HtmlUtils.StripPunctuation(objTab.Description, true); string tabKeywords = HtmlUtils.StripPunctuation(objTab.KeyWords, true); string tagfilter = PortalController.GetPortalSetting("SearchIncludedTagInfoFilter", objModule.PortalID, Host.SearchIncludedTagInfoFilter); // clean content Content = HtmlUtils.CleanWithTagInfo(Content, tagfilter, true); // append tab and module metadata Content = Content.ToLower() + title.ToLower() + " " + tabName.ToLower() + " " + tabTitle.ToLower() + " " + tabDescription.ToLower() + " " + tabKeywords.ToLower(); // split content into words string[] ContentWords = Content.Split(' '); //process each word int intWord = 0; foreach (string strWord in ContentWords) { if (CanIndexWord(strWord, language, settings)) { var encodedWord = HttpUtility.HtmlEncode(strWord); intWord = intWord + 1; if (IndexWords.ContainsKey(encodedWord) == false) { IndexWords.Add(encodedWord, 0); IndexPositions.Add(encodedWord, new List <int>()); } //track number of occurrences of word in content IndexWords[encodedWord] = IndexWords[encodedWord] + 1; //track positions of word in content IndexPositions[encodedWord].Add(intWord); } } //get list of words ( non-common ) Hashtable Words = GetSearchWords(); //this could be cached int WordId; //iterate through each indexed word foreach (object objWord in IndexWords.Keys) { string strWord = Convert.ToString(objWord); if (Words.ContainsKey(strWord)) { //word is in the DataStore WordId = Convert.ToInt32(Words[strWord]); } else { //add the word to the DataStore WordId = DataProvider.Instance().AddSearchWord(strWord); Words.Add(strWord, WordId); } //add the indexword int SearchItemWordID = DataProvider.Instance().AddSearchItemWord(indexId, WordId, IndexWords[strWord]); string strPositions = Null.NullString; foreach (int position in IndexPositions[strWord]) { strPositions += position + ","; } DataProvider.Instance().AddSearchItemWordPosition(SearchItemWordID, strPositions); } }
/// ----------------------------------------------------------------------------- /// <summary> /// CanIndexWord determines whether the Word should be indexed /// </summary> /// <remarks> /// </remarks> /// <param name="strWord">The Word to validate</param> /// <param name="Locale"></param> /// <param name="settings"></param> /// <returns>True if indexable, otherwise false</returns> /// <history> /// [cnurse] 11/16/2004 created /// </history> /// ----------------------------------------------------------------------------- private bool CanIndexWord(string strWord, string Locale, SearchConfig settings) { //Create Boolean to hold return value bool retValue = true; //get common words for exclusion Hashtable CommonWords = GetCommonWords(Locale); //Determine if Word is actually a number if (Regex.IsMatch(strWord, "^\\d+$")) { //Word is Numeric if (!settings.SearchIncludeNumeric) { retValue = false; } } else { //Word is Non-Numeric //Determine if Word satisfies Minimum/Maximum length if (strWord.Length < settings.SearchMinWordlLength || strWord.Length > settings.SearchMaxWordlLength) { retValue = false; } else if (CommonWords.ContainsKey(strWord) && !settings.SearchIncludeCommon) { //Determine if Word is a Common Word (and should be excluded) retValue = false; } } return retValue; }