Exemplo n.º 1
0
        /// -----------------------------------------------------------------------------
        /// <summary>
        /// AddIndexWords adds the Index Words to the Data Store
        /// </summary>
        /// <remarks>
        /// </remarks>
		/// <param name="indexId">The Id of the SearchItem</param>
		/// <param name="searchItem">The SearchItem</param>
		/// <param name="language">The Language of the current Item</param>
        /// <history>
        ///		[cnurse]	11/15/2004	documented
        ///     [cnurse]    11/16/2004  replaced calls to separate content clean-up
        ///                             functions with new call to HtmlUtils.Clean().
        ///                             replaced logic to determine whether word should
        ///                             be indexed by call to CanIndexWord()
        ///     [vnguyen]   09/03/2010  added searchitem title to the content and
        ///                             also tab title, description, keywords where the
        ///                             content resides for indexed searching
        /// </history>
        /// -----------------------------------------------------------------------------
        private void AddIndexWords(int indexId, SearchItemInfo searchItem, string language)
        {
			//Get the Search Settings for this Portal
            var settings = new SearchConfig(SearchDataStoreController.GetSearchSettings(searchItem.ModuleId));
            var IndexWords = new Dictionary<string, int>();
            var IndexPositions = new Dictionary<string, List<int>>();
            string Content = GetSearchContent(searchItem);

            string title = HtmlUtils.StripPunctuation(searchItem.Title, true);

            // Tab and Module Metadata
            // Retreive module and page names
            ModuleInfo objModule = new ModuleController().GetModule(searchItem.ModuleId);
            TabInfo objTab = new TabController().GetTab(objModule.TabID, objModule.PortalID, false);
            string tabName = HtmlUtils.StripPunctuation(objTab.TabName, true);
            string tabTitle = HtmlUtils.StripPunctuation(objTab.Title, true);
            string tabDescription = HtmlUtils.StripPunctuation(objTab.Description, true);
            string tabKeywords = HtmlUtils.StripPunctuation(objTab.KeyWords, true);
            string tagfilter = PortalController.GetPortalSetting("SearchIncludedTagInfoFilter", objModule.PortalID, Host.SearchIncludedTagInfoFilter);

            // clean content
            Content = HtmlUtils.CleanWithTagInfo(Content, tagfilter, true);
            // append tab and module metadata
            Content = Content.ToLower() + title.ToLower() + " " + tabName.ToLower() + " " + tabTitle.ToLower() + " " + tabDescription.ToLower() + " " + tabKeywords.ToLower();
            
            // split content into words
            string[] ContentWords = Content.Split(' ');
            
            //process each word
            int intWord = 0;
            foreach (string strWord in ContentWords)
            {
                if (CanIndexWord(strWord, language, settings))
                {
                    var encodedWord = HttpUtility.HtmlEncode(strWord);
                    intWord = intWord + 1;
                    if (IndexWords.ContainsKey(encodedWord) == false)
                    {
                        IndexWords.Add(encodedWord, 0);
                        IndexPositions.Add(encodedWord, new List<int>());
                    }
                    //track number of occurrences of word in content
                    IndexWords[encodedWord] = IndexWords[encodedWord] + 1;
                    //track positions of word in content
                    IndexPositions[encodedWord].Add(intWord);
                }
            }
			
            //get list of words ( non-common )
            Hashtable Words = GetSearchWords(); //this could be cached
            int WordId;

            //iterate through each indexed word
            foreach (object objWord in IndexWords.Keys)
            {
                string strWord = Convert.ToString(objWord);
                if (Words.ContainsKey(strWord))
                {
					//word is in the DataStore
                    WordId = Convert.ToInt32(Words[strWord]);
                }
                else
                {
					//add the word to the DataStore
                    WordId = DataProvider.Instance().AddSearchWord(strWord);
                    Words.Add(strWord, WordId);
                }
                //add the indexword
                int SearchItemWordID = DataProvider.Instance().AddSearchItemWord(indexId, WordId, IndexWords[strWord]);
                string strPositions = Null.NullString;
                foreach (int position in IndexPositions[strWord])
                {
                    strPositions += position + ",";
                }
                DataProvider.Instance().AddSearchItemWordPosition(SearchItemWordID, strPositions);
            }
        }
Exemplo n.º 2
0
        /// -----------------------------------------------------------------------------
        /// <summary>
        /// AddIndexWords adds the Index Words to the Data Store
        /// </summary>
        /// <remarks>
        /// </remarks>
        /// <param name="indexId">The Id of the SearchItem</param>
        /// <param name="searchItem">The SearchItem</param>
        /// <param name="language">The Language of the current Item</param>
        /// <history>
        ///		[cnurse]	11/15/2004	documented
        ///     [cnurse]    11/16/2004  replaced calls to separate content clean-up
        ///                             functions with new call to HtmlUtils.Clean().
        ///                             replaced logic to determine whether word should
        ///                             be indexed by call to CanIndexWord()
        ///     [vnguyen]   09/03/2010  added searchitem title to the content and
        ///                             also tab title, description, keywords where the
        ///                             content resides for indexed searching
        /// </history>
        /// -----------------------------------------------------------------------------
        private void AddIndexWords(int indexId, SearchItemInfo searchItem, string language)
        {
            //Get the Search Settings for this Portal
            var    settings       = new SearchConfig(SearchDataStoreController.GetSearchSettings(searchItem.ModuleId));
            var    IndexWords     = new Dictionary <string, int>();
            var    IndexPositions = new Dictionary <string, List <int> >();
            string Content        = GetSearchContent(searchItem);

            string title = HtmlUtils.StripPunctuation(searchItem.Title, true);

            // Tab and Module Metadata
            // Retreive module and page names
            ModuleInfo objModule      = new ModuleController().GetModule(searchItem.ModuleId);
            TabInfo    objTab         = new TabController().GetTab(objModule.TabID, objModule.PortalID, false);
            string     tabName        = HtmlUtils.StripPunctuation(objTab.TabName, true);
            string     tabTitle       = HtmlUtils.StripPunctuation(objTab.Title, true);
            string     tabDescription = HtmlUtils.StripPunctuation(objTab.Description, true);
            string     tabKeywords    = HtmlUtils.StripPunctuation(objTab.KeyWords, true);
            string     tagfilter      = PortalController.GetPortalSetting("SearchIncludedTagInfoFilter", objModule.PortalID, Host.SearchIncludedTagInfoFilter);

            // clean content
            Content = HtmlUtils.CleanWithTagInfo(Content, tagfilter, true);
            // append tab and module metadata
            Content = Content.ToLower() + title.ToLower() + " " + tabName.ToLower() + " " + tabTitle.ToLower() + " " + tabDescription.ToLower() + " " + tabKeywords.ToLower();

            // split content into words
            string[] ContentWords = Content.Split(' ');

            //process each word
            int intWord = 0;

            foreach (string strWord in ContentWords)
            {
                if (CanIndexWord(strWord, language, settings))
                {
                    var encodedWord = HttpUtility.HtmlEncode(strWord);
                    intWord = intWord + 1;
                    if (IndexWords.ContainsKey(encodedWord) == false)
                    {
                        IndexWords.Add(encodedWord, 0);
                        IndexPositions.Add(encodedWord, new List <int>());
                    }
                    //track number of occurrences of word in content
                    IndexWords[encodedWord] = IndexWords[encodedWord] + 1;
                    //track positions of word in content
                    IndexPositions[encodedWord].Add(intWord);
                }
            }

            //get list of words ( non-common )
            Hashtable Words = GetSearchWords(); //this could be cached
            int       WordId;

            //iterate through each indexed word
            foreach (object objWord in IndexWords.Keys)
            {
                string strWord = Convert.ToString(objWord);
                if (Words.ContainsKey(strWord))
                {
                    //word is in the DataStore
                    WordId = Convert.ToInt32(Words[strWord]);
                }
                else
                {
                    //add the word to the DataStore
                    WordId = DataProvider.Instance().AddSearchWord(strWord);
                    Words.Add(strWord, WordId);
                }
                //add the indexword
                int    SearchItemWordID = DataProvider.Instance().AddSearchItemWord(indexId, WordId, IndexWords[strWord]);
                string strPositions     = Null.NullString;
                foreach (int position in IndexPositions[strWord])
                {
                    strPositions += position + ",";
                }
                DataProvider.Instance().AddSearchItemWordPosition(SearchItemWordID, strPositions);
            }
        }
Exemplo n.º 3
0
        /// -----------------------------------------------------------------------------
        /// <summary>
        /// CanIndexWord determines whether the Word should be indexed
        /// </summary>
        /// <remarks>
        /// </remarks>
        /// <param name="strWord">The Word to validate</param>
        /// <param name="Locale"></param>
        /// <param name="settings"></param>
        /// <returns>True if indexable, otherwise false</returns>
        /// <history>
        ///		[cnurse]	11/16/2004	created
        /// </history>
        /// -----------------------------------------------------------------------------
        private bool CanIndexWord(string strWord, string Locale, SearchConfig settings)
        {
			//Create Boolean to hold return value
            bool retValue = true;

            //get common words for exclusion
            Hashtable CommonWords = GetCommonWords(Locale);
            
			//Determine if Word is actually a number
			if (Regex.IsMatch(strWord, "^\\d+$"))
            {
                //Word is Numeric
                if (!settings.SearchIncludeNumeric)
                {
                    retValue = false;
                }
            }
            else
            {
				//Word is Non-Numeric
                //Determine if Word satisfies Minimum/Maximum length
                if (strWord.Length < settings.SearchMinWordlLength || strWord.Length > settings.SearchMaxWordlLength)
                {
                    retValue = false;
                }
                else if (CommonWords.ContainsKey(strWord) && !settings.SearchIncludeCommon)
                {
					//Determine if Word is a Common Word (and should be excluded)
                    retValue = false;
                }
            }
            return retValue;
        }