public Resource(ulong __resourceID, Constants.ResourceType __type, uint __tokens, long __size, 
     string __description, string __name, string __location, InformationEntropy[] __IE, object __data)
 {
     header = new ResourceHeader(__resourceID, __type, __tokens, __size, __description, __name, __location);
     IE = __IE;
     data = __data;
 }
 public static string Detokenize(InformationEntropy[] IEList, char delimeter)
 {
     string IEString = "";
     foreach (InformationEntropy IE in IEList)
     {
         IEString += delimeter + IE.keyword;
     }
     return IEString;
 }
        public static string[] InformationEntropyToStringArray(InformationEntropy[] IEList)
        {
            string[] array = new string[IEList.Length];
            int index = 0;

            foreach (InformationEntropy IE in IEList)
            {
                array[index++] = IE.keyword;
            }

            return array;
        }
        public float calculateScore(string[] query, InformationEntropy[] IEs)
        {
            float score = 0.0f;

            foreach (String keyword in query)
            {
                foreach (InformationEntropy IE in IEs)
                {
                    score += queryProcessor.calculateSimilarity(keyword, IE.keyword);
                }
            }
            //normalize to the query length
            return (score/ query.Length);
        }
Exemplo n.º 5
0
 public Peer(Node __node, PublicUserInfo __userInfo, InformationEntropy[] __IE)
 {
     node = __node;
     userInfo = __userInfo;
     IE = __IE;
 }
        private void finalizeResourcesIndex()
        {
            IDictionaryEnumerator enumerator = resourcesIndex.getEnumerator();
            calculatedIE = new InformationEntropy[resourcesIndex.Count > Constants.MAX_INFORMATION_ENTROPY_LENGTH ?
                Constants.MAX_INFORMATION_ENTROPY_LENGTH : resourcesIndex.Count];

            int IECounter = 0;

            while(enumerator.MoveNext())
            {
                RRLib.SortedList invertedList = (RRLib.SortedList)enumerator.Value;
                int dfk = invertedList.Count;
                int N = resourcesIndex.Count;
                float IDFk = (float)Math.Log(N/dfk, 10);
                foreach (ResourceDescriptor rd in invertedList)
                {
                    rd.weight *= IDFk;   //finish the weight calculation.
                }
                //Calculate our Information Entropy (top keywords)
                Accumulate(calculatedIE, IECounter++, IDFk, (string)enumerator.Key);
            }
        }
        /// <summary>
        /// Accumulates the top keywords (those with most frequency) to establish as our ring's IE.
        /// Uses the Binary Insertion Algorithm
        /// </summary>
        /// <param name="IEList"></param>
        /// <param name="frequency"></param>
        /// <param name="keyword"></param>
        private void Accumulate(InformationEntropy [] IEList, int position, float weight, string keyword)
        {
            InformationEntropy IE = new InformationEntropy(keyword, weight);
            bool done = false;
            int index = position;
            int endIndex = position;
            int beginIndex = 0;
            int comparison = -1;
            bool evict = false;
            bool headInsert = false;

            if (position >= IEList.Length)
            {
                endIndex = IEList.Length;
                position = IEList.Length;
                evict = true;
            }

            while(!done)
            {
                //only if we are full calculate comparison index and do actual comparing
                if (evict)
                {
                    index = beginIndex + (endIndex - beginIndex)/2;
                    comparison = IE.CompareTo(IEList[index]);
                }
                else
                    headInsert = true;

                if (comparison == 0 || headInsert)
                {
                    if (!evict && IEList[index] != null)
                        LibUtil.ShiftArrayItems(IEList, index, position -index, 1);
                    IEList[index] = IE;

                    done = true;
                }
                else if (comparison < 0)
                {
                    //if less than the last element just ignore it
                    if (index == IEList.Length-1)
                        done = true;
                    else
                        beginIndex = index;
                }
                else if (comparison > 0)
                {
                    //if greater than the first element just ignore it
                    if (index == 0)
                        headInsert = true;
                    else
                        endIndex = index;
                }
            }
        }
        private static InformationEntropy[] extractInformationEntropy_internal(string text)
        {
            //REVISIT: reduce the # copies
            string[] tokens = text.Split(Constants.TOKENIZER_DELIMETERS);
            //add stopword check
            Hashtable keywords = new Hashtable();
            int frequency;
            object freqObj;
            string keyword;
            foreach (string newKeyword in tokens)
            {
                keyword = newKeyword.ToLower();
                if (keyword.Equals(""))
                    continue;

                freqObj = keywords[keyword];
                if (freqObj == null)
                    keywords[keyword] = 1;
                else
                {
                    frequency = ((int)freqObj);
                    keywords[keyword] = ++frequency;
                }
            }

            IDictionaryEnumerator enumerator = keywords.GetEnumerator();
            InformationEntropy[] ret_IE = new InformationEntropy[keywords.Count];
            int insertIndex = 0;
            while (enumerator.MoveNext())
            {
                ret_IE[insertIndex++] = new InformationEntropy((string)enumerator.Key, (float)((int)enumerator.Value));
            }

            return ret_IE;
        }