예제 #1
0
        /// <summary>
        /// </summary>
        private static float getPrediction(IEnumerable<string> pTokens, TokenCollection pFirst, TokenCollection pSecond)
        {
            Probability prob = new Probability();

            foreach (string token in pTokens)
            {
                int firstCount = pFirst.get(token);
                int secondCount = pSecond.get(token);

                if (firstCount == 0 && secondCount == 0)
                {
                    continue;
                }

                float probability = CalcProbability(prob, firstCount, pFirst.Sum, secondCount, pSecond.Sum);

                Console.WriteLine(@"{0}: [{1}] ({2}-{3}), ({4}-{5})",
                    token,
                    probability,
                    firstCount,
                    pFirst.Sum,
                    secondCount,
                    pSecond.Sum);
            }

            return prob.Combine();
        }
예제 #2
0
 /// <summary>
 /// Calculates the probability that the pTokens belong to the pGood collection, when
 /// compared to the pBad collection.
 /// The return value will be from 0 to 1, where 1 is 100% belongs to the pGood and 0 is
 /// it does not belong.
 /// </summary>
 public static float Score(IEnumerable<string> pTokens, TokenCollection pGood, TokenCollection pBad)
 {
     float prediction = getPrediction(pTokens, pGood, pBad);
     if (float.IsNaN(prediction))
     {
         return 0.0f;
     }
     return Math.Max(0.0f, prediction - 0.5f) * 2;
 }
예제 #3
0
        /// <summary>
        /// Adds the counts from a collection.
        /// Only tokens that exist in the destination will be added.
        /// </summary>
        public static TokenCollection Add(TokenCollection pDest, TokenCollection pSrc)
        {
            pSrc = Exclude(pDest._data.Tokens.Keys, pSrc);

            TokenData copy = new TokenData(pDest._data);
            foreach (KeyValuePair<string, int> pair in pSrc._data.Tokens)
            {
                copy.Tokens[pair.Key] += pSrc._data.Tokens[pair.Key];
            }

            return new TokenCollection(copy);
        }
예제 #4
0
        protected static TokenCollection Create(string[] pTokens)
        {
            TokenCollection a = new TokenCollection();
            foreach (string token in pTokens)
            {
                a.Add(token);
            }

            Assert.AreEqual(pTokens.Length, a.Sum);
            Assert.AreEqual(pTokens.Distinct().Count(), a.Count);
            foreach (string token in pTokens)
            {
                Assert.AreNotEqual(0, a.get(token));
            }

            return a;
        }
예제 #5
0
        /// <summary>
        /// Subtracts the counts from a collection.
        /// Only tokens that exist in the destination will be subtracted.
        /// </summary>
        public static TokenCollection Subtract(TokenCollection pDest, TokenCollection pSrc)
        {
            if (pDest.Sum < pSrc.Sum)
            {
                throw new ArgumentException("Can not subtract a larger collection");
            }

            pSrc = Exclude(pDest._data.Tokens.Keys, pSrc);

            TokenData copy = new TokenData(pDest._data);

            foreach (string token in pSrc._data.Tokens.Keys)
            {
                copy.Tokens[token] -= pSrc._data.Tokens[token];
                if (copy.Tokens[token] <= 0)
                {
                    copy.Tokens.Remove(token);
                }
            }

            return(new TokenCollection(copy));
        }
예제 #6
0
 private static TokenCollection Exclude(IEnumerable<string> pExclude, TokenCollection pSrc)
 {
     // remove tokens from the source that aren't in the destination.
     TokenCollection tmp = new TokenCollection(pSrc._data);
     string[] excludeKeys = pSrc._data.Tokens.Keys.Except(pExclude).ToArray();
     foreach (string exclude in excludeKeys)
     {
         tmp.Remove(exclude);
     }
     return tmp;
 }
예제 #7
0
        /// <summary>
        /// Subtracts the counts from a collection.
        /// Only tokens that exist in the destination will be subtracted.
        /// </summary>
        public static TokenCollection Subtract(TokenCollection pDest, TokenCollection pSrc)
        {
            if (pDest.Sum < pSrc.Sum)
            {
                throw new ArgumentException("Can not subtract a larger collection");
            }

            pSrc = Exclude(pDest._data.Tokens.Keys, pSrc);

            TokenData copy = new TokenData(pDest._data);
            foreach (string token in pSrc._data.Tokens.Keys)
            {
                copy.Tokens[token] -= pSrc._data.Tokens[token];
                if (copy.Tokens[token] <= 0)
                {
                    copy.Tokens.Remove(token);
                }
            }

            return new TokenCollection(copy);
        }
예제 #8
0
 /// <summary>
 /// Converts a TokenCollection object into JSON
 /// </summary>
 public static string Serialize(TokenCollection pCollection)
 {
     JavaScriptSerializer serializer = new JavaScriptSerializer();
     return serializer.Serialize(pCollection._data.Tokens);
 }
예제 #9
0
        /// <summary>
        /// Merges all the tokens from both collections. Adding their counts together.
        /// </summary>
        public static TokenCollection Merge(TokenCollection pDest, TokenCollection pSrc)
        {
            TokenData copy = new TokenData(pDest._data);
            foreach (string token in pSrc._data.Tokens.Keys)
            {
                if (!copy.Tokens.ContainsKey(token))
                {
                    copy.Tokens.Add(token, 0);
                }
                copy.Tokens[token] += pSrc._data.Tokens[token];
            }

            return new TokenCollection(copy);
        }
예제 #10
0
        public void Test_Subtract_Rule()
        {
            TokenCollection a = new TokenCollection();
            a.Add("mouse");
            a.Add("mouse");
            a.Add("mouse");

            TokenCollection b = new TokenCollection();
            b.Add("house");
            b.Add("house");

            try
            {
                TokenCollection.Subtract(b, a);
                Assert.Fail();
            }
            catch (ArgumentException)
            {
            }
        }
예제 #11
0
        public void Test_To_JSON()
        {
            Dictionary<string, int> tokens = new Dictionary<string, int> {{"hello", 3}, {"chicken", 1}};

            TokenCollection col = new TokenCollection(tokens);
            string str = TokenCollection.Serialize(col);

            Assert.AreEqual(_JSON, str);
        }
예제 #12
0
        /// <summary>
        /// Calculates the probability that the pTokens belong to the pGood collection, when
        /// compared to the pBad collection.
        /// The return value will be from 0 to 1, where 1 is 100% belongs to the pGood and 0 is
        /// it does not belong.
        /// </summary>
        public static float Score(IEnumerable <string> pTokens, TokenCollection pGood, TokenCollection pBad)
        {
            float prediction = getPrediction(pTokens, pGood, pBad);

            if (float.IsNaN(prediction))
            {
                return(0.0f);
            }
            return(Math.Max(0.0f, prediction - 0.5f) * 2);
        }
예제 #13
0
        /// <summary>
        /// </summary>
        private static float getPrediction(IEnumerable <string> pTokens, TokenCollection pFirst, TokenCollection pSecond)
        {
            Probability prob = new Probability();

            foreach (string token in pTokens)
            {
                int firstCount  = pFirst.get(token);
                int secondCount = pSecond.get(token);

                if (firstCount == 0 && secondCount == 0)
                {
                    continue;
                }

                float probability = CalcProbability(prob, firstCount, pFirst.Sum, secondCount, pSecond.Sum);

                Console.WriteLine(@"{0}: [{1}] ({2}-{3}), ({4}-{5})",
                                  token,
                                  probability,
                                  firstCount,
                                  pFirst.Sum,
                                  secondCount,
                                  pSecond.Sum);
            }

            return(prob.Combine());
        }
예제 #14
0
        /// <summary>
        /// Converts a TokenCollection object into JSON
        /// </summary>
        public static string Serialize(TokenCollection pCollection)
        {
            JavaScriptSerializer serializer = new JavaScriptSerializer();

            return(serializer.Serialize(pCollection._data.Tokens));
        }