/// <summary> /// </summary> private static float getPrediction(IEnumerable<string> pTokens, TokenCollection pFirst, TokenCollection pSecond) { Probability prob = new Probability(); foreach (string token in pTokens) { int firstCount = pFirst.get(token); int secondCount = pSecond.get(token); if (firstCount == 0 && secondCount == 0) { continue; } float probability = CalcProbability(prob, firstCount, pFirst.Sum, secondCount, pSecond.Sum); Console.WriteLine(@"{0}: [{1}] ({2}-{3}), ({4}-{5})", token, probability, firstCount, pFirst.Sum, secondCount, pSecond.Sum); } return prob.Combine(); }
/// <summary> /// Calculates the probability that the pTokens belong to the pGood collection, when /// compared to the pBad collection. /// The return value will be from 0 to 1, where 1 is 100% belongs to the pGood and 0 is /// it does not belong. /// </summary> public static float Score(IEnumerable<string> pTokens, TokenCollection pGood, TokenCollection pBad) { float prediction = getPrediction(pTokens, pGood, pBad); if (float.IsNaN(prediction)) { return 0.0f; } return Math.Max(0.0f, prediction - 0.5f) * 2; }
/// <summary> /// Adds the counts from a collection. /// Only tokens that exist in the destination will be added. /// </summary> public static TokenCollection Add(TokenCollection pDest, TokenCollection pSrc) { pSrc = Exclude(pDest._data.Tokens.Keys, pSrc); TokenData copy = new TokenData(pDest._data); foreach (KeyValuePair<string, int> pair in pSrc._data.Tokens) { copy.Tokens[pair.Key] += pSrc._data.Tokens[pair.Key]; } return new TokenCollection(copy); }
protected static TokenCollection Create(string[] pTokens) { TokenCollection a = new TokenCollection(); foreach (string token in pTokens) { a.Add(token); } Assert.AreEqual(pTokens.Length, a.Sum); Assert.AreEqual(pTokens.Distinct().Count(), a.Count); foreach (string token in pTokens) { Assert.AreNotEqual(0, a.get(token)); } return a; }
/// <summary> /// Subtracts the counts from a collection. /// Only tokens that exist in the destination will be subtracted. /// </summary> public static TokenCollection Subtract(TokenCollection pDest, TokenCollection pSrc) { if (pDest.Sum < pSrc.Sum) { throw new ArgumentException("Can not subtract a larger collection"); } pSrc = Exclude(pDest._data.Tokens.Keys, pSrc); TokenData copy = new TokenData(pDest._data); foreach (string token in pSrc._data.Tokens.Keys) { copy.Tokens[token] -= pSrc._data.Tokens[token]; if (copy.Tokens[token] <= 0) { copy.Tokens.Remove(token); } } return(new TokenCollection(copy)); }
private static TokenCollection Exclude(IEnumerable<string> pExclude, TokenCollection pSrc) { // remove tokens from the source that aren't in the destination. TokenCollection tmp = new TokenCollection(pSrc._data); string[] excludeKeys = pSrc._data.Tokens.Keys.Except(pExclude).ToArray(); foreach (string exclude in excludeKeys) { tmp.Remove(exclude); } return tmp; }
/// <summary> /// Subtracts the counts from a collection. /// Only tokens that exist in the destination will be subtracted. /// </summary> public static TokenCollection Subtract(TokenCollection pDest, TokenCollection pSrc) { if (pDest.Sum < pSrc.Sum) { throw new ArgumentException("Can not subtract a larger collection"); } pSrc = Exclude(pDest._data.Tokens.Keys, pSrc); TokenData copy = new TokenData(pDest._data); foreach (string token in pSrc._data.Tokens.Keys) { copy.Tokens[token] -= pSrc._data.Tokens[token]; if (copy.Tokens[token] <= 0) { copy.Tokens.Remove(token); } } return new TokenCollection(copy); }
/// <summary> /// Converts a TokenCollection object into JSON /// </summary> public static string Serialize(TokenCollection pCollection) { JavaScriptSerializer serializer = new JavaScriptSerializer(); return serializer.Serialize(pCollection._data.Tokens); }
/// <summary> /// Merges all the tokens from both collections. Adding their counts together. /// </summary> public static TokenCollection Merge(TokenCollection pDest, TokenCollection pSrc) { TokenData copy = new TokenData(pDest._data); foreach (string token in pSrc._data.Tokens.Keys) { if (!copy.Tokens.ContainsKey(token)) { copy.Tokens.Add(token, 0); } copy.Tokens[token] += pSrc._data.Tokens[token]; } return new TokenCollection(copy); }
public void Test_Subtract_Rule() { TokenCollection a = new TokenCollection(); a.Add("mouse"); a.Add("mouse"); a.Add("mouse"); TokenCollection b = new TokenCollection(); b.Add("house"); b.Add("house"); try { TokenCollection.Subtract(b, a); Assert.Fail(); } catch (ArgumentException) { } }
public void Test_To_JSON() { Dictionary<string, int> tokens = new Dictionary<string, int> {{"hello", 3}, {"chicken", 1}}; TokenCollection col = new TokenCollection(tokens); string str = TokenCollection.Serialize(col); Assert.AreEqual(_JSON, str); }
/// <summary> /// Calculates the probability that the pTokens belong to the pGood collection, when /// compared to the pBad collection. /// The return value will be from 0 to 1, where 1 is 100% belongs to the pGood and 0 is /// it does not belong. /// </summary> public static float Score(IEnumerable <string> pTokens, TokenCollection pGood, TokenCollection pBad) { float prediction = getPrediction(pTokens, pGood, pBad); if (float.IsNaN(prediction)) { return(0.0f); } return(Math.Max(0.0f, prediction - 0.5f) * 2); }
/// <summary> /// </summary> private static float getPrediction(IEnumerable <string> pTokens, TokenCollection pFirst, TokenCollection pSecond) { Probability prob = new Probability(); foreach (string token in pTokens) { int firstCount = pFirst.get(token); int secondCount = pSecond.get(token); if (firstCount == 0 && secondCount == 0) { continue; } float probability = CalcProbability(prob, firstCount, pFirst.Sum, secondCount, pSecond.Sum); Console.WriteLine(@"{0}: [{1}] ({2}-{3}), ({4}-{5})", token, probability, firstCount, pFirst.Sum, secondCount, pSecond.Sum); } return(prob.Combine()); }
/// <summary> /// Converts a TokenCollection object into JSON /// </summary> public static string Serialize(TokenCollection pCollection) { JavaScriptSerializer serializer = new JavaScriptSerializer(); return(serializer.Serialize(pCollection._data.Tokens)); }