/// <summary> /// if the token contains a defined abbreviation, returned token will /// contain the term wich was abbreviated. Otherwise same token is /// returned /// </summary> /// <param name="token"></param> /// <returns></returns> public Token Transform(Token token) { string replacement; return Abbreviations.TryGetValue(token.Value, out replacement) ? new Token(replacement) : token; }
/// <summary> /// If token has been seen returns the number of times it was seen in added documents /// otherwise returns null /// </summary> /// <param name="token"></param> /// <returns></returns> public int? GetDocumentFrequency(Token token) { int frequency = 0; return _dict.TryGetValue(token, out frequency) ? frequency : (int?)null; }
/// <summary> /// REturns a number between 0.0 (entirely different) to 1.0 (exactly the same) /// </summary> /// <param name="source"></param> /// <param name="target"></param> /// <returns></returns> public double Score(Token source, Token target) { return _distance.GetSimilarity(source.Value, target.Value); }