Exemple #1
0
 private static void AddToList(List <TokenMatchInfo> result, TokenMatchInfo token, int ngramsLength)
 {
     if (token.TokenText.Length >= ngramsLength)
     {
         result.Add(token);
     }
 }
Exemple #2
0
        /// <summary>
        /// Creates all the multi-word tokens, up to tokens containing maximumWordCount words
        /// </summary>
        public static List <TokenMatchInfo> GetAllPossibleTokens(List <TokenMatchInfo> tokenList, int maximumWordCount, int ngramsLength)
        {
            var tokenListResult = new List <TokenMatchInfo>();

            //adding single word tokens
            foreach (var token in tokenList)
            {
                AddToList(tokenListResult, token, ngramsLength);
            }

            //adding multi word tokens
            //looping on first word of this current token
            for (int firstWordIndex = 0; firstWordIndex < tokenList.Count; firstWordIndex++)
            {
                //the string that will be built upon as we add more words
                var currentWord = tokenList[firstWordIndex].TokenText;
                var startIndex  = tokenList[firstWordIndex].StartIndex;

                //loop until we have either included as many words as maxWordCount, or we've reached the last word of the sentence
                for (int tokenSize = 1; tokenSize < maximumWordCount && tokenSize + firstWordIndex < tokenList.Count; tokenSize++)
                {
                    //how many words forward will we include this time? forward word

                    //adding last word to the string
                    currentWord += " " + tokenList[firstWordIndex + tokenSize].TokenText;

                    //create new token object
                    var newToken = new TokenMatchInfo
                    {
                        TokenText  = currentWord,
                        StartIndex = startIndex,
                        EndIndex   = tokenList[firstWordIndex + tokenSize].EndIndex
                    };

                    AddToList(tokenListResult, newToken, ngramsLength);
                }
            }
            return(tokenListResult);
        }
 public bool Equals(TokenMatchInfo obj)
 {
     return(TokenText == obj.TokenText &&
            StartIndex == obj.StartIndex &&
            EndIndex == obj.EndIndex);
 }
 public TokenMatchInfo(TokenMatchInfo token)
 {
     TokenText  = token.TokenText;
     StartIndex = token.StartIndex;
     EndIndex   = token.EndIndex;
 }
        public static List <MatchResult> FilterByThreshold(float[] similarityValues, List <string> dataset, float threshold, TokenMatchInfo token = default)
        {
            var matchResult = new List <MatchResult>();

            for (int i = 0; i < similarityValues.Length; i++)
            {
                if (similarityValues[i] >= threshold)
                {
                    matchResult.Add(new MatchResult
                    {
                        SimilarityScore   = similarityValues[i],
                        TokenMatchInfo    = token == default ? null : new TokenMatchInfo(token),
                        DatabaseMatchInfo = new DatabaseMatchInfo()
                        {
                            MatchText  = dataset[i],
                            MatchIndex = i
                        }
                    });