public Tuple <float, Tuple <DocLabel, DocsGroup> > Classify(string messageText)
        {
            var tokenized = _tokenizer.GetTokens(messageText);

            var tuples = _groups
                         .Select(x => new Tuple <float, Tuple <DocLabel, DocsGroup> >(Score(tokenized, x.Item2), x));

            //get element with max
            return(tuples.OrderByDescending(x => x.Item1).FirstOrDefault());
        }
예제 #2
0
        /// <summary>
        ///     Evaluates the specified infix expression
        /// </summary>
        /// <param name="expression">The infix expression to evaluate</param>
        /// <returns>The result of the expression</returns>
        /// <exception cref="EvaluationException">If the expression is invalid</exception>
        public int Evaluate(string expression)
        {
            var tokens        = _tokenizer.GetTokens(expression);
            var postfixTokens = _converter.Convert(tokens);
            var operands      = new Stack <Token>();

            foreach (var token in postfixTokens)
            {
                if (token is NumericToken)
                {
                    operands.Push(token);
                }
                else if (token is OperatorToken)
                {
                    operands.Push(Evaluate((OperatorToken)token, operands));
                }
            }

            if (!operands.Any())
            {
                throw new EvaluationException("No expression to evaluate");
            }

            if (operands.Count > 1)
            {
                throw new EvaluationException($"Multiple operands in a row near {operands.First().Index}");
            }

            return((NumericToken)operands.Pop());
        }
예제 #3
0
        /// <summary>
        /// Evaluates the specified math expression.
        /// </summary>
        /// <param name="expression">The expression to evaluate.</param>
        /// <param name="tokenizer">The tokenizer for tokenize the expression.</param>
        /// <returns>The result of the evaluation.</returns>
        public static double Evaluate(string expression, ITokenizer tokenizer)
        {
            IMathContext context = tokenizer.Context;

            Token[] tokens = tokenizer.GetTokens(expression);
            Token[] rpn    = InfixToRPN(tokens, context);
            return(Evaluate(rpn, context));
        }
예제 #4
0
 public Bitmap GenerateTagsCloud(string text, TagsCloudSettings settings) =>
 tokenizer
 .GetTokens(text)
 .Where(token => token.WordType != WordType.None)
 .Where(token => token.Word.Length > 3)
 .Select(token => token.Word)
 .Where(word => !settings.StopWords.Contains(word))
 .SortByFrequency()
 .Visualize(settings, visualizer);
예제 #5
0
        private void InitializeTokenizer()
        {
            var(createdTokenizer, capturedErrors) = CreateTokenizerWithCapturedErrors();
            tokenizer     = createdTokenizer;
            result.Tokens = tokenizer.GetTokens().Tokens;
            result.TokenizerErrors.AddRange(capturedErrors);

            result.CommentTokens   = ParseCommentTokens();
            result.NonKeywordWords = NonKeywordWordsFromTokens(result.Tokens);
        }
예제 #6
0
        private SparseVector <double> ProcessDocument(string document)
        {
            Set <string>          docWords = new Set <string>();
            Dictionary <int, int> tfVec    = new Dictionary <int, int>();
            ArrayList <WordStem>  nGrams   = new ArrayList <WordStem>(mMaxNGramLen);

            foreach (string token in mTokenizer.GetTokens(document))
            {
                string word = token.Trim().ToLower();
                if (mStopWords == null || !mStopWords.Contains(word))
                {
                    string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower();
                    if (nGrams.Count < mMaxNGramLen)
                    {
                        WordStem wordStem = new WordStem();
                        wordStem.mWord = word;
                        wordStem.mStem = stem;
                        nGrams.Add(wordStem);
                        if (nGrams.Count < mMaxNGramLen)
                        {
                            continue;
                        }
                    }
                    else
                    {
                        WordStem wordStem = nGrams[0];
                        wordStem.mWord = word;
                        wordStem.mStem = stem;
                        for (int i = 0; i < mMaxNGramLen - 1; i++)
                        {
                            nGrams[i] = nGrams[i + 1];
                        }
                        nGrams[mMaxNGramLen - 1] = wordStem;
                    }
                    ProcessDocumentNGrams(nGrams, 0, tfVec, docWords);
                }
            }
            int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0;

            for (int i = startIdx; i < nGrams.Count; i++)
            {
                ProcessDocumentNGrams(nGrams, i, tfVec, docWords);
            }
            SparseVector <double> docVec = new SparseVector <double>(tfVec.Count);

            foreach (KeyValuePair <int, int> tfItem in tfVec)
            {
                docVec.InnerIdx.Add(tfItem.Key);
                docVec.InnerDat.Add(tfItem.Value);
            }
            docVec.Sort();
            return(docVec);
        }
예제 #7
0
        public Dictionary <string, HashSet <string> > Analysis(string query)
        {
            HashSet <string> queryTokens = tokenizer.GetTokens(query);
            Dictionary <string, HashSet <string> > result = new Dictionary <string, HashSet <string> >();

            foreach (string token in queryTokens)
            {
                result[token] = new HashSet <string>();
                HashSet <string> sames = new SimilarBuilder(token).ProduceSames(number).sames;
                foreach (string sameToken in sames)
                {
                    result[token].UnionWith(index.LookUp(sameToken));
                }
            }
            return(result);
        }
예제 #8
0
 private void Process(string filePath)
 {
     string[] lines = File.ReadAllLines(filePath);
     foreach (string line in lines)
     {
         HashSet <string> newTokens = tokenizer.GetTokens(line);
         foreach (string newToken in newTokens)
         {
             if (!listOfWordAddresses.ContainsKey(newToken))
             {
                 listOfWordAddresses[newToken] = new HashSet <string>();
             }
             listOfWordAddresses[newToken].Add(filePath);
         }
     }
 }
예제 #9
0
        private List <ConceptSyntaxNode> ExtractConcepts(MultiDictionary <string, IConceptParser> conceptParsers)
        {
            var stopwatch = Stopwatch.StartNew();

            var tokenizerResult = _tokenizer.GetTokens();

            if (tokenizerResult.SyntaxError != null)
            {
                ExceptionsUtility.Rethrow(tokenizerResult.SyntaxError);
            }
            var tokenReader = new TokenReader(tokenizerResult.Tokens, 0);

            var newConcepts = new List <ConceptSyntaxNode>();
            var context     = new Stack <ConceptSyntaxNode>();
            var warnings    = new List <string>();

            tokenReader.SkipEndOfFile();
            while (!tokenReader.EndOfInput)
            {
                var parsed = ParseNextConcept(tokenReader, context, conceptParsers);
                newConcepts.Add(parsed.ConceptInfo);

                if (parsed.Warnings != null)
                {
                    warnings.AddRange(parsed.Warnings);
                }

                UpdateContextForNextConcept(tokenReader, context, parsed.ConceptInfo);
                OnKeyword?.Invoke(tokenReader, null);

                if (context.Count == 0)
                {
                    tokenReader.SkipEndOfFile();
                }
            }

            _performanceLogger.Write(stopwatch, "ExtractConcepts (" + newConcepts.Count + " concepts).");

            if (context.Count > 0)
            {
                var(dslScript, position) = tokenReader.GetPositionInScript();
                throw new DslSyntaxException($"Expected \"}}\" to close concept \"{context.Peek()}\".",
                                             "RH0002", dslScript, position, 0, ReportPreviousConcept(context.Peek()));
            }

            foreach (string warning in warnings)
            {
                if (_syntax.Value.ExcessDotInKey == ExcessDotInKey.Ignore)
                {
                    _logger.Trace(warning);
                }
                else
                {
                    _logger.Warning(warning);
                }
            }
            if (_syntax.Value.ExcessDotInKey == ExcessDotInKey.Error && warnings.Any())
            {
                throw new DslSyntaxException(warnings.First());
            }

            return(newConcepts);
        }
예제 #10
0
 private IEnumerable <Token> GetTokens(string text)
 {
     return(_namesTokenizer.GetTokens(text));
 }