public Tuple <float, Tuple <DocLabel, DocsGroup> > Classify(string messageText) { var tokenized = _tokenizer.GetTokens(messageText); var tuples = _groups .Select(x => new Tuple <float, Tuple <DocLabel, DocsGroup> >(Score(tokenized, x.Item2), x)); //get element with max return(tuples.OrderByDescending(x => x.Item1).FirstOrDefault()); }
/// <summary> /// Evaluates the specified infix expression /// </summary> /// <param name="expression">The infix expression to evaluate</param> /// <returns>The result of the expression</returns> /// <exception cref="EvaluationException">If the expression is invalid</exception> public int Evaluate(string expression) { var tokens = _tokenizer.GetTokens(expression); var postfixTokens = _converter.Convert(tokens); var operands = new Stack <Token>(); foreach (var token in postfixTokens) { if (token is NumericToken) { operands.Push(token); } else if (token is OperatorToken) { operands.Push(Evaluate((OperatorToken)token, operands)); } } if (!operands.Any()) { throw new EvaluationException("No expression to evaluate"); } if (operands.Count > 1) { throw new EvaluationException($"Multiple operands in a row near {operands.First().Index}"); } return((NumericToken)operands.Pop()); }
/// <summary> /// Evaluates the specified math expression. /// </summary> /// <param name="expression">The expression to evaluate.</param> /// <param name="tokenizer">The tokenizer for tokenize the expression.</param> /// <returns>The result of the evaluation.</returns> public static double Evaluate(string expression, ITokenizer tokenizer) { IMathContext context = tokenizer.Context; Token[] tokens = tokenizer.GetTokens(expression); Token[] rpn = InfixToRPN(tokens, context); return(Evaluate(rpn, context)); }
public Bitmap GenerateTagsCloud(string text, TagsCloudSettings settings) => tokenizer .GetTokens(text) .Where(token => token.WordType != WordType.None) .Where(token => token.Word.Length > 3) .Select(token => token.Word) .Where(word => !settings.StopWords.Contains(word)) .SortByFrequency() .Visualize(settings, visualizer);
private void InitializeTokenizer() { var(createdTokenizer, capturedErrors) = CreateTokenizerWithCapturedErrors(); tokenizer = createdTokenizer; result.Tokens = tokenizer.GetTokens().Tokens; result.TokenizerErrors.AddRange(capturedErrors); result.CommentTokens = ParseCommentTokens(); result.NonKeywordWords = NonKeywordWordsFromTokens(result.Tokens); }
private SparseVector <double> ProcessDocument(string document) { Set <string> docWords = new Set <string>(); Dictionary <int, int> tfVec = new Dictionary <int, int>(); ArrayList <WordStem> nGrams = new ArrayList <WordStem>(mMaxNGramLen); foreach (string token in mTokenizer.GetTokens(document)) { string word = token.Trim().ToLower(); if (mStopWords == null || !mStopWords.Contains(word)) { string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower(); if (nGrams.Count < mMaxNGramLen) { WordStem wordStem = new WordStem(); wordStem.mWord = word; wordStem.mStem = stem; nGrams.Add(wordStem); if (nGrams.Count < mMaxNGramLen) { continue; } } else { WordStem wordStem = nGrams[0]; wordStem.mWord = word; wordStem.mStem = stem; for (int i = 0; i < mMaxNGramLen - 1; i++) { nGrams[i] = nGrams[i + 1]; } nGrams[mMaxNGramLen - 1] = wordStem; } ProcessDocumentNGrams(nGrams, 0, tfVec, docWords); } } int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0; for (int i = startIdx; i < nGrams.Count; i++) { ProcessDocumentNGrams(nGrams, i, tfVec, docWords); } SparseVector <double> docVec = new SparseVector <double>(tfVec.Count); foreach (KeyValuePair <int, int> tfItem in tfVec) { docVec.InnerIdx.Add(tfItem.Key); docVec.InnerDat.Add(tfItem.Value); } docVec.Sort(); return(docVec); }
public Dictionary <string, HashSet <string> > Analysis(string query) { HashSet <string> queryTokens = tokenizer.GetTokens(query); Dictionary <string, HashSet <string> > result = new Dictionary <string, HashSet <string> >(); foreach (string token in queryTokens) { result[token] = new HashSet <string>(); HashSet <string> sames = new SimilarBuilder(token).ProduceSames(number).sames; foreach (string sameToken in sames) { result[token].UnionWith(index.LookUp(sameToken)); } } return(result); }
private void Process(string filePath) { string[] lines = File.ReadAllLines(filePath); foreach (string line in lines) { HashSet <string> newTokens = tokenizer.GetTokens(line); foreach (string newToken in newTokens) { if (!listOfWordAddresses.ContainsKey(newToken)) { listOfWordAddresses[newToken] = new HashSet <string>(); } listOfWordAddresses[newToken].Add(filePath); } } }
private List <ConceptSyntaxNode> ExtractConcepts(MultiDictionary <string, IConceptParser> conceptParsers) { var stopwatch = Stopwatch.StartNew(); var tokenizerResult = _tokenizer.GetTokens(); if (tokenizerResult.SyntaxError != null) { ExceptionsUtility.Rethrow(tokenizerResult.SyntaxError); } var tokenReader = new TokenReader(tokenizerResult.Tokens, 0); var newConcepts = new List <ConceptSyntaxNode>(); var context = new Stack <ConceptSyntaxNode>(); var warnings = new List <string>(); tokenReader.SkipEndOfFile(); while (!tokenReader.EndOfInput) { var parsed = ParseNextConcept(tokenReader, context, conceptParsers); newConcepts.Add(parsed.ConceptInfo); if (parsed.Warnings != null) { warnings.AddRange(parsed.Warnings); } UpdateContextForNextConcept(tokenReader, context, parsed.ConceptInfo); OnKeyword?.Invoke(tokenReader, null); if (context.Count == 0) { tokenReader.SkipEndOfFile(); } } _performanceLogger.Write(stopwatch, "ExtractConcepts (" + newConcepts.Count + " concepts)."); if (context.Count > 0) { var(dslScript, position) = tokenReader.GetPositionInScript(); throw new DslSyntaxException($"Expected \"}}\" to close concept \"{context.Peek()}\".", "RH0002", dslScript, position, 0, ReportPreviousConcept(context.Peek())); } foreach (string warning in warnings) { if (_syntax.Value.ExcessDotInKey == ExcessDotInKey.Ignore) { _logger.Trace(warning); } else { _logger.Warning(warning); } } if (_syntax.Value.ExcessDotInKey == ExcessDotInKey.Error && warnings.Any()) { throw new DslSyntaxException(warnings.First()); } return(newConcepts); }
private IEnumerable <Token> GetTokens(string text) { return(_namesTokenizer.GetTokens(text)); }