// Methods public List <INGram> Do (string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet) { Validator.ValidateStringNullOrWhiteSpace(text, nameof(text)); Validator.ValidateObject(strategy, nameof(strategy)); Validator.ValidateObject(ruleSet, nameof(ruleSet)); return(TokenizeText(text, strategy, ruleSet)); }
// Methods (public) public LabeledExample Create (ulong id, string label, string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet) { Validator.ValidateStringNullOrWhiteSpace(label, nameof(label)); Validator.ValidateStringNullOrWhiteSpace(text, nameof(text)); Validator.ValidateObject(strategy, nameof(strategy)); Validator.ValidateObject(ruleSet, nameof(ruleSet)); List <INGram> nGrams = _tokenizer.Do(text, strategy, ruleSet); LabeledExample labeledExample = new LabeledExample(id, label, text, nGrams); return(labeledExample); }
// Methods public TextClassifierResult PredictLabel (string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet, List <LabeledExample> labeledExamples) { Validator.ValidateStringNullOrWhiteSpace(text, nameof(text)); Validator.ValidateObject(strategy, nameof(strategy)); Validator.ValidateObject(ruleSet, nameof(ruleSet)); Validator.ValidateList(labeledExamples, nameof(labeledExamples)); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_AttemptingToPredictLabel); string truncatedText = _components.TextTruncatingFunction.Invoke(text, _settings.TruncateTextInLogMessagesAfter); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_FollowingTextHasBeenProvided.Invoke(truncatedText)); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_FollowingTokenizationStrategyWillBeUsed.Invoke(strategy)); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_FollowingNGramsTokenizerRuleSetWillBeUsed.Invoke(ruleSet)); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_XLabeledExamplesHaveBeenProvided.Invoke(labeledExamples)); List <INGram> nGrams = _components.NGramsTokenizer.Do(text, strategy, ruleSet); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_ProvidedTextHasBeenTokenizedIntoXNGrams.Invoke(nGrams)); List <SimilarityIndex> indexes = GetSimilarityIndexes(nGrams, labeledExamples); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_TokenizedTextHasBeenComparedAgainstTheProvidedLabeledExamples); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_XSimilarityIndexObjectsHaveBeenComputed.Invoke(indexes)); List <SimilarityIndexAverage> indexAverages = GetSimilarityIndexAverages(indexes); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_XSimilarityIndexAverageObjectsHaveBeenComputed(indexAverages)); string label = PredictLabel(indexAverages); _components.LoggingAction.Invoke(MessageCollection.TextClassifier_PredictedLabelIs.Invoke(label)); if (label == null) { _components.LoggingAction.Invoke(MessageCollection.TextClassifier_PredictionHasFailedTryIncreasingTheAmountOfProvidedLabeledExamples); } else { _components.LoggingAction.Invoke(MessageCollection.TextClassifier_PredictionHasBeenSuccessful); } TextClassifierResult result = new TextClassifierResult(label, indexes, indexAverages); return(result); }
// Methods (private) private List <INGram> TokenizeText (string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet) { List <INGram> nGrams = new List <INGram>(); string ruleName = null; try { if (ruleSet.DoForMonograms) { ruleName = nameof(ruleSet.DoForMonograms); nGrams.AddRange(DoFor <Monogram>(text, strategy)); } if (ruleSet.DoForBigrams) { ruleName = nameof(ruleSet.DoForBigrams); nGrams.AddRange(DoFor <Bigram>(text, strategy)); } if (ruleSet.DoForTrigrams) { ruleName = nameof(ruleSet.DoForTrigrams); nGrams.AddRange(DoFor <Trigram>(text, strategy)); } if (ruleSet.DoForFourgrams) { ruleName = nameof(ruleSet.DoForFourgrams); nGrams.AddRange(DoFor <Fourgram>(text, strategy)); } if (ruleSet.DoForFivegrams) { ruleName = nameof(ruleSet.DoForFivegrams); nGrams.AddRange(DoFor <Fivegram>(text, strategy)); } return(nGrams); } catch { throw new Exception(MessageCollection.NGramTokenizer_TheRuleCantBeAppliedTo.Invoke(ruleName, text)); } }
public List <INGram> Do(string text, INGramTokenizerRuleSet ruleSet) => Do(text, new TokenizationStrategy(), ruleSet);
public TextClassifierResult PredictLabel (string text, INGramTokenizerRuleSet ruleSet, List <LabeledExample> labeledExamples) => PredictLabel(text, new TokenizationStrategy(), ruleSet, labeledExamples);