// Methods
        public List <INGram> Do
            (string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet)
        {
            Validator.ValidateStringNullOrWhiteSpace(text, nameof(text));
            Validator.ValidateObject(strategy, nameof(strategy));
            Validator.ValidateObject(ruleSet, nameof(ruleSet));

            return(TokenizeText(text, strategy, ruleSet));
        }
示例#2
0
        // Methods (public)
        public LabeledExample Create
            (ulong id, string label, string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet)
        {
            Validator.ValidateStringNullOrWhiteSpace(label, nameof(label));
            Validator.ValidateStringNullOrWhiteSpace(text, nameof(text));
            Validator.ValidateObject(strategy, nameof(strategy));
            Validator.ValidateObject(ruleSet, nameof(ruleSet));

            List <INGram>  nGrams         = _tokenizer.Do(text, strategy, ruleSet);
            LabeledExample labeledExample = new LabeledExample(id, label, text, nGrams);

            return(labeledExample);
        }
示例#3
0
        // Methods
        public TextClassifierResult PredictLabel
            (string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet, List <LabeledExample> labeledExamples)
        {
            Validator.ValidateStringNullOrWhiteSpace(text, nameof(text));
            Validator.ValidateObject(strategy, nameof(strategy));
            Validator.ValidateObject(ruleSet, nameof(ruleSet));
            Validator.ValidateList(labeledExamples, nameof(labeledExamples));

            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_AttemptingToPredictLabel);
            string truncatedText = _components.TextTruncatingFunction.Invoke(text, _settings.TruncateTextInLogMessagesAfter);

            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_FollowingTextHasBeenProvided.Invoke(truncatedText));
            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_FollowingTokenizationStrategyWillBeUsed.Invoke(strategy));
            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_FollowingNGramsTokenizerRuleSetWillBeUsed.Invoke(ruleSet));
            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_XLabeledExamplesHaveBeenProvided.Invoke(labeledExamples));

            List <INGram> nGrams = _components.NGramsTokenizer.Do(text, strategy, ruleSet);

            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_ProvidedTextHasBeenTokenizedIntoXNGrams.Invoke(nGrams));

            List <SimilarityIndex> indexes = GetSimilarityIndexes(nGrams, labeledExamples);

            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_TokenizedTextHasBeenComparedAgainstTheProvidedLabeledExamples);
            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_XSimilarityIndexObjectsHaveBeenComputed.Invoke(indexes));

            List <SimilarityIndexAverage> indexAverages = GetSimilarityIndexAverages(indexes);

            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_XSimilarityIndexAverageObjectsHaveBeenComputed(indexAverages));

            string label = PredictLabel(indexAverages);

            _components.LoggingAction.Invoke(MessageCollection.TextClassifier_PredictedLabelIs.Invoke(label));

            if (label == null)
            {
                _components.LoggingAction.Invoke(MessageCollection.TextClassifier_PredictionHasFailedTryIncreasingTheAmountOfProvidedLabeledExamples);
            }
            else
            {
                _components.LoggingAction.Invoke(MessageCollection.TextClassifier_PredictionHasBeenSuccessful);
            }

            TextClassifierResult result = new TextClassifierResult(label, indexes, indexAverages);

            return(result);
        }
        // Methods (private)
        private List <INGram> TokenizeText
            (string text, ITokenizationStrategy strategy, INGramTokenizerRuleSet ruleSet)
        {
            List <INGram> nGrams   = new List <INGram>();
            string        ruleName = null;

            try
            {
                if (ruleSet.DoForMonograms)
                {
                    ruleName = nameof(ruleSet.DoForMonograms);
                    nGrams.AddRange(DoFor <Monogram>(text, strategy));
                }

                if (ruleSet.DoForBigrams)
                {
                    ruleName = nameof(ruleSet.DoForBigrams);
                    nGrams.AddRange(DoFor <Bigram>(text, strategy));
                }

                if (ruleSet.DoForTrigrams)
                {
                    ruleName = nameof(ruleSet.DoForTrigrams);
                    nGrams.AddRange(DoFor <Trigram>(text, strategy));
                }

                if (ruleSet.DoForFourgrams)
                {
                    ruleName = nameof(ruleSet.DoForFourgrams);
                    nGrams.AddRange(DoFor <Fourgram>(text, strategy));
                }

                if (ruleSet.DoForFivegrams)
                {
                    ruleName = nameof(ruleSet.DoForFivegrams);
                    nGrams.AddRange(DoFor <Fivegram>(text, strategy));
                }

                return(nGrams);
            }
            catch
            {
                throw new Exception(MessageCollection.NGramTokenizer_TheRuleCantBeAppliedTo.Invoke(ruleName, text));
            }
        }
 public List <INGram> Do(string text, INGramTokenizerRuleSet ruleSet)
 => Do(text, new TokenizationStrategy(), ruleSet);
示例#6
0
 public TextClassifierResult PredictLabel
     (string text, INGramTokenizerRuleSet ruleSet, List <LabeledExample> labeledExamples)
 => PredictLabel(text, new TokenizationStrategy(), ruleSet, labeledExamples);