コード例 #1
0
        public bool TrainClusteringModels(MySqlDataManipulator manipulator, int companyId, List <string> examplesIn, bool training = false)
        {
            List <KeywordExample> trainingData = new List <KeywordExample>();

            foreach (string sentence in examplesIn)
            {
                List <string>         tokens       = SentenceTokenizer.TokenizeSentence(sentence);
                List <List <string> > taggedTokens = KeywordTagger.Tag(tokens);
                List <string>         keywords     = KeywordPredictor.PredictKeywords(taggedTokens);
                KeywordExample        example      = new KeywordExample();
                foreach (string keyword in keywords)
                {
                    example.AddKeyword(keyword);
                }
                trainingData.Add(example);
            }
            KeywordClusterer.Train(trainingData);
            if (!training)
            {
                return(KeywordClusterer.Save(manipulator, companyId));
            }
            else
            {
                return(true);
            }
        }
コード例 #2
0
        public List <string> ProcessQuery(MechanicQuery queryIn)
        {
            List <List <string> > complaintTokens = PartOfSpeechTagger.Tag(
                SentenceTokenizer.TokenizeSentence(queryIn.Complaint)
                );
            List <string>  keywords = KeywordPredictor.PredictKeywords(complaintTokens);
            KeywordExample ex       = new KeywordExample();

            foreach (string s in keywords)
            {
                ex.AddKeyword(s);
            }
            List <int>    complaintGroups = KeywordClusterer.PredictTopNSimilarGroups(ex, NUMBER_COMPLAINT_GROUPS);
            List <object> queryDataPoint  = new List <object> {
                queryIn.Make, queryIn.Model
            };

            foreach (int x in complaintGroups)
            {
                queryDataPoint.Add(x);
            }
            List <object> predictedProblems = ProblemPredictor.PredictTopN(queryDataPoint, CalculateDistance, NUMBER_QUERIES_OUT);
            List <string> returnProblems    = new List <string>();

            foreach (object o in predictedProblems)
            {
                returnProblems.Add((string)o);
            }
            return(returnProblems);
        }
コード例 #3
0
        private static float CalcSimilarity(RepairJobEntry query, RepairJobEntry other)
        {
            IKeywordPredictor        keyPred = NaiveBayesKeywordPredictor.GetGlobalModel();
            AveragedPerceptronTagger tagger  = AveragedPerceptronTagger.GetTagger();
            List <String>            tokened = SentenceTokenizer.TokenizeSentence(query.Complaint);
            List <List <String> >    tagged  = tagger.Tag(tokened);
            List <String>            InputComplaintKeywords = keyPred.PredictKeywords(tagged);

            tokened = SentenceTokenizer.TokenizeSentence(query.Problem);
            tagged  = tagger.Tag(tokened);
            List <String> InputProblemKeywords = keyPred.PredictKeywords(tagged);
            float         score = 0;

            tokened = SentenceTokenizer.TokenizeSentence(other.Complaint);
            tagged  = tagger.Tag(tokened);
            List <String> JobComplaintKeywords = keyPred.PredictKeywords(tagged);

            tokened = SentenceTokenizer.TokenizeSentence(other.Problem);
            tagged  = tagger.Tag(tokened);
            List <String> JobProblemKeywords = keyPred.PredictKeywords(tagged);

            foreach (String keyword in JobComplaintKeywords)
            {
                if (InputComplaintKeywords.Contains(keyword))
                {
                    score++;
                }
            }
            foreach (String keyword in JobProblemKeywords)
            {
                if (InputProblemKeywords.Contains(keyword))
                {
                    score++;
                }
            }
            return(score / (JobComplaintKeywords.Count + JobProblemKeywords.Count));
        }