public bool TrainClusteringModels(MySqlDataManipulator manipulator, int companyId, List <string> examplesIn, bool training = false) { List <KeywordExample> trainingData = new List <KeywordExample>(); foreach (string sentence in examplesIn) { List <string> tokens = SentenceTokenizer.TokenizeSentence(sentence); List <List <string> > taggedTokens = KeywordTagger.Tag(tokens); List <string> keywords = KeywordPredictor.PredictKeywords(taggedTokens); KeywordExample example = new KeywordExample(); foreach (string keyword in keywords) { example.AddKeyword(keyword); } trainingData.Add(example); } KeywordClusterer.Train(trainingData); if (!training) { return(KeywordClusterer.Save(manipulator, companyId)); } else { return(true); } }
public List <string> ProcessQuery(MechanicQuery queryIn) { List <List <string> > complaintTokens = PartOfSpeechTagger.Tag( SentenceTokenizer.TokenizeSentence(queryIn.Complaint) ); List <string> keywords = KeywordPredictor.PredictKeywords(complaintTokens); KeywordExample ex = new KeywordExample(); foreach (string s in keywords) { ex.AddKeyword(s); } List <int> complaintGroups = KeywordClusterer.PredictTopNSimilarGroups(ex, NUMBER_COMPLAINT_GROUPS); List <object> queryDataPoint = new List <object> { queryIn.Make, queryIn.Model }; foreach (int x in complaintGroups) { queryDataPoint.Add(x); } List <object> predictedProblems = ProblemPredictor.PredictTopN(queryDataPoint, CalculateDistance, NUMBER_QUERIES_OUT); List <string> returnProblems = new List <string>(); foreach (object o in predictedProblems) { returnProblems.Add((string)o); } return(returnProblems); }
private static float CalcSimilarity(RepairJobEntry query, RepairJobEntry other) { IKeywordPredictor keyPred = NaiveBayesKeywordPredictor.GetGlobalModel(); AveragedPerceptronTagger tagger = AveragedPerceptronTagger.GetTagger(); List <String> tokened = SentenceTokenizer.TokenizeSentence(query.Complaint); List <List <String> > tagged = tagger.Tag(tokened); List <String> InputComplaintKeywords = keyPred.PredictKeywords(tagged); tokened = SentenceTokenizer.TokenizeSentence(query.Problem); tagged = tagger.Tag(tokened); List <String> InputProblemKeywords = keyPred.PredictKeywords(tagged); float score = 0; tokened = SentenceTokenizer.TokenizeSentence(other.Complaint); tagged = tagger.Tag(tokened); List <String> JobComplaintKeywords = keyPred.PredictKeywords(tagged); tokened = SentenceTokenizer.TokenizeSentence(other.Problem); tagged = tagger.Tag(tokened); List <String> JobProblemKeywords = keyPred.PredictKeywords(tagged); foreach (String keyword in JobComplaintKeywords) { if (InputComplaintKeywords.Contains(keyword)) { score++; } } foreach (String keyword in JobProblemKeywords) { if (InputProblemKeywords.Contains(keyword)) { score++; } } return(score / (JobComplaintKeywords.Count + JobProblemKeywords.Count)); }