private void GenerateExamples1() { Exs1 = new List <KeywordExample>(); KeywordExample curr = new KeywordExample(); curr.AddKeyword("oil"); curr.AddKeyword("pan"); curr.AddKeyword("leak"); Exs1.Add(curr); curr = new KeywordExample(); curr.AddKeyword("oil"); curr.AddKeyword("pan"); Exs1.Add(curr); curr = new KeywordExample(); curr.AddKeyword("oil"); curr.AddKeyword("leak"); curr.AddKeyword("head"); curr.AddKeyword("gasket"); Exs1.Add(curr); curr = new KeywordExample(); curr.AddKeyword("fuel"); curr.AddKeyword("leak"); curr.AddKeyword("line"); Exs1.Add(curr); curr = new KeywordExample(); curr.AddKeyword("transmission"); curr.AddKeyword("fluid"); curr.AddKeyword("leak"); Exs1.Add(curr); }
private void GenerateExamples2() { KeywordExample curr = new KeywordExample(); curr.AddKeyword("icm"); curr.AddKeyword("malfunction"); Exs2.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("starter"); curr.AddKeyword("engaging"); Exs2.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("plugged"); curr.AddKeyword("dpf"); Exs2.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("fuel"); curr.AddKeyword("leak"); curr.AddKeyword("line"); Exs2.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("transmission"); curr.AddKeyword("seized"); Exs2.Add(new ClaimableKeywordExample(curr)); }
private void GenerateExamples1() { KeywordExample curr = new KeywordExample(); curr.AddKeyword("oil"); curr.AddKeyword("pan"); curr.AddKeyword("leak"); Exs1.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("oil"); curr.AddKeyword("pan"); Exs1.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("oil"); curr.AddKeyword("leak"); curr.AddKeyword("head"); curr.AddKeyword("gasket"); Exs1.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("fuel"); curr.AddKeyword("leak"); curr.AddKeyword("line"); Exs1.Add(new ClaimableKeywordExample(curr)); curr = new KeywordExample(); curr.AddKeyword("transmission"); curr.AddKeyword("fluid"); curr.AddKeyword("leak"); Exs1.Add(new ClaimableKeywordExample(curr)); }
public void InitTests() { Ex1 = new KeywordExample(); Ex2 = new KeywordExample(); Ex3 = new KeywordExample(); Ex4 = new KeywordExample(); Ex1.AddKeyword("a"); Ex1.AddKeyword("b"); Ex1.AddKeyword("c"); Ex2.AddKeyword("c"); Ex2.AddKeyword("A"); Ex2.AddKeyword("B"); Ex3.AddKeyword("B"); }
private void GenerateKeywordExamples() { Exs1 = new List <ClaimableKeywordExample>(); Exs2 = new List <ClaimableKeywordExample>(); GenerateExamples1(); GenerateExamples2(); Ex1 = new KeywordExample(); Ex1.AddKeyword("transmission"); Ex1.AddKeyword("fluid"); Ex1.AddKeyword("leak"); Ex2 = new KeywordExample(); Ex2.AddKeyword("head"); Ex2.AddKeyword("gasket"); Ex2.AddKeyword("blown"); }
private bool RestoreKeywordClusterer() { List <MechanicQuery> mechanicQueries = DataSource.LoadMechanicQueries(); List <KeywordExample> trainingExamples = new List <KeywordExample>(); foreach (MechanicQuery query in mechanicQueries) { List <List <string> > complaintTags = PartOfSpeechTagger.Tag( SentenceTokenizer.TokenizeSentence(query.Complaint.ToLower()) ); List <string> keywords = KeywordPredictor.PredictKeywords(complaintTags); KeywordExample example = new KeywordExample(); foreach (string s in keywords) { example.AddKeyword(s); } trainingExamples.Add(example); } KeywordClusterer.Train(trainingExamples); try { AnsEncoderStream streamOut = new AnsEncoderStream( new FileStream(DefaultModelFileLocations.KEYWORD_SIMILARITY_CLUSTERER_FILE, FileMode.Create, FileAccess.Write), 1048576, 4096 ); KeywordClusterer.Save(streamOut); streamOut.Close(); } catch (IOException) { return(false); } return(true); }
public List <string> ProcessQuery(MechanicQuery queryIn) { List <List <string> > complaintTokens = PartOfSpeechTagger.Tag( SentenceTokenizer.TokenizeSentence(queryIn.Complaint) ); List <string> keywords = KeywordPredictor.PredictKeywords(complaintTokens); KeywordExample ex = new KeywordExample(); foreach (string s in keywords) { ex.AddKeyword(s); } List <int> complaintGroups = KeywordClusterer.PredictTopNSimilarGroups(ex, NUMBER_COMPLAINT_GROUPS); List <object> queryDataPoint = new List <object> { queryIn.Make, queryIn.Model }; foreach (int x in complaintGroups) { queryDataPoint.Add(x); } List <object> predictedProblems = ProblemPredictor.PredictTopN(queryDataPoint, CalculateDistance, NUMBER_QUERIES_OUT); List <string> returnProblems = new List <string>(); foreach (object o in predictedProblems) { returnProblems.Add((string)o); } return(returnProblems); }
public bool TrainClusteringModels(MySqlDataManipulator manipulator, int companyId, List <string> examplesIn, bool training = false) { List <KeywordExample> trainingData = new List <KeywordExample>(); foreach (string sentence in examplesIn) { List <string> tokens = SentenceTokenizer.TokenizeSentence(sentence); List <List <string> > taggedTokens = KeywordTagger.Tag(tokens); List <string> keywords = KeywordPredictor.PredictKeywords(taggedTokens); KeywordExample example = new KeywordExample(); foreach (string keyword in keywords) { example.AddKeyword(keyword); } trainingData.Add(example); } KeywordClusterer.Train(trainingData); if (!training) { return(KeywordClusterer.Save(manipulator, companyId)); } else { return(true); } }
public void TestCalculateTopNGroupSimiliarity() { TestTrain(); KeywordExample test = new KeywordExample(); test.AddKeyword("leak"); var list = Clusterer.PredictTopNSimilarGroups(test, 1); Assert.AreEqual(list[0], 1); }
/// <summary> /// Attempts to return a list of the top 3 most similar complaint groups from the database /// </summary> /// <param name="entryIn">The query to predict the most similar complaint groups of</param> /// <param name="manipulator">The object to use to access the database</param> /// <param name="companyId">The id of the company the request is being made for. Determines which tables to use in the database</param> /// <returns>Json formatted string that contains the top 3 complaint groups that are most similar to the query made, and their database ids</returns> public string ProcessQueryForComplaintGroups(RepairJobEntry entryIn, MySqlDataManipulator manipulator, int companyId, int numGroupsRequested = 3) { List <string> tokens = SentenceTokenizer.TokenizeSentence(entryIn.Complaint); List <List <string> > taggedTokens = KeywordTagger.Tag(tokens); List <string> keywords = KeywordPredictor.PredictKeywords(taggedTokens); KeywordExample example = new KeywordExample(); foreach (string keyword in keywords) { example.AddKeyword(keyword); } KeywordClusterer.Load(manipulator, companyId); List <int> groups = KeywordClusterer.PredictTopNSimilarGroups(example, numGroupsRequested); List <KeywordGroupEntry> companyComplaintGroups = manipulator.GetCompanyComplaintGroups(companyId); if (companyComplaintGroups == null) { throw new NullReferenceException("Company " + companyId + " complaint groups were not available in database"); } List <KeywordGroupEntry> ret = new List <KeywordGroupEntry>(); bool uncategorizedAdded = false; foreach (int i in groups) { if (i == 0 && !uncategorizedAdded) { ret.Add(new KeywordGroupEntry("Uncategorized") { Id = 0 }); uncategorizedAdded = true; } else if (i != 0) { companyComplaintGroups[i - 1].Id = i; ret.Add(companyComplaintGroups[i - 1]); } } JsonListStringConstructor constructor = new JsonListStringConstructor(); ret.ForEach(obj => constructor.AddElement(ConvertKeywordGroupEntry(obj))); return(constructor.ToString()); JsonDictionaryStringConstructor ConvertKeywordGroupEntry(KeywordGroupEntry e) { JsonDictionaryStringConstructor r = new JsonDictionaryStringConstructor(); r.SetMapping("GroupDefinition", e.GroupDefinition); r.SetMapping("Id", e.Id); return(r); } }
public void TestLoad() { TestTrain(); TestSave(); KeywordSimilarityClusterer clusterer2 = new KeywordSimilarityClusterer(); var reader = new System.IO.StreamReader(TempFileLoc); clusterer2.Load(reader.BaseStream); reader.Close(); KeywordExample test = new KeywordExample(); test.AddKeyword("leak"); Assert.AreEqual(clusterer2.PredictGroupSimilarity(test)[0], Clusterer.PredictGroupSimilarity(test)[0]); }
public List <int> PredictGroupsInJobData(RepairJobEntry entry, int companyId, MySqlDataManipulator manipulator) { List <string> keywords = PredictKeywordsInJobData(entry, true); KeywordExample example = new KeywordExample(); foreach (string keyword in keywords) { example.AddKeyword(keyword); } KeywordClusterer.Load(manipulator, companyId); List <int> groups = KeywordClusterer.PredictTopNSimilarGroups(example, 5); return(groups); }
private bool RestoreProblemPredictor() { List <MechanicQuery> mechanicQueries = DataSource.LoadMechanicQueries(); List <List <object> > trainingExamples = new List <List <object> >(); List <object> targetExamples = new List <object>(); foreach (MechanicQuery query in mechanicQueries) { List <object> currExample = new List <object>(); currExample.Add(query.Make); currExample.Add(query.Model); List <List <string> > complaintTags = PartOfSpeechTagger.Tag( SentenceTokenizer.TokenizeSentence(query.Complaint.ToLower()) ); List <string> keywords = KeywordPredictor.PredictKeywords(complaintTags); KeywordExample example = new KeywordExample(); foreach (string s in keywords) { example.AddKeyword(s); } List <int> groupsOut = KeywordClusterer.PredictTopNSimilarGroups(example, NUMBER_COMPLAINT_GROUPS); foreach (int i in groupsOut) { currExample.Add(i); } trainingExamples.Add(currExample); targetExamples.Add(query.Problem.ToLower()); } ProblemPredictor.Train(trainingExamples, targetExamples); try { AnsEncoderStream saveStream = new AnsEncoderStream( new FileStream(DefaultModelFileLocations.KNN_QUERY_PROBLEM_PREDICTOR_FILE, FileMode.Create, FileAccess.Write), 1048576, 4096 ); ProblemPredictor.Save(saveStream); saveStream.Flush(); saveStream.Close(); } catch (IOException) { return(false); } return(true); }
public string ProcessQueryForSimilarQueriesArchive(RepairJobEntry entryIn, MySqlDataManipulator manipulator, int companyId, int problemGroupId, int numRequested, int offset = 0) { List <string> tokens = SentenceTokenizer.TokenizeSentence(entryIn.Problem); List <List <string> > taggedTokens = KeywordTagger.Tag(tokens); List <string> keywords = KeywordPredictor.PredictKeywords(taggedTokens); KeywordExample example = new KeywordExample(); foreach (string keyword in keywords) { example.AddKeyword(keyword); } KeywordClusterer.Load(manipulator, companyId); List <int> groups = KeywordClusterer.PredictTopNSimilarGroups(example, 3); entryIn.ComplaintGroups = "[" + string.Join(',', groups) + "]"; List <RepairJobEntry> potentials = manipulator.GetDataEntriesByProblemGroup(companyId, problemGroupId); List <EntrySimilarity> ret = ProblemPredictor.GetQueryResults(entryIn, potentials, numRequested, offset); JsonListStringConstructor retConstructor = new JsonListStringConstructor(); ret.ForEach(obj => retConstructor.AddElement(ConvertEntrySimilarity(obj))); return(retConstructor.ToString()); JsonDictionaryStringConstructor ConvertEntrySimilarity(EntrySimilarity e) { JsonDictionaryStringConstructor r = new JsonDictionaryStringConstructor(); r.SetMapping("Make", e.Entry.Make); r.SetMapping("Model", e.Entry.Model); r.SetMapping("Complaint", e.Entry.Complaint); r.SetMapping("Problem", e.Entry.Problem); if (e.Entry.Year == -1) { r.SetMapping("Year", "Unknown"); } else { r.SetMapping("Year", e.Entry.Year); } r.SetMapping("Id", e.Entry.Id); r.SetMapping("Difference", e.Difference); return(r); } }