Exemplo n.º 1
0
        public List <string> ProcessQuery(MechanicQuery queryIn)
        {
            List <List <string> > complaintTokens = PartOfSpeechTagger.Tag(
                SentenceTokenizer.TokenizeSentence(queryIn.Complaint)
                );
            List <string>  keywords = KeywordPredictor.PredictKeywords(complaintTokens);
            KeywordExample ex       = new KeywordExample();

            foreach (string s in keywords)
            {
                ex.AddKeyword(s);
            }
            List <int>    complaintGroups = KeywordClusterer.PredictTopNSimilarGroups(ex, NUMBER_COMPLAINT_GROUPS);
            List <object> queryDataPoint  = new List <object> {
                queryIn.Make, queryIn.Model
            };

            foreach (int x in complaintGroups)
            {
                queryDataPoint.Add(x);
            }
            List <object> predictedProblems = ProblemPredictor.PredictTopN(queryDataPoint, CalculateDistance, NUMBER_QUERIES_OUT);
            List <string> returnProblems    = new List <string>();

            foreach (object o in predictedProblems)
            {
                returnProblems.Add((string)o);
            }
            return(returnProblems);
        }
        private void GenerateExamples1()
        {
            Exs1 = new List <KeywordExample>();
            KeywordExample curr = new KeywordExample();

            curr.AddKeyword("oil");
            curr.AddKeyword("pan");
            curr.AddKeyword("leak");
            Exs1.Add(curr);
            curr = new KeywordExample();
            curr.AddKeyword("oil");
            curr.AddKeyword("pan");
            Exs1.Add(curr);
            curr = new KeywordExample();
            curr.AddKeyword("oil");
            curr.AddKeyword("leak");
            curr.AddKeyword("head");
            curr.AddKeyword("gasket");
            Exs1.Add(curr);
            curr = new KeywordExample();
            curr.AddKeyword("fuel");
            curr.AddKeyword("leak");
            curr.AddKeyword("line");
            Exs1.Add(curr);
            curr = new KeywordExample();
            curr.AddKeyword("transmission");
            curr.AddKeyword("fluid");
            curr.AddKeyword("leak");
            Exs1.Add(curr);
        }
Exemplo n.º 3
0
        private bool RestoreKeywordClusterer()
        {
            List <MechanicQuery>  mechanicQueries  = DataSource.LoadMechanicQueries();
            List <KeywordExample> trainingExamples = new List <KeywordExample>();

            foreach (MechanicQuery query in mechanicQueries)
            {
                List <List <string> > complaintTags = PartOfSpeechTagger.Tag(
                    SentenceTokenizer.TokenizeSentence(query.Complaint.ToLower())
                    );
                List <string>  keywords = KeywordPredictor.PredictKeywords(complaintTags);
                KeywordExample example  = new KeywordExample();
                foreach (string s in keywords)
                {
                    example.AddKeyword(s);
                }
                trainingExamples.Add(example);
            }
            KeywordClusterer.Train(trainingExamples);
            try
            {
                AnsEncoderStream streamOut = new AnsEncoderStream(
                    new FileStream(DefaultModelFileLocations.KEYWORD_SIMILARITY_CLUSTERER_FILE, FileMode.Create, FileAccess.Write),
                    1048576,
                    4096
                    );
                KeywordClusterer.Save(streamOut);
                streamOut.Close();
            } catch (IOException)
            {
                return(false);
            }
            return(true);
        }
Exemplo n.º 4
0
        public bool TrainClusteringModels(MySqlDataManipulator manipulator, int companyId, List <string> examplesIn, bool training = false)
        {
            List <KeywordExample> trainingData = new List <KeywordExample>();

            foreach (string sentence in examplesIn)
            {
                List <string>         tokens       = SentenceTokenizer.TokenizeSentence(sentence);
                List <List <string> > taggedTokens = KeywordTagger.Tag(tokens);
                List <string>         keywords     = KeywordPredictor.PredictKeywords(taggedTokens);
                KeywordExample        example      = new KeywordExample();
                foreach (string keyword in keywords)
                {
                    example.AddKeyword(keyword);
                }
                trainingData.Add(example);
            }
            KeywordClusterer.Train(trainingData);
            if (!training)
            {
                return(KeywordClusterer.Save(manipulator, companyId));
            }
            else
            {
                return(true);
            }
        }
Exemplo n.º 5
0
        private void GenerateExamples2()
        {
            KeywordExample curr = new KeywordExample();

            curr.AddKeyword("icm");
            curr.AddKeyword("malfunction");
            Exs2.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("starter");
            curr.AddKeyword("engaging");
            Exs2.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("plugged");
            curr.AddKeyword("dpf");
            Exs2.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("fuel");
            curr.AddKeyword("leak");
            curr.AddKeyword("line");
            Exs2.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("transmission");
            curr.AddKeyword("seized");
            Exs2.Add(new ClaimableKeywordExample(curr));
        }
Exemplo n.º 6
0
        private void GenerateExamples1()
        {
            KeywordExample curr = new KeywordExample();

            curr.AddKeyword("oil");
            curr.AddKeyword("pan");
            curr.AddKeyword("leak");
            Exs1.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("oil");
            curr.AddKeyword("pan");
            Exs1.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("oil");
            curr.AddKeyword("leak");
            curr.AddKeyword("head");
            curr.AddKeyword("gasket");
            Exs1.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("fuel");
            curr.AddKeyword("leak");
            curr.AddKeyword("line");
            Exs1.Add(new ClaimableKeywordExample(curr));
            curr = new KeywordExample();
            curr.AddKeyword("transmission");
            curr.AddKeyword("fluid");
            curr.AddKeyword("leak");
            Exs1.Add(new ClaimableKeywordExample(curr));
        }
        public void TestCalculateTopNGroupSimiliarity()
        {
            TestTrain();
            KeywordExample test = new KeywordExample();

            test.AddKeyword("leak");
            var list = Clusterer.PredictTopNSimilarGroups(test, 1);

            Assert.AreEqual(list[0], 1);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Attempts to return a list of the top 3 most similar complaint groups from the database
        /// </summary>
        /// <param name="entryIn">The query to predict the most similar complaint groups of</param>
        /// <param name="manipulator">The object to use to access the database</param>
        /// <param name="companyId">The id of the company the request is being made for. Determines which tables to use in the database</param>
        /// <returns>Json formatted string that contains the top 3 complaint groups that are most similar to the query made, and their database ids</returns>
        public string ProcessQueryForComplaintGroups(RepairJobEntry entryIn, MySqlDataManipulator manipulator, int companyId, int numGroupsRequested = 3)
        {
            List <string>         tokens       = SentenceTokenizer.TokenizeSentence(entryIn.Complaint);
            List <List <string> > taggedTokens = KeywordTagger.Tag(tokens);
            List <string>         keywords     = KeywordPredictor.PredictKeywords(taggedTokens);
            KeywordExample        example      = new KeywordExample();

            foreach (string keyword in keywords)
            {
                example.AddKeyword(keyword);
            }
            KeywordClusterer.Load(manipulator, companyId);
            List <int> groups = KeywordClusterer.PredictTopNSimilarGroups(example, numGroupsRequested);
            List <KeywordGroupEntry> companyComplaintGroups = manipulator.GetCompanyComplaintGroups(companyId);

            if (companyComplaintGroups == null)
            {
                throw new NullReferenceException("Company " + companyId + " complaint groups were not available in database");
            }
            List <KeywordGroupEntry> ret = new List <KeywordGroupEntry>();
            bool uncategorizedAdded      = false;

            foreach (int i in groups)
            {
                if (i == 0 && !uncategorizedAdded)
                {
                    ret.Add(new KeywordGroupEntry("Uncategorized")
                    {
                        Id = 0
                    });
                    uncategorizedAdded = true;
                }
                else if (i != 0)
                {
                    companyComplaintGroups[i - 1].Id = i;
                    ret.Add(companyComplaintGroups[i - 1]);
                }
            }
            JsonListStringConstructor constructor = new JsonListStringConstructor();

            ret.ForEach(obj => constructor.AddElement(ConvertKeywordGroupEntry(obj)));
            return(constructor.ToString());

            JsonDictionaryStringConstructor ConvertKeywordGroupEntry(KeywordGroupEntry e)
            {
                JsonDictionaryStringConstructor r = new JsonDictionaryStringConstructor();

                r.SetMapping("GroupDefinition", e.GroupDefinition);
                r.SetMapping("Id", e.Id);
                return(r);
            }
        }
        public void TestLoad()
        {
            TestTrain();
            TestSave();
            KeywordSimilarityClusterer clusterer2 = new KeywordSimilarityClusterer();
            var reader = new System.IO.StreamReader(TempFileLoc);

            clusterer2.Load(reader.BaseStream);
            reader.Close();
            KeywordExample test = new KeywordExample();

            test.AddKeyword("leak");
            Assert.AreEqual(clusterer2.PredictGroupSimilarity(test)[0], Clusterer.PredictGroupSimilarity(test)[0]);
        }
Exemplo n.º 10
0
        public List <int> PredictGroupsInJobData(RepairJobEntry entry, int companyId, MySqlDataManipulator manipulator)
        {
            List <string>  keywords = PredictKeywordsInJobData(entry, true);
            KeywordExample example  = new KeywordExample();

            foreach (string keyword in keywords)
            {
                example.AddKeyword(keyword);
            }
            KeywordClusterer.Load(manipulator, companyId);
            List <int> groups = KeywordClusterer.PredictTopNSimilarGroups(example, 5);

            return(groups);
        }
 public void InitTests()
 {
     Ex1 = new KeywordExample();
     Ex2 = new KeywordExample();
     Ex3 = new KeywordExample();
     Ex4 = new KeywordExample();
     Ex1.AddKeyword("a");
     Ex1.AddKeyword("b");
     Ex1.AddKeyword("c");
     Ex2.AddKeyword("c");
     Ex2.AddKeyword("A");
     Ex2.AddKeyword("B");
     Ex3.AddKeyword("B");
 }
Exemplo n.º 12
0
 private void GenerateKeywordExamples()
 {
     Exs1 = new List <ClaimableKeywordExample>();
     Exs2 = new List <ClaimableKeywordExample>();
     GenerateExamples1();
     GenerateExamples2();
     Ex1 = new KeywordExample();
     Ex1.AddKeyword("transmission");
     Ex1.AddKeyword("fluid");
     Ex1.AddKeyword("leak");
     Ex2 = new KeywordExample();
     Ex2.AddKeyword("head");
     Ex2.AddKeyword("gasket");
     Ex2.AddKeyword("blown");
 }
Exemplo n.º 13
0
        private bool RestoreProblemPredictor()
        {
            List <MechanicQuery>  mechanicQueries  = DataSource.LoadMechanicQueries();
            List <List <object> > trainingExamples = new List <List <object> >();
            List <object>         targetExamples   = new List <object>();

            foreach (MechanicQuery query in mechanicQueries)
            {
                List <object> currExample = new List <object>();
                currExample.Add(query.Make);
                currExample.Add(query.Model);
                List <List <string> > complaintTags = PartOfSpeechTagger.Tag(
                    SentenceTokenizer.TokenizeSentence(query.Complaint.ToLower())
                    );
                List <string>  keywords = KeywordPredictor.PredictKeywords(complaintTags);
                KeywordExample example  = new KeywordExample();
                foreach (string s in keywords)
                {
                    example.AddKeyword(s);
                }
                List <int> groupsOut = KeywordClusterer.PredictTopNSimilarGroups(example, NUMBER_COMPLAINT_GROUPS);
                foreach (int i in groupsOut)
                {
                    currExample.Add(i);
                }
                trainingExamples.Add(currExample);
                targetExamples.Add(query.Problem.ToLower());
            }
            ProblemPredictor.Train(trainingExamples, targetExamples);
            try
            {
                AnsEncoderStream saveStream = new AnsEncoderStream(
                    new FileStream(DefaultModelFileLocations.KNN_QUERY_PROBLEM_PREDICTOR_FILE, FileMode.Create, FileAccess.Write),
                    1048576,
                    4096
                    );
                ProblemPredictor.Save(saveStream);
                saveStream.Flush();
                saveStream.Close();
            } catch (IOException)
            {
                return(false);
            }
            return(true);
        }
Exemplo n.º 14
0
        public string ProcessQueryForSimilarQueriesArchive(RepairJobEntry entryIn, MySqlDataManipulator manipulator, int companyId, int problemGroupId, int numRequested, int offset = 0)
        {
            List <string>         tokens       = SentenceTokenizer.TokenizeSentence(entryIn.Problem);
            List <List <string> > taggedTokens = KeywordTagger.Tag(tokens);
            List <string>         keywords     = KeywordPredictor.PredictKeywords(taggedTokens);
            KeywordExample        example      = new KeywordExample();

            foreach (string keyword in keywords)
            {
                example.AddKeyword(keyword);
            }
            KeywordClusterer.Load(manipulator, companyId);
            List <int> groups = KeywordClusterer.PredictTopNSimilarGroups(example, 3);

            entryIn.ComplaintGroups = "[" + string.Join(',', groups) + "]";
            List <RepairJobEntry>     potentials     = manipulator.GetDataEntriesByProblemGroup(companyId, problemGroupId);
            List <EntrySimilarity>    ret            = ProblemPredictor.GetQueryResults(entryIn, potentials, numRequested, offset);
            JsonListStringConstructor retConstructor = new JsonListStringConstructor();

            ret.ForEach(obj => retConstructor.AddElement(ConvertEntrySimilarity(obj)));
            return(retConstructor.ToString());


            JsonDictionaryStringConstructor ConvertEntrySimilarity(EntrySimilarity e)
            {
                JsonDictionaryStringConstructor r = new JsonDictionaryStringConstructor();

                r.SetMapping("Make", e.Entry.Make);
                r.SetMapping("Model", e.Entry.Model);
                r.SetMapping("Complaint", e.Entry.Complaint);
                r.SetMapping("Problem", e.Entry.Problem);
                if (e.Entry.Year == -1)
                {
                    r.SetMapping("Year", "Unknown");
                }
                else
                {
                    r.SetMapping("Year", e.Entry.Year);
                }
                r.SetMapping("Id", e.Entry.Id);
                r.SetMapping("Difference", e.Difference);
                return(r);
            }
        }
Exemplo n.º 15
0
 public ClaimableKeywordExample(KeywordExample exampleIn)
 {
     ContainedExample = exampleIn;
 }