public override void PerformFunction(MySqlDataManipulator manipulator) { //Ensure that all KeywordPredictor models are loaded //If one is not, then a company requesting that model through its settings will cause an error if (!GlobalModelHelper.LoadOrTrainGlobalModels(ReflectionHelper.GetAllKeywordPredictors())) { throw new NullReferenceException("One or more global models failed to load. Server cannot start."); } DatabaseQueryProcessor processor = new DatabaseQueryProcessor(DatabaseQueryProcessorSettings.RetrieveCompanySettings(manipulator, CompanyId)); List <RepairJobEntry> validatedData = manipulator.GetDataEntriesWhere(CompanyId, "id > 0", validated: true); List <string> sentences; if (Flag.ToLower().Equals("complaint")) { //train model sentences = validatedData.Select(entry => entry.Complaint).ToList(); if (!processor.TrainClusteringModels(manipulator, CompanyId, sentences, false)) { Console.WriteLine("Failed to train problem prediction models for company " + CompanyId); return; } //register the complaint groups that the clusterer predicts with the repair job entry in the database foreach (RepairJobEntry entry in validatedData) { string groups = JsonDataObjectUtil <List <int> > .ConvertObject(processor.PredictGroupsInJobData(entry, CompanyId, manipulator)); entry.ComplaintGroups = groups; manipulator.UpdateDataEntryGroups(CompanyId, entry, complaint: true); } } Console.WriteLine("Trained clustering models for company " + CompanyId); }
public static void PerformDataValidation(MySqlDataManipulator manipulator, int companyId, DatabaseQueryProcessor processor, int numShuffleTests = 5, int numGroups = 3) { List <RepairJobEntry> validatedData = manipulator.GetDataEntriesWhere(companyId, "id>0", validated: true); List <RepairJobEntry> nonValidatedData = manipulator.GetDataEntriesWhere(companyId, "id>0", validated: false); List <NonValidatedMapping> mappings = nonValidatedData.Select(entry => new NonValidatedMapping() { Entry = entry, Vote = 0 }).ToList(); double currCompanyAccuracy = manipulator.GetCompanyAccuracy(companyId); for (int i = 0; i < numShuffleTests; i++) { mappings.Shuffle(); List <List <NonValidatedMapping> > nonValidatedTestingGroups = mappings.Split(numGroups); foreach (List <NonValidatedMapping> currentTestGroup in nonValidatedTestingGroups) { List <RepairJobEntry> testGroup = new List <RepairJobEntry>(validatedData); testGroup.AddRange(currentTestGroup.Select(mapping => mapping.Entry)); processor.TrainClusteringModels(manipulator, companyId, testGroup.Select(entry => entry.Complaint).ToList(), training: true); double accuracy = 100 - PerformAutomatedTestingWithData(manipulator, companyId, processor, testGroup); double vote = (accuracy - currCompanyAccuracy) / currCompanyAccuracy; foreach (NonValidatedMapping mapping in currentTestGroup) { mapping.Vote += vote; } } } bool changed = false; foreach (NonValidatedMapping mapping in mappings) { if (mapping.Vote > 0.01) { if (!manipulator.UpdateValidationStatus(companyId, mapping.Entry, wasValidated: false)) { Console.WriteLine("Failed to update validation status of Repair Job Entry: " + mapping.Entry.Serialize(TableNameStorage.CompanyNonValidatedRepairJobTable.Replace("(n)", companyId.ToString()))); continue; } changed = true; } } if (changed) { TrainClusteringModel(manipulator, processor, companyId, false); } }
public static void TrainClusteringModel(MySqlDataManipulator manipulator, DatabaseQueryProcessor processor, int companyId, bool training = false) { List <RepairJobEntry> validatedData = manipulator.GetDataEntriesWhere(companyId, "id > 0", validated: true); List <string> sentences; sentences = validatedData.Select(entry => entry.Complaint).ToList(); if (!processor.TrainClusteringModels(manipulator, companyId, sentences, training)) { Console.WriteLine("Failed to train problem prediction models for company " + companyId); return; } if (!training) { foreach (RepairJobEntry entry in validatedData) { string groups = JsonDataObjectUtil <List <int> > .ConvertObject(processor.PredictGroupsInJobData(entry, companyId, manipulator)); entry.ComplaintGroups = groups; manipulator.UpdateDataEntryGroups(companyId, entry, complaint: true); } } }