public override void DoSelect(DocumentSetCase target, DocumentSetCaseCollectionSet caseSet, ILogBuilder logger) { var state = states.GetState(caseSet, GetExperimentSufix()); Int32 c = state.machine.Decide(target.data.featureVectors.GetValues().ToArray()); target.data[this].SetValues(c); }
public WebPostClassifierState <T> GetState(DocumentSetCaseCollectionSet caseSet, String prefix) { String key = prefix + caseSet.validationCase.name; var state = this[key]; state.LoadState(true); return(state); }
public override void DoTraining(DocumentSetCaseCollectionSet trainingSet, classifierTools tools, ILogBuilder logger) { var state = states.SetState(trainingSet, GetExperimentSufix()); _distance = new SquareEuclidean(); var kNearest = new KNearestNeighbors <Double[]>(k: setup.kNN_k, distance: _distance); kNearest.Learn(state.data.inputs, state.data.outputs); state.machine = kNearest; state.SaveState(); }
/// <summary> /// Sets <see cref="NumberOfCases"/> and other information, and returns training data /// </summary> /// <param name="trainingSet">The training set.</param> /// <returns></returns> public WebPostClassifierState <T> SetState(DocumentSetCaseCollectionSet trainingSet, String prefix, T _machine = null) { var state = new WebPostClassifierState <T>(trainingSet.validationCase.folder, prefix + trainingSet.validationCase.name, _machine); AITrainingData data = trainingSet.GetAITrainingData(); state.data = data; state.SaveState(); TryAdd(state.name, state); return(state); }
public override void DoTraining(DocumentSetCaseCollectionSet trainingSet, classifierTools tools, ILogBuilder logger) { var state = states.SetState(trainingSet, GetExperimentSufix()); if (isMultinominal) { NaiveBayesLearning <GeneralizedBetaDistribution> teacher = new NaiveBayesLearning <GeneralizedBetaDistribution>(); // Set options for the component distributions teacher.Options.InnerOption = new NormalOptions { Regularization = 1e-5 // to avoid zero variances }; // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism) _teacher = teacher; // Learn a machine // state.machine = teacher.Learn(state.data.inputs, state.data.outputs); } else { NaiveBayesLearning <NormalDistribution> teacher = new NaiveBayesLearning <NormalDistribution>(); // Set options for the component distributions teacher.Options.InnerOption = new NormalOptions { Regularization = 1e-5 // to avoid zero variances }; // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism) _teacher = teacher; // Learn a machine state.machine = teacher.Learn(state.data.inputs, state.data.outputs); } state.SaveState(); }
public abstract void DoSelect(DocumentSetCase target, DocumentSetCaseCollectionSet caseSet, ILogBuilder logger);
public abstract void DoTraining(DocumentSetCaseCollectionSet trainingSet, classifierTools tools, ILogBuilder logger);
public override WebSiteClassifierResult GetClassification(DocumentSetCase target, DocumentSetCaseCollectionSet caseSet, ILogBuilder logger) { if (target == null) { logger.log("-- target is null -- [GetClassification]"); return(null); } tfidfFVExtractorKnowledge caseKnowledge = target.caseKnowledge as tfidfFVExtractorKnowledge; foreach (DocumentSetCaseCollection caseColl in caseSet.Values) { tfidfFVExtractorKnowledge knowledge = caseColl.classKnowledge as tfidfFVExtractorKnowledge; webLemmaTermPairCollection lemmaOverlap = null; if (SVMSimilarity.isActive) { lemmaOverlap = knowledge.WLTableOfIndustryClass.GetMatchingTerms(caseKnowledge.WLTableOfIndustryClass); target.data.featureVectors[caseColl.setClass.classID][SVMSimilarity] += lemmaOverlap.GetCosineSimilarity(logger); } } //target.result.selected = target.result.GetClassWithHighestScore(); return(target.data.featureVectors); }
protected void runModel(experimentExecutionContext context, IWebFVExtractor model) { imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, model.name); Int32 crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit; aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection> casesByClasses = new aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection>(); DSCCReportSet kFoldReport = new DSCCReportSet(model); var valCol = context.validationCollections[model.name]; List <DocumentSetCaseCollectionSet> modelCaseResults = new List <DocumentSetCaseCollectionSet>(); crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit; while (crashRetries > 0) { try { experimentNotes modelNotes = new experimentNotes(valCol.folder, "Fold-level experiment settings description notes"); modelNotes.AppendLine("# Notes on Feature Vector Extractor: " + model.name); var nts = model.DescribeSelf(); nts.ForEach(x => modelNotes.AppendLine(x)); context.logger.log("Executing k-fold cases with model [" + model.name + "]"); valCol.DescribeSampleDistribution(modelNotes); context.mainReport.valColVsModelVsSampleHash.Add("[" + model.name + "]".toWidthExact(20) + " [sample distribution hash: " + valCol.SampleDistributionHash + "]"); modelNotes.SaveNote(); ParallelOptions ops = new ParallelOptions(); ops.MaxDegreeOfParallelism = context.tools.operation.ParallelThreads; Parallel.ForEach <kFoldValidationCase>(valCol.GetCases(), ops, valCase => { model.DoFVEAndTraining(valCase, context.tools, context.logger); // <--------------------------------------------------------------------------------------- BUILDING FVE DocumentSetCaseCollectionSet results = model.DoClassification(valCase, context.tools, context.logger); if (!results.Any()) { throw new aceScienceException("DoClassification for [" + model.name + "] returned no results!", null, model, "DoClassification " + model.name + " failed!", context); } foreach (var pair in results) { DocumentSetCaseCollection cls = pair.Value; casesByClasses.Add(cls.setClass, cls); } valCase.evaluationResults = results; if (context.tools.DoResultReporting) { context.logger.log("producing reports on k-Fold case [" + valCase.name + "]"); DSCCReports r = results.GetReports(); var sumMeans = r.GetAverageTable(context); //.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name); sumMeans.SetDescription("FVE report, aggregated for all categories - for fold [" + valCase.name + "]"); sumMeans.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name, true, context.tools.operation.doReportsInParalell); var fveAndCase = r.GetFullValidationTable(context); fveAndCase.SetDescription("Per-category aggregate statistics, for each classifier, within fold [" + valCase.name + "], used for macro-averaging"); fveAndCase.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_extrainfo_" + valCase.name, true, context.tools.operation.doReportsInParalell); var fullCaseReport = results.GetReportOnAllCases(); fullCaseReport.GetReportAndSave(valCase.folder, appManager.AppInfo, "FullReport_" + valCase.name, true, context.tools.operation.doReportsInParalell); kFoldReport.Add(valCase, r); } context.logger.log("k-Fold case [" + valCase.name + "] completed"); context.notes.log("- - Experiment sequence for [" + valCase.name + "] fold completed"); if (context.tools.operation.doSaveKnowledgeForClasses) { valCase.knowledgeLibrary.SaveKnowledgeInstancesForClasses(valCase, context.logger); } }); foreach (var fold in valCol.GetCases()) // Parallel.ForEach<kFoldValidationCase>(valCol.GetCases(), ops, valCase => { modelCaseResults.Add(fold.evaluationResults); } crashRetries = 0; } catch (Exception ex) { crashRetries--; context.errorNotes.LogException("FVE Model crashed -- retries left [" + crashRetries + "] --- ", ex, model.name); context.logger.log(":::: REPEATING the model [" + model.name + "] ::: CRASHED [" + ex.Message + "] ::: RETRIES [" + crashRetries + "]"); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 1000, 1); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(2400, 1000, 1); imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "RETRIES[" + crashRetries + "]"); } } imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "Reporting"); valCol.knowledgeLibrary.SaveCaseKnowledgeInstances(context.logger); // DocumentSetCaseCollection second = null; if (modelCaseResults.Any()) { featureExtractionMetrics modelMetrics = new featureExtractionMetrics(model.name, "All"); DataTableTypeExtended <featureExtractionMetrics> modelVsCategoryMetrics = new DataTableTypeExtended <featureExtractionMetrics>(model.name, "Model metrics per category"); // <-------------------------------------- CATEGORIES REPORT ---------------------------------------------- DataTable allTable = modelCaseResults.First()[0].GetReportTable(false, false).GetClonedShema <DataTable>();; //valCol.GetCases().First().evaluationResults[0].GetReportTable(false, false); rangeFinderForDataTable ranger = new rangeFinderForDataTable(allTable, "name"); ranger.columnsToSignIn.Add("Case"); foreach (KeyValuePair <IDocumentSetClass, aceConcurrentBag <DocumentSetCaseCollection> > pair in casesByClasses) { DocumentSetCaseCollection first = null; DataTable repTable = null; ranger.prepareForNextAggregationBlock(allTable, "name"); foreach (DocumentSetCaseCollection cn in pair.Value) { foreach (var cni in cn) { if (cni != null) { cn.BuildRow(cni, allTable, false); } } } ranger.AddRangeRows(pair.Key.name, allTable, true, imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); var categoryMetrics = new featureExtractionMetrics(model.name, pair.Key.name); categoryMetrics.SetValues(ranger); modelVsCategoryMetrics.AddRow(categoryMetrics); modelMetrics.AddValues(categoryMetrics); categoryMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_" + categoryMetrics.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "FV and Category sample metrics, serialized object")); //context.notes.log("- - Creating report for category [" + pair.Key.name + "] completed"); //repTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_category_" + pair.Key.name); } modelMetrics.DivideValues(casesByClasses.Count); modelMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_metrics.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Cross-categories macroaveraged metrics of the FVE model [" + model.name + "]")); modelVsCategoryMetrics.AddRow(modelMetrics); modelVsCategoryMetrics.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, "Name", modelMetrics.Name); modelVsCategoryMetrics.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_metrics", true, true); context.mainReport.AddModelMetrics(modelMetrics); context.notes.log("- Creating report for all categories [" + model.name + "] "); allTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_categories", true, context.tools.operation.doReportsInParalell); } kFoldReport.MakeReports(context, valCol.folder); context.mainReport.AddBestPerformer(kFoldReport.GetTopClassifierReport(), kFoldReport.meanClassifierReport, model); // <---------------- creation of complete report context.notes.log("- Experiment sequence with Feature Vector Extractor [" + model.name + "] completed"); context.notes.SaveNote(); // <------------- END OF THE MODEL ------------------------------------------------------------------------------------------------- }
public override void DoTraining(DocumentSetCaseCollectionSet trainingSet, classifierTools tools, ILogBuilder logger) { var state = states.SetState(trainingSet, GetExperimentSufix()); if (activationFunction == null) { activationFunction = new BipolarSigmoidFunction(setup.neuralnetwork.alpha); } var neurons = setup.neuralnetwork.HiddenLayersNeuronCounts.ToList(); ActivationNetwork machine = null; switch (neurons.Count) { case 0: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, state.data.NumberOfClasses); break; case 1: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, neurons[0], state.data.NumberOfClasses); break; case 2: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, neurons[0], neurons[1], state.data.NumberOfClasses); break; case 3: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, neurons[0], neurons[1], neurons[2], state.data.NumberOfClasses); break; case 4: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, neurons[0], neurons[1], neurons[2], neurons[3], state.data.NumberOfClasses); break; case 5: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, neurons[0], neurons[1], neurons[2], neurons[3], neurons[4], state.data.NumberOfClasses); break; case 6: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, neurons[0], neurons[1], neurons[2], neurons[3], neurons[4], neurons[5], state.data.NumberOfClasses); break; case 7: machine = new ActivationNetwork(new BipolarSigmoidFunction(setup.neuralnetwork.alpha), state.data.NumberOfInputs, neurons[0], neurons[1], neurons[2], neurons[3], neurons[4], neurons[5], neurons[6], state.data.NumberOfClasses); break; default: throw new aceGeneralException("At current implementation NN with [" + neurons.Count + "] hidden layers is not allowed.", null, this, "To high number of hidden layers"); break; } new NguyenWidrow(machine).Randomize(); state.machine = machine; // BackPropagationLearning teacher = new BackPropagationLearning(machine); LevenbergMarquardtLearning teacher = new LevenbergMarquardtLearning(machine); teacher.LearningRate = setup.neuralnetwork.learningRate; var outputs = Accord.Statistics.Tools.Expand(state.data.outputs, state.data.NumberOfClasses, -1, 1); //teacher.Momentum = momentum; Int32 itOfSameError = 0; Int32 itOfSameErrorLimit = setup.neuralnetwork.learningIterationsMax / 10; Double errorSignificantSpan = setup.neuralnetwork.errorLowerLimit * setup.neuralnetwork.errorLowerLimit; for (int i = 0; i < setup.neuralnetwork.learningIterationsMax; i++) { double error = teacher.RunEpoch(state.data.inputs, outputs); if (Math.Abs(error - state.errorRate) < errorSignificantSpan) { itOfSameError++; } if (itOfSameError > itOfSameErrorLimit) { logger.log("Stoping training in [" + i.ToString("D3") + "] because error rate had no significant change [" + errorSignificantSpan.ToString("F8") + "] in last [" + itOfSameError + "] iterations [" + error.ToString("F8") + "]"); break; } if (i % 10 == 0) { logger.log("Learning Neural Network [" + i.ToString("D3") + "] Error rate: " + error.ToString("F5")); } if (error < state.errorRate) { state.errorRate = error; } if (error < setup.neuralnetwork.errorLowerLimit) { break; } } if (teacherRef == null) { teacherRef = teacher; } state.SaveState(); }
/// <summary> /// Gets the classification. /// </summary> /// <param name="target">The target.</param> /// <param name="caseSet">The case set.</param> /// <param name="logger">The logger.</param> /// <returns></returns> public override WebSiteClassifierResult GetClassification(DocumentSetCase target, DocumentSetCaseCollectionSet caseSet, ILogBuilder logger) { if (target == null) { logger.log("-- target is null -- [GetClassification]"); return(null); } semanticFVExtractorKnowledge caseKnowledge = target.caseKnowledge as semanticFVExtractorKnowledge; List <webLemmaTerm> caseTerms = caseKnowledge.WLTableOfIndustryClass.GetList(); // StringBuilder sb = new StringBuilder(); foreach (DocumentSetCaseCollection caseColl in caseSet.Values) { Boolean doReportInDetail = caseSet.validationCase.context.tools.operation.doMakeClassificationReportForCases; if (caseSet.validationCase.context.tools.operation.DoRandomCaseGraphReportMode && doReportInDetail) { Int32 r = rnd.Next(100); if (r <= caseSet.validationCase.context.tools.operation.In100RandomCaseGraphReport) { doReportInDetail = true; } else { doReportInDetail = false; } } else { } semanticFVExtractorKnowledge classKnowledge = caseColl.classKnowledge as semanticFVExtractorKnowledge; webLemmaTermPairCollection lemmaOverlap = null; if (semanticSimilarity.isActive) { var expandedCloud = classKnowledge.semanticCloudFiltered.ExpandTermsToCloud(caseTerms, settings.caseTermExpansionSteps, true, settings.caseTermExpansionOptions); //expandedCloud.InverseWeights(true, true); //expandedCloud.normalizeNodeWeights(); // expandedCloud.normalizeLinkWeights(); lemmaOverlap = classKnowledge.semanticCloudFiltered.GetMatchingTerms(caseTerms, true); SSRMComputation debug = null; if (doReportInDetail) { debug = new SSRMComputation(classKnowledge.name, caseKnowledge.name); } Double Similarity = expandedCloud.GetSSRM(lemmaOverlap, logger, debug); target.data.featureVectors[caseColl.setClass.classID][semanticSimilarity] += Similarity; target.data.featureVectors[caseColl.setClass.classID].termMatched += lemmaOverlap.Count; if (doReportInDetail) { // var dt = lemmaOverlap.GetDataTable(); // dt.GetReportAndSave(caseColl.setClass.folder, appManager.AppInfo, "cosine_similarity_" + caseKnowledge.name + "_" + classKnowledge.name); freeGraphToDMGL converter = new freeGraphToDMGL(); String dgmlOutput = "expandedCloud_" + caseKnowledge.name + "_" + classKnowledge.name + ".dgml"; var dgml = converter.ConvertToDMGL(expandedCloud); if (debug != null) { var simNode = dgml.Nodes.AddNode("sim", "Sim(d,t) = " + debug.similarity.ToString("F5")); var simUp = dgml.Nodes.AddNode("up", debug.upper.ToString("F5")); var simLow = dgml.Nodes.AddNode("low", debug.lower.ToString("F5")); dgml.Links.AddLink(simNode, simUp, "Above fraction"); dgml.Links.AddLink(simNode, simLow, "Below fraction"); } dgml.Save(caseSet.validationCase.caseSampleFolder.pathFor(dgmlOutput, getWritableFileMode.autoRenameThis, "DGML export of expanded terms for [" + caseKnowledge.name + "] against class cloud [" + classKnowledge.name + "]")); logger.log("DGML Saved [" + dgmlOutput + "]"); } if (debug != null) { File.WriteAllText(caseSet.validationCase.caseSampleFolder.pathFor(debug.GetFilename(), imbSCI.Data.enums.getWritableFileMode.overwrite), debug.sb.ToString()); } } if (cosineSemanticSimilarity.isActive) { var caseLemmaDictionary = lemmaSemanticCloud.GetWebLemmaDictionary(caseKnowledge.semanticCloud.nodes); List <webLemmaTerm> expandedTerms = classKnowledge.semanticCloudFiltered.ExpandTerms(caseTerms, settings.caseTermExpansionSteps, settings.caseTermExpansionOptions); var cloudOverlap = classKnowledge.semanticCloudFiltered.GetMatchingTerms(expandedTerms); if (doReportInDetail) { var dt = cloudOverlap.GetDataTable(); dt.GetReportAndSave(caseSet.validationCase.caseSampleFolder, appManager.AppInfo, "cosine_similarity_" + caseKnowledge.name + "_" + classKnowledge.name, true, caseSet.validationCase.context.tools.operation.doReportsInParalell); } target.data.featureVectors[caseColl.setClass.classID][cosineSemanticSimilarity] += cloudOverlap.GetCosineSimilarity(logger); } if (SVMSimilarity.isActive) { lemmaOverlap = classKnowledge.WLTableOfIndustryClass.GetMatchingTerms(caseKnowledge.WLTableOfIndustryClass); target.data.featureVectors[caseColl.setClass.classID][SVMSimilarity] += lemmaOverlap.GetCosineSimilarity(logger); } if (SVMChunkSimilarity.isActive) { // lemmaOverlap = classKnowledge.WLChunkTableOfIndustryClass.GetMatchingTerms(caseKnowledge.WLChunkTableOfIndustryClass); Double similarity = 0; foreach (var primChunk in classKnowledge.semanticCloudFiltered.primaryChunks) { if (caseKnowledge.WLChunkTableOfIndustryClass.ContainsKey(primChunk)) { similarity += caseKnowledge.WLChunkTableOfIndustryClass[primChunk].documentFrequency.GetRatio(caseKnowledge.WLChunkTableOfIndustryClass.meta.maxDF); } } foreach (var primChunk in classKnowledge.semanticCloudFiltered.secondaryChunks) { if (caseKnowledge.WLChunkTableOfIndustryClass.ContainsKey(primChunk)) { similarity += (caseKnowledge.WLChunkTableOfIndustryClass[primChunk].documentFrequency.GetRatio(caseKnowledge.WLChunkTableOfIndustryClass.meta.maxDF)) * 0.25; } } target.data.featureVectors[caseColl.setClass.classID][SVMChunkSimilarity] += similarity; } } //target.result.selected = target.result.GetClassWithHighestScore(); // <---------------------------------- ovde treba da se desi poziv ka klasifikatoru // sb.AppendLine("kNN used - class selected is: " + c.ToString() + " [" + target.result.selected.name + "]"); //String path = caseKnowledge.folder.pathFor(caseKnowledge.name + "_log.txt"); //File.WriteAllText(path, sb.ToString()); return(target.data.featureVectors); }