/// <summary> /// Loads the classifier provided by the <c>classifierFactory</c> which is modelled using the specified <c>model_file</c>. /// Then, the classifier is used to evaluate the accuracy of the <c>vector_data</c>. /// A report on the classification details is printed to the <c>output_file</c>. /// </summary> /// <param name="model_file">A file containing a serialization of the classifier model.</param> /// <param name="sys_output">A report on the classification details.</param> /// <param name="classifierFactory">Provides the necessary classifier.</param> internal static double ReportOnModel( FeatureVectorFile vectorFile , string sys_output , Func <TextIdMapper, TextIdMapper, Classifier> classifierFactory , Func <Classifier, List <FeatureVector>, TextIdMapper, TextIdMapper, string[]> getDetailsFunc ) { int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; Classifier classifier = classifierFactory(classToClassId, featureToFeatureId); var vectors = vectorFile.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses = vectorFile.Headers[gold_i]; var systemClasses = classifier.Classify(vectors); string[] details = getDetailsFunc(classifier, vectors, classToClassId, featureToFeatureId); var accuracy = ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.CreateNew, vectors, classToClassId, goldClasses, systemClasses, details, heading: Path.GetFileName(vectorFile.Path)); return(accuracy); }
// Public Methods public override double ExecuteCommand() { FeatureVectorFile vectorFile_train = new FeatureVectorFile(path: training_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: true); FeatureVectorFile vectorFile_test = new FeatureVectorFile(path: test_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: true); int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; var trainingVectors = vectorFile_train.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_train = vectorFile_train.Headers[gold_i]; var testVectors = vectorFile_test.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_test = vectorFile_test.Headers[gold_i]; var classifier = new kNNClassifier(k_val, (SimilarityFunction)similarity_func, trainingVectors, classToClassId.Count, gold_i); var systemClasses_train = classifier.Classify(trainingVectors); var systemClasses_test = classifier.Classify(testVectors); var details_train = ProgramOutput.GetDistributionDetails(classifier, trainingVectors, classToClassId); var details_test = ProgramOutput.GetDistributionDetails(classifier, testVectors, classToClassId); ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.CreateNew, trainingVectors, classToClassId, goldClasses_train, systemClasses_train, details_train, "training data"); var testAccuracy = ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.Append, testVectors, classToClassId, goldClasses_test, systemClasses_test, details_test, "test data"); return(testAccuracy); }
/// <summary> /// Trains the classifier provided by the <c>classifierFactory</c> on the <c>training_data</c>. /// Then, the classifier is used to evaluate the accuracy of both the <c>training_data</c> and <c>test_data</c>. /// A report on the classification details is printed to the <c>output_file</c>. /// </summary> /// <param name="output_file">A report on the classification details.</param> /// <param name="classifierFactory">Provides the necessary classifier.</param> internal static void ReportOnTrainingAndTesting( FeatureVectorFile vectorFile_train , FeatureVectorFile vectorFile_test , string output_file , Func <List <FeatureVector>, int, int, Classifier> classifierFactory , Func <Classifier, List <FeatureVector>, TextIdMapper, string[]> getDetailsFunc ) { int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; var trainingVectors = vectorFile_train.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_train = vectorFile_train.Headers[gold_i]; var testVectors = vectorFile_test.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_test = vectorFile_test.Headers[gold_i]; Classifier classifier = classifierFactory(trainingVectors, classToClassId.Count, gold_i); var systemClasses_train = classifier.Classify(trainingVectors); var systemClasses_test = classifier.Classify(testVectors); var details_train = ProgramOutput.GetDistributionDetails(classifier, trainingVectors, classToClassId); var details_test = ProgramOutput.GetDistributionDetails(classifier, testVectors, classToClassId); ProgramOutput.GenerateSysOutput(output_file, FileCreationMode.CreateNew, trainingVectors, classToClassId, goldClasses_train, systemClasses_train, details_train, "training data"); ProgramOutput.GenerateSysOutput(output_file, FileCreationMode.Append, testVectors, classToClassId, goldClasses_test, systemClasses_test, details_test, "test data"); }