示例#1
0
        /// <summary>
        /// Loads the classifier provided by the <c>classifierFactory</c> which is modelled using the specified <c>model_file</c>.
        /// Then, the classifier is used to evaluate the accuracy of the <c>vector_data</c>.
        /// A report on the classification details is printed to the <c>output_file</c>.
        /// </summary>
        /// <param name="model_file">A file containing a serialization of the classifier model.</param>
        /// <param name="sys_output">A report on the classification details.</param>
        /// <param name="classifierFactory">Provides the necessary classifier.</param>
        internal static double ReportOnModel(
            FeatureVectorFile vectorFile
            , string sys_output
            , Func <TextIdMapper, TextIdMapper, Classifier> classifierFactory
            , Func <Classifier, List <FeatureVector>, TextIdMapper, TextIdMapper, string[]> getDetailsFunc
            )
        {
            int          gold_i             = 0;
            TextIdMapper featureToFeatureId = new TextIdMapper();
            TextIdMapper classToClassId     = new TextIdMapper();

            TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId };

            Classifier classifier = classifierFactory(classToClassId, featureToFeatureId);

            var vectors     = vectorFile.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary);
            var goldClasses = vectorFile.Headers[gold_i];

            var systemClasses = classifier.Classify(vectors);

            string[] details = getDetailsFunc(classifier, vectors, classToClassId, featureToFeatureId);

            var accuracy = ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.CreateNew, vectors, classToClassId, goldClasses, systemClasses, details, heading: Path.GetFileName(vectorFile.Path));

            return(accuracy);
        }
示例#2
0
        // Public Methods

        public override double ExecuteCommand()
        {
            FeatureVectorFile vectorFile_train = new FeatureVectorFile(path: training_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: true);
            FeatureVectorFile vectorFile_test  = new FeatureVectorFile(path: test_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: true);

            int          gold_i             = 0;
            TextIdMapper featureToFeatureId = new TextIdMapper();
            TextIdMapper classToClassId     = new TextIdMapper();

            TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId };

            var trainingVectors   = vectorFile_train.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary);
            var goldClasses_train = vectorFile_train.Headers[gold_i];

            var testVectors      = vectorFile_test.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary);
            var goldClasses_test = vectorFile_test.Headers[gold_i];

            var classifier = new kNNClassifier(k_val, (SimilarityFunction)similarity_func, trainingVectors, classToClassId.Count, gold_i);

            var systemClasses_train = classifier.Classify(trainingVectors);
            var systemClasses_test  = classifier.Classify(testVectors);

            var details_train = ProgramOutput.GetDistributionDetails(classifier, trainingVectors, classToClassId);
            var details_test  = ProgramOutput.GetDistributionDetails(classifier, testVectors, classToClassId);

            ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.CreateNew, trainingVectors, classToClassId, goldClasses_train, systemClasses_train, details_train, "training data");
            var testAccuracy = ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.Append, testVectors, classToClassId, goldClasses_test, systemClasses_test, details_test, "test data");

            return(testAccuracy);
        }
示例#3
0
        /// <summary>
        /// Trains the classifier provided by the <c>classifierFactory</c> on the <c>training_data</c>.
        /// Then, the classifier is used to evaluate the accuracy of both the <c>training_data</c> and <c>test_data</c>.
        /// A report on the classification details is printed to the <c>output_file</c>.
        /// </summary>
        /// <param name="output_file">A report on the classification details.</param>
        /// <param name="classifierFactory">Provides the necessary classifier.</param>
        internal static void ReportOnTrainingAndTesting(
            FeatureVectorFile vectorFile_train
            , FeatureVectorFile vectorFile_test
            , string output_file
            , Func <List <FeatureVector>, int, int, Classifier> classifierFactory
            , Func <Classifier, List <FeatureVector>, TextIdMapper, string[]> getDetailsFunc
            )
        {
            int          gold_i             = 0;
            TextIdMapper featureToFeatureId = new TextIdMapper();
            TextIdMapper classToClassId     = new TextIdMapper();

            TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId };

            var trainingVectors   = vectorFile_train.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary);
            var goldClasses_train = vectorFile_train.Headers[gold_i];

            var testVectors      = vectorFile_test.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary);
            var goldClasses_test = vectorFile_test.Headers[gold_i];

            Classifier classifier = classifierFactory(trainingVectors, classToClassId.Count, gold_i);

            var systemClasses_train = classifier.Classify(trainingVectors);
            var systemClasses_test  = classifier.Classify(testVectors);

            var details_train = ProgramOutput.GetDistributionDetails(classifier, trainingVectors, classToClassId);
            var details_test  = ProgramOutput.GetDistributionDetails(classifier, testVectors, classToClassId);

            ProgramOutput.GenerateSysOutput(output_file, FileCreationMode.CreateNew, trainingVectors, classToClassId, goldClasses_train, systemClasses_train, details_train, "training data");
            ProgramOutput.GenerateSysOutput(output_file, FileCreationMode.Append, testVectors, classToClassId, goldClasses_test, systemClasses_test, details_test, "test data");
        }