// Public Methods public override double ExecuteCommand() { FeatureVectorFile vectorFile_train = new FeatureVectorFile(path: training_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: true); FeatureVectorFile vectorFile_test = new FeatureVectorFile(path: test_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: true); int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; var trainingVectors = vectorFile_train.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_train = vectorFile_train.Headers[gold_i]; var testVectors = vectorFile_test.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_test = vectorFile_test.Headers[gold_i]; var classifier = new kNNClassifier(k_val, (SimilarityFunction)similarity_func, trainingVectors, classToClassId.Count, gold_i); var systemClasses_train = classifier.Classify(trainingVectors); var systemClasses_test = classifier.Classify(testVectors); var details_train = ProgramOutput.GetDistributionDetails(classifier, trainingVectors, classToClassId); var details_test = ProgramOutput.GetDistributionDetails(classifier, testVectors, classToClassId); ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.CreateNew, trainingVectors, classToClassId, goldClasses_train, systemClasses_train, details_train, "training data"); var testAccuracy = ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.Append, testVectors, classToClassId, goldClasses_test, systemClasses_test, details_test, "test data"); return(testAccuracy); }
// Methods public override double ExecuteCommand() { int instanceName_i = 0; int gold_i = 1; featureToFeatureId = new TextIdMapper(); classToClassId = new TextIdMapper(); var instanceNameToId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { instanceNameToId, classToClassId }; FeatureVectorFile vectorFile = new FeatureVectorFile(path: vector_file, noOfHeaderColumns: 2, featureDelimiter: ' ', isSortRequired: false); // Read the boundaries: int[] sentenceLengths = ReadBoundaryFile(boundary_file); // Read the classifier model: classifier = MaxEntPOSClassifier.LoadModel(model_file, classToClassId, featureToFeatureId); // Read the vectors: var testVectors = vectorFile.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Continuous); // Get the output ready for display. int[] goldClassIds = vectorFile.Headers[gold_i]; int[] instanceNameIds = vectorFile.Headers[instanceName_i]; string[] instanceNames = instanceNameToId.GetValues(instanceNameIds); // Generate sys_output: ConfusionMatrix confusionMatrix; File.WriteAllText(sys_output, GenerateSysOutput(instanceNames, testVectors, sentenceLengths, out confusionMatrix, gold_i)); return(confusionMatrix.CalculateAccuracy()); }
/// <summary> /// Trains the classifier provided by the <c>classifierFactory</c> on the <c>training_data</c>. /// Then, the classifier is used to evaluate the accuracy of both the <c>training_data</c> and <c>test_data</c>. /// A report on the classification details is printed to the <c>output_file</c>. /// </summary> /// <param name="output_file">A report on the classification details.</param> /// <param name="classifierFactory">Provides the necessary classifier.</param> internal static void ReportOnTrainingAndTesting( FeatureVectorFile vectorFile_train , FeatureVectorFile vectorFile_test , string output_file , Func <List <FeatureVector>, int, int, Classifier> classifierFactory , Func <Classifier, List <FeatureVector>, TextIdMapper, string[]> getDetailsFunc ) { int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; var trainingVectors = vectorFile_train.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_train = vectorFile_train.Headers[gold_i]; var testVectors = vectorFile_test.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_test = vectorFile_test.Headers[gold_i]; Classifier classifier = classifierFactory(trainingVectors, classToClassId.Count, gold_i); var systemClasses_train = classifier.Classify(trainingVectors); var systemClasses_test = classifier.Classify(testVectors); var details_train = ProgramOutput.GetDistributionDetails(classifier, trainingVectors, classToClassId); var details_test = ProgramOutput.GetDistributionDetails(classifier, testVectors, classToClassId); ProgramOutput.GenerateSysOutput(output_file, FileCreationMode.CreateNew, trainingVectors, classToClassId, goldClasses_train, systemClasses_train, details_train, "training data"); ProgramOutput.GenerateSysOutput(output_file, FileCreationMode.Append, testVectors, classToClassId, goldClasses_test, systemClasses_test, details_test, "test data"); }
/// <summary> /// Loads the classifier provided by the <c>classifierFactory</c> which is modelled using the specified <c>model_file</c>. /// Then, the classifier is used to evaluate the accuracy of the <c>vector_data</c>. /// A report on the classification details is printed to the <c>output_file</c>. /// </summary> /// <param name="model_file">A file containing a serialization of the classifier model.</param> /// <param name="sys_output">A report on the classification details.</param> /// <param name="classifierFactory">Provides the necessary classifier.</param> internal static double ReportOnModel( FeatureVectorFile vectorFile , string sys_output , Func <TextIdMapper, TextIdMapper, Classifier> classifierFactory , Func <Classifier, List <FeatureVector>, TextIdMapper, TextIdMapper, string[]> getDetailsFunc ) { int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; Classifier classifier = classifierFactory(classToClassId, featureToFeatureId); var vectors = vectorFile.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses = vectorFile.Headers[gold_i]; var systemClasses = classifier.Classify(vectors); string[] details = getDetailsFunc(classifier, vectors, classToClassId, featureToFeatureId); var accuracy = ProgramOutput.GenerateSysOutput(sys_output, FileCreationMode.CreateNew, vectors, classToClassId, goldClasses, systemClasses, details, heading: Path.GetFileName(vectorFile.Path)); return(accuracy); }
// Methods public override bool ExecuteCommand() { // Initialize the text-to-Id mappers: int gold_i = 0; featureToFeatureId = new TextIdMapper(); classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; // Workaround: Read everything from STDIN to a file. (Files are used as the text source throughout this application.) var svmLight_data = Console.In.ReadToEnd(); Console.Error.WriteLine("{0} characters of input received.", svmLight_data.Length); string tempFile = Path.GetTempFileName(); int[] goldClasses; List <FeatureVector> vectors; try { File.WriteAllText(tempFile, svmLight_data); FeatureVectorFile vectorFile = new FeatureVectorFile(path: tempFile, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: false); vectors = vectorFile.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); goldClasses = vectorFile.Headers[gold_i]; } finally { File.Delete(tempFile); } Debug.Assert(vectors.Count > 0); IdValuePair <double>[] chiSquare = new IdValuePair <double> [featureToFeatureId.Count]; //TODO: Make the implementation less binary dependent (i.e. the hardcoded 2 below). double[][,] contingencyTable_f = new double[featureToFeatureId.Count][, ]; for (int f_i = 0; f_i < featureToFeatureId.Count; f_i++) { // Create a contingency table for this vector. contingencyTable_f[f_i] = new double[classToClassId.Count, 2]; for (int v_i = 0; v_i < vectors.Count; v_i++) { FeatureVector v = vectors[v_i]; contingencyTable_f[f_i][v.Headers[gold_i], (int)v.Features[f_i]]++; } chiSquare[f_i] = new IdValuePair <double>(f_i, StatisticsHelper.CalculateChiSquare(contingencyTable_f[f_i])); } ReportChiSquareResults(contingencyTable_f, chiSquare); return(true); }
// Methods public override int ExecuteCommand() { FeatureVectorFile vectorFile = new FeatureVectorFile(path: vector_data, noOfHeaderColumns: 1, featureDelimiter: ':', isSortRequired: false); int gold_i = 0; TBLClassifier classifier = null; Program.TrainModel(vectorFile, model_file, classifierFactory: (vectors, classToClassId, featureToFeatureId) => { return(classifier = new TBLClassifier(vectors, classToClassId.Count, min_gain, gold_i)); } ); return(classifier.Transformations.Count); }
// Methods public override double ExecuteCommand() { int gold_i = 0; FeatureVectorFile vectorFile = new FeatureVectorFile(path: vector_file, noOfHeaderColumns: 1, featureDelimiter: ':', isSortRequired: false); var accuracy = Program.ReportOnModel(vectorFile, sys_output , classifierFactory: (classToClassId, featureToFeatureId) => { return(TBLClassifier.LoadModel(model_file, classToClassId, featureToFeatureId, N, gold_i)); } , getDetailsFunc: GetDetails ); return(accuracy); }
// Methods public override double ExecuteCommand() { FeatureVectorFile vectorFile_train = new FeatureVectorFile(path: vector_file, noOfHeaderColumns: 1, featureDelimiter: ':', isSortRequired: false); double accuracy = Program.ReportOnModel(vectorFile_train, sys_output , classifierFactory: (classToClassId, featureToFeatureId) => { return(MaxEntClassifier.LoadModel(model_file, classToClassId, featureToFeatureId)); } , getDetailsFunc: (classifier, vectors, classToClassId, featureToFeatureId) => { return(ProgramOutput.GetDistributionDetails(classifier, vectors, classToClassId)); } ); return(accuracy); }
// Methods public override bool ExecuteCommand() { FeatureVectorFile vectorFile_train = new FeatureVectorFile(path: training_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: false); FeatureVectorFile vectorFile_test = new FeatureVectorFile(path: test_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: false); Program.ReportOnTrainingAndTesting(vectorFile_train, vectorFile_test, sys_output , classifierFactory: (trainingVectors, gold_i, noOfClasses) => { return(new DecisionTreeClassifier(trainingVectors, gold_i, noOfClasses, max_depth, min_gain)); } , getDetailsFunc: (classifier, vectors, classToClassId) => { return(ProgramOutput.GetDistributionDetails(classifier, vectors, classToClassId)); } ); return(true); }
// Methods public override double ExecuteCommand() { FeatureVectorFile vectorFile = new FeatureVectorFile(path: vector_file, noOfHeaderColumns: 1, featureDelimiter: ':', isSortRequired: false); // Initialize the text-to-Id mappers: featureToFeatureId = new TextIdMapper(); int instanceName_i = 0; int gold_i = 1; classToClassId = new TextIdMapper(); var instanceNameToInstanceNameId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { instanceNameToInstanceNameId , classToClassId }; // Read the boundaries: int[] sentenceLengths = ReadBoundaryFile(boundary_file); // Read the classifier model: classifier = MaxEntPOSClassifier.LoadModel(model_file, classToClassId, featureToFeatureId); // Read the vectors: var testVectors = vectorFile.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Continuous); int[] instanceNameIds = vectorFile.Headers[instanceName_i]; int[] goldClasses = vectorFile.Headers[gold_i]; // TODO: Neaten this up a little. string[] instanceNames = new string[instanceNameIds.Length]; for (int i = 0; i < instanceNameIds.Length; i++) { int instanceNameId = instanceNameIds[i]; instanceNames[i] = headerToHeaderIds[instanceName_i][i]; } // Generate sys_output: var confusionMatrix = GenerateSysOutput(sys_output, instanceNames, testVectors, sentenceLengths, gold_i); return(confusionMatrix.CalculateAccuracy()); }
// Methods public override bool ExecuteCommand() { FeatureVectorFile vectorFile = new FeatureVectorFile(path: vector_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: false); // Load the training file. int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; var trainingVectors = vectorFile.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses = vectorFile.Headers[gold_i]; double[,] observation, expectation; CalculateObservationAndEmpiricalExpectation(trainingVectors, out observation, out expectation); OutputEmpiricalCount(observation, expectation); return(true); }
public override double ExecuteCommand() { FeatureVectorFile vectorFile = new FeatureVectorFile(path: vector_file, noOfHeaderColumns: 1, featureDelimiter: ':', isSortRequired: true); FeatureVectorFile modelFile = new FeatureVectorFile(path: model_file, noOfHeaderColumns: 1, featureDelimiter: ':', isSortRequired: true); int alphaColumn_i = 0; TextIdMapper[] headerToHeaderIds_model = new TextIdMapper[modelFile.NoOfHeaderColumns]; headerToHeaderIds_model[alphaColumn_i] = new TextIdMapper(); var accuracy = Program.ReportOnModel(vectorFile, sys_output , classifierFactory: (classToClassId, featureToFeatureId) => { return(SVMClassifier.LoadModel(modelFile, classToClassId, featureToFeatureId, alphaColumn_i, headerToHeaderIds_model)); } , getDetailsFunc: GetDetails ); return(accuracy); }
/// <summary> /// Trains a classifier on the specified <c>train_data</c>. /// Output the model to the specified <c>model_file</c>. /// </summary> /// <param name="model_file">A file containing a serialization of the classifier model.</param> /// <param name="classifierFactory">Provides the necessary classifier, which must implement ISaveModel.</param> internal static void TrainModel <T>( FeatureVectorFile vector_file , string model_file , Func <List <FeatureVector>, TextIdMapper, TextIdMapper, T> classifierFactory ) where T : Classifier, ISaveModel { TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; var vectors = vector_file.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); T classifier = classifierFactory(vectors, classToClassId, featureToFeatureId); //var systemClasses = classifier.Classify(vectors); classifier.SaveModel(model_file, classToClassId, featureToFeatureId); }
// Methods public override bool ExecuteCommand() { FeatureVectorFile vectorFile_train = new FeatureVectorFile(path: training_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: false); int gold_i = 0; TextIdMapper featureToFeatureId = new TextIdMapper(); TextIdMapper classToClassId = new TextIdMapper(); TextIdMapper[] headerToHeaderIds = new TextIdMapper[] { classToClassId }; var trainingVectors = vectorFile_train.LoadFromSVMLight(featureToFeatureId, headerToHeaderIds, FeatureType.Binary); var goldClasses_train = vectorFile_train.Headers[gold_i]; // model_file is optional. Func <int, FeatureVector, double> calculate_Prob_c_v; // If it is not given, p(v|c_i) = 1/|C|, where |C| is the number of class_labels. if (string.IsNullOrWhiteSpace(model_file)) { double kProbability = 1D / classToClassId.Count; calculate_Prob_c_v = (v, c_i) => { return(kProbability); }; } // If it is given, it is used to calculate p(y|xi). else { MaxEntClassifier classifier = MaxEntClassifier.LoadModel(model_file, classToClassId, featureToFeatureId); calculate_Prob_c_v = (c_i, v) => { double[] details; int sysClass = classifier.Classify(v, out details); return(details[c_i]); }; } double[,] expectation = CalculateModelExpectation(trainingVectors, calculate_Prob_c_v); OutputEmpiricalCount(expectation, trainingVectors.Count, requiresSort: true); return(true); }
// Methods public override bool ExecuteCommand() { FeatureVectorFile vectorFile_train = new FeatureVectorFile(path: training_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: false); FeatureVectorFile vectorFile_test = new FeatureVectorFile(path: test_data_file, noOfHeaderColumns: 1, featureDelimiter: ' ', isSortRequired: false); Program.ReportOnTrainingAndTesting(vectorFile_train, vectorFile_test, sys_output , classifierFactory: (trainingVectors, gold_i, noOfClasses) => { return(new NaiveBayesClassifier_Multinomial( class_prior_delta , cond_prob_delta , trainingVectors , noOfClasses , gold_i)); } , getDetailsFunc: (classifier, vectors, classToClassId) => { return(ProgramOutput.GetDistributionDetails(classifier, vectors, classToClassId)); } ); return(true); }