Beispiel #1
0
        /// <summary>
        /// Execute the svm_learn module
        /// </summary>
        /// <param name="svmLearnPath">Path to the svm_learn executable. MUST INCLUDE FULL PATH
        /// INCLUDING THE .EXE EXTENSION</param>
        /// <param name="trainingFile">The training dataset</param>
        /// <param name="modelFile">Model file to be created. Must have write permissions at this location
        /// </param>
        /// <param name="logFile">Log file to be created</param>
        /// <param name="silent">Whether no output should be displayed on the console. Set this to true if you want silent.</param>
        public void ExecuteLearner(String svmLearnPath, String trainingFile, String modelFile, string trainArgument = null, String logFile = null, bool silent = true)
        {
            this.svmLearnOutput.Clear();

            if (!string.IsNullOrEmpty(logFile))
            {
                Trace.Listeners.Clear();
                if (!silent)
                {
                    Trace.Listeners.Add(new TextWriterTraceListener(Console.Out));
                }

                if (File.Exists(logFile))
                {
                    Console.WriteLine("\n\nLog file " + logFile + " already exists. Deleting");
                    try
                    {
                        File.Delete(logFile);
                    }
                    catch (Exception)
                    {
                        Console.WriteLine("Failed to delete file");
                    }
                }

                Trace.Listeners.Add(new TextWriterTraceListener(logFile));
                Trace.AutoFlush = true;
            }

            if (!Utilities.ExistsInPath(svmLearnPath) || !Utilities.ExistsInPath(trainingFile))
            {
                Console.WriteLine("Invalid file path");
                Environment.Exit(0);
            }

            Process svmLearnProcess = CreateLearnerProcess(svmLearnPath, trainingFile, modelFile, trainArgument);

            if (!string.IsNullOrEmpty(logFile))
            {
                Trace.WriteLine("\n\nLearning model using svmlight...");
                Trace.WriteLine("svm_learn " + svmLearnProcess.StartInfo.Arguments);
            }

            svmLearnProcess.Start();
            svmLearnProcess.BeginErrorReadLine();
            svmLearnProcess.BeginOutputReadLine();

            svmLearnProcess.WaitForExit(1000 * 60 * 1);
            if (!svmLearnProcess.HasExited)
            {
                svmLearnProcess.Kill();
            }
            svmLearnProcess.Close();

            if (!string.IsNullOrEmpty(logFile))
            {
                Trace.WriteLine(this.svmLearnOutput.ToString());
            }
        }
Beispiel #2
0
        /// <summary>
        /// Executes svm_classifier
        /// </summary>
        /// <param name="svmClassifyPath">Enter full path, including .exe extension</param>
        /// <param name="testFile">The test dataset</param>
        /// <param name="modelFile">The model generated by svm_learn</param>
        /// <param name="outputFile">The output to be generated by svm_classify</param>
        /// <param name="logFile">Name of log file to be generated</param>
        /// <param name="silent">Whether to be silent: i.e. no output on the console. Log file is still
        /// generated</param>
        public void ExecuteClassifier(String svmClassifyPath, String testFile, String modelFile, String outputFile,
                                      String logFile, String incorrectFile, bool silent)
        {
            Trace.Listeners.Clear();

            if (!silent)
            {
                Trace.Listeners.Add(new TextWriterTraceListener(Console.Out));
            }

            if (File.Exists(logFile))
            {
                Console.WriteLine("\n\nLog file " + logFile + " already exists. Deleting");
                try
                {
                    File.Delete(logFile);
                }
                catch (Exception)
                {
                    //ignore..don't do anything if u cant delete the log
                }
            }

            Trace.Listeners.Add(new TextWriterTraceListener(logFile));
            Trace.AutoFlush = true;

            if (!Utilities.ExistsInPath(svmClassifyPath) || !Utilities.ExistsInPath(testFile))
            {
                Trace.WriteLine("Invalid file path");
                Environment.Exit(0);
            }

            Trace.WriteLine("Classifying test instances....");

            Process svmClassifyProcess = new Process();

            svmClassifyProcess.StartInfo.FileName = svmClassifyPath;

            svmClassifyProcess.StartInfo.Arguments = "\"" + testFile + "\"" + " \"" + modelFile + "\" " +
                                                     "\"" + outputFile + "\"";

            svmClassifyProcess.StartInfo.UseShellExecute        = false;
            svmClassifyProcess.StartInfo.RedirectStandardError  = true;
            svmClassifyProcess.StartInfo.RedirectStandardOutput = true;
            svmClassifyProcess.OutputDataReceived += new DataReceivedEventHandler(svmClassifyProcess_OutputDataReceived);
            svmClassifyProcess.ErrorDataReceived  += new DataReceivedEventHandler(svmClassifyProcess_ErrorDataReceived);

            svmClassifyProcess.Start();
            svmClassifyProcess.BeginErrorReadLine();
            svmClassifyProcess.BeginOutputReadLine();


            svmClassifyProcess.WaitForExit();
            svmClassifyProcess.Close();

            Trace.WriteLine(this.svmClassifyOutput.ToString());

            String logFileName      = Path.GetFileNameWithoutExtension(logFile);
            String incorrectLogFile = Path.Combine(Path.GetFullPath(Path.GetDirectoryName(logFile)),
                                                   logFileName + "-incorrect-log.txt");

            this.FindIncorrectlyClassifiedInstances(testFile, outputFile, incorrectFile, incorrectLogFile);
        }
Beispiel #3
0
        /// <summary>
        /// Finds all those instances which were incorrectly classified by svmlight
        /// </summary>
        /// <param name="testFile">The test datset</param>
        /// <param name="outputFile">The output file generated by svm_classify</param>
        /// <param name="incorrect">The file to be generated by this function: contains all the
        /// features vectors that were incorrectly classified by svm_classify</param>
        public void FindIncorrectlyClassifiedInstances(String testFile, String outputFile, String incorrect,
                                                       String logFile)
        {
            Trace.Listeners.Clear();
            Trace.Listeners.Add(new TextWriterTraceListener(Console.Out));
            Trace.Listeners.Add(new TextWriterTraceListener(logFile));
            int incorrectClassificationCount = 0;

            int falsePositive = 0, falseNegative = 0;

            if (!Utilities.ExistsInPath(testFile) || !Utilities.ExistsInPath(outputFile))
            {
                Trace.WriteLine("Invalid file path");
                Environment.Exit(0);
            }

            using (StreamReader testFileReader = new StreamReader(testFile))
            {
                using (StreamReader outputFileReader = new StreamReader(outputFile))
                {
                    try
                    {
                        Trace.WriteLine("Writing incorrectly classified instances to disk....");
                        StreamWriter incorrectClassificationWriter = new StreamWriter(incorrect, false);
                        String       testFileLine   = testFileReader.ReadLine();
                        String       outputFileLine = outputFileReader.ReadLine();

                        while (!testFileReader.EndOfStream && !outputFileReader.EndOfStream)
                        {
                            testFileLine   = testFileReader.ReadLine();
                            outputFileLine = outputFileReader.ReadLine();

                            int testLabel   = testFileLine[0].Equals('-') ? -1 : 1;
                            int outputLabel = Double.Parse(outputFileLine) > 0 ? 1 : -1;

                            if (testLabel != outputLabel)
                            {
                                incorrectClassificationCount++;

                                if (testLabel == 1 && outputLabel == -1)
                                {
                                    falseNegative++;
                                }

                                else if (testLabel == -1 && outputLabel == 1)
                                {
                                    falsePositive++;
                                }

                                incorrectClassificationWriter.WriteLine(testFileLine + "#"
                                                                        + outputFileLine);
                            }
                        }

                        if (falsePositive + falseNegative != incorrectClassificationCount)
                        {
                            throw new Exception("problem with counting incorrect classifications");
                        }

                        Trace.WriteLine(incorrectClassificationCount + " wrong instances instances...done");
                        Trace.WriteLine("False positives: " + falsePositive);
                        Trace.WriteLine("False negative: " + falseNegative);

                        incorrectClassificationWriter.WriteLine("#False positives: " + falsePositive);
                        incorrectClassificationWriter.WriteLine("#False negative: " + falseNegative);

                        incorrectClassificationWriter.Flush();
                        incorrectClassificationWriter.Close();

                        if (!testFileReader.EndOfStream || !outputFileReader.EndOfStream)
                        {
                            throw new Exception("Both files have different number of instances");
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                }
            }
        }
Beispiel #4
0
        /// <summary>
        /// Take the input file and construct N folds out of it
        /// </summary>
        /// <param name="exampleFile">The file that needs to be split. Will ensure that each fold
        /// contains roughly the same proportion of positive and negative labels as the original</param>
        /// <param name="numberOfFolds">Number of folds in the cross validation</param>
        /// <param name="outputFolder">Will be created to store the cross validation results</param>
        /// <param name="isNegativeExample">Predicate to determine whether a feature vector has a
        /// negative label</param>
        public void ConstructNFolds(String exampleFile, int numberOfFolds, String outputFolder,
                                    Predicate <String> isNegativeExample)
        {
            int numPositiveExamples, numNegativeExamples;

            Utilities.GetNumberOfTrainingExamples(exampleFile, out numPositiveExamples, out numNegativeExamples,
                                                  isNegativeExample);

            if (numberOfFolds > numPositiveExamples + numNegativeExamples)
            {
                throw new Exception("Number of folds must be less than the number of instances. Try using"
                                    + " the leave one out cross validation option in SVMLearn.");
            }

            String[] folders = new String[numberOfFolds];
            if (Directory.Exists(outputFolder))
            {
                throw new Exception("Cannot create directory " + outputFolder + ". It already exists");
            }

            if (!Utilities.ExistsInPath(exampleFile))
            {
                throw new Exception("Could not find input file");
            }


            StreamWriter[] files             = new StreamWriter[numberOfFolds];
            int[]          positiveInstances = new int[numberOfFolds];
            int[]          negativeInstances = new int[numberOfFolds];

            using (StreamReader reader = new StreamReader(exampleFile))
            {
                Directory.CreateDirectory(outputFolder);
                //now create N directories...one for each fold
                for (int counter = 0; counter < numberOfFolds; counter++)
                {
                    folders[counter] = Path.Combine(outputFolder, "fold" + counter);
                    Directory.CreateDirectory(folders[counter]);
                    files[counter] = new StreamWriter(Path.Combine(folders[counter], counter + ".data"));
                }

                while (!reader.EndOfStream)
                {
                    String line = reader.ReadLine();
                    if (isNegativeExample(line))
                    {
                        int fold = this.GetFairFold(numberOfFolds, numNegativeExamples, negativeInstances, false);

                        files[fold].WriteLine(line);
                        negativeInstances[fold]++;
                    }

                    else
                    {
                        int fold = this.GetFairFold(numberOfFolds, numPositiveExamples, positiveInstances, true);

                        files[fold].WriteLine(line);
                        positiveInstances[fold]++;
                    }
                }

                foreach (var writer in files)
                {
                    writer.Flush();
                    writer.Close();
                }

                Console.WriteLine("Verifying counts...");

                int positiveRunning = 0, negativeRunning = 0;
                for (int counter = 0; counter < numberOfFolds; counter++)
                {
                    Console.WriteLine("Fold " + counter + ": [" + positiveInstances[counter] +
                                      "+, " + negativeInstances[counter] + "-]" + " Ratio: " +
                                      ((double)(negativeInstances[counter]) / (double)(positiveInstances[counter])));
                    positiveRunning += positiveInstances[counter];
                    negativeRunning += negativeInstances[counter];
                }

                Console.WriteLine("Overall ratio: " + ((double)(negativeRunning) / (double)(positiveRunning)));

                if (positiveRunning == numPositiveExamples && negativeRunning == numNegativeExamples)
                {
                    Console.WriteLine("OK");
                }

                else
                {
                    throw new Exception("Problem with counts");
                }
            }
        }