/// <summary> /// Execute the svm_learn module /// </summary> /// <param name="svmLearnPath">Path to the svm_learn executable. MUST INCLUDE FULL PATH /// INCLUDING THE .EXE EXTENSION</param> /// <param name="trainingFile">The training dataset</param> /// <param name="modelFile">Model file to be created. Must have write permissions at this location /// </param> /// <param name="logFile">Log file to be created</param> /// <param name="silent">Whether no output should be displayed on the console. Set this to true if you want silent.</param> public void ExecuteLearner(String svmLearnPath, String trainingFile, String modelFile, string trainArgument = null, String logFile = null, bool silent = true) { this.svmLearnOutput.Clear(); if (!string.IsNullOrEmpty(logFile)) { Trace.Listeners.Clear(); if (!silent) { Trace.Listeners.Add(new TextWriterTraceListener(Console.Out)); } if (File.Exists(logFile)) { Console.WriteLine("\n\nLog file " + logFile + " already exists. Deleting"); try { File.Delete(logFile); } catch (Exception) { Console.WriteLine("Failed to delete file"); } } Trace.Listeners.Add(new TextWriterTraceListener(logFile)); Trace.AutoFlush = true; } if (!Utilities.ExistsInPath(svmLearnPath) || !Utilities.ExistsInPath(trainingFile)) { Console.WriteLine("Invalid file path"); Environment.Exit(0); } Process svmLearnProcess = CreateLearnerProcess(svmLearnPath, trainingFile, modelFile, trainArgument); if (!string.IsNullOrEmpty(logFile)) { Trace.WriteLine("\n\nLearning model using svmlight..."); Trace.WriteLine("svm_learn " + svmLearnProcess.StartInfo.Arguments); } svmLearnProcess.Start(); svmLearnProcess.BeginErrorReadLine(); svmLearnProcess.BeginOutputReadLine(); svmLearnProcess.WaitForExit(1000 * 60 * 1); if (!svmLearnProcess.HasExited) { svmLearnProcess.Kill(); } svmLearnProcess.Close(); if (!string.IsNullOrEmpty(logFile)) { Trace.WriteLine(this.svmLearnOutput.ToString()); } }
/// <summary> /// Executes svm_classifier /// </summary> /// <param name="svmClassifyPath">Enter full path, including .exe extension</param> /// <param name="testFile">The test dataset</param> /// <param name="modelFile">The model generated by svm_learn</param> /// <param name="outputFile">The output to be generated by svm_classify</param> /// <param name="logFile">Name of log file to be generated</param> /// <param name="silent">Whether to be silent: i.e. no output on the console. Log file is still /// generated</param> public void ExecuteClassifier(String svmClassifyPath, String testFile, String modelFile, String outputFile, String logFile, String incorrectFile, bool silent) { Trace.Listeners.Clear(); if (!silent) { Trace.Listeners.Add(new TextWriterTraceListener(Console.Out)); } if (File.Exists(logFile)) { Console.WriteLine("\n\nLog file " + logFile + " already exists. Deleting"); try { File.Delete(logFile); } catch (Exception) { //ignore..don't do anything if u cant delete the log } } Trace.Listeners.Add(new TextWriterTraceListener(logFile)); Trace.AutoFlush = true; if (!Utilities.ExistsInPath(svmClassifyPath) || !Utilities.ExistsInPath(testFile)) { Trace.WriteLine("Invalid file path"); Environment.Exit(0); } Trace.WriteLine("Classifying test instances...."); Process svmClassifyProcess = new Process(); svmClassifyProcess.StartInfo.FileName = svmClassifyPath; svmClassifyProcess.StartInfo.Arguments = "\"" + testFile + "\"" + " \"" + modelFile + "\" " + "\"" + outputFile + "\""; svmClassifyProcess.StartInfo.UseShellExecute = false; svmClassifyProcess.StartInfo.RedirectStandardError = true; svmClassifyProcess.StartInfo.RedirectStandardOutput = true; svmClassifyProcess.OutputDataReceived += new DataReceivedEventHandler(svmClassifyProcess_OutputDataReceived); svmClassifyProcess.ErrorDataReceived += new DataReceivedEventHandler(svmClassifyProcess_ErrorDataReceived); svmClassifyProcess.Start(); svmClassifyProcess.BeginErrorReadLine(); svmClassifyProcess.BeginOutputReadLine(); svmClassifyProcess.WaitForExit(); svmClassifyProcess.Close(); Trace.WriteLine(this.svmClassifyOutput.ToString()); String logFileName = Path.GetFileNameWithoutExtension(logFile); String incorrectLogFile = Path.Combine(Path.GetFullPath(Path.GetDirectoryName(logFile)), logFileName + "-incorrect-log.txt"); this.FindIncorrectlyClassifiedInstances(testFile, outputFile, incorrectFile, incorrectLogFile); }
/// <summary> /// Finds all those instances which were incorrectly classified by svmlight /// </summary> /// <param name="testFile">The test datset</param> /// <param name="outputFile">The output file generated by svm_classify</param> /// <param name="incorrect">The file to be generated by this function: contains all the /// features vectors that were incorrectly classified by svm_classify</param> public void FindIncorrectlyClassifiedInstances(String testFile, String outputFile, String incorrect, String logFile) { Trace.Listeners.Clear(); Trace.Listeners.Add(new TextWriterTraceListener(Console.Out)); Trace.Listeners.Add(new TextWriterTraceListener(logFile)); int incorrectClassificationCount = 0; int falsePositive = 0, falseNegative = 0; if (!Utilities.ExistsInPath(testFile) || !Utilities.ExistsInPath(outputFile)) { Trace.WriteLine("Invalid file path"); Environment.Exit(0); } using (StreamReader testFileReader = new StreamReader(testFile)) { using (StreamReader outputFileReader = new StreamReader(outputFile)) { try { Trace.WriteLine("Writing incorrectly classified instances to disk...."); StreamWriter incorrectClassificationWriter = new StreamWriter(incorrect, false); String testFileLine = testFileReader.ReadLine(); String outputFileLine = outputFileReader.ReadLine(); while (!testFileReader.EndOfStream && !outputFileReader.EndOfStream) { testFileLine = testFileReader.ReadLine(); outputFileLine = outputFileReader.ReadLine(); int testLabel = testFileLine[0].Equals('-') ? -1 : 1; int outputLabel = Double.Parse(outputFileLine) > 0 ? 1 : -1; if (testLabel != outputLabel) { incorrectClassificationCount++; if (testLabel == 1 && outputLabel == -1) { falseNegative++; } else if (testLabel == -1 && outputLabel == 1) { falsePositive++; } incorrectClassificationWriter.WriteLine(testFileLine + "#" + outputFileLine); } } if (falsePositive + falseNegative != incorrectClassificationCount) { throw new Exception("problem with counting incorrect classifications"); } Trace.WriteLine(incorrectClassificationCount + " wrong instances instances...done"); Trace.WriteLine("False positives: " + falsePositive); Trace.WriteLine("False negative: " + falseNegative); incorrectClassificationWriter.WriteLine("#False positives: " + falsePositive); incorrectClassificationWriter.WriteLine("#False negative: " + falseNegative); incorrectClassificationWriter.Flush(); incorrectClassificationWriter.Close(); if (!testFileReader.EndOfStream || !outputFileReader.EndOfStream) { throw new Exception("Both files have different number of instances"); } } catch (Exception) { throw; } } } }
/// <summary> /// Take the input file and construct N folds out of it /// </summary> /// <param name="exampleFile">The file that needs to be split. Will ensure that each fold /// contains roughly the same proportion of positive and negative labels as the original</param> /// <param name="numberOfFolds">Number of folds in the cross validation</param> /// <param name="outputFolder">Will be created to store the cross validation results</param> /// <param name="isNegativeExample">Predicate to determine whether a feature vector has a /// negative label</param> public void ConstructNFolds(String exampleFile, int numberOfFolds, String outputFolder, Predicate <String> isNegativeExample) { int numPositiveExamples, numNegativeExamples; Utilities.GetNumberOfTrainingExamples(exampleFile, out numPositiveExamples, out numNegativeExamples, isNegativeExample); if (numberOfFolds > numPositiveExamples + numNegativeExamples) { throw new Exception("Number of folds must be less than the number of instances. Try using" + " the leave one out cross validation option in SVMLearn."); } String[] folders = new String[numberOfFolds]; if (Directory.Exists(outputFolder)) { throw new Exception("Cannot create directory " + outputFolder + ". It already exists"); } if (!Utilities.ExistsInPath(exampleFile)) { throw new Exception("Could not find input file"); } StreamWriter[] files = new StreamWriter[numberOfFolds]; int[] positiveInstances = new int[numberOfFolds]; int[] negativeInstances = new int[numberOfFolds]; using (StreamReader reader = new StreamReader(exampleFile)) { Directory.CreateDirectory(outputFolder); //now create N directories...one for each fold for (int counter = 0; counter < numberOfFolds; counter++) { folders[counter] = Path.Combine(outputFolder, "fold" + counter); Directory.CreateDirectory(folders[counter]); files[counter] = new StreamWriter(Path.Combine(folders[counter], counter + ".data")); } while (!reader.EndOfStream) { String line = reader.ReadLine(); if (isNegativeExample(line)) { int fold = this.GetFairFold(numberOfFolds, numNegativeExamples, negativeInstances, false); files[fold].WriteLine(line); negativeInstances[fold]++; } else { int fold = this.GetFairFold(numberOfFolds, numPositiveExamples, positiveInstances, true); files[fold].WriteLine(line); positiveInstances[fold]++; } } foreach (var writer in files) { writer.Flush(); writer.Close(); } Console.WriteLine("Verifying counts..."); int positiveRunning = 0, negativeRunning = 0; for (int counter = 0; counter < numberOfFolds; counter++) { Console.WriteLine("Fold " + counter + ": [" + positiveInstances[counter] + "+, " + negativeInstances[counter] + "-]" + " Ratio: " + ((double)(negativeInstances[counter]) / (double)(positiveInstances[counter]))); positiveRunning += positiveInstances[counter]; negativeRunning += negativeInstances[counter]; } Console.WriteLine("Overall ratio: " + ((double)(negativeRunning) / (double)(positiveRunning))); if (positiveRunning == numPositiveExamples && negativeRunning == numNegativeExamples) { Console.WriteLine("OK"); } else { throw new Exception("Problem with counts"); } } }