/// <summary> /// Trains a tokenizer model from input files well formatted for /// a token event reader. /// </summary> /// <param name="inputFiles">The collection of training input files</param> /// <param name="iterations">The number of iterations to run when training the model</param> /// <param name="cut">The minimum nb of occurences for statistical relevancy in the trained model</param> /// <param name="splitMarker">The character indicating a split in the files</param> /// <returns>The freshly trained GisModel</returns> public static GisModel Train(IEnumerable <string> inputFiles, int iterations, int cut, char splitMarker = '|') { var trainer = new GisTrainer(0.1); foreach (var inputFile in inputFiles) { var dataReader = new StreamReader(inputFile); var eventReader = new TokenEventReader(dataReader, splitMarker); trainer.TrainModel(iterations, new TwoPassDataIndexer(eventReader, cut)); } return(new GisModel(trainer)); }
public static void Train(string input, string output) { System.IO.StreamReader dataReader = new System.IO.StreamReader(new System.IO.FileInfo(input).FullName); SharpEntropy.ITrainingEventReader eventReader = new TokenEventReader(dataReader); Train(eventReader, output); }
/// <summary> /// Trains a tokenizer model from input files well formatted for /// a token event reader. /// </summary> /// <param name="inputFiles">The collection of training input files</param> /// <param name="iterations">The number of iterations to run when training the model</param> /// <param name="cut">The minimum nb of occurences for statistical relevancy in the trained model</param> /// <param name="splitMarker">The character indicating a split in the files</param> /// <returns>The freshly trained GisModel</returns> public static GisModel Train(IEnumerable<string> inputFiles, int iterations, int cut, char splitMarker = '|') { var trainer = new GisTrainer(0.1); foreach (var inputFile in inputFiles) { var dataReader = new StreamReader(inputFile); var eventReader = new TokenEventReader(dataReader, splitMarker); trainer.TrainModel(iterations, new TwoPassDataIndexer(eventReader, cut)); } return new GisModel(trainer); }