/// <summary> /// Trains a tokenizer model from input files well formatted for /// a token event reader. /// </summary> /// <param name="inputFilePaths">The collection of training input files</param> /// <param name="iterations">The number of iterations to run when training the model</param> /// <param name="cut">The minimum nb of occurences for statistical relevancy in the trained model</param> /// <param name="splitMarker">The character indicating a split in the files</param> /// <returns>The freshly trained GisModel</returns> public static GisModel Train(IEnumerable <string> inputFilePaths, int iterations, int cut, char splitMarker = '|', bool includeAllCapsExamples = false) { var trainer = new GisTrainer(0.1); var dataReaders = new List <StreamReader>(); foreach (var path in inputFilePaths) { var dataReader = new StreamReader(path); dataReaders.Add(dataReader); } // train the model var eventReader = new MultipleFileTokenEventReader(dataReaders, splitMarker, includeAllCapsExamples); trainer.TrainModel(iterations, new TwoPassDataIndexer(eventReader, cut)); return(new GisModel(trainer)); }
/// <summary> /// Trains a tokenizer model from input files well formatted for /// a token event reader. /// </summary> /// <param name="inputFilePaths">The collection of training input files</param> /// <param name="iterations">The number of iterations to run when training the model</param> /// <param name="cut">The minimum nb of occurences for statistical relevancy in the trained model</param> /// <param name="splitMarker">The character indicating a split in the files</param> /// <returns>The freshly trained GisModel</returns> public static GisModel Train(IEnumerable<string> inputFilePaths, int iterations, int cut, char splitMarker = '|', bool includeAllCapsExamples = false) { var trainer = new GisTrainer(0.1); var dataReaders = new List<StreamReader>(); foreach (var path in inputFilePaths) { var dataReader = new StreamReader(path); dataReaders.Add(dataReader); } // train the model var eventReader = new MultipleFileTokenEventReader(dataReaders, splitMarker, includeAllCapsExamples); trainer.TrainModel(iterations, new TwoPassDataIndexer(eventReader, cut)); return new GisModel(trainer); }