Esempio n. 1
0
        /// <summary>
        /// Trains a tokenizer model from input files well formatted for
        /// a token event reader.
        /// </summary>
        /// <param name="inputFiles">The collection of training input files</param>
        /// <param name="iterations">The number of iterations to run when training the model</param>
        /// <param name="cut">The minimum nb of occurences for statistical relevancy in the trained model</param>
        /// <param name="splitMarker">The character indicating a split in the files</param>
        /// <returns>The freshly trained GisModel</returns>
        public static GisModel Train(IEnumerable <string> inputFiles, int iterations, int cut, char splitMarker = '|')
        {
            var trainer = new GisTrainer(0.1);

            foreach (var inputFile in inputFiles)
            {
                var dataReader  = new StreamReader(inputFile);
                var eventReader = new TokenEventReader(dataReader, splitMarker);

                trainer.TrainModel(iterations, new TwoPassDataIndexer(eventReader, cut));
            }
            return(new GisModel(trainer));
        }
Esempio n. 2
0
 public static void Train(string input, string output)
 {
     System.IO.StreamReader            dataReader  = new System.IO.StreamReader(new System.IO.FileInfo(input).FullName);
     SharpEntropy.ITrainingEventReader eventReader = new TokenEventReader(dataReader);
     Train(eventReader, output);
 }
 public static void Train(string input, string output)
 {
     System.IO.StreamReader dataReader = new System.IO.StreamReader(new System.IO.FileInfo(input).FullName);
     SharpEntropy.ITrainingEventReader eventReader = new TokenEventReader(dataReader);
     Train(eventReader, output);
 }
        /// <summary>
        /// Trains a tokenizer model from input files well formatted for
        /// a token event reader.
        /// </summary>
        /// <param name="inputFiles">The collection of training input files</param>
        /// <param name="iterations">The number of iterations to run when training the model</param>
        /// <param name="cut">The minimum nb of occurences for statistical relevancy in the trained model</param>
        /// <param name="splitMarker">The character indicating a split in the files</param>
        /// <returns>The freshly trained GisModel</returns>
        public static GisModel Train(IEnumerable<string> inputFiles, int iterations, int cut, char splitMarker = '|')
        {
            var trainer = new GisTrainer(0.1);
            foreach (var inputFile in inputFiles)
            {
                var dataReader = new StreamReader(inputFile);
                var eventReader = new TokenEventReader(dataReader, splitMarker);

                trainer.TrainModel(iterations, new TwoPassDataIndexer(eventReader, cut));
            }
            return new GisModel(trainer);
        }