Example #1
0
 public SentenceDetectorME(SentenceModel sentenceModel)
 {
     model       = sentenceModel.MaxentModel;
     cgen        = sentenceModel.Factory.GetContextGenerator();
     scanner     = sentenceModel.Factory.GetEndOfSentenceScanner();
     useTokenEnd = sentenceModel.UseTokenEnd;
 }
Example #2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model.
        /// </summary>
        /// <param name="sentenceModel">The sentence model.</param>
        /// <exception cref="System.ArgumentNullException">
        /// The <paramref name="sentenceModel"/> is <c>null</c>.
        /// </exception>
        public SentenceDetectorME(SentenceModel sentenceModel)
        {
            if (sentenceModel == null)
            {
                throw new ArgumentNullException(nameof(sentenceModel));
            }

            model       = sentenceModel.MaxentModel;
            cgen        = sentenceModel.Factory.GetContextGenerator();
            scanner     = sentenceModel.Factory.GetEndOfSentenceScanner();
            useTokenEnd = sentenceModel.UseTokenEnd;

            if (sentenceModel.Abbreviations == null)
            {
                return;
            }

            stringComparison = sentenceModel.Abbreviations.IsCaseSensitive
                ? StringComparison.Ordinal
                : StringComparison.OrdinalIgnoreCase;

            abbreviationTokens = new Dictionary <string, int>();

            foreach (var abbreviation in sentenceModel.Abbreviations)
            {
                foreach (var token in abbreviation.Tokens)
                {
                    abbreviationTokens.Add(token, token.Length);
                }
            }
        }
Example #3
0
        // Utilities ----------------------------

        /// <summary>
        /// Use this training method if you wish to supply an end of
        /// sentence scanner which provides a different set of ending chars
        /// other than the default ones. They are "\\.|!|\\?|\\\"|\\)".
        /// </summary>
        public static GisModel TrainModel(string filePath, int iterations, int cut, IEndOfSentenceScanner scanner)
        {
            return(TrainModel(new List <string>()
            {
                filePath
            }, iterations, cut, scanner));
        }
 public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner, SharpEntropy.IContextGenerator <Util.Pair <System.Text.StringBuilder, int> > contextGenerator)
 {
     mDataReader       = dataReader;
     mScanner          = scanner;
     mContextGenerator = contextGenerator;
     if (mDataReader.HasNext())
     {
         string current = mDataReader.NextToken();
         if (mDataReader.HasNext())
         {
             mNext = mDataReader.NextToken();
         }
         AddNewEvents(current);
     }
 }
 public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator)
 {
     mDataReader = dataReader;
     mScanner = scanner;
     mContextGenerator = contextGenerator;
     if (mDataReader.HasNext())
     {
         string current = mDataReader.NextToken();
         if (mDataReader.HasNext())
         {
             mNext = mDataReader.NextToken();
         }
         AddNewEvents(current);
     }
 }
        public SentenceDetectionEventReader(ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner, 
            IContextGenerator<Tuple<StringBuilder, int>> contextGenerator)
		{
			_dataReader = dataReader;
			_scanner = scanner;
			_contextGenerator = contextGenerator;
			if (_dataReader.HasNext())
			{
				string current = _dataReader.NextToken();
				if (_dataReader.HasNext())
				{
					_next = _dataReader.NextToken();
				}
				AddNewEvents(current);
			}
		}
 public SentenceDetectionEventReader(ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner,
                                     IContextGenerator <Tuple <StringBuilder, int> > contextGenerator)
 {
     _dataReader       = dataReader;
     _scanner          = scanner;
     _contextGenerator = contextGenerator;
     if (_dataReader.HasNext())
     {
         string current = _dataReader.NextToken();
         if (_dataReader.HasNext())
         {
             _next = _dataReader.NextToken();
         }
         AddNewEvents(current);
     }
 }
        /// <summary>
        /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model.
        /// </summary>
        /// <param name="sentenceModel">The sentence model.</param>
        /// <exception cref="System.ArgumentNullException">
        /// The <paramref name="sentenceModel"/> is <c>null</c>.
        /// </exception>
        public SentenceDetectorME(SentenceModel sentenceModel) {
            if (sentenceModel == null)
                throw new ArgumentNullException("sentenceModel");

            model = sentenceModel.MaxentModel;
            cgen = sentenceModel.Factory.GetContextGenerator();
            scanner = sentenceModel.Factory.GetEndOfSentenceScanner();
            useTokenEnd = sentenceModel.UseTokenEnd;

            if (sentenceModel.Abbreviations == null) 
                return;

            stringComparison = sentenceModel.Abbreviations.IsCaseSensitive
                ? StringComparison.Ordinal
                : StringComparison.OrdinalIgnoreCase;

            abbreviationTokens = new Dictionary<string, int>();

            foreach (var abbreviation in sentenceModel.Abbreviations)
                foreach (var token in abbreviation.Tokens)
                    abbreviationTokens.Add(token, token.Length);
        }
 public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IEndOfSentenceScanner scanner) :
     this(model, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray()), scanner)
 {
 }
        /// <summary> Use this training method if you wish to supply an end of
        /// sentence scanner which provides a different set of ending chars
        /// other than the default ones.  They are "\\.|!|\\?|\\\"|\\)".
        /// </summary>
        public static SharpEntropy.GisModel TrainModel(string inFile, int iterations, int cut, IEndOfSentenceScanner scanner)
        {
            SharpEntropy.ITrainingEventReader eventReader;
            SharpEntropy.ITrainingDataReader<string> dataReader;
            System.IO.StreamReader streamReader;

            using (streamReader = new System.IO.StreamReader(inFile, System.Text.Encoding.UTF7))
            {
                dataReader = new SharpEntropy.PlainTextByLineDataReader(streamReader);
                eventReader = new SentenceDetectionEventReader(dataReader, scanner);

                SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer();
                trainer.TrainModel(eventReader, iterations, cut);
                return new SharpEntropy.GisModel(trainer);
            }
        }
 /// <summary> 
 /// Creates a new <code>MaximumEntropySentenceDetector</code> instance.
 /// </summary>
 /// <param name="model">
 /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to
 /// evaluate end-of-sentence decisions.
 /// </param>
 /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector
 /// will use to turn strings into contexts for the model to
 /// evaluate.
 /// </param>
 /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector
 /// will use to locate end of sentence indexes.
 /// </param>
 public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner)
 {
     mModel = model;
     mContextGenerator = contextGenerator;
     mScanner = scanner;
 }
Example #12
0
 /// <summary>
 /// Initializes a new instance of the <see cref="SentenceEventStream"/> class.
 /// </summary>
 /// <param name="samples">The samples.</param>
 /// <param name="cg">The sentence context generator.</param>
 /// <param name="scanner">The end of sentence scanner.</param>
 public SentenceEventStream(IObjectStream <SentenceSample> samples, ISentenceContextGenerator cg, IEndOfSentenceScanner scanner) : base(samples)
 {
     this.cg      = cg;
     this.scanner = scanner;
 }
 /// <summary>
 /// Class constructor which uses the EndOfSentenceScanner to locate
 /// sentence endings.
 /// </summary>
 public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner)
     : this(dataReader, scanner, new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters()))
 {
 }
		/// <summary>
		/// Class constructor which uses the EndOfSentenceScanner to locate
		/// sentence endings.
		/// </summary>
		public SentenceDetectionEventReader(ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner) : 
            this(dataReader, scanner, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray())){}
 /// <summary>
 /// Class constructor which uses the EndOfSentenceScanner to locate
 /// sentence endings.
 /// </summary>
 public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner) : this(dataReader, scanner, new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters()))
 {
 }
 public EnglishMaximumEntropySentenceDetector(string name, IEndOfSentenceScanner scanner):
     base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name)), scanner) { }
 public EnglishMaximumEntropySentenceDetector(string name, IEndOfSentenceScanner scanner) :
     base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name)), scanner)
 {
 }
        public static GisModel TrainModel(IEnumerable<string> filePaths, int iterations, int cut, IEndOfSentenceScanner scanner)
        {
            var trainer = new GisTrainer();

            var readers = filePaths.Select(path => new StreamReader(path)).ToList();

            // train the model
            ITrainingDataReader<string> dataReader = new MultipleFilesPlainTextByLineDataReader(readers);
            ITrainingEventReader eventReader = new SentenceDetectionEventReader(dataReader, scanner);

            trainer.TrainModel(eventReader, iterations, cut);

            return new GisModel(trainer);
        }
Example #19
0
		/// <summary> Use this training method if you wish to supply an end of
		/// sentence scanner which provides a different set of ending chars
		/// other than the default ones.  They are "\\.|!|\\?|\\\"|\\)".
		/// </summary>
		public static SharpEntropy.GisModel TrainModel(string inFile, int iterations, int cut, IEndOfSentenceScanner scanner)
		{
			SharpEntropy.ITrainingEventReader eventReader;
			SharpEntropy.ITrainingDataReader<string> dataReader;
			System.IO.StreamReader streamReader;
			
			using (streamReader = new System.IO.StreamReader(inFile, System.Text.Encoding.UTF7)) 
			{
				dataReader = new SharpEntropy.PlainTextByLineDataReader(streamReader);
				eventReader = new SentenceDetectionEventReader(dataReader, scanner);

				SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer();
				trainer.TrainModel(eventReader, iterations, cut);
				return new SharpEntropy.GisModel(trainer);
			}
		}
Example #20
0
		/// <summary> 
		/// Creates a new <code>MaximumEntropySentenceDetector</code> instance.
		/// </summary>
		/// <param name="model">
		/// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to
		/// evaluate end-of-sentence decisions.
		/// </param>
		/// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector
		/// will use to turn strings into contexts for the model to
		/// evaluate.
		/// </param>
		/// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector
		/// will use to locate end of sentence indexes.
		/// </param>
        public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner)
		{
			mModel = model;
			mContextGenerator = contextGenerator;
			mScanner = scanner;
		}
		public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IEndOfSentenceScanner scanner):
            this(model, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray()), scanner){ }
 /// <summary>
 /// Creates a new <code>MaximumEntropySentenceDetector</code> instance.
 /// </summary>
 /// <param name="model">
 /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to
 /// evaluate end-of-sentence decisions.
 /// </param>
 /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector
 /// will use to turn strings into contexts for the model to
 /// evaluate.
 /// </param>
 /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector
 /// will use to locate end of sentence indexes.
 /// </param>
 public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IContextGenerator <Tuple <StringBuilder, int> > contextGenerator, IEndOfSentenceScanner scanner)
 {
     _model            = model;
     _contextGenerator = contextGenerator;
     _scanner          = scanner;
 }
        // Utilities ----------------------------
		
		/// <summary>
		/// Use this training method if you wish to supply an end of
		/// sentence scanner which provides a different set of ending chars
		/// other than the default ones. They are "\\.|!|\\?|\\\"|\\)".
		/// </summary>
		public static GisModel TrainModel(string filePath, int iterations, int cut, IEndOfSentenceScanner scanner)
		{
		    return TrainModel(new List<string>() {filePath}, iterations, cut, scanner);
		}
Example #24
0
        public static GisModel TrainModel(IEnumerable <string> filePaths, int iterations, int cut, IEndOfSentenceScanner scanner)
        {
            var trainer = new GisTrainer();

#if DNF
            var readers = filePaths.Select(path => new StreamReader(path)).ToList();
#else
            var readers = filePaths.Select(path => new StreamReader(new FileStream(path, FileMode.OpenOrCreate))).ToList();
#endif

            // train the model
            ITrainingDataReader <string> dataReader  = new MultipleFilesPlainTextByLineDataReader(readers);
            ITrainingEventReader         eventReader = new SentenceDetectionEventReader(dataReader, scanner);

            trainer.TrainModel(eventReader, iterations, cut);

            return(new GisModel(trainer));
        }
		/// <summary> 
		/// Creates a new <code>MaximumEntropySentenceDetector</code> instance.
		/// </summary>
		/// <param name="model">
		/// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to
		/// evaluate end-of-sentence decisions.
		/// </param>
		/// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector
		/// will use to turn strings into contexts for the model to
		/// evaluate.
		/// </param>
		/// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector
		/// will use to locate end of sentence indexes.
		/// </param>
        public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IContextGenerator<Tuple<StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner)
		{
			_model = model;
			_contextGenerator = contextGenerator;
			_scanner = scanner;
		}
 /// <summary>
 /// Class constructor which uses the EndOfSentenceScanner to locate
 /// sentence endings.
 /// </summary>
 public SentenceDetectionEventReader(ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner) :
     this(dataReader, scanner, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray()))
 {
 }
        public static GisModel TrainModel(IEnumerable<string> files, int iterations, int cut, IEndOfSentenceScanner scanner)
        {
            var trainer = new GisTrainer();

            foreach (var file in files)
            {
                using (var streamReader = new StreamReader(file))
                {
                    ITrainingDataReader<string> dataReader = new PlainTextByLineDataReader(streamReader);
                    ITrainingEventReader eventReader = new SentenceDetectionEventReader(dataReader, scanner);

                    trainer.TrainModel(eventReader, iterations, cut);
                }
            }

            return new GisModel(trainer);
        }
Example #28
0
        public static GisModel TrainModel(IEnumerable <string> files, int iterations, int cut, IEndOfSentenceScanner scanner)
        {
            var trainer = new GisTrainer();

            foreach (var file in files)
            {
                using (var streamReader = new StreamReader(file))
                {
                    ITrainingDataReader <string> dataReader  = new PlainTextByLineDataReader(streamReader);
                    ITrainingEventReader         eventReader = new SentenceDetectionEventReader(dataReader, scanner);

                    trainer.TrainModel(eventReader, iterations, cut);
                }
            }

            return(new GisModel(trainer));
        }