public SentenceDetectorME(SentenceModel sentenceModel) { model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; }
/// <summary> /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model. /// </summary> /// <param name="sentenceModel">The sentence model.</param> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="sentenceModel"/> is <c>null</c>. /// </exception> public SentenceDetectorME(SentenceModel sentenceModel) { if (sentenceModel == null) { throw new ArgumentNullException(nameof(sentenceModel)); } model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; if (sentenceModel.Abbreviations == null) { return; } stringComparison = sentenceModel.Abbreviations.IsCaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; abbreviationTokens = new Dictionary <string, int>(); foreach (var abbreviation in sentenceModel.Abbreviations) { foreach (var token in abbreviation.Tokens) { abbreviationTokens.Add(token, token.Length); } } }
// Utilities ---------------------------- /// <summary> /// Use this training method if you wish to supply an end of /// sentence scanner which provides a different set of ending chars /// other than the default ones. They are "\\.|!|\\?|\\\"|\\)". /// </summary> public static GisModel TrainModel(string filePath, int iterations, int cut, IEndOfSentenceScanner scanner) { return(TrainModel(new List <string>() { filePath }, iterations, cut, scanner)); }
public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner, SharpEntropy.IContextGenerator <Util.Pair <System.Text.StringBuilder, int> > contextGenerator) { mDataReader = dataReader; mScanner = scanner; mContextGenerator = contextGenerator; if (mDataReader.HasNext()) { string current = mDataReader.NextToken(); if (mDataReader.HasNext()) { mNext = mDataReader.NextToken(); } AddNewEvents(current); } }
public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator) { mDataReader = dataReader; mScanner = scanner; mContextGenerator = contextGenerator; if (mDataReader.HasNext()) { string current = mDataReader.NextToken(); if (mDataReader.HasNext()) { mNext = mDataReader.NextToken(); } AddNewEvents(current); } }
public SentenceDetectionEventReader(ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner, IContextGenerator<Tuple<StringBuilder, int>> contextGenerator) { _dataReader = dataReader; _scanner = scanner; _contextGenerator = contextGenerator; if (_dataReader.HasNext()) { string current = _dataReader.NextToken(); if (_dataReader.HasNext()) { _next = _dataReader.NextToken(); } AddNewEvents(current); } }
public SentenceDetectionEventReader(ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner, IContextGenerator <Tuple <StringBuilder, int> > contextGenerator) { _dataReader = dataReader; _scanner = scanner; _contextGenerator = contextGenerator; if (_dataReader.HasNext()) { string current = _dataReader.NextToken(); if (_dataReader.HasNext()) { _next = _dataReader.NextToken(); } AddNewEvents(current); } }
/// <summary> /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model. /// </summary> /// <param name="sentenceModel">The sentence model.</param> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="sentenceModel"/> is <c>null</c>. /// </exception> public SentenceDetectorME(SentenceModel sentenceModel) { if (sentenceModel == null) throw new ArgumentNullException("sentenceModel"); model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; if (sentenceModel.Abbreviations == null) return; stringComparison = sentenceModel.Abbreviations.IsCaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; abbreviationTokens = new Dictionary<string, int>(); foreach (var abbreviation in sentenceModel.Abbreviations) foreach (var token in abbreviation.Tokens) abbreviationTokens.Add(token, token.Length); }
public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IEndOfSentenceScanner scanner) : this(model, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray()), scanner) { }
/// <summary> Use this training method if you wish to supply an end of /// sentence scanner which provides a different set of ending chars /// other than the default ones. They are "\\.|!|\\?|\\\"|\\)". /// </summary> public static SharpEntropy.GisModel TrainModel(string inFile, int iterations, int cut, IEndOfSentenceScanner scanner) { SharpEntropy.ITrainingEventReader eventReader; SharpEntropy.ITrainingDataReader<string> dataReader; System.IO.StreamReader streamReader; using (streamReader = new System.IO.StreamReader(inFile, System.Text.Encoding.UTF7)) { dataReader = new SharpEntropy.PlainTextByLineDataReader(streamReader); eventReader = new SentenceDetectionEventReader(dataReader, scanner); SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer(); trainer.TrainModel(eventReader, iterations, cut); return new SharpEntropy.GisModel(trainer); } }
/// <summary> /// Creates a new <code>MaximumEntropySentenceDetector</code> instance. /// </summary> /// <param name="model"> /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to /// evaluate end-of-sentence decisions. /// </param> /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector /// will use to turn strings into contexts for the model to /// evaluate. /// </param> /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector /// will use to locate end of sentence indexes. /// </param> public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner) { mModel = model; mContextGenerator = contextGenerator; mScanner = scanner; }
/// <summary> /// Initializes a new instance of the <see cref="SentenceEventStream"/> class. /// </summary> /// <param name="samples">The samples.</param> /// <param name="cg">The sentence context generator.</param> /// <param name="scanner">The end of sentence scanner.</param> public SentenceEventStream(IObjectStream <SentenceSample> samples, ISentenceContextGenerator cg, IEndOfSentenceScanner scanner) : base(samples) { this.cg = cg; this.scanner = scanner; }
/// <summary> /// Class constructor which uses the EndOfSentenceScanner to locate /// sentence endings. /// </summary> public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner) : this(dataReader, scanner, new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters())) { }
/// <summary> /// Class constructor which uses the EndOfSentenceScanner to locate /// sentence endings. /// </summary> public SentenceDetectionEventReader(ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner) : this(dataReader, scanner, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray())){}
/// <summary> /// Class constructor which uses the EndOfSentenceScanner to locate /// sentence endings. /// </summary> public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner) : this(dataReader, scanner, new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters())) { }
public EnglishMaximumEntropySentenceDetector(string name, IEndOfSentenceScanner scanner): base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name)), scanner) { }
public EnglishMaximumEntropySentenceDetector(string name, IEndOfSentenceScanner scanner) : base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name)), scanner) { }
public static GisModel TrainModel(IEnumerable<string> filePaths, int iterations, int cut, IEndOfSentenceScanner scanner) { var trainer = new GisTrainer(); var readers = filePaths.Select(path => new StreamReader(path)).ToList(); // train the model ITrainingDataReader<string> dataReader = new MultipleFilesPlainTextByLineDataReader(readers); ITrainingEventReader eventReader = new SentenceDetectionEventReader(dataReader, scanner); trainer.TrainModel(eventReader, iterations, cut); return new GisModel(trainer); }
public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IEndOfSentenceScanner scanner): this(model, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray()), scanner){ }
/// <summary> /// Creates a new <code>MaximumEntropySentenceDetector</code> instance. /// </summary> /// <param name="model"> /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to /// evaluate end-of-sentence decisions. /// </param> /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector /// will use to turn strings into contexts for the model to /// evaluate. /// </param> /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector /// will use to locate end of sentence indexes. /// </param> public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IContextGenerator <Tuple <StringBuilder, int> > contextGenerator, IEndOfSentenceScanner scanner) { _model = model; _contextGenerator = contextGenerator; _scanner = scanner; }
// Utilities ---------------------------- /// <summary> /// Use this training method if you wish to supply an end of /// sentence scanner which provides a different set of ending chars /// other than the default ones. They are "\\.|!|\\?|\\\"|\\)". /// </summary> public static GisModel TrainModel(string filePath, int iterations, int cut, IEndOfSentenceScanner scanner) { return TrainModel(new List<string>() {filePath}, iterations, cut, scanner); }
public static GisModel TrainModel(IEnumerable <string> filePaths, int iterations, int cut, IEndOfSentenceScanner scanner) { var trainer = new GisTrainer(); #if DNF var readers = filePaths.Select(path => new StreamReader(path)).ToList(); #else var readers = filePaths.Select(path => new StreamReader(new FileStream(path, FileMode.OpenOrCreate))).ToList(); #endif // train the model ITrainingDataReader <string> dataReader = new MultipleFilesPlainTextByLineDataReader(readers); ITrainingEventReader eventReader = new SentenceDetectionEventReader(dataReader, scanner); trainer.TrainModel(eventReader, iterations, cut); return(new GisModel(trainer)); }
/// <summary> /// Creates a new <code>MaximumEntropySentenceDetector</code> instance. /// </summary> /// <param name="model"> /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to /// evaluate end-of-sentence decisions. /// </param> /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector /// will use to turn strings into contexts for the model to /// evaluate. /// </param> /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector /// will use to locate end of sentence indexes. /// </param> public MaximumEntropySentenceDetector(IMaximumEntropyModel model, IContextGenerator<Tuple<StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner) { _model = model; _contextGenerator = contextGenerator; _scanner = scanner; }
/// <summary> /// Class constructor which uses the EndOfSentenceScanner to locate /// sentence endings. /// </summary> public SentenceDetectionEventReader(ITrainingDataReader <string> dataReader, IEndOfSentenceScanner scanner) : this(dataReader, scanner, new SentenceDetectionContextGenerator(scanner.GetPotentialEndOfSentenceCharacters().ToArray())) { }
public static GisModel TrainModel(IEnumerable<string> files, int iterations, int cut, IEndOfSentenceScanner scanner) { var trainer = new GisTrainer(); foreach (var file in files) { using (var streamReader = new StreamReader(file)) { ITrainingDataReader<string> dataReader = new PlainTextByLineDataReader(streamReader); ITrainingEventReader eventReader = new SentenceDetectionEventReader(dataReader, scanner); trainer.TrainModel(eventReader, iterations, cut); } } return new GisModel(trainer); }
public static GisModel TrainModel(IEnumerable <string> files, int iterations, int cut, IEndOfSentenceScanner scanner) { var trainer = new GisTrainer(); foreach (var file in files) { using (var streamReader = new StreamReader(file)) { ITrainingDataReader <string> dataReader = new PlainTextByLineDataReader(streamReader); ITrainingEventReader eventReader = new SentenceDetectionEventReader(dataReader, scanner); trainer.TrainModel(eventReader, iterations, cut); } } return(new GisModel(trainer)); }