public ParserEventStream(IObjectStream<Parse> d, AbstractHeadRules rules, ParserEventTypeEnum type) : base(d, rules, type) { buildContextGenerator = new BuildContextGenerator(); attachContextGenerator = new AttachContextGenerator(Punctuation); checkContextGenerator = new CheckContextGenerator(Punctuation); }
/// <summary> /// Initializes a new instance of the <see cref="NameSampleSequenceStream"/> class. /// </summary> /// <param name="psi">The sample stream.</param> /// <param name="pcg">The context generator.</param> /// <param name="useOutcomes">if set to <c>true</c> will be used in the samples.</param> /// <param name="seqCodec">The sequence codec.</param> public NameSampleSequenceStream(IObjectStream<NameSample> psi, INameContextGenerator pcg, bool useOutcomes, ISequenceCodec<string> seqCodec) { this.psi = psi; this.useOutcomes = useOutcomes; this.pcg = pcg; this.seqCodec = seqCodec; }
/// <summary> /// Initializes a new instance of the <see cref="OnePassDataIndexer"/> class, using a event stream, a cutoff value and a value that indicates if the events should be sorted. /// </summary> /// <param name="eventStream">The event stream.</param> /// <param name="cutoff">The cutoff.</param> /// <param name="sort">if set to <c>true</c> the events will be sorted during the indexing.</param> /// <param name="monitor">The evaluation monitor.</param> public OnePassDataIndexer(IObjectStream<Event> eventStream, int cutoff, bool sort, Monitor monitor) : base(monitor) { EventStream = eventStream; Cutoff = cutoff; Sort = sort; }
/// <summary> /// Initializes a new instance of the <see cref="AdChunkSampleStream"/> class. /// </summary> /// <param name="lineStream">The line stream.</param> /// <param name="safeParse">if set to <c>true</c> the invalid AD sentences will be skipped.</param> /// <exception cref="System.ArgumentNullException"> /// <paramref name="lineStream"/> /// </exception> public AdChunkSampleStream(IObjectStream <string> lineStream, bool safeParse) : this() { if (lineStream == null) { throw new ArgumentNullException("lineStream"); } adSentenceStream = new AdSentenceStream(lineStream, safeParse); }
protected AbstractEventStream(IObjectStream <T> samples) { if (samples == null) { throw new ArgumentNullException("samples"); } this.samples = samples; }
public FrequencyTableSequence(IObjectStream @object, string name = null) : base(@object) { if (name != null) { Name = name; } GenerateAuto = new GenerateAutoTable( ); }
/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train( string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory) { return(Train(languageCode, type, samples, parameters, factory, null)); }
/// <summary> /// Initializes a new instance of the <see cref="TokenSampleStream"/> class. /// </summary> /// <param name="samples">The samples.</param> /// <param name="separatorChars">The separator chars.</param> /// <exception cref="ArgumentNullException">separatorChars</exception> public TokenSampleStream(IObjectStream <string> samples, string separatorChars) : base(samples) { if (separatorChars == null) { throw new ArgumentNullException("separatorChars"); } this.separatorChars = separatorChars; }
/// <summary> /// Initializes a new instance of the <see cref="RealBasicEventStream"/> class. /// </summary> /// <param name="objectStream">The object stream.</param> /// <exception cref="System.ArgumentNullException">objectStream</exception> public RealBasicEventStream(IObjectStream <string> objectStream) { if (objectStream == null) { throw new ArgumentNullException("objectStream"); } this.objectStream = objectStream; }
/// <summary> /// Параметризованный конструктор /// </summary> /// <param name="@object">Объект последовательности</param> /// <param name="name">Название таблицы</param> public TableSequence(IObjectStream @object, string name = null) : this() { TypeTable = GetTypeTable(@object); Name = name == null?GetTitle(@object, TypeTable) : name; Data = new DataTable(); Text = "0"; }
public DelayTableSequence(IObjectStream @object, string name = null) : base(@object) { if (name != null) { Name = name; } DelayAuto = new DelayAutoTable( ); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="iterations">The number of training iterations.</param> /// <param name="cutoff">The min number of times a feature must be seen.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, int iterations, int cutoff) { return(Train(null, languageCode, samples, rules, iterations, cutoff)); }
/// <summary> /// Initializes a new instance of the <see cref="ChunkSampleSequenceStream" /> class using the given parameters. /// </summary> /// <param name="samples">The chunk samples.</param> /// <param name="contextGenerator">The chunker context generator.</param> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="samples"/> is null. /// or /// The <paramref name="contextGenerator"/> is null. /// </exception> public ChunkSampleSequenceStream(IObjectStream<ChunkSample> samples, IChunkerContextGenerator contextGenerator) { if (samples == null) throw new ArgumentNullException("samples"); if (contextGenerator == null) throw new ArgumentNullException("contextGenerator"); this.samples = samples; this.contextGenerator = contextGenerator; }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train(string languageCode, IObjectStream <DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return(new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory)); }
/// <summary> /// Initializes a new instance of the <see cref="PtbTokenSampleStream"/> using a <see cref="T:IObjectStream{string}"/> and a evaluation monitor. /// </summary> /// <param name="language">The language.</param> /// <param name="lineStream">The line stream.</param> /// <param name="detokenizer">The detokenizer.</param> /// <param name="monitor">The monitor.</param> public PtbTokenSampleStream(string language, IObjectStream <string> lineStream, IDetokenizer detokenizer, Monitor monitor) : base(new PtbStreamReader(language, lineStream, false, monitor)) { if (detokenizer == null) { throw new ArgumentNullException(nameof(detokenizer)); } this.detokenizer = detokenizer; }
/// <summary> /// Initializes a new instance of the <see cref="AdContractionNameSampleStream" /> from a <paramref name="lineStream" /> object. /// </summary> /// <param name="monitor">The execution monitor.</param> /// <param name="lineStream">The line stream.</param> /// <param name="safeParse">if set to <c>true</c> the invalid Ad sentences will be skipped.</param> /// <exception cref="System.ArgumentNullException"> /// <paramref name="monitor"/> /// or /// <paramref name="lineStream"/> /// </exception> public AdContractionNameSampleStream(Monitor monitor, IObjectStream <string> lineStream, bool safeParse) : this(lineStream, safeParse) { if (monitor == null) { throw new ArgumentNullException(nameof(monitor)); } this.monitor = monitor; }
/// <summary> /// Initializes a new instance of the <see cref="AdNameSampleStream" /> from a <paramref name="lineStream" /> object. /// </summary> /// <param name="lineStream">The line stream.</param> /// <param name="splitHyphenatedTokens">if set to <c>true</c> hyphenated tokens will be separated: "carros-monstro" > "carros" Hyphen "monstro".</param> /// <param name="safeParse">if set to <c>true</c> the invalid data in the file will be skipped.</param> /// <exception cref="System.ArgumentNullException">lineStream</exception> public AdNameSampleStream(IObjectStream <string> lineStream, bool splitHyphenatedTokens, bool safeParse) { if (lineStream == null) { throw new ArgumentNullException(nameof(lineStream)); } adSentenceStream = new AdSentenceStream(lineStream, safeParse); this.splitHyphenatedTokens = splitHyphenatedTokens; }
/// <summary> /// Возвращает таблицу последовательности /// </summary> internal TableSequence GetTable(IObjectStream @object, string nameTable) { TableSequence table = null; if (_loadTables.TryGetValue(nameTable, out table)) { _loadTableObjects[table.Name].Add(@object); } return(table); }
/// <summary> /// Initializes a new instance of the <see cref="AdChunkSampleStream"/> class. /// </summary> /// <param name="monitor">The execution monitor.</param> /// <param name="lineStream">The line stream.</param> /// <param name="safeParse">if set to <c>true</c> the invalid AD sentences will be skipped.</param> /// <exception cref="System.ArgumentNullException"> /// <paramref name="monitor"/> /// or /// <paramref name="lineStream"/> /// </exception> public AdChunkSampleStream(Monitor monitor, IObjectStream <string> lineStream, bool safeParse) : this(lineStream, safeParse) { if (monitor == null) { throw new ArgumentNullException("monitor"); } this.monitor = monitor; }
public SetObjectCommand(Document document, IStream stream, eTypeObjectCollection typeCollection, int index, string text) : base(document) { _oldObject = stream[typeCollection][index]; _newObject = stream.GetObject(typeCollection, index, text); _newObject.CorrectionSequence(this); if (_newObject.Type != eTypeObjectStream.Default && typeCollection == eTypeObjectCollection._1D && stream.EventCount == index + 1) { Add(new InsertColumnCommand(document, index + 1)); } }
/// <summary> /// Initializes a new instance of the <see cref="AdNameSampleStream"/> from a <paramref name="lineStream"/> object. /// </summary> /// <param name="monitor">The execution monitor.</param> /// <param name="lineStream">The line stream.</param> /// <param name="splitHyphenatedTokens">if set to <c>true</c> hyphenated tokens will be separated: "carros-monstro" > "carros" Hyphen "monstro".</param> /// <param name="safeParse">if set to <c>true</c> the invalid data in the file will be skipped.</param> /// <exception cref="System.ArgumentNullException">lineStream</exception> public AdNameSampleStream(Monitor monitor, IObjectStream <string> lineStream, bool splitHyphenatedTokens, bool safeParse) : this(lineStream, splitHyphenatedTokens, safeParse) { if (monitor == null) { throw new ArgumentNullException(nameof(monitor)); } this.monitor = monitor; }
/// <summary> /// Initializes a new instance of the <see cref="AdTokenSampleStream"/> from a <paramref name="lineStream"/> object. /// </summary> /// <param name="monitor">The evaluation monitor.</param> /// <param name="lineStream">The line stream.</param> /// <param name="detokenizer">The detokenizer used create the samples.</param> /// <param name="splitHyphenatedTokens">if set to <c>true</c> hyphenated tokens will be separated: "carros-monstro" > "carros" Hyphen "monstro".</param> /// <param name="safeParse">if set to <c>true</c> the invalid data in the file will be skipped.</param> /// <exception cref="System.ArgumentNullException">lineStream</exception> public AdTokenSampleStream(Monitor monitor, IObjectStream <string> lineStream, IDetokenizer detokenizer, bool splitHyphenatedTokens, bool safeParse) : this(lineStream, detokenizer, splitHyphenatedTokens, safeParse) { if (monitor == null) { throw new ArgumentNullException("monitor"); } this.monitor = monitor; }
public SetObjectCommand(Document document, IStream element, eTypeObjectCollection typeCollection, int index, eTypeObjectStream typeObject, object data = null) : base(document) { _oldObject = element[typeCollection][index]; _newObject = element.GetObject(typeCollection, index, typeObject, data); _newObject.CorrectionSequence(this); if (_newObject.Type != eTypeObjectStream.Default && typeCollection == eTypeObjectCollection._1D && element.EventCount == index + 1) { Add(new InsertColumnCommand(document, index + 1)); } }
internal static NaiveBayesModel TrainModel(IObjectStream <Event> samples, int cutoff = 1) { var parameters = TrainingParameters.DefaultParameters(); parameters.Set(Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); var trainer = new NaiveBayesTrainer(); trainer.Init(parameters, null); return(trainer.Train(samples)); }
public NameFinderEventStream(IObjectStream <NameSample> dataStream, string type, INameContextGenerator contextGenerator, ISequenceCodec <string> codec) : base(dataStream) { this.codec = codec ?? new BioCodec(); additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator(); this.contextGenerator = contextGenerator; this.contextGenerator.AddFeatureGenerator(new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); // TODO: How to make the type really do something?! // Type = type ?? "default"; }
/// <summary> /// Trains a model for the <see cref="TokenizerME"/>. /// </summary> /// <param name="samples">The samples used for the training.</param> /// <param name="factory">A <see cref="TokenizerFactory"/> to get resources from.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="TokenizerModel"/>.</returns> public static TokenizerModel Train(IObjectStream <TokenSample> samples, TokenizerFactory factory, TrainingParameters parameters, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); var eventStream = new TokSpanEventStream(samples, factory.UseAlphaNumericOptimization, factory.AlphaNumericPattern, factory.ContextGenerator); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return(new TokenizerModel(model, manifestInfoEntries, factory)); }
/// <summary> /// Evaluates the samples with a given number of partitions. /// </summary> /// <param name="samples">The samples to train and test.</param> /// <param name="partitions">The number of folds.</param> public void Evaluate(IObjectStream <T> samples, int partitions) { var partitioner = new CrossValidationPartitioner <T>(samples, partitions); while (partitioner.HasNext) { var ps = partitioner.Next(); var fm = Process(ps); FMeasure.MergeInto(fm); } }
public static void PopulatePOSDictionary(IObjectStream <POSSample> samples, IMutableTagDictionary dictionary, bool caseSensitive, int cutoff) { var newEntries = new Dictionary <string, Dictionary <string, int> >(); POSSample sample; while ((sample = samples.Read()) != null) { for (int i = 0; i < sample.Sentence.Length; i++) { if (!StringPattern.Recognize(sample.Sentence[i]).ContainsDigit) { string word = caseSensitive ? sample.Sentence[i] : sample.Sentence[i].ToLowerInvariant(); if (!newEntries.ContainsKey(word)) { newEntries.Add(word, new Dictionary <string, int>()); } var dicTags = dictionary.GetTags(word); if (dicTags != null) { foreach (var tag in dicTags) { if (!newEntries[word].ContainsKey(tag)) { newEntries[word].Add(tag, cutoff); } } } if (!newEntries[word].ContainsKey(sample.Tags[i])) { newEntries[word].Add(sample.Tags[i], 1); } else { newEntries[word][sample.Tags[i]]++; } } } } foreach (var wordEntry in newEntries) { var tagsForWord = (from entry in wordEntry.Value where entry.Value >= cutoff select entry.Key).ToList(); if (tagsForWord.Count > 0) { dictionary.Put(wordEntry.Key, tagsForWord.ToArray()); } } }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( Monitor monitor, string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { var dict = BuildDictionary(samples, rules, parameters); samples.Reset(); var manifestInfoEntries = new Dictionary <string, string>(); // build //System.err.println("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict); var buildReportMap = new Dictionary <string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // tag var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory()); samples.Reset(); // chunk var chunkModel = ChunkerME.Train(languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ChunkerFactory()); samples.Reset(); // check //System.err.println("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary <string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, ParserType.Chunking, manifestInfoEntries)); }
/// <summary> /// Trains a lemmatizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training /// operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="LemmatizerModel" /> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static LemmatizerModel Train(string languageCode, IObjectStream <LemmaSample> samples, TrainingParameters parameters, LemmatizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var cg = factory.GetContextGenerator(); var trainerType = TrainerFactory.GetTrainerType(parameters); if (!trainerType.HasValue) { throw new InvalidOperationException("The trainer was not specified."); } IMaxentModel model = null; ML.Model.ISequenceClassificationModel <string> seqModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var s1 = new LemmaSampleEventStream(samples, cg); var t1 = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); model = t1.Train(s1); break; case TrainerType.EventModelSequenceTrainer: var s2 = new LemmaSampleSequenceStream(samples, cg); var t2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); model = t2.Train(s2); break; case TrainerType.SequenceTrainer: var s3 = new LemmaSampleSequenceStream(samples, cg); var t3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); seqModel = t3.Train(s3); break; default: throw new NotSupportedException("Trainer type is not supported."); } return(model != null ? new LemmatizerModel(languageCode, model, beamSize, manifestInfoEntries, factory) : new LemmatizerModel(languageCode, seqModel, manifestInfoEntries, factory)); }
/// <summary> /// Create an event stream based on the specified data stream of the specified type using the specified head rules. /// </summary> /// <param name="samples">A 1-parse-per-line Penn Treebank Style parse.</param> /// <param name="rules">The head rules.</param> /// <param name="eType">The type of events desired (tag, chunk, build, or check).</param> /// <param name="dictionary">A tri-gram dictionary to reduce feature generation.</param> public ParserEventStream(IObjectStream<Parse> samples, AbstractHeadRules rules, ParserEventTypeEnum eType, Dictionary.Dictionary dictionary) : base(samples, rules, eType, dictionary) { switch (eType) { case ParserEventTypeEnum.Build: bcg = new BuildContextGenerator(dictionary); break; case ParserEventTypeEnum.Check: kcg = new CheckContextGenerator(); break; } }
/// <summary> /// Initializes a new instance of the <see cref="DocumentCategorizerEventStream"/> class with the given feature generators. /// </summary> /// <param name="samples">The samples.</param> /// <param name="featureGenerators">The feature generators.</param> /// <exception cref="System.ArgumentNullException">featureGenerators</exception> /// <exception cref="System.ArgumentOutOfRangeException">featureGenerators</exception> public DocumentCategorizerEventStream(IObjectStream <DocumentSample> samples, params IFeatureGenerator[] featureGenerators) : base(samples) { if (featureGenerators == null) { throw new ArgumentNullException("featureGenerators"); } if (featureGenerators.Length == 0) { throw new ArgumentOutOfRangeException("featureGenerators"); } cg = new DocumentCategorizerContextGenerator(featureGenerators); }
/// <summary> /// Creates a new data indexer for the given event stream. /// </summary> /// <param name="events">The event stream.</param> /// <returns>IDataIndexer.</returns> /// <exception cref="System.InvalidOperationException">Unexpected data indexer name: Name</exception> public IDataIndexer GetDataIndexer(IObjectStream <Event> events) { switch (DataIndexerName) { case Parameters.DataIndexers.OnePass: return(new OnePassDataIndexer(events, Cutoff, IsSortAndMerge, Monitor)); case Parameters.DataIndexers.TwoPass: return(new TwoPassDataIndexer(events, Cutoff, IsSortAndMerge, Monitor)); default: throw new InvalidOperationException("Unexpected data indexer name: " + DataIndexerName); } }
/// <summary> /// Trains sentence detection model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="SentenceModel"/> object.</returns> public static SentenceModel Train(string languageCode, IObjectStream <SentenceSample> samples, SentenceDetectorFactory factory, TrainingParameters parameters, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); // TODO: Fix the EventStream to throw exceptions when training goes wrong var eventStream = new SentenceEventStream( samples, factory.GetContextGenerator(), factory.GetEndOfSentenceScanner()); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return(new SentenceModel(languageCode, model, manifestInfoEntries, factory)); }
/// <summary> /// Initializes a new instance of the <see cref="BioNLP2004NameSampleStream"/> class. /// </summary> /// <param name="inputStream">The input stream.</param> /// <param name="types">The types.</param> /// <exception cref="System.ArgumentNullException">inputStream</exception> /// <exception cref="System.ArgumentException">The input stream was not readable.</exception> public BioNLP2004NameSampleStream(Stream inputStream, int types) { if (inputStream == null) { throw new ArgumentNullException("inputStream"); } if (!inputStream.CanRead) { throw new ArgumentException(@"The input stream was not readable.", "inputStream"); } lineStream = new PlainTextByLineStream(inputStream, Encoding.UTF8); this.types = types; }
/// <summary> /// Evaluates the specified chunk samples. /// </summary> /// <param name="samples">The chunk samples to be evaluated.</param> /// <param name="partitions">The partitions (folds).</param> public void Evaluate(IObjectStream<ChunkSample> samples, int partitions) { var partitioner = new CrossValidationPartitioner<ChunkSample>(samples, partitions); while (partitioner.HasNext) { var trainingSampleStream = partitioner.Next(); var model = ChunkerME.Train(languageCode, trainingSampleStream, parameters, chunkerFactory); var evaluator = new ChunkerEvaluator(new ChunkerME(model), listeners); evaluator.Evaluate(trainingSampleStream.GetTestSampleStream()); FMeasure.MergeInto(evaluator.FMeasure); } }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train( string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, TrainingParameters parameters, Monitor monitor) { var manifestInfoEntries = new Dictionary<string, string>(); #if DEBUG System.Diagnostics.Debug.Print("Building dictionary"); #endif var dictionary = BuildDictionary(samples, rules, parameters); samples.Reset(); // tag var posModel = POSTaggerME.Train( languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory(), monitor); samples.Reset(); // chunk var chunkModel = ChunkerME.Train( languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ParserChunkerFactory(), monitor); samples.Reset(); // build #if DEBUG System.Diagnostics.Debug.Print("Training builder"); #endif var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary); var buildReportMap = new Dictionary<string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // check #if DEBUG System.Diagnostics.Debug.Print("Training checker"); #endif var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary<string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); samples.Reset(); // attach #if DEBUG System.Diagnostics.Debug.Print("Training attacher"); #endif var attachEvents = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach); var attachReportMap = new Dictionary<string, string>(); var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor); var attachModel = attachTrainer.Train(attachEvents); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach"); return new ParserModel( languageCode, buildModel, checkModel, attachModel, posModel, chunkModel, rules, ParserType.TreeInsert, manifestInfoEntries); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train( string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { return Train(languageCode, samples, rules, parameters, null); }
/// <summary> /// Trains a Part of Speech model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="POSModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static POSModel Train(string languageCode, IObjectStream<POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory, Monitor monitor) { //int beamSize = trainParams.Get(Parameters.BeamSize, NameFinderME.DefaultBeamSize); var contextGenerator = factory.GetPOSContextGenerator(); var manifestInfoEntries = new Dictionary<string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); switch (trainerType) { case TrainerType.EventModelTrainer: var es = new POSSampleEventStream(samples, contextGenerator); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var eventModel = trainer.Train(es); return new POSModel(languageCode, eventModel, manifestInfoEntries, factory); case TrainerType.EventModelSequenceTrainer: var ss = new POSSampleSequenceStream(samples, contextGenerator); var trainer2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); var seqModel = trainer2.Train(ss); return new POSModel(languageCode, seqModel, manifestInfoEntries, factory); case TrainerType.SequenceTrainer: var trainer3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss2 = new POSSampleSequenceStream(samples, contextGenerator); var seqPosModel = trainer3.Train(ss2); return new POSModel(languageCode, seqPosModel, manifestInfoEntries, factory); default: throw new NotSupportedException("Trainer type is not supported."); } }
/// <summary> /// Initializes a new instance of the <see cref="NameSampleSequenceStream"/> class. /// </summary> /// <param name="psi">The sample stream.</param> /// <param name="featureGen">The feature generator.</param> public NameSampleSequenceStream(IObjectStream<NameSample> psi, IAdaptiveFeatureGenerator featureGen) : this(psi, new DefaultNameContextGenerator(featureGen), true) {}
/// <summary> /// Two argument constructor for DataIndexer. /// </summary> /// <param name="eventStream">An event stream which contains the a list of all the Events seen in the training data.</param> /// <param name="cutoff">The minimum number of times a predicate must have been observed in order to be included in the model.</param> public TwoPassDataIndexer(IObjectStream<Event> eventStream, int cutoff) : this(eventStream, cutoff, true, null) { }
/// <summary> /// Initializes a new instance of the <see cref="NameSampleSequenceStream"/> class. /// </summary> /// <param name="psi">The sample stream.</param> /// <param name="pcg">The context generator.</param> public NameSampleSequenceStream(IObjectStream<NameSample> psi, INameContextGenerator pcg) : this(psi, pcg, true) {}
/// <summary> /// Trains a model for the <see cref="TokenizerME"/>. /// </summary> /// <param name="samples">The samples used for the training.</param> /// <param name="factory">A <see cref="TokenizerFactory"/> to get resources from.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <returns>The trained <see cref="TokenizerModel"/>.</returns> public static TokenizerModel Train(IObjectStream<TokenSample> samples, TokenizerFactory factory, TrainingParameters parameters) { return Train(samples, factory, parameters, null); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train( string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory) { return Train(languageCode, samples, parameters, factory, null); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( Monitor monitor, string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { var dict = BuildDictionary(samples, rules, parameters); samples.Reset(); var manifestInfoEntries = new Dictionary<string, string>(); // build //System.err.println("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict); var buildReportMap = new Dictionary<string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // tag var posTaggerParams = parameters.GetNamespace("tagger"); if (!posTaggerParams.Contains(Parameters.BeamSize)) posTaggerParams.Set(Parameters.BeamSize, "10"); var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory()); samples.Reset(); // chunk var chunkModel = ChunkerME.Train(languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ParserChunkerFactory()); samples.Reset(); // check //System.err.println("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary<string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); return new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries); }
/// <summary> /// Trains a model for the <see cref="TokenizerME"/>. /// </summary> /// <param name="samples">The samples used for the training.</param> /// <param name="factory">A <see cref="TokenizerFactory"/> to get resources from.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="TokenizerModel"/>.</returns> public static TokenizerModel Train(IObjectStream<TokenSample> samples, TokenizerFactory factory, TrainingParameters parameters, Monitor monitor) { var manifestInfoEntries = new Dictionary<string, string>(); var eventStream = new TokSpanEventStream(samples, factory.UseAlphaNumericOptimization, factory.AlphaNumericPattern, factory.ContextGenerator); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return new TokenizerModel(model, manifestInfoEntries, factory); }
/// <summary> /// Trains a name finder model. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train(string languageCode, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) { return Train(languageCode, DefaultType, samples, parameters, factory, monitor); }
/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train( string languageCode, string type, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory) { return Train(languageCode, type, samples, parameters, factory, null); }
/// <summary> /// Initializes a new instance of the <see cref="NameSampleSequenceStream"/> class using the <seealso cref="BioCodec"/> as sequence codec. /// </summary> /// <param name="psi">The sample stream.</param> /// <param name="pcg">The context generator.</param> /// <param name="useOutcomes">if set to <c>true</c> will be used in the samples.</param> public NameSampleSequenceStream(IObjectStream<NameSample> psi, INameContextGenerator pcg, bool useOutcomes) : this(psi, pcg, useOutcomes, new BioCodec()) {}
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="iterations">The number of training iterations.</param> /// <param name="cutoff">The min number of times a feature must be seen.</param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train(string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, int iterations, int cutoff) { return Train(languageCode, samples, rules, iterations, cutoff, null); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="iterations">The number of training iterations.</param> /// <param name="cutoff">The min number of times a feature must be seen.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train(string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, int iterations, int cutoff, Monitor monitor) { var param = new TrainingParameters(); param.Set("dict", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("tagger", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("tagger", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("chunker", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("chunker", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("check", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("check", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("build", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("build", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); return Train(languageCode, samples, rules, param, monitor); }
private List<ComparableEvent> Index( IObjectStream<Event> indexEventStream, Dictionary<string, int> predicateIndex) { var map = new Dictionary<string, int>(); var indexedContext = new List<int>(); var eventsToCompare = new List<ComparableEvent>(); int outcomeCount = 0; Event ev; while ((ev = indexEventStream.Read()) != null) { int ocID; if (Monitor != null && Monitor.Token.CanBeCanceled) Monitor.Token.ThrowIfCancellationRequested(); if (map.ContainsKey(ev.Outcome)) { ocID = map[ev.Outcome]; } else { ocID = outcomeCount++; map[ev.Outcome] = ocID; } // ReSharper disable once LoopCanBeConvertedToQuery foreach (var pred in ev.Context) { if (predicateIndex.ContainsKey(pred)) { indexedContext.Add(predicateIndex[pred]); } } // drop events with no active features if (indexedContext.Count > 0) { var cons = new int[indexedContext.Count]; for (int ci = 0; ci < cons.Length; ci++) { cons[ci] = indexedContext[ci]; } eventsToCompare.Add(new ComparableEvent(ocID, cons)); } else { if (Monitor != null) Monitor.OnWarning(string.Format("Dropped event {0}:{1}", ev.Outcome, ev.Context.ToDisplay())); } indexedContext.Clear(); } outcomeLabels = ToIndexedStringArray(map); predLabels = ToIndexedStringArray(predicateIndex); return eventsToCompare; }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train(string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary<string, string>(); var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory); }
/// <summary> /// Initializes a new instance of the <see cref="NameSampleSequenceStream"/> class. /// </summary> /// <param name="psi">The sample stream.</param> /// <param name="featureGen">The feature generator.</param> /// <param name="useOutcomes">if set to <c>true</c> will be used in the samples.</param> public NameSampleSequenceStream(IObjectStream<NameSample> psi, IAdaptiveFeatureGenerator featureGen, bool useOutcomes) : this(psi, new DefaultNameContextGenerator(featureGen), useOutcomes) {}
/// <summary> /// Builds the NGram dictionary with the given samples. /// </summary> /// <param name="samples">The samples.</param> /// <param name="cutoff">The cutoff.</param> /// <returns>The NGram dictionary.</returns> public static Dict BuildNGramDictionary(IObjectStream<POSSample> samples, int cutoff) { var model = new NGramModel(); POSSample sample; while ((sample = samples.Read()) != null) { if (sample.Sentence.Length > 0) { model.Add(new StringList(sample.Sentence), 1, 1); } } model.CutOff(cutoff, int.MaxValue); return model.ToDictionary(); }
public static void PopulatePOSDictionary(IObjectStream<POSSample> samples, IMutableTagDictionary dictionary, bool caseSensitive, int cutoff) { var newEntries = new Dictionary<string, Dictionary<string, int>>(); POSSample sample; while ((sample = samples.Read()) != null) { for (int i = 0; i < sample.Sentence.Length; i++) { if (!StringPattern.Recognize(sample.Sentence[i]).ContainsDigit) { string word = caseSensitive ? sample.Sentence[i] : sample.Sentence[i].ToLowerInvariant(); if (!newEntries.ContainsKey(word)) { newEntries.Add(word, new Dictionary<string, int>()); } var dicTags = dictionary.GetTags(word); if (dicTags != null) { foreach (var tag in dicTags) { if (!newEntries[word].ContainsKey(tag)) { newEntries[word].Add(tag, cutoff); } } } if (!newEntries[word].ContainsKey(sample.Tags[i])) { newEntries[word].Add(sample.Tags[i], 1); } else { newEntries[word][sample.Tags[i]]++; } } } } foreach (var wordEntry in newEntries) { var tagsForWord = (from entry in wordEntry.Value where entry.Value >= cutoff select entry.Key).ToList(); if (tagsForWord.Count > 0) dictionary.Put(wordEntry.Key, tagsForWord.ToArray()); } }
/// <summary> /// One argument constructor for DataIndexer which calls the two argument constructor assuming no cutoff. /// </summary> /// <param name="eventStream">An event stream which contains the a list of all the Events seen in the training data.</param> public TwoPassDataIndexer(IObjectStream<Event> eventStream) : this(eventStream, 0) { }
/// <summary> /// Trains a Part of Speech model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <returns>The trained <see cref="POSModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static POSModel Train(string languageCode, IObjectStream<POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory) { return Train(languageCode, samples, parameters, factory, null); }
/// <summary> /// Two argument constructor for DataIndexer. /// </summary> /// <param name="eventStream">An event stream which contains the a list of all the Events seen in the training data.</param> /// <param name="cutoff">The minimum number of times a predicate must have been observed in order to be included in the model.</param> /// <param name="sort">if set to <c>true</c> the events will be sorted.</param> public TwoPassDataIndexer(IObjectStream<Event> eventStream, int cutoff, bool sort) : this(eventStream, cutoff, sort, null) { }
/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) { var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var manifestInfoEntries = new Dictionary<string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel meModel = null; ML.Model.ISequenceClassificationModel<string> seqModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(), factory.CreateSequenceCodec()); var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); meModel = nfTrainer.Train(eventStream); break; case TrainerType.EventModelSequenceTrainer: var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); meModel = nsTrainer.Train(sampleStream); break; case TrainerType.SequenceTrainer: var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); seqModel = sqTrainer.Train(sequenceStream); break; default: throw new InvalidOperationException("Unexpected trainer type!"); } if (seqModel != null) { return new TokenNameFinderModel( languageCode, seqModel, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec, factory); } return new TokenNameFinderModel( languageCode, meModel, beamSize, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec, factory); }