public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType type, Dictionary <string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, type, manifestInfoEntries) { }
/// <summary> /// Initializes a new instance of the <see cref="ParserCrossEvaluator"/> class. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="parserType">The parser model type.</param> /// <param name="monitors">The training monitors.</param> /// <param name="headRules">The headrules.</param> public ParserCrossEvaluator(string languageCode, TrainingParameters parameters, AbstractHeadRules headRules, ParserType parserType, params IEvaluationMonitor <Parse>[] monitors) { this.languageCode = languageCode; this.parameters = parameters; this.headRules = headRules; this.parserType = parserType; this.monitors = monitors; }
public Parse AdJoinRoot(Parse node, AbstractHeadRules rules, int parseIndex) { var lastChild = parts[parseIndex]; var adjNode = new Parse(Text, new Span(lastChild.Span.Start, node.Span.End), lastChild.Type, 1, rules.GetHead(new[] { lastChild, node }, lastChild.Type)); adjNode.parts.Add(lastChild); if (node.PreviousPunctuationSet != null) { adjNode.parts.AddRange(node.PreviousPunctuationSet); } adjNode.parts.Add(node); parts[parseIndex] = adjNode; return(adjNode); }
/// <summary> /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>. /// </summary> /// <param name="tagger">The pos-tagger that the parser uses.</param> /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param> /// <param name="headRules">The head rules for the parser.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="advancePercentage">The advance percentage.</param> protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) { this.tagger = tagger; this.chunker = chunker; M = beamSize; K = beamSize; Q = advancePercentage; ReportFailedParse = true; this.headRules = headRules; punctSet = headRules.PunctuationTags; odh = new ListHeap <Parse>(K); ndh = new ListHeap <Parse>(K); completeParses = new ListHeap <Parse>(K); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException(nameof(modelType), "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); } Manifest[PARSER_TYPE] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); } Manifest[PARSER_TYPE] = "TREEINSERT"; artifactMap[ATTACH_MODEL_ENTRY_NAME] = attachModel; break; default: throw new ArgumentException(@"Unknown mode type.", "modelType"); } artifactMap[BUILD_MODEL_ENTRY_NAME] = buildModel; artifactMap[CHECK_MODEL_ENTRY_NAME] = checkModel; artifactMap[PARSER_TAGGER_MODEL_ENTRY_NAME] = parserTagger; artifactMap[CHUNKER_TAGGER_MODEL_ENTRY_NAME] = chunkerTagger; artifactMap[HEAD_RULES_MODEL_ENTRY_NAME] = headRules; CheckArtifactMap(); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary<string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException("modelType", "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
protected bool Equals(AbstractHeadRules other) { return(HeadRules.Keys.SequenceEqual(other.HeadRules.Keys) && PunctuationTags.SequenceEqual(other.PunctuationTags)); }
/// <summary> /// Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off. /// </summary> /// <param name="data">The data stream of parses.</param> /// <param name="rules">The head rules for the parses.</param> /// <param name="parameters">Can contain a cutoff, the minimum number of entries required for the n-gram to be saved as part of the dictionary.</param> /// <returns>A dictionary object.</returns> public static Dic BuildDictionary(IObjectStream <Parse> data, AbstractHeadRules rules, TrainingParameters parameters) { var cutoff = parameters.Get("dict", Parameters.Cutoff, 5); var dict = new NGramModel(); Parse p; while ((p = data.Read()) != null) { p.UpdateHeads(rules); var pWords = p.GetTagNodes(); var words = new string[pWords.Length]; //add all uni-grams for (var wi = 0; wi < words.Length; wi++) { words[wi] = pWords[wi].CoveredText; } dict.Add(new StringList(words), 1, 1); //add tri-grams and bi-grams for initial sequence var chunks = CollapsePunctuation(AbstractParserEventStream.GetInitialChunks(p), rules.PunctuationTags); var cWords = new string[chunks.Length]; for (var wi = 0; wi < cWords.Length; wi++) { cWords[wi] = chunks[wi].Head.CoveredText; } dict.Add(new StringList(cWords), 2, 3); //emulate reductions to produce additional n-grams var ci = 0; while (ci < chunks.Length) { /* * if (chunks[ci].Parent == null) { * chunks[ci].Show(); * } */ if (LastChild(chunks[ci], chunks[ci].Parent, rules.PunctuationTags)) { //perform reduce var reduceStart = ci; while (reduceStart >= 0 && Equals(chunks[reduceStart].Parent, chunks[ci].Parent)) { reduceStart--; } reduceStart++; chunks = ParserEventStream.ReduceChunks(chunks, ref ci, chunks[ci].Parent); ci = reduceStart; if (chunks.Length != 0) { var window = new string[5]; var wi = 0; if (ci - 2 >= 0) { window[wi++] = chunks[ci - 2].Head.CoveredText; } if (ci - 1 >= 0) { window[wi++] = chunks[ci - 1].Head.CoveredText; } window[wi++] = chunks[ci].Head.CoveredText; if (ci + 1 < chunks.Length) { window[wi++] = chunks[ci + 1].Head.CoveredText; } if (ci + 2 < chunks.Length) { window[wi++] = chunks[ci + 2].Head.CoveredText; } if (wi < 5) { var subWindow = new string[wi]; for (var swi = 0; swi < wi; swi++) { subWindow[swi] = window[swi]; } window = subWindow; } if (window.Length >= 3) { dict.Add(new StringList(window), 2, 3); } else if (window.Length == 2) { dict.Add(new StringList(window), 2, 2); } } ci = reduceStart - 1; //ci will be incremented at end of loop } ci++; } } dict.CutOff(cutoff, int.MaxValue); return(dict.ToDictionary(true)); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, Dictionary<string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, ParserType.Chunking, manifestInfoEntries) { }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }
/// <summary> /// Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off. /// </summary> /// <param name="data">The data stream of parses.</param> /// <param name="rules">The head rules for the parses.</param> /// <param name="parameters">Can contain a cutoff, the minimum number of entries required for the n-gram to be saved as part of the dictionary.</param> /// <returns>A dictionary object.</returns> public static Dic BuildDictionary(IObjectStream<Parse> data, AbstractHeadRules rules, TrainingParameters parameters) { var cutoff = parameters.Get("dict", Parameters.Cutoff, 5); var dict = new NGramModel(); Parse p; while ((p = data.Read()) != null) { p.UpdateHeads(rules); var pWords = p.GetTagNodes(); var words = new string[pWords.Length]; //add all uni-grams for (var wi = 0; wi < words.Length; wi++) { words[wi] = pWords[wi].CoveredText; } dict.Add(new StringList(words), 1, 1); //add tri-grams and bi-grams for initial sequence var chunks = CollapsePunctuation(AbstractParserEventStream.GetInitialChunks(p), rules.PunctuationTags); var cWords = new string[chunks.Length]; for (var wi = 0; wi < cWords.Length; wi++) { cWords[wi] = chunks[wi].Head.CoveredText; } dict.Add(new StringList(cWords), 2, 3); //emulate reductions to produce additional n-grams var ci = 0; while (ci < chunks.Length) { /* if (chunks[ci].Parent == null) { chunks[ci].Show(); } */ if (LastChild(chunks[ci], chunks[ci].Parent, rules.PunctuationTags)) { //perform reduce var reduceStart = ci; while (reduceStart >= 0 && Equals(chunks[reduceStart].Parent, chunks[ci].Parent)) { reduceStart--; } reduceStart++; chunks = ParserEventStream.ReduceChunks(chunks, ref ci, chunks[ci].Parent); ci = reduceStart; if (chunks.Length != 0) { var window = new string[5]; var wi = 0; if (ci - 2 >= 0) window[wi++] = chunks[ci - 2].Head.CoveredText; if (ci - 1 >= 0) window[wi++] = chunks[ci - 1].Head.CoveredText; window[wi++] = chunks[ci].Head.CoveredText; if (ci + 1 < chunks.Length) window[wi++] = chunks[ci + 1].Head.CoveredText; if (ci + 2 < chunks.Length) window[wi++] = chunks[ci + 2].Head.CoveredText; if (wi < 5) { var subWindow = new string[wi]; for (var swi = 0; swi < wi; swi++) { subWindow[swi] = window[swi]; } window = subWindow; } if (window.Length >= 3) { dict.Add(new StringList(window), 2, 3); } else if (window.Length == 2) { dict.Add(new StringList(window), 2, 2); } } ci = reduceStart - 1; //ci will be incremented at end of loop } ci++; } } dict.CutOff(cutoff, int.MaxValue); return dict.ToDictionary(true); }
/// <summary> /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>. /// </summary> /// <param name="tagger">The pos-tagger that the parser uses.</param> /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param> /// <param name="headRules">The head rules for the parser.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="advancePercentage">The advance percentage.</param> protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) { this.tagger = tagger; this.chunker = chunker; M = beamSize; K = beamSize; Q = advancePercentage; ReportFailedParse = true; this.headRules = headRules; punctSet = headRules.PunctuationTags; odh = new ListHeap<Parse>(K); ndh = new ListHeap<Parse>(K); completeParses = new ListHeap<Parse>(K); }
protected bool Equals(AbstractHeadRules other) { return HeadRules.Keys.SequenceEqual(other.HeadRules.Keys) && PunctuationTags.SequenceEqual(other.PunctuationTags); }