Example #1
0
 public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger,
                    ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType type,
                    Dictionary <string, string> manifestInfoEntries)
     : this(
         languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, type,
         manifestInfoEntries)
 {
 }
Example #2
0
 /// <summary>
 /// Initializes a new instance of the <see cref="ParserCrossEvaluator"/> class.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="parserType">The parser model type.</param>
 /// <param name="monitors">The training monitors.</param>
 /// <param name="headRules">The headrules.</param>
 public ParserCrossEvaluator(string languageCode, TrainingParameters parameters, AbstractHeadRules headRules, ParserType parserType, params IEvaluationMonitor <Parse>[] monitors)
 {
     this.languageCode = languageCode;
     this.parameters   = parameters;
     this.headRules    = headRules;
     this.parserType   = parserType;
     this.monitors     = monitors;
 }
Example #3
0
        public Parse AdJoinRoot(Parse node, AbstractHeadRules rules, int parseIndex)
        {
            var lastChild = parts[parseIndex];
            var adjNode   = new Parse(Text, new Span(lastChild.Span.Start, node.Span.End), lastChild.Type, 1, rules.GetHead(new[] { lastChild, node }, lastChild.Type));

            adjNode.parts.Add(lastChild);
            if (node.PreviousPunctuationSet != null)
            {
                adjNode.parts.AddRange(node.PreviousPunctuationSet);
            }
            adjNode.parts.Add(node);
            parts[parseIndex] = adjNode;
            return(adjNode);
        }
Example #4
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
                                  double advancePercentage)
 {
     this.tagger       = tagger;
     this.chunker      = chunker;
     M                 = beamSize;
     K                 = beamSize;
     Q                 = advancePercentage;
     ReportFailedParse = true;
     this.headRules    = headRules;
     punctSet          = headRules.PunctuationTags;
     odh               = new ListHeap <Parse>(K);
     ndh               = new ListHeap <Parse>(K);
     completeParses    = new ListHeap <Parse>(K);
 }
Example #5
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries)
        {
            switch (modelType)
            {
            case ParserType.Chunking:
                if (attachModel != null)
                {
                    throw new ArgumentException(@"attachModel must be null for chunking parser!", nameof(attachModel));
                }

                Manifest[ParserTypeParameter] = "CHUNKING";
                break;

            case ParserType.TreeInsert:
                if (attachModel == null)
                {
                    throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                                                nameof(attachModel));
                }

                Manifest[ParserTypeParameter] = "TREEINSERT";

                artifactMap[EntryAttachModel] = attachModel;

                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(modelType), "Unknown model type");
            }

            artifactMap[EntryBuildModel]         = buildModel;
            artifactMap[EntryCheckModel]         = checkModel;
            artifactMap[EntryParserTaggerModel]  = parserTagger;
            artifactMap[EntryChunkerTaggerModel] = chunkerTagger;
            artifactMap[EntryHeadRules]          = headRules;

            CheckArtifactMap();
        }
Example #6
0
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries)
        {
            switch (modelType)
            {
            case ParserType.Chunking:
                if (attachModel != null)
                {
                    throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel");
                }

                Manifest[PARSER_TYPE] = "CHUNKING";
                break;

            case ParserType.TreeInsert:
                if (attachModel == null)
                {
                    throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                                                "attachModel");
                }

                Manifest[PARSER_TYPE] = "TREEINSERT";

                artifactMap[ATTACH_MODEL_ENTRY_NAME] = attachModel;

                break;

            default:
                throw new ArgumentException(@"Unknown mode type.", "modelType");
            }

            artifactMap[BUILD_MODEL_ENTRY_NAME]          = buildModel;
            artifactMap[CHECK_MODEL_ENTRY_NAME]          = checkModel;
            artifactMap[PARSER_TAGGER_MODEL_ENTRY_NAME]  = parserTagger;
            artifactMap[CHUNKER_TAGGER_MODEL_ENTRY_NAME] = chunkerTagger;
            artifactMap[HEAD_RULES_MODEL_ENTRY_NAME]     = headRules;

            CheckArtifactMap();
        }
Example #7
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType) : this(

                languageCode,
                buildModel,
                checkModel,
                attachModel,
                parserTagger,
                chunkerTagger,
                headRules,
                modelType,
                null)
        {
        }
Example #8
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary<string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) {

            switch (modelType) {
                case ParserType.Chunking:
                    if (attachModel != null)
                        throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel");

                    Manifest[ParserTypeParameter] = "CHUNKING";
                    break;
                case ParserType.TreeInsert:
                    if (attachModel == null)
                        throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                            "attachModel");

                    Manifest[ParserTypeParameter] = "TREEINSERT";

                    artifactMap[EntryAttachModel] = attachModel;

                    break;
                default:
                    throw new ArgumentOutOfRangeException("modelType", "Unknown model type");
            }

            artifactMap[EntryBuildModel] = buildModel;
            artifactMap[EntryCheckModel] = checkModel;
            artifactMap[EntryParserTaggerModel] = parserTagger;
            artifactMap[EntryChunkerTaggerModel] = chunkerTagger;
            artifactMap[EntryHeadRules] = headRules;

            CheckArtifactMap();
        }
Example #9
0
 protected bool Equals(AbstractHeadRules other)
 {
     return(HeadRules.Keys.SequenceEqual(other.HeadRules.Keys) &&
            PunctuationTags.SequenceEqual(other.PunctuationTags));
 }
Example #10
0
        /// <summary>
        /// Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off.
        /// </summary>
        /// <param name="data">The data stream of parses.</param>
        /// <param name="rules">The head rules for the parses.</param>
        /// <param name="parameters">Can contain a cutoff, the minimum number of entries required for the n-gram to be saved as part of the dictionary.</param>
        /// <returns>A dictionary object.</returns>
        public static Dic BuildDictionary(IObjectStream <Parse> data, AbstractHeadRules rules, TrainingParameters parameters)
        {
            var cutoff = parameters.Get("dict", Parameters.Cutoff, 5);
            var dict   = new NGramModel();

            Parse p;

            while ((p = data.Read()) != null)
            {
                p.UpdateHeads(rules);
                var pWords = p.GetTagNodes();
                var words  = new string[pWords.Length];
                //add all uni-grams
                for (var wi = 0; wi < words.Length; wi++)
                {
                    words[wi] = pWords[wi].CoveredText;
                }

                dict.Add(new StringList(words), 1, 1);
                //add tri-grams and bi-grams for initial sequence
                var chunks = CollapsePunctuation(AbstractParserEventStream.GetInitialChunks(p), rules.PunctuationTags);
                var cWords = new string[chunks.Length];
                for (var wi = 0; wi < cWords.Length; wi++)
                {
                    cWords[wi] = chunks[wi].Head.CoveredText;
                }
                dict.Add(new StringList(cWords), 2, 3);

                //emulate reductions to produce additional n-grams
                var ci = 0;
                while (ci < chunks.Length)
                {
                    /*
                     * if (chunks[ci].Parent == null) {
                     *  chunks[ci].Show();
                     * } */
                    if (LastChild(chunks[ci], chunks[ci].Parent, rules.PunctuationTags))
                    {
                        //perform reduce
                        var reduceStart = ci;
                        while (reduceStart >= 0 && Equals(chunks[reduceStart].Parent, chunks[ci].Parent))
                        {
                            reduceStart--;
                        }
                        reduceStart++;
                        chunks = ParserEventStream.ReduceChunks(chunks, ref ci, chunks[ci].Parent);
                        ci     = reduceStart;
                        if (chunks.Length != 0)
                        {
                            var window = new string[5];
                            var wi     = 0;
                            if (ci - 2 >= 0)
                            {
                                window[wi++] = chunks[ci - 2].Head.CoveredText;
                            }
                            if (ci - 1 >= 0)
                            {
                                window[wi++] = chunks[ci - 1].Head.CoveredText;
                            }
                            window[wi++] = chunks[ci].Head.CoveredText;
                            if (ci + 1 < chunks.Length)
                            {
                                window[wi++] = chunks[ci + 1].Head.CoveredText;
                            }
                            if (ci + 2 < chunks.Length)
                            {
                                window[wi++] = chunks[ci + 2].Head.CoveredText;
                            }
                            if (wi < 5)
                            {
                                var subWindow = new string[wi];
                                for (var swi = 0; swi < wi; swi++)
                                {
                                    subWindow[swi] = window[swi];
                                }
                                window = subWindow;
                            }
                            if (window.Length >= 3)
                            {
                                dict.Add(new StringList(window), 2, 3);
                            }
                            else if (window.Length == 2)
                            {
                                dict.Add(new StringList(window), 2, 2);
                            }
                        }
                        ci = reduceStart - 1; //ci will be incremented at end of loop
                    }
                    ci++;
                }
            }
            dict.CutOff(cutoff, int.MaxValue);
            return(dict.ToDictionary(true));
        }
Example #11
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            Dictionary<string, string> manifestInfoEntries) : this(

                languageCode,
                buildModel,
                checkModel,
                null,
                parserTagger,
                chunkerTagger,
                headRules,
                ParserType.Chunking,
                manifestInfoEntries) {

        }
Example #12
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType) : this(

                languageCode,
                buildModel,
                checkModel,
                attachModel,
                parserTagger,
                chunkerTagger,
                headRules,
                modelType,
                null) {

        }
        /// <summary>
        /// Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off.
        /// </summary>
        /// <param name="data">The data stream of parses.</param>
        /// <param name="rules">The head rules for the parses.</param>
        /// <param name="parameters">Can contain a cutoff, the minimum number of entries required for the n-gram to be saved as part of the dictionary.</param>
        /// <returns>A dictionary object.</returns>
        public static Dic BuildDictionary(IObjectStream<Parse> data, AbstractHeadRules rules, TrainingParameters parameters) {
            var cutoff = parameters.Get("dict", Parameters.Cutoff, 5);
            var dict = new NGramModel();

            Parse p;
            while ((p = data.Read()) != null) {
                p.UpdateHeads(rules);
                var pWords = p.GetTagNodes();
                var words = new string[pWords.Length];
                //add all uni-grams
                for (var wi = 0; wi < words.Length; wi++) {
                    words[wi] = pWords[wi].CoveredText;
                }

                dict.Add(new StringList(words), 1, 1);
                //add tri-grams and bi-grams for initial sequence
                var chunks = CollapsePunctuation(AbstractParserEventStream.GetInitialChunks(p), rules.PunctuationTags);
                var cWords = new string[chunks.Length];
                for (var wi = 0; wi < cWords.Length; wi++) {
                    cWords[wi] = chunks[wi].Head.CoveredText;
                }
                dict.Add(new StringList(cWords), 2, 3);

                //emulate reductions to produce additional n-grams
                var ci = 0;
                while (ci < chunks.Length) {
                    /*
                    if (chunks[ci].Parent == null) {
                        chunks[ci].Show();
                    } */
                    if (LastChild(chunks[ci], chunks[ci].Parent, rules.PunctuationTags)) {
                        //perform reduce
                        var reduceStart = ci;
                        while (reduceStart >= 0 && Equals(chunks[reduceStart].Parent, chunks[ci].Parent)) {
                            reduceStart--;
                        }
                        reduceStart++;
                        chunks = ParserEventStream.ReduceChunks(chunks, ref ci, chunks[ci].Parent);
                        ci = reduceStart;
                        if (chunks.Length != 0) {
                            var window = new string[5];
                            var wi = 0;
                            if (ci - 2 >= 0) window[wi++] = chunks[ci - 2].Head.CoveredText;
                            if (ci - 1 >= 0) window[wi++] = chunks[ci - 1].Head.CoveredText;
                            window[wi++] = chunks[ci].Head.CoveredText;
                            if (ci + 1 < chunks.Length) window[wi++] = chunks[ci + 1].Head.CoveredText;
                            if (ci + 2 < chunks.Length) window[wi++] = chunks[ci + 2].Head.CoveredText;
                            if (wi < 5) {
                                var subWindow = new string[wi];
                                for (var swi = 0; swi < wi; swi++) {
                                    subWindow[swi] = window[swi];
                                }
                                window = subWindow;
                            }
                            if (window.Length >= 3) {
                                dict.Add(new StringList(window), 2, 3);
                            } else if (window.Length == 2) {
                                dict.Add(new StringList(window), 2, 2);
                            }
                        }
                        ci = reduceStart - 1; //ci will be incremented at end of loop
                    }
                    ci++;
                }
            }
            dict.CutOff(cutoff, int.MaxValue);
            return dict.ToDictionary(true);
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
     double advancePercentage) {
     this.tagger = tagger;
     this.chunker = chunker;
     M = beamSize;
     K = beamSize;
     Q = advancePercentage;
     ReportFailedParse = true;
     this.headRules = headRules;
     punctSet = headRules.PunctuationTags;
     odh = new ListHeap<Parse>(K);
     ndh = new ListHeap<Parse>(K);
     completeParses = new ListHeap<Parse>(K);
 }
Example #15
0
 protected bool Equals(AbstractHeadRules other) {
     return HeadRules.Keys.SequenceEqual(other.HeadRules.Keys) &&
            PunctuationTags.SequenceEqual(other.PunctuationTags);
 }