public Parse[] DoParse(string line, int requestedParses)
        {
            System.Text.StringBuilder lineBuilder = new System.Text.StringBuilder();
            string[]  rawTokens = mTokenizer.Tokenize(line);
            ArrayList tokens    = new ArrayList();

            foreach (string rawToken in rawTokens)
            {
                string convertedToken = ConvertToken(rawToken);
                tokens.Add(convertedToken);
                lineBuilder.Append(convertedToken).Append(" ");
            }
            if (lineBuilder.Length != 0)
            {
                string text         = lineBuilder.ToString(0, lineBuilder.Length - 1).ToString();
                Parse  currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null);
                int    start        = 0;

                foreach (string token in tokens)
                {
                    currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0));
                    start += token.Length + 1;
                }

                Parse[] parses = mParser.FullParse(currentParse, requestedParses);
                return(parses);
            }
            else
            {
                return(null);
            }
        }
Exemple #2
0
        private Parse[] DoParse(IEnumerable <string> tokens, int requestedParses)
        {
            var lineBuilder     = new System.Text.StringBuilder();
            var convertedTokens = new List <string>();

            foreach (string rawToken in tokens)
            {
                string convertedToken = ConvertToken(rawToken);
                convertedTokens.Add(convertedToken);
                lineBuilder.Append(convertedToken).Append(" ");
            }
            if (lineBuilder.Length != 0)
            {
                string text         = lineBuilder.ToString(0, lineBuilder.Length - 1);
                var    currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null);
                int    start        = 0;

                foreach (string token in convertedTokens)
                {
                    currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0));
                    start += token.Length + 1;
                }

                Parse[] parses = _parser.FullParse(currentParse, requestedParses);
                return(parses);
            }
            else
            {
                return(null);
            }
        }
Exemple #3
0
        /// <summary>
        /// Generates a Parse structure from the specified tree-bank style parse string.
        /// </summary>
        /// <param name="parse">
        /// A tree-bank style parse string.
        /// </param>
        /// <returns>
        /// a Parse structure for the specified tree-bank style parse string.
        /// </returns>
        public static Parse FromParseString(string parse)
        {
            var textBuffer = new StringBuilder();
            int offset     = 0;

            var parseStack = new Stack <Tuple <string, int> >();

            var consitutents = new List <Tuple <string, Util.Span> >();

            for (int currentChar = 0, charCount = parse.Length; currentChar < charCount; currentChar++)
            {
                char c = parse[currentChar];
                if (c == '(')
                {
                    string rest = parse.Substring(currentChar + 1);
                    string type = GetType(rest);
                    if (type == null)
                    {
                        throw new ParseException("null type for: " + rest);
                    }
                    string token = GetToken(rest);
                    parseStack.Push(new Tuple <string, int>(type, offset));
                    if ((object)token != null && type != "-NONE-")
                    {
                        consitutents.Add(new Tuple <string, Util.Span>(MaximumEntropyParser.TokenNode, new Util.Span(offset, offset + token.Length)));
                        textBuffer.Append(token).Append(" ");
                        offset += token.Length + 1;
                    }
                }
                else if (c == ')')
                {
                    Tuple <string, int> parts = parseStack.Pop();
                    string type = parts.Item1;
                    if (type != "-NONE-")
                    {
                        int start = parts.Item2;
                        consitutents.Add(new Tuple <string, Util.Span>(parts.Item1, new Util.Span(start, offset - 1)));
                    }
                }
            }
            string text      = textBuffer.ToString();
            var    rootParse = new Parse(text, new Util.Span(0, text.Length), MaximumEntropyParser.TopNode, 1);

            for (int currentConstituent = 0, constituentCount = consitutents.Count; currentConstituent < constituentCount; currentConstituent++)
            {
                Tuple <string, Util.Span> parts = consitutents[currentConstituent];
                string type = parts.Item1;
                if (type != MaximumEntropyParser.TopNode)
                {
                    var newConstituent = new Parse(text, parts.Item2, type, 1);
                    rootParse.Insert(newConstituent);
                }
            }
            return(rootParse);
        }
        public string DoParse(string[] lines, int requestedParses)
        {
            System.Text.StringBuilder parseStringBuilder = new System.Text.StringBuilder();

            foreach (string line in lines)
            {
                System.Text.StringBuilder lineBuilder = new System.Text.StringBuilder();

                string[]  rawTokens = mTokenizer.Tokenize(line);
                ArrayList tokens    = new ArrayList();
                foreach (string rawToken in rawTokens)
                {
                    string convertedToken = ConvertToken(rawToken);
                    tokens.Add(convertedToken);
                    lineBuilder.Append(convertedToken).Append(" ");
                }
                if (lineBuilder.Length != 0)
                {
                    string text         = lineBuilder.ToString(0, lineBuilder.Length - 1).ToString();
                    Parse  currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null);
                    int    start        = 0;

                    foreach (string token in tokens)
                    {
                        currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0));
                        start += token.Length + 1;
                    }

                    Parse[] parses = mParser.FullParse(currentParse, requestedParses);
                    for (int currentParseIndex = 0, parseCount = parses.Length; currentParseIndex < parseCount; currentParseIndex++)
                    {
                        if (requestedParses > 1)
                        {
                            lineBuilder.Append(currentParse.ToString() + " " + parses[currentParseIndex].Probability.ToString(System.Globalization.CultureInfo.InvariantCulture) + " ");
                        }
                        lineBuilder.Append(parses[currentParseIndex].Show());
                        parseStringBuilder.Append(lineBuilder.ToString());
                    }
                }
                else
                {
                    parseStringBuilder.Append("\r\n");
                }
            }
            return(parseStringBuilder.ToString());
        }
Exemple #5
0
 ///<summary>
 ///Inserts the specified constituent into this parse based on its text span.  This
 ///method assumes that the specified constituent can be inserted into this parse.
 ///</summary>
 ///<param name="constituent">
 ///The constituent to be inserted.
 ///</param>
 public virtual void Insert(Parse constituent)
 {
     Util.Span constituentSpan = constituent.Span;
     if (Span.Contains(constituentSpan))
     {
         int currentPart;
         int partCount = _parts.Count;
         for (currentPart = 0; currentPart < partCount; currentPart++)
         {
             Parse     subPart     = _parts[currentPart];
             Util.Span subPartSpan = subPart.Span;
             if (subPartSpan.Start > constituentSpan.End)
             {
                 break;
             }
             // constituent Contains subPart
             else if (constituentSpan.Contains(subPartSpan))
             {
                 _parts.RemoveAt(currentPart);
                 currentPart--;
                 constituent._parts.Add(subPart);
                 subPart.Parent = constituent;
                 partCount      = _parts.Count;
             }
             else if (subPartSpan.Contains(constituentSpan))
             {
                 //System.Console.WriteLine("Parse.insert:subPart contains con");
                 subPart.Insert(constituent);
                 return;
             }
         }
         _parts.Insert(currentPart, constituent);
         constituent.Parent = this;
     }
     else
     {
         throw new ParseException("Inserting constituent not contained in the sentence!");
     }
 }
Exemple #6
0
        ///<summary>
        ///Advances the specified parse and returns the an array advanced parses whose probability accounts for
        ///more than the speicficed amount of probability mass, Q.
        ///</summary>
        ///<param name="inputParse">
        ///The parse to advance.
        ///</param>
        ///<param name="qParam">
        ///The amount of probability mass that should be accounted for by the advanced parses.
        ///</param>
        private Parse[] AdvanceParses(Parse inputParse, double qParam, double[] buildProbabilities, double[] checkProbabilities)
        {
            double qOpp           = 1 - qParam;
            Parse  lastStartNode  = null;               // The closest previous node which has been labeled as a start node.
            int    lastStartIndex = -1;                 // The index of the closest previous node which has been labeled as a start node.
            string lastStartType  = null;               // The type of the closest previous node which has been labeled as a start node.
            int    advanceNodeIndex;                    // The index of the node which will be labeled in this iteration of advancing the parse.
            Parse  advanceNode = null;                  // The node which will be labeled in this iteration of advancing the parse.

            Parse[] children  = inputParse.GetChildren();
            int     nodeCount = children.Length;

            //determines which node needs to be labeled and prior labels.
            for (advanceNodeIndex = 0; advanceNodeIndex < nodeCount; advanceNodeIndex++)
            {
                advanceNode = children[advanceNodeIndex];
                if (advanceNode.Label == null)
                {
                    break;
                }
                else if (startTypeMap.ContainsKey(advanceNode.Label))
                {
                    lastStartType  = startTypeMap[advanceNode.Label];
                    lastStartNode  = advanceNode;
                    lastStartIndex = advanceNodeIndex;
                }
            }
            var newParsesList = new List <Parse>(buildModel.OutcomeCount);

            //call build
            buildModel.Evaluate(buildContextGenerator.GetContext(children, advanceNodeIndex), buildProbabilities);
            double buildProbabilitiesSum = 0;

            while (buildProbabilitiesSum < qParam)
            {
                //  The largest unadvanced labeling.
                int highestBuildProbabilityIndex = 0;
                for (int probabilityIndex = 1; probabilityIndex < buildProbabilities.Length; probabilityIndex++)
                {                 //for each build outcome
                    if (buildProbabilities[probabilityIndex] > buildProbabilities[highestBuildProbabilityIndex])
                    {
                        highestBuildProbabilityIndex = probabilityIndex;
                    }
                }
                if (buildProbabilities[highestBuildProbabilityIndex] == 0)
                {
                    break;
                }

                double highestBuildProbability = buildProbabilities[highestBuildProbabilityIndex];

                buildProbabilities[highestBuildProbabilityIndex] = 0;                 //zero out so new max can be found
                buildProbabilitiesSum += highestBuildProbability;

                string tag = buildModel.GetOutcomeName(highestBuildProbabilityIndex);
                //System.Console.Out.WriteLine("trying " + tag + " " + buildProbabilitiesSum + " lst=" + lst);
                if (highestBuildProbabilityIndex == topStartIndex)
                {                 // can't have top until complete
                    continue;
                }
                //System.Console.Error.WriteLine(probabilityIndex + " " + tag + " " + highestBuildProbability);
                if (startTypeMap.ContainsKey(tag))
                {                 //update last start
                    lastStartIndex = advanceNodeIndex;
                    lastStartNode  = advanceNode;
                    lastStartType  = startTypeMap[tag];
                }
                else if (continueTypeMap.ContainsKey(tag))
                {
                    if (lastStartNode == null || lastStartType != continueTypeMap[tag])
                    {
                        continue;                         //Cont must match previous start or continue
                    }
                }
                var newParse1 = (Parse)inputParse.Clone();                  //clone parse
                if (CreateDerivationString)
                {
                    newParse1.AppendDerivationBuffer(highestBuildProbabilityIndex.ToString(System.Globalization.CultureInfo.InvariantCulture));
                    newParse1.AppendDerivationBuffer("-");
                }
                newParse1.SetChild(advanceNodeIndex, tag);                 //replace constituent labeled

                newParse1.AddProbability(Math.Log(highestBuildProbability));
                //check
                checkModel.Evaluate(checkContextGenerator.GetContext(newParse1.GetChildren(), lastStartType, lastStartIndex, advanceNodeIndex), checkProbabilities);
                //System.Console.Out.WriteLine("check " + mCheckProbabilities[mCompleteIndex] + " " + mCheckProbabilities[mIncompleteIndex]);
                Parse newParse2 = newParse1;
                if (checkProbabilities[completeIndex] > qOpp)
                {                 //make sure a reduce is likely
                    newParse2 = (Parse)newParse1.Clone();
                    if (CreateDerivationString)
                    {
                        newParse2.AppendDerivationBuffer("1");
                        newParse2.AppendDerivationBuffer(".");
                    }
                    newParse2.AddProbability(System.Math.Log(checkProbabilities[1]));
                    var  constituent = new Parse[advanceNodeIndex - lastStartIndex + 1];
                    bool isFlat      = true;
                    //first
                    constituent[0] = lastStartNode;
                    if (constituent[0].Type != constituent[0].Head.Type)
                    {
                        isFlat = false;
                    }
                    //last
                    constituent[advanceNodeIndex - lastStartIndex] = advanceNode;
                    if (isFlat && constituent[advanceNodeIndex - lastStartIndex].Type != constituent[advanceNodeIndex - lastStartIndex].Head.Type)
                    {
                        isFlat = false;
                    }
                    //middle
                    for (int constituentIndex = 1; constituentIndex < advanceNodeIndex - lastStartIndex; constituentIndex++)
                    {
                        constituent[constituentIndex] = children[constituentIndex + lastStartIndex];
                        if (isFlat && constituent[constituentIndex].Type != constituent[constituentIndex].Head.Type)
                        {
                            isFlat = false;
                        }
                    }
                    if (!isFlat)
                    {                     //flat chunks are done by chunker
                        newParse2.Insert(new Parse(inputParse.Text, new Util.Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, checkProbabilities[1], headRules.GetHead(constituent, lastStartType)));
                        newParsesList.Add(newParse2);
                    }
                }
                if (checkProbabilities[incompleteIndex] > qOpp)
                {                 //make sure a shift is likely
                    if (CreateDerivationString)
                    {
                        newParse1.AppendDerivationBuffer("0");
                        newParse1.AppendDerivationBuffer(".");
                    }
                    if (advanceNodeIndex != nodeCount - 1)
                    {                     //can't shift last element
                        newParse1.AddProbability(Math.Log(checkProbabilities[0]));
                        newParsesList.Add(newParse1);
                    }
                }
            }
            Parse[] newParses = newParsesList.ToArray();
            return(newParses);
        }
Exemple #7
0
        /// <summary>
        /// Generates a Parse structure from the specified tree-bank style parse string. 
        /// </summary>
        /// <param name="parse">
        /// A tree-bank style parse string.
        /// </param>
        /// <returns>
        /// a Parse structure for the specified tree-bank style parse string.
        /// </returns>
        public static Parse FromParseString(string parse)
        {
            StringBuilder textBuffer = new StringBuilder();
            int offset = 0;

            Stack<Util.Pair<string, int>> parseStack = new Stack<Util.Pair<string, int>>();

            List<Util.Pair<string, Util.Span>> consitutents = new List<Util.Pair<string, Util.Span>>();
            for (int currentChar = 0, charCount = parse.Length; currentChar < charCount; currentChar++)
            {
                char c = parse[currentChar];
                if (c == '(')
                {
                    string rest = parse.Substring(currentChar + 1);
                    string type = GetType(rest);
                    if (type == null)
                    {
                        throw new ParseException("null type for: " + rest);
                    }
                    string token = GetToken(rest);
                    parseStack.Push(new Util.Pair<string, int>(type, offset));
                    if ((object) token != null && type != "-NONE-")
                    {
                        consitutents.Add(new Util.Pair<string, Util.Span>(MaximumEntropyParser.TokenNode, new Util.Span(offset, offset + token.Length)));
                        textBuffer.Append(token).Append(" ");
                        offset += token.Length + 1;
                    }
                }
                else if (c == ')')
                {
                    Util.Pair<string, int> parts = parseStack.Pop();
                    string type = parts.FirstValue;
                    if (type != "-NONE-")
                    {
                        int start = parts.SecondValue;
                        consitutents.Add(new Util.Pair<string, Util.Span>(parts.FirstValue, new Util.Span(start, offset - 1)));
                    }
                }
            }
            string text = textBuffer.ToString();
            Parse rootParse = new Parse(text, new Util.Span(0, text.Length), MaximumEntropyParser.TopNode, 1);
            for (int currentConstituent = 0, constituentCount = consitutents.Count; currentConstituent < constituentCount; currentConstituent++)
            {
                Util.Pair<string, Util.Span> parts = consitutents[currentConstituent];
                string type = parts.FirstValue;
                if (type != MaximumEntropyParser.TopNode)
                {
                    Parse newConstituent = new Parse(text, parts.SecondValue, type, 1);
                    rootParse.Insert(newConstituent);
                }
            }
            return rootParse;
        }
        public Parse[] DoParse(string line, int requestedParses)
        {
            System.Text.StringBuilder lineBuilder = new System.Text.StringBuilder();
            string[] rawTokens = mTokenizer.Tokenize(line);
            ArrayList tokens = new ArrayList();
            foreach (string rawToken in rawTokens)
            {
                string convertedToken = ConvertToken(rawToken);
                tokens.Add(convertedToken);
                lineBuilder.Append(convertedToken).Append(" ");
            }
            if (lineBuilder.Length != 0)
            {
                string text = lineBuilder.ToString(0, lineBuilder.Length - 1).ToString();
                Parse currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null);
                int start = 0;

                foreach (string token in tokens)
                {
                    currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0));
                    start += token.Length + 1;
                }

                Parse[] parses = mParser.FullParse(currentParse, requestedParses);
                return parses;
            }
            else
            {
                return null;
            }
        }
        public string DoParse(string[] lines, int requestedParses)
        {
            System.Text.StringBuilder parseStringBuilder = new System.Text.StringBuilder();

            foreach (string line in lines)
            {
                System.Text.StringBuilder lineBuilder = new System.Text.StringBuilder();

                string[] rawTokens = mTokenizer.Tokenize(line);
                ArrayList tokens = new ArrayList();
                foreach (string rawToken in rawTokens)
                {
                    string convertedToken = ConvertToken(rawToken);
                    tokens.Add(convertedToken);
                    lineBuilder.Append(convertedToken).Append(" ");
                }
                if (lineBuilder.Length != 0)
                {
                    string text = lineBuilder.ToString(0, lineBuilder.Length - 1).ToString();
                    Parse currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null);
                    int start = 0;

                    foreach (string token in tokens)
                    {
                        currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0));
                        start += token.Length + 1;
                    }

                    Parse[] parses = mParser.FullParse(currentParse, requestedParses);
                    for (int currentParseIndex = 0, parseCount = parses.Length; currentParseIndex < parseCount; currentParseIndex++)
                    {
                        if (requestedParses > 1)
                        {
                        lineBuilder.Append(currentParse.ToString() + " " + parses[currentParseIndex].Probability.ToString(System.Globalization.CultureInfo.InvariantCulture) + " ");
                        }
                        lineBuilder.Append(parses[currentParseIndex].Show());
                        parseStringBuilder.Append(lineBuilder.ToString());
                    }
                }
                else
                {
                    parseStringBuilder.Append("\r\n");
                }
            }
            return parseStringBuilder.ToString();
        }
        private Parse[] DoParse(IEnumerable<string> tokens, int requestedParses)
	    {
            var lineBuilder = new System.Text.StringBuilder();
            var convertedTokens = new List<string>();
            foreach (string rawToken in tokens)
            {
                string convertedToken = ConvertToken(rawToken);
                convertedTokens.Add(convertedToken);
                lineBuilder.Append(convertedToken).Append(" ");
            }
            if (lineBuilder.Length != 0)
            {
                string text = lineBuilder.ToString(0, lineBuilder.Length - 1);
                var currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null);
                int start = 0;

                foreach (string token in convertedTokens)
                {
                    currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0));
                    start += token.Length + 1;
                }

                Parse[] parses = _parser.FullParse(currentParse, requestedParses);
                return parses;
            }
            else
            {
                return null;
            }
	    }