Example #1
0
        private string ProcessParse(string[] models, Parse lineParse)
        {
            var output = new System.Text.StringBuilder();

            var finderTags = new string[models.Length][];

            Dictionary <string, string>[] previousTokenMaps = CreatePreviousTokenMaps(models);

            Parse[] tokenParses = lineParse.GetTagNodes();
            var     tokens      = new string[tokenParses.Length];

            for (int currentToken = 0; currentToken < tokens.Length; currentToken++)
            {
                tokens[currentToken] = tokenParses[currentToken].ToString();
            }

            for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
            {
                MaximumEntropyNameFinder finder = mFinders[models[currentFinder]];
                finderTags[currentFinder] = finder.Find(tokens, previousTokenMaps[currentFinder]);
            }
            UpdatePreviousTokenMaps(previousTokenMaps, tokens, finderTags);
            for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
            {
                int start = -1;

                var names = new List <Span>(5);
                for (int currentToken = 0, tokenCount = tokens.Length; currentToken < tokenCount; currentToken++)
                {
                    if ((finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start) || (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Other))
                    {
                        if (start != -1)
                        {
                            names.Add(new Span(start, currentToken - 1));
                        }
                        start = -1;
                    }
                    if (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start)
                    {
                        start = currentToken;
                    }
                }
                if (start != -1)
                {
                    names.Add(new Span(start, tokens.Length - 1));
                }
                AddNames(models[currentFinder], names, tokenParses, lineParse);
            }
            output.Append(lineParse.Show());
            //output.Append("\r\n");

            return(output.ToString());
        }
Example #2
0
        /// <summary>Adds sgml style name tags to the specified input string and outputs this information</summary>
        /// <param name="models">The model names for the name finders to be used</param>
        /// <param name="line">The input</param>
        private string ProcessText(string[] models, string line)
        {
            var output = new System.Text.StringBuilder();

            var finderTags = new string[models.Length][];

            Dictionary <string, string>[] previousTokenMaps = CreatePreviousTokenMaps(models);

            if (line.Length == 0)
            {
                ClearPreviousTokenMaps(previousTokenMaps);
                output.Append("\r\n");
            }
            else
            {
                Span[]   spans  = TokenizeToSpans(line);
                string[] tokens = SpansToStrings(spans, line);
                for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                {
                    MaximumEntropyNameFinder finder = mFinders[models[currentFinder]];
                    finderTags[currentFinder] = finder.Find(tokens, previousTokenMaps[currentFinder]);
                }
                UpdatePreviousTokenMaps(previousTokenMaps, tokens, finderTags);
                for (int currentToken = 0, tokenCount = tokens.Length; currentToken < tokenCount; currentToken++)
                {
                    for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                    {
                        //check for end tags
                        if (currentToken != 0)
                        {
                            if ((finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Other) && (finderTags[currentFinder][currentToken - 1] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][currentToken - 1] == MaximumEntropyNameFinder.Continue))
                            {
                                output.Append("</" + models[currentFinder] + ">");
                            }
                        }
                    }
                    if (currentToken > 0 && spans[currentToken - 1].End < spans[currentToken].Start)
                    {
                        output.Append(line.Substring(spans[currentToken - 1].End, (spans[currentToken].Start) - (spans[currentToken - 1].End)));
                    }
                    //check for start tags
                    for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                    {
                        if (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start)
                        {
                            output.Append("<" + models[currentFinder] + ">");
                        }
                    }
                    output.Append(tokens[currentToken]);
                }
                //final end tags
                if (tokens.Length != 0)
                {
                    for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                    {
                        if (finderTags[currentFinder][tokens.Length - 1] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][tokens.Length - 1] == MaximumEntropyNameFinder.Continue)
                        {
                            output.Append("</" + models[currentFinder] + ">");
                        }
                    }
                }
                if (tokens.Length != 0)
                {
                    if (spans[tokens.Length - 1].End < line.Length)
                    {
                        output.Append(line.Substring(spans[tokens.Length - 1].End));
                    }
                }
                output.Append("\r\n");
            }
            return(output.ToString());
        }