//public string GetNames(string[] models, Parse data)
        //{
        //    CreateModels(models);
        //    return ProcessParse(models, data);
        //}

        /// <summary>
        /// Creates the models from the trained NBIN files that are given.
        /// </summary>
        /// <param name="models">The trained models (as NBIN files without the extension)</param>
        private void CreateModels(string[] models)
        {
            for (int currentModel = 0; currentModel < models.Length; currentModel++)
            {
                if (!mFinders.ContainsKey(models[currentModel]))
                {
                    string modelName = mModelPath + models[currentModel] + ".nbin";
                    MaxEntropyModel_Interface model  = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName));
                    MaximumEntropyNameFinder  finder = new MaximumEntropyNameFinder(model);
                    mFinders.Add(models[currentModel], finder);
                }
            }
        }
        private string ProcessParse(string[] models, Parse lineParse)
        {
            System.Text.StringBuilder output = new System.Text.StringBuilder();

            string[][] finderTags = new string[models.Length][];
            Dictionary <string, string>[] previousTokenMaps = CreatePreviousTokenMaps(models);

            Parse[]  tokenParses = lineParse.GetTagNodes();
            string[] tokens      = new string[tokenParses.Length];
            for (int currentToken = 0; currentToken < tokens.Length; currentToken++)
            {
                tokens[currentToken] = tokenParses[currentToken].ToString();
            }

            for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
            {
                MaximumEntropyNameFinder finder = mFinders[models[currentFinder]];
                finderTags[currentFinder] = finder.Find(tokens, previousTokenMaps[currentFinder]);
            }
            UpdatePreviousTokenMaps(previousTokenMaps, tokens, finderTags);
            for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
            {
                int start = -1;

                List <Span> names = new List <Span>(5);
                for (int currentToken = 0, tokenCount = tokens.Length; currentToken < tokenCount; currentToken++)
                {
                    if ((finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start) || (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Other))
                    {
                        if (start != -1)
                        {
                            names.Add(new Span(start, currentToken - 1));
                        }
                        start = -1;
                    }
                    if (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start)
                    {
                        start = currentToken;
                    }
                }
                if (start != -1)
                {
                    names.Add(new Span(start, tokens.Length - 1));
                }
                AddNames(models[currentFinder], names, tokenParses, lineParse);
            }
            output.Append(lineParse.Show());
            output.Append("\r\n");

            return(output.ToString());
        }
Пример #3
0
 /// <summary>
 /// Creates a beam seach of the specified size using the specified model with the specified context generator.
 /// </summary>
 /// <param name="nameFinder">
 /// The associated MaximumEntropyNameFinder instance.
 /// </param>
 /// <param name="size">
 /// The size of the beam.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator used with the specified model.
 /// </param>
 /// <param name="model">
 /// The model used to determine names.
 /// </param>
 /// <param name="beamSize">
 /// The size of the beam to use in searching.
 /// </param>
 public NameBeamSearch(MaximumEntropyNameFinder nameFinder, int size, Interfaces.INameContextGenerator contextGenerator, MaxEntropyModel_Interface model, int beamSize) : base(size, contextGenerator, model, beamSize)
 {
     mNameFinder = nameFinder;
 }
        /// <summary>
        /// Adds sgml style name tags to the specified input string and outputs this information.
        /// </summary>
        /// <param name="models">
        /// The model names for the name finders to be used.
        /// </param>
        /// <param name="line">
        /// The input.
        /// </param>
        private string ProcessText(string[] models, string line)
        {
            System.Text.StringBuilder output = new System.Text.StringBuilder();

            string[][] finderTags = new string[models.Length][];
            Dictionary <string, string>[] previousTokenMaps = CreatePreviousTokenMaps(models);

            if (line.Length == 0)
            {
                ClearPreviousTokenMaps(previousTokenMaps);
                output.Append("\r\n");
            }
            else
            {
                Span[]   spans  = TokenizeToSpans(line);
                string[] tokens = SpansToStrings(spans, line);
                for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                {
                    MaximumEntropyNameFinder finder = mFinders[models[currentFinder]];
                    finderTags[currentFinder] = finder.Find(tokens, previousTokenMaps[currentFinder]);
                }
                UpdatePreviousTokenMaps(previousTokenMaps, tokens, finderTags);
                for (int currentToken = 0, tokenCount = tokens.Length; currentToken < tokenCount; currentToken++)
                {
                    for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                    {
                        //check for end tags
                        if (currentToken != 0)
                        {
                            if ((finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Other) && (finderTags[currentFinder][currentToken - 1] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][currentToken - 1] == MaximumEntropyNameFinder.Continue))
                            {
                                output.Append("</" + models[currentFinder] + ">");
                            }
                        }
                    }
                    if (currentToken > 0 && spans[currentToken - 1].End < spans[currentToken].Start)
                    {
                        output.Append(line.Substring(spans[currentToken - 1].End, (spans[currentToken].Start) - (spans[currentToken - 1].End)));
                    }
                    //check for start tags
                    for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                    {
                        if (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start)
                        {
                            output.Append("<" + models[currentFinder] + ">");
                        }
                    }
                    output.Append(tokens[currentToken]);
                }
                //final end tags
                if (tokens.Length != 0)
                {
                    for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++)
                    {
                        if (finderTags[currentFinder][tokens.Length - 1] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][tokens.Length - 1] == MaximumEntropyNameFinder.Continue)
                        {
                            output.Append("</" + models[currentFinder] + ">");
                        }
                    }
                }
                if (tokens.Length != 0)
                {
                    if (spans[tokens.Length - 1].End < line.Length)
                    {
                        output.Append(line.Substring(spans[tokens.Length - 1].End));
                    }
                }
                output.Append("\r\n");
            }
            return(output.ToString());
        }