//public string GetNames(string[] models, Parse data) //{ // CreateModels(models); // return ProcessParse(models, data); //} /// <summary> /// Creates the models from the trained NBIN files that are given. /// </summary> /// <param name="models">The trained models (as NBIN files without the extension)</param> private void CreateModels(string[] models) { for (int currentModel = 0; currentModel < models.Length; currentModel++) { if (!mFinders.ContainsKey(models[currentModel])) { string modelName = mModelPath + models[currentModel] + ".nbin"; MaxEntropyModel_Interface model = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName)); MaximumEntropyNameFinder finder = new MaximumEntropyNameFinder(model); mFinders.Add(models[currentModel], finder); } } }
private string ProcessParse(string[] models, Parse lineParse) { System.Text.StringBuilder output = new System.Text.StringBuilder(); string[][] finderTags = new string[models.Length][]; Dictionary <string, string>[] previousTokenMaps = CreatePreviousTokenMaps(models); Parse[] tokenParses = lineParse.GetTagNodes(); string[] tokens = new string[tokenParses.Length]; for (int currentToken = 0; currentToken < tokens.Length; currentToken++) { tokens[currentToken] = tokenParses[currentToken].ToString(); } for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { MaximumEntropyNameFinder finder = mFinders[models[currentFinder]]; finderTags[currentFinder] = finder.Find(tokens, previousTokenMaps[currentFinder]); } UpdatePreviousTokenMaps(previousTokenMaps, tokens, finderTags); for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { int start = -1; List <Span> names = new List <Span>(5); for (int currentToken = 0, tokenCount = tokens.Length; currentToken < tokenCount; currentToken++) { if ((finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start) || (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Other)) { if (start != -1) { names.Add(new Span(start, currentToken - 1)); } start = -1; } if (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start) { start = currentToken; } } if (start != -1) { names.Add(new Span(start, tokens.Length - 1)); } AddNames(models[currentFinder], names, tokenParses, lineParse); } output.Append(lineParse.Show()); output.Append("\r\n"); return(output.ToString()); }
/// <summary> /// Creates a beam seach of the specified size using the specified model with the specified context generator. /// </summary> /// <param name="nameFinder"> /// The associated MaximumEntropyNameFinder instance. /// </param> /// <param name="size"> /// The size of the beam. /// </param> /// <param name="contextGenerator"> /// The context generator used with the specified model. /// </param> /// <param name="model"> /// The model used to determine names. /// </param> /// <param name="beamSize"> /// The size of the beam to use in searching. /// </param> public NameBeamSearch(MaximumEntropyNameFinder nameFinder, int size, Interfaces.INameContextGenerator contextGenerator, MaxEntropyModel_Interface model, int beamSize) : base(size, contextGenerator, model, beamSize) { mNameFinder = nameFinder; }
/// <summary> /// Adds sgml style name tags to the specified input string and outputs this information. /// </summary> /// <param name="models"> /// The model names for the name finders to be used. /// </param> /// <param name="line"> /// The input. /// </param> private string ProcessText(string[] models, string line) { System.Text.StringBuilder output = new System.Text.StringBuilder(); string[][] finderTags = new string[models.Length][]; Dictionary <string, string>[] previousTokenMaps = CreatePreviousTokenMaps(models); if (line.Length == 0) { ClearPreviousTokenMaps(previousTokenMaps); output.Append("\r\n"); } else { Span[] spans = TokenizeToSpans(line); string[] tokens = SpansToStrings(spans, line); for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { MaximumEntropyNameFinder finder = mFinders[models[currentFinder]]; finderTags[currentFinder] = finder.Find(tokens, previousTokenMaps[currentFinder]); } UpdatePreviousTokenMaps(previousTokenMaps, tokens, finderTags); for (int currentToken = 0, tokenCount = tokens.Length; currentToken < tokenCount; currentToken++) { for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { //check for end tags if (currentToken != 0) { if ((finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Other) && (finderTags[currentFinder][currentToken - 1] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][currentToken - 1] == MaximumEntropyNameFinder.Continue)) { output.Append("</" + models[currentFinder] + ">"); } } } if (currentToken > 0 && spans[currentToken - 1].End < spans[currentToken].Start) { output.Append(line.Substring(spans[currentToken - 1].End, (spans[currentToken].Start) - (spans[currentToken - 1].End))); } //check for start tags for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { if (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start) { output.Append("<" + models[currentFinder] + ">"); } } output.Append(tokens[currentToken]); } //final end tags if (tokens.Length != 0) { for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { if (finderTags[currentFinder][tokens.Length - 1] == MaximumEntropyNameFinder.Start || finderTags[currentFinder][tokens.Length - 1] == MaximumEntropyNameFinder.Continue) { output.Append("</" + models[currentFinder] + ">"); } } } if (tokens.Length != 0) { if (spans[tokens.Length - 1].End < line.Length) { output.Append(line.Substring(spans[tokens.Length - 1].End)); } } output.Append("\r\n"); } return(output.ToString()); }