/// <summary> /// Adds pattern ending in this node /// </summary> /// <param name="ngram">Pattern</param> public void AddNgram(ngram_t ngram) { if (_Ngrams == null) { _Ngrams = new HashSet <ngram_t>(ngram_t_IEqualityComparer.Instance); } _Ngrams.Add(ngram); }
public AhoCorasick(ngram_t ngram) { _Root = TreeNode.BuildTree(ngram); }
public static TreeNode BuildTree(ngram_t ngram) { var root = new TreeNode(); // Build keyword tree and transition function { // add pattern to tree var node = root; foreach (var nerOutputType in ngram.NerOutputTypes) { var nodeNew = node.GetTransition(nerOutputType); if (nodeNew == null) { nodeNew = new TreeNode(node, nerOutputType); node.AddTransition(nodeNew); } node = nodeNew; } node.AddNgram(ngram); } // Find failure functions var nodes = new List <TreeNode>(); // level 1 nodes - fail to root node var transitions_root_nodes = root.Transitions; if (transitions_root_nodes != null) { nodes.Capacity = transitions_root_nodes.Count; foreach (var node in transitions_root_nodes) { node.Failure = root; var transitions_nodes = node.Transitions; if (transitions_nodes != null) { foreach (var trans in transitions_nodes) { nodes.Add(trans); } } } } // other nodes - using BFS while (nodes.Count != 0) { var newNodes = new List <TreeNode>(nodes.Count); foreach (var node in nodes) { var r = node.Parent.Failure; var nerOutputType = node.NerOutputType; while ((r != null) && !r.ContainsTransition(nerOutputType)) { r = r.Failure; } if (r == null) { node.Failure = root; } else { node.Failure = r.GetTransition(nerOutputType); var failure_ngrams = node.Failure.Ngrams; if (failure_ngrams != null) { foreach (var ng in failure_ngrams) { node.AddNgram(ng); } } } // add child nodes to BFS list var transitions_nodes = node.Transitions; if (transitions_nodes != null) { foreach (var child in transitions_nodes) { newNodes.Add(child); } } } nodes = newNodes; } root.Failure = root; return(root); }
private void BuildTree(ngram_t ngram) { // Build keyword tree and transition function { // add pattern to tree TreeNode node = _Root; foreach (var nerOutputType in ngram.NerOutputTypes) { TreeNode nodeNew = null; foreach (TreeNode trans in node.Transitions) { if (trans.NerOutputType == nerOutputType) { nodeNew = trans; break; } } if (nodeNew == null) { nodeNew = new TreeNode(node, nerOutputType); node.AddTransition(nodeNew); } node = nodeNew; } node.AddNgram(ngram); } // Find failure functions var nodes = new List <TreeNode>(); // level 1 nodes - fail to root node foreach (TreeNode node in _Root.Transitions) { node.Failure = _Root; foreach (TreeNode trans in node.Transitions) { nodes.Add(trans); } } // other nodes - using BFS while (nodes.Count != 0) { var newNodes = new List <TreeNode>(); foreach (TreeNode node in nodes) { TreeNode r = node.Parent.Failure; var nerOutputType = node.NerOutputType; while (r != null && !r.ContainsTransition(nerOutputType)) { r = r.Failure; } if (r == null) { node.Failure = _Root; } else { node.Failure = r.GetTransition(nerOutputType); foreach (ngram_t result in node.Failure.Ngrams) { node.AddNgram(result); } } // add child nodes to BFS list foreach (TreeNode child in node.Transitions) { newNodes.Add(child); } } nodes = newNodes; } _Root.Failure = _Root; }
public AhoCorasick(ngram_t ngram) { _Root = new TreeNode(); Count = 1; BuildTree(ngram); }
/// <summary> /// Adds pattern ending in this node /// </summary> /// <param name="ngram">Pattern</param> public void AddNgram(ngram_t ngram) { Ngrams.Add(ngram); }