public void LineNumbers() { string textfile = ""; using (StreamReader fs = new StreamReader(@"C:\Новая папка\1.txt")) { while (true) { string temp = fs.ReadLine(); if (temp == null) { break; } textfile += temp; } } string[] words = new[] { "hello", "word" }; AhoCorasick.Trie <int> trie = new AhoCorasick.Trie <int>(); for (int i = 0; i < words.Length; i++) { trie.Add(words[i], i); } trie.Build(); int[] lines = trie.Find(textfile).ToArray(); Assert.AreEqual(5, lines.Length); // Assert.AreEqual(1, lines[0]); // Assert.AreEqual(1, lines[1]); }
static void Main(string[] args) { Stopwatch sw = new Stopwatch(); sw.Start(); //untuk true / false string[] text = "ini laptop thomas".Split(' '); AhoCorasick.Trie <string, bool> trie = new AhoCorasick.Trie <string, bool>(); trie.Add(new[] { "thomasa" }, true); trie.Build(); /* * string[] masuk = "one two three four".Split(' '); * * AhoCorasick.Trie<string, bool> trie = new AhoCorasick.Trie<string, bool>(); * * string text = System.IO.File.ReadAllText(@"C:\Users\Thomas Yap\Documents\CUDA Examples\testaho\lorem 1000words.txt"); * * trie.Build(); */ Console.WriteLine(trie.Find(text).Any()); sw.Stop(); Console.WriteLine("Elapsed={0}", sw.Elapsed); Console.ReadKey(); }
public void findsome() { string textfile = ""; using (StreamReader fs = new StreamReader(@"C:\Новая папка\1.txt")) { while (true) { string temp = fs.ReadLine(); if (temp == null) { break; } textfile += temp; } } AhoCorasick.Trie trie = new AhoCorasick.Trie(); trie.Add("hello"); // trie.Add("word"); trie.Build(); string[] matches = trie.Find(textfile).ToArray(); Assert.AreEqual(5, matches.Length); Assert.AreEqual("hello", matches[0]); // Assert.AreEqual("hellonull",matches[1]); // Assert.AreEqual("word", matches[1]); }
public static List <string> Match(AhoCorasick.Trie trie, List <string> hashtags, string text) { List <int> positions = new List <int>(); foreach (string position in trie.Find(text)) { positions.Add(Int16.Parse(position)); } //check if it is non alfa char var verifiedWords = new List <string>(); if (positions.Count() == 0) { return(verifiedWords); } foreach (int wordNo in positions) { string word = hashtags.ElementAt(wordNo); int startingPosition = text.IndexOf(word); int endingPosition = startingPosition + word.Count(); //If beggining or end of text, assume its not english letter bool front = startingPosition == 0 || (startingPosition != -1 && !Search.IsEnglishLetter(text.ElementAt(startingPosition - 1))); bool end = endingPosition == text.Count() || (endingPosition != -1 && !Search.IsEnglishLetter(text.ElementAt(endingPosition))); if (front && end) { verifiedWords.Add(word); } } return(verifiedWords); }
public void Words() { string[] text = "hello:hello:wor:ddsdsdf:word:hello".Split(':'); AhoCorasick.Trie <string, bool> trie = new AhoCorasick.Trie <string, bool>(); trie.Add(new[] { "wol" }, true); trie.Build(); Assert.IsFalse(trie.Find(text).Any()); }
public void Words() { string[] text = "one two three four".Split(' '); AhoCorasick.Trie <string, bool> trie = new AhoCorasick.Trie <string, bool>(); trie.Add(new[] { "three", "four" }, true); trie.Build(); Assert.IsTrue(trie.Find(text).Any()); }
public void Contains() { string text = "hello and welcome to this beautiful world!"; AhoCorasick.Trie trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); Assert.IsTrue(trie.Find(text).Any()); }
public void HelloWorld() { string text = "hello and welcome to this beautiful wo5rld!"; AhoCorasick.Trie trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("worldddddd"); trie.Build(); string[] matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual("hello", matches[0]); Assert.AreEqual("world5dd", matches[1]); }
public void HelloWorld() { string text = "hello and welcome to this beautiful world!"; AhoCorasick.Trie trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); string[] matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual("hello", matches[0]); Assert.AreEqual("world", matches[1]); }
public void HelloWorld() { string text = "hello and welcome to this beautiful world!"; var trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); var matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual(Tuple.Create("hello", 4), matches[0]); Assert.AreEqual(Tuple.Create("world", 40), matches[1]); }
public void LineNumbers() { string text = "world, i hello you!"; string[] words = new[] { "hello", "world" }; AhoCorasick.Trie<int> trie = new AhoCorasick.Trie<int>(); for (int i = 0; i < words.Length; i++) trie.Add(words[i], i); trie.Build(); int[] lines = trie.Find(text).ToArray(); Assert.AreEqual(2, lines.Length); Assert.AreEqual(1, lines[0]); Assert.AreEqual(0, lines[1]); }
public void WordsAndIds() { string text = "hello and welcome to this beautiful world!"; var trie = new AhoCorasick.Trie <Tuple <string, int> >(); trie.Add("hello", Tuple.Create("hello", 123)); trie.Add("world", Tuple.Create("world", 456)); trie.Build(); var matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual(Tuple.Create(Tuple.Create("hello", 123), 4), matches[0]); Assert.AreEqual(Tuple.Create(Tuple.Create("world", 456), 40), matches[1]); }
// remember to check for the same key added before adding when counting words! public static void Main() { AhoCorasick.Trie trie = new AhoCorasick.Trie(); // add words trie.Add("hello"); trie.Add("world"); // build search tree trie.Build(); string text = "hello and welcome to this beautiful world world hello!"; // find words foreach (string word in trie.Find(text)) { Console.WriteLine(word); } }
public void LineNumbers() { string text = "world, i hello you!"; string[] words = new[] { "hello", "world" }; AhoCorasick.Trie <int> trie = new AhoCorasick.Trie <int>(); for (int i = 0; i < words.Length; i++) { trie.Add(words[i], i); } trie.Build(); int[] lines = trie.Find(text).ToArray(); Assert.AreEqual(2, lines.Length); Assert.AreEqual(1, lines[0]); Assert.AreEqual(0, lines[1]); }
private void NonRegexSearch(SearchFile sf, string line, int iLine, bool caseSensitive) { //Find. bool bFound = false; lock (MatchedFiles_Lock) { IEnumerable <string> ret = _objFileContentsSearchTrie.Find(caseSensitive ? line : line.ToLower()); if (ret.Any()) { if (sf.MatchedLines == null) { sf.MatchedLines = new List <SearchLine>(); } sf.MatchedLines.Add(new SearchLine(iLine, line)); bFound = true; } } if (bFound) { AddMatch(sf); } }
/// <summary> /// /// </summary> /// <param name="path"></param> /// <param name="trie"></param> /// <param name="capitalizedHashtags"></param> /// <param name="hashtags"></param> public static void Match(string path, AhoCorasick.Trie trie, List <string> hashtags) { DirectoryInfo rootFolder = new DirectoryInfo(path); var files = rootFolder.EnumerateFiles("*.json", SearchOption.AllDirectories); tags = hashtags.ToDictionary(x => x, x => 0); Parallel.ForEach(files, new ParallelOptions { MaxDegreeOfParallelism = 16 }, (file1) => { int found = 0; int matched = 0; Console.WriteLine("Reading " + file1.FullName); var jsonText = System.IO.File.ReadAllText(file1.FullName); IList <_Tweet> tweets; try { tweets = JsonConvert.DeserializeObject <IList <_Tweet> >(jsonText); } catch (Exception E) { return; } var matchedTweets = new List <string>(); for (var i = 0; i < tweets.Count; i++) { String text = tweets[i].Text.ToLower(); if (tweets[i] == null) { continue; } List <int> positions = new List <int>(); foreach (string position in trie.Find(text)) { positions.Add(Int16.Parse(position)); } found++; if (positions.Count() == 0) { continue; } //check if it is non alfa char var verifiedWords = new List <string>(); foreach (int wordNo in positions) { string word = hashtags.ElementAt(wordNo); int startingPosition = text.IndexOf(word); int endingPosition = startingPosition + word.Count(); //If beggining or end of text, assume its not english letter bool front = startingPosition == 0 || (startingPosition != -1 && !IsEnglishLetter(text.ElementAt(startingPosition - 1))); bool end = endingPosition == text.Count() || (endingPosition != -1 && !IsEnglishLetter(text.ElementAt(endingPosition))); if (front && end) { verifiedWords.Add(word); } else { continue; } } if (verifiedWords.Count == 0) { continue; } matched++; matchedTweets.Add(JsonConvert.SerializeObject(tweets[i])); //Check if what happens here is correct foreach (string word in verifiedWords) { lock (keywordLock) { tags[word] += 1; } } } Interlocked.Add(ref TweetsFound, found); Interlocked.Add(ref TweetsMatched, matched); System.IO.File.WriteAllLines(file1.DirectoryName + "/Matched" + file1.Name + ".txt", matchedTweets); }); bool mergeIntoOneFile = true; if (mergeIntoOneFile) { MergeSearchResults(path, "output.txt"); } using (StreamWriter sw = new StreamWriter(path + "/Search_stats.txt")) { sw.Write(JsonConvert.SerializeObject(new Stats { Found = TweetsFound, Matched = TweetsMatched, Results = tags }, Formatting.Indented)); // sw.Write(JsonConvert.SerializeObject(objectToSerialize, Formatting.Indented)); Interlocked.Increment(ref TweetsFound); } }
public void Words() { string[] text = "one two three four".Split(' '); AhoCorasick.Trie<string, bool> trie = new AhoCorasick.Trie<string, bool>(); trie.Add(new[] { "three", "four" }, true); trie.Build(); Assert.IsTrue(trie.Find(text).Any()); }