示例#1
0
        public void LineNumbers()
        {
            string textfile = "";

            using (StreamReader fs = new StreamReader(@"C:\Новая папка\1.txt"))
            {
                while (true)
                {
                    string temp = fs.ReadLine();
                    if (temp == null)
                    {
                        break;
                    }
                    textfile += temp;
                }
            }
            string[] words = new[] { "hello", "word" };

            AhoCorasick.Trie <int> trie = new AhoCorasick.Trie <int>();
            for (int i = 0; i < words.Length; i++)
            {
                trie.Add(words[i], i);
            }
            trie.Build();

            int[] lines = trie.Find(textfile).ToArray();

            Assert.AreEqual(5, lines.Length);
            // Assert.AreEqual(1, lines[0]);
            // Assert.AreEqual(1, lines[1]);
        }
示例#2
0
        static void Main(string[] args)
        {
            Stopwatch sw = new Stopwatch();

            sw.Start();

            //untuk true / false

            string[] text = "ini laptop thomas".Split(' ');

            AhoCorasick.Trie <string, bool> trie = new AhoCorasick.Trie <string, bool>();
            trie.Add(new[] { "thomasa" }, true);
            trie.Build();

            /*
             * string[] masuk = "one two three four".Split(' ');
             *
             * AhoCorasick.Trie<string, bool> trie = new AhoCorasick.Trie<string, bool>();
             *
             * string text = System.IO.File.ReadAllText(@"C:\Users\Thomas Yap\Documents\CUDA Examples\testaho\lorem 1000words.txt");
             *
             * trie.Build();
             */
            Console.WriteLine(trie.Find(text).Any());


            sw.Stop();

            Console.WriteLine("Elapsed={0}", sw.Elapsed);
            Console.ReadKey();
        }
示例#3
0
        public void findsome()
        {
            string textfile = "";

            using (StreamReader fs = new StreamReader(@"C:\Новая папка\1.txt"))
            {
                while (true)
                {
                    string temp = fs.ReadLine();
                    if (temp == null)
                    {
                        break;
                    }
                    textfile += temp;
                }
            }

            AhoCorasick.Trie trie = new AhoCorasick.Trie();
            trie.Add("hello");
            // trie.Add("word");
            trie.Build();

            string[] matches = trie.Find(textfile).ToArray();

            Assert.AreEqual(5, matches.Length);
            Assert.AreEqual("hello", matches[0]);
            // Assert.AreEqual("hellonull",matches[1]);
            // Assert.AreEqual("word", matches[1]);
        }
            public static List <string> Match(AhoCorasick.Trie trie, List <string> hashtags, string text)
            {
                List <int> positions = new List <int>();

                foreach (string position in trie.Find(text))
                {
                    positions.Add(Int16.Parse(position));
                }

                //check if it is non alfa char
                var verifiedWords = new List <string>();

                if (positions.Count() == 0)
                {
                    return(verifiedWords);
                }

                foreach (int wordNo in positions)
                {
                    string word             = hashtags.ElementAt(wordNo);
                    int    startingPosition = text.IndexOf(word);
                    int    endingPosition   = startingPosition + word.Count();

                    //If beggining or end of text, assume its not english letter
                    bool front = startingPosition == 0 || (startingPosition != -1 && !Search.IsEnglishLetter(text.ElementAt(startingPosition - 1)));

                    bool end = endingPosition == text.Count() || (endingPosition != -1 && !Search.IsEnglishLetter(text.ElementAt(endingPosition)));

                    if (front && end)
                    {
                        verifiedWords.Add(word);
                    }
                }
                return(verifiedWords);
            }
示例#5
0
        public void Words()
        {
            string[] text = "hello:hello:wor:ddsdsdf:word:hello".Split(':');

            AhoCorasick.Trie <string, bool> trie = new AhoCorasick.Trie <string, bool>();
            trie.Add(new[] { "wol" }, true);
            trie.Build();

            Assert.IsFalse(trie.Find(text).Any());
        }
示例#6
0
        public void Words()
        {
            string[] text = "one two three four".Split(' ');

            AhoCorasick.Trie <string, bool> trie = new AhoCorasick.Trie <string, bool>();
            trie.Add(new[] { "three", "four" }, true);
            trie.Build();

            Assert.IsTrue(trie.Find(text).Any());
        }
示例#7
0
        public void Contains()
        {
            string text = "hello and welcome to this beautiful world!";

            AhoCorasick.Trie trie = new AhoCorasick.Trie();
            trie.Add("hello");
            trie.Add("world");
            trie.Build();

            Assert.IsTrue(trie.Find(text).Any());
        }
示例#8
0
        public void Contains()
        {
            string text = "hello and welcome to this beautiful world!";

            AhoCorasick.Trie trie = new AhoCorasick.Trie();
            trie.Add("hello");
            trie.Add("world");
            trie.Build();

            Assert.IsTrue(trie.Find(text).Any());
        }
示例#9
0
        public void HelloWorld()
        {
            string text = "hello and welcome to this beautiful wo5rld!";

            AhoCorasick.Trie trie = new AhoCorasick.Trie();
            trie.Add("hello");
            trie.Add("worldddddd");
            trie.Build();

            string[] matches = trie.Find(text).ToArray();

            Assert.AreEqual(2, matches.Length);
            Assert.AreEqual("hello", matches[0]);
            Assert.AreEqual("world5dd", matches[1]);
        }
示例#10
0
        public void HelloWorld()
        {
            string text = "hello and welcome to this beautiful world!";

            AhoCorasick.Trie trie = new AhoCorasick.Trie();
            trie.Add("hello");
            trie.Add("world");
            trie.Build();

            string[] matches = trie.Find(text).ToArray();

            Assert.AreEqual(2, matches.Length);
            Assert.AreEqual("hello", matches[0]);
            Assert.AreEqual("world", matches[1]);
        }
示例#11
0
        public void HelloWorld()
        {
            string text = "hello and welcome to this beautiful world!";

            var trie = new AhoCorasick.Trie();

            trie.Add("hello");
            trie.Add("world");
            trie.Build();

            var matches = trie.Find(text).ToArray();

            Assert.AreEqual(2, matches.Length);
            Assert.AreEqual(Tuple.Create("hello", 4), matches[0]);
            Assert.AreEqual(Tuple.Create("world", 40), matches[1]);
        }
示例#12
0
        public void LineNumbers()
        {
            string text = "world, i hello you!";
            string[] words = new[] { "hello", "world" };

            AhoCorasick.Trie<int> trie = new AhoCorasick.Trie<int>();
            for (int i = 0; i < words.Length; i++)
                trie.Add(words[i], i);
            trie.Build();

            int[] lines = trie.Find(text).ToArray();

            Assert.AreEqual(2, lines.Length);
            Assert.AreEqual(1, lines[0]);
            Assert.AreEqual(0, lines[1]);
        }
示例#13
0
        public void WordsAndIds()
        {
            string text = "hello and welcome to this beautiful world!";

            var trie = new AhoCorasick.Trie <Tuple <string, int> >();

            trie.Add("hello", Tuple.Create("hello", 123));
            trie.Add("world", Tuple.Create("world", 456));

            trie.Build();

            var matches = trie.Find(text).ToArray();

            Assert.AreEqual(2, matches.Length);
            Assert.AreEqual(Tuple.Create(Tuple.Create("hello", 123), 4), matches[0]);
            Assert.AreEqual(Tuple.Create(Tuple.Create("world", 456), 40), matches[1]);
        }
示例#14
0
        // remember to check for the same key added before adding when counting words!
        public static void Main()
        {
            AhoCorasick.Trie trie = new AhoCorasick.Trie();

            // add words
            trie.Add("hello");
            trie.Add("world");

            // build search tree
            trie.Build();

            string text = "hello and welcome to this beautiful world world hello!";

            // find words
            foreach (string word in trie.Find(text))
            {
                Console.WriteLine(word);
            }
        }
示例#15
0
        public void LineNumbers()
        {
            string text = "world, i hello you!";

            string[] words = new[] { "hello", "world" };

            AhoCorasick.Trie <int> trie = new AhoCorasick.Trie <int>();
            for (int i = 0; i < words.Length; i++)
            {
                trie.Add(words[i], i);
            }
            trie.Build();

            int[] lines = trie.Find(text).ToArray();

            Assert.AreEqual(2, lines.Length);
            Assert.AreEqual(1, lines[0]);
            Assert.AreEqual(0, lines[1]);
        }
示例#16
0
        private void NonRegexSearch(SearchFile sf, string line, int iLine, bool caseSensitive)
        {
            //Find.
            bool bFound = false;

            lock (MatchedFiles_Lock)
            {
                IEnumerable <string> ret = _objFileContentsSearchTrie.Find(caseSensitive ? line : line.ToLower());
                if (ret.Any())
                {
                    if (sf.MatchedLines == null)
                    {
                        sf.MatchedLines = new List <SearchLine>();
                    }
                    sf.MatchedLines.Add(new SearchLine(iLine, line));
                    bFound = true;
                }
            }
            if (bFound)
            {
                AddMatch(sf);
            }
        }
示例#17
0
            /// <summary>
            ///
            /// </summary>
            /// <param name="path"></param>
            /// <param name="trie"></param>
            /// <param name="capitalizedHashtags"></param>
            /// <param name="hashtags"></param>
            public static void Match(string path, AhoCorasick.Trie trie, List <string> hashtags)
            {
                DirectoryInfo rootFolder = new DirectoryInfo(path);
                var           files      = rootFolder.EnumerateFiles("*.json", SearchOption.AllDirectories);

                tags = hashtags.ToDictionary(x => x, x => 0);

                Parallel.ForEach(files, new ParallelOptions {
                    MaxDegreeOfParallelism = 16
                }, (file1) =>
                {
                    int found   = 0;
                    int matched = 0;

                    Console.WriteLine("Reading " + file1.FullName);

                    var jsonText = System.IO.File.ReadAllText(file1.FullName);
                    IList <_Tweet> tweets;

                    try
                    {
                        tweets = JsonConvert.DeserializeObject <IList <_Tweet> >(jsonText);
                    } catch (Exception E)
                    {
                        return;
                    }

                    var matchedTweets = new List <string>();
                    for (var i = 0; i < tweets.Count; i++)
                    {
                        String text = tweets[i].Text.ToLower();

                        if (tweets[i] == null)
                        {
                            continue;
                        }

                        List <int> positions = new List <int>();

                        foreach (string position in trie.Find(text))
                        {
                            positions.Add(Int16.Parse(position));
                        }

                        found++;

                        if (positions.Count() == 0)
                        {
                            continue;
                        }

                        //check if it is non alfa char
                        var verifiedWords = new List <string>();

                        foreach (int wordNo in positions)
                        {
                            string word          = hashtags.ElementAt(wordNo);
                            int startingPosition = text.IndexOf(word);
                            int endingPosition   = startingPosition + word.Count();

                            //If beggining or end of text, assume its not english letter
                            bool front = startingPosition == 0 || (startingPosition != -1 && !IsEnglishLetter(text.ElementAt(startingPosition - 1)));

                            bool end = endingPosition == text.Count() || (endingPosition != -1 && !IsEnglishLetter(text.ElementAt(endingPosition)));

                            if (front && end)
                            {
                                verifiedWords.Add(word);
                            }
                            else
                            {
                                continue;
                            }
                        }
                        if (verifiedWords.Count == 0)
                        {
                            continue;
                        }

                        matched++;
                        matchedTweets.Add(JsonConvert.SerializeObject(tweets[i]));

                        //Check if what happens here is correct
                        foreach (string word in verifiedWords)
                        {
                            lock (keywordLock)
                            {
                                tags[word] += 1;
                            }
                        }
                    }

                    Interlocked.Add(ref TweetsFound, found);
                    Interlocked.Add(ref TweetsMatched, matched);
                    System.IO.File.WriteAllLines(file1.DirectoryName + "/Matched" + file1.Name + ".txt", matchedTweets);
                });

                bool mergeIntoOneFile = true;

                if (mergeIntoOneFile)
                {
                    MergeSearchResults(path, "output.txt");
                }


                using (StreamWriter sw = new StreamWriter(path + "/Search_stats.txt"))
                {
                    sw.Write(JsonConvert.SerializeObject(new Stats
                    {
                        Found   = TweetsFound,
                        Matched = TweetsMatched,
                        Results = tags
                    }, Formatting.Indented));
                    // sw.Write(JsonConvert.SerializeObject(objectToSerialize, Formatting.Indented));
                    Interlocked.Increment(ref TweetsFound);
                }
            }
示例#18
0
        public void Words()
        {
            string[] text = "one two three four".Split(' ');

            AhoCorasick.Trie<string, bool> trie = new AhoCorasick.Trie<string, bool>();
            trie.Add(new[] { "three", "four" }, true);
            trie.Build();

            Assert.IsTrue(trie.Find(text).Any());
        }