コード例 #1
0
        private void BuildFromDefaultDictionary()
        {
            m_MyPrivateTrie = new Trie();

            /*
             * ITrie trie = new rm.MyTrie.Trie();
             * string[] strings =
             * {
             *              "123", "1", "23", "1",
             *              "this", "test", "the", "TEMP", "TOKEN", "TAKE", "THUMP"
             *      };
             *
             * foreach (string s in strings)
             * {
             *      trie.AddWord(s);
             * }
             */

            string l_Path = AppContext.BaseDirectory + "Assets\\words_alpha.txt";
            string l_Dict_text;

            Console.WriteLine("Path" + l_Path);

            l_Dict_text = System.IO.File.ReadAllText(l_Path);
            //Todo:  Decide if this is called on a thread, or use ReadAllTextAsync
            //Todo:  Handle empty string and handle no lines
            int    l_start_index = 0;
            int    l_end_index   = 0;
            int    l_length      = 0;
            string l_word;
            int    l_words_thrown_out = 0;

            for (l_end_index = l_Dict_text.IndexOf('\r');
                 l_end_index > 0;
                 l_start_index = l_end_index + 2, l_end_index = l_Dict_text.IndexOf(('\r'), l_start_index))
            {
                l_length = l_end_index - l_start_index;
                if ((l_length > 2) && (l_length < 17))
                {
                    l_word = (l_Dict_text.Substring(l_start_index, l_length)).Trim();
                    m_MyPrivateTrie.AddWord(l_word);
                }
                else
                {
                    //Todo:  Fix dictionary file if the wrong words are included
                    //Todo:  Fix dictionary file for leading and trailing spaces on words
                    l_words_thrown_out++;
                }
            }

            if (l_start_index > 0)
            {
                l_word = l_Dict_text.Substring(l_start_index).Trim();
                m_MyPrivateTrie.AddWord(l_word);
            }
            Console.ReadKey();
            l_Dict_text = null;
            Console.ReadKey();
        }
コード例 #2
0
        public void GetLongestWords01()
        {
            trie.AddWord("the longest word");
            var expected     = new[] { "the longest word" };
            var longestWords = trie.GetLongestWords();

            Assert.AreEqual(expected, longestWords);
        }
コード例 #3
0
        public void GetLongestWords01()
        {
            trie.AddWord("the longest word");
            var expected     = new[] { "the longest word" };
            var longestWords = trie.GetLongestWords();

            Assert.AreEqual(expected.Length, longestWords.Count);
            string      longWord;
            IEnumerator e1 = longestWords.GetEnumerator();

            e1.MoveNext();
            longWord = (string)e1.Current;
            Assert.AreEqual(expected[0], longWord);
        }
コード例 #4
0
        static void Setup()
        {
            /*Setup dictionary*/
            _dictionary = TrieFactory.CreateTrie();
            StreamReader reader = new StreamReader("..\\..\\..\\SampleText.txt");

            string[] words = reader.ReadToEnd().Split(' ', '.', ',', '?', '!', ':', ';');
            foreach (var word in words)
            {
                _dictionary.AddWord(word.ToLower());
            }
            /*end dictionary*/

            /*setup sample sentence*/
            _sentence = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
            /*end sample sentence*/

            /*setup random encryption*/
            _encryption = Service.GetRandomEncryption();
            /*end random encryption*/

            /*encrypt sentence*/
            _encryptedSentence = Service.Encrypt(_sentence, _encryption);
            /*end ecnryption*/
        }
コード例 #5
0
ファイル: TrieTest.cs プロジェクト: rmandvikar/csharp-trie
 public void AddWord_EmptyString01()
 {
     trie = new Trie();
     Assert.AreEqual(0, trie.GetWords().Count);
     trie.AddWord("");
     Assert.AreNotEqual(0, trie.GetWords().Count);
 }
コード例 #6
0
        public void Setup()
        {
            var words = File.ReadAllLines(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "vocabulary.txt"));

            _trie = new Trie();
            _wordGroupsByFirstLetter = new Dictionary <char, IEnumerable <string> >();
            for (int i = 0; i < words.Length; i++)
            {
                _trie.AddWord(words[i]);
                _wordGroupsByFirstLetter.AddOrUpdate(
                    words[i][0],
                    new List <string> {
                    words[i]
                },
                    (x, y) =>
                {
                    (y as List <string>).Add(words[i]);
                    return(y);
                });

                if (!_wordGroupsByFirstLetter.ContainsKey(words[i][0]))
                {
                    _wordGroupsByFirstLetter[words[i][0]] = new List <string>();
                }
                (_wordGroupsByFirstLetter[words[i][0]] as List <string>).Add(words[i]);
            }
            _wordGroups = words
                          .ToLookup(w => w[0]);
        }
コード例 #7
0
 public void AddWord_EmptyString01()
 {
     trie = new Trie();
     Assert.AreEqual(0, trie.GetWords().Count);
     trie.AddWord("");
     Assert.AreNotEqual(0, trie.GetWords().Count);
 }
コード例 #8
0
        private static void ReadWordsFromText(StreamReader reader, ITrie trie)
        {
            while (!reader.EndOfStream)
            {
                var line    = reader.ReadLine();
                var matches = Regex.Matches(line, @"\w+");

                foreach (var match in matches)
                {
                    trie.AddWord(match.ToString());
                }
            }
        }
コード例 #9
0
        private static void ReadWordsFromText(StreamReader reader, ITrie trie)
        {
            while (!reader.EndOfStream)
            {
                var line = reader.ReadLine();
                var matches = Regex.Matches(line, @"\w+");

                foreach (var match in matches)
                {
                    trie.AddWord(match.ToString());
                }
            }
        }
コード例 #10
0
        private static void AddWordsToTrie(ICollection<string> words, ITrie trie)
        {
            Console.Write("Adding words to trie... ");
            sw.Start();

            foreach (var word in words)
            {
                trie.AddWord(word);
            }

            sw.Stop();
            Console.WriteLine("\rAdding words to trie -> Elapsed time: {1}\n", words.Count, sw.Elapsed);
            sw.Reset();
        }
コード例 #11
0
        private static void AddWordsToTrie(ICollection <string> words, ITrie trie)
        {
            Console.Write("Adding words to trie... ");
            sw.Start();

            foreach (var word in words)
            {
                trie.AddWord(word);
            }

            sw.Stop();
            Console.WriteLine("\rAdding words to trie -> Elapsed time: {1}\n", words.Count, sw.Elapsed);
            sw.Reset();
        }
コード例 #12
0
        public void train(string filename)
        {
            StreamReader reader = File.OpenText(filename);
            string line;
            while ((line = reader.ReadLine()) != null)
            {
                string[] items = line.Split(' ');

                foreach (string ss in items)
                {
                    trie.AddWord(ss);
                }
            }
        }
コード例 #13
0
        public void Setup()
        {
            _words = File.ReadAllLines(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "vocabulary.txt"))
                     .Take(Number)
                     .ToArray();

            _trie = new Trie();
            for (int i = 0; i < _words.Length; i++)
            {
                _trie.AddWord(_words[i]);
            }
            _words = _words
                     .Randomize()
                     .ToArray();
        }
コード例 #14
0
        private ITrie Digits()
        {
            ITrie trie = TrieFactory.CreateTrie();

            string[] strings =
            {
                "123", "1", "23"
            };

            foreach (string s in strings)
            {
                trie.AddWord(s);
            }
            return(trie);
        }
コード例 #15
0
        private ITrie LowerCaseWords()
        {
            ITrie trie = TrieFactory.CreateTrie();

            string[] strings =
            {
                "this", "test", "the", "temp", "token", "take", "thump"
            };

            foreach (string s in strings)
            {
                trie.AddWord(s);
            }
            return(trie);
        }
コード例 #16
0
        private ITrie UpperCaseWords()
        {
            ITrie trie = TrieFactory.CreateTrie();

            string[] strings =
            {
                "THIS", "TEST", "THE", "TEMP", "TOKEN", "TAKE", "THUMP"
            };

            foreach (string s in strings)
            {
                trie.AddWord(s);
            }
            return(trie);
        }
コード例 #17
0
        private ITrie Words()
        {
            ITrie trie = TrieFactory.CreateTrie();

            string[] strings =
            {
                "this", "test", "the", "TEMP", "TOKEN", "TAKE", "THUMP"
            };

            foreach (string s in strings)
            {
                trie.AddWord(s);
            }
            return(trie);
        }
コード例 #18
0
ファイル: Program.cs プロジェクト: GaikwadPratik/CS570AlgoDS
        static ITrie BuildTestTrie()
        {
            ITrie trie = TrieFactory.CreateTrie();

            List <string> strings = new List <string>()
            {
                "123", "1", "23", "1", "this", "test", "the", "TEMP", "TOKEN", "TAKE", "THUMP", "Microsoft Inc"
            };

            foreach (string s in strings)
            {
                trie.AddWord(s);
            }

            return(trie);
        }
コード例 #19
0
        /// <summary>
        /// Creates the trie from flattend body contents. Stores the frequency of the word in the particular document by it's id
        /// </summary>
        /// <param name="webResponses"></param>
        public void CreateTrie(Dictionary <int, string> webResponses)
        {
            try
            {
                if (_trie == null)
                {
                    _trie = TrieFactory.CreateTrie();
                }

                foreach (var _webString in webResponses)
                {
                    List <string> _lstSorted = _webString.Value.Split(' ', StringSplitOptions.RemoveEmptyEntries).ToList();
                    _lstSorted.Sort();

                    var numberOfTestcasesWithDuplicates = (from word in _lstSorted
                                                           where !Constants._lstExclusion.Contains(word.ToLower())
                                                           select word.ToLower())
                                                          .GroupBy(x => x)
                                                          .ToDictionary(x => x.First(), x => x.Count());

                    foreach (var item in numberOfTestcasesWithDuplicates)
                    {
                        var _trieNode = _trie.ContainsWord(item.Key.Trim());
                        if (_trieNode != null)
                        {
                            _trieNode.DocReferences[_webString.Key] = item.Value;
                        }
                        else
                        {
                            _trie.AddWord(item.Key.Trim(), new Dictionary <int, int>()
                            {
                                { _webString.Key, item.Value }
                            });
                        }
                    }
                }
            }
            catch (System.Exception ex)
            {
                Console.WriteLine($"An exception occurred while creating trie {ex}");
            }
        }
コード例 #20
0
        public static void Main()
        {
            ITrie trie = TrieFactory.CreateTrie();

            using (var reader = new StreamReader(@"..\..\Files\text.txt"))
            {
                while (!reader.EndOfStream)
                {
                    reader
                    .ReadLine()
                    .Split(' ', '.', ',', '?', '!', ':')
                    .ToList()
                    .ForEach(word =>
                    {
                        trie.AddWord(word);
                    });
                }
            }

            var countOfLorem = trie.WordCount("lorem");

            Console.WriteLine("Lorem -> {0} times", countOfLorem);
        }
コード例 #21
0
ファイル: Program.cs プロジェクト: GaikwadPratik/CS570AlgoDS
        static void Main(string[] args)
        {
            string        _strCompaniesFileName = "companies.dat";
            Regex         _lastLine             = new Regex(@"\.[.]", RegexOptions.Compiled);
            Regex         _flattenNames         = new Regex(@"[^0-9a-zA-Z ]+", RegexOptions.Compiled);
            List <string> _lstExclusion         = new List <string>()
            {
                "a", "an", "the", "and", "or", "but"
            };
            ITrie _trie = null;
            List <CompanyNodes> _lstNodes = null;
            CompanyNodes        _node     = null;
            int _nTotalWordCound          = 0;

            //Check file existance
            if (File.Exists(_strCompaniesFileName))
            {
                string _strCompaniesFileText = File.ReadAllText(_strCompaniesFileName);

                //Check for contents
                if (!string.IsNullOrEmpty(_strCompaniesFileText))
                {
                    string[] _strCompanyLines = _strCompaniesFileText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);

                    //Proceed only if at least one line is present
                    if (_strCompanyLines.Length > 0)
                    {
                        _trie     = TrieFactory.CreateTrie();
                        _lstNodes = new List <CompanyNodes>();

                        foreach (string _line in _strCompanyLines)
                        {
                            string _strParentName = string.Empty;

                            //Break each line by \t to get synonyms
                            string[] _namesToAdd = _line.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries);

                            foreach (string _name in _namesToAdd)
                            {
                                //Remove special chars from names and then add to trie
                                string _flattenedName = _flattenNames.Replace(_name, string.Empty);
                                _trie.AddWord(_flattenedName);
                                //To calculate the frequency
                                _node = new CompanyNodes()
                                {
                                    Name       = _flattenedName,
                                    ParentName = !string.IsNullOrEmpty(_strParentName) ? _strParentName : null
                                };
                                //The first name is parent for synonyms
                                if (string.IsNullOrEmpty(_strParentName))
                                {
                                    _strParentName = _flattenedName;
                                }

                                _lstNodes.Add(_node);
                            }
                        }

                        //If trie is correctly created
                        if (_trie.GetAllWords().Count > 0)
                        {
                            Console.WriteLine("Enter article:");
                            List <string> _lstFoundWords       = new List <string>();
                            bool          _bIncrementFrequency = false;

                            string _strInArticle = string.Empty;
                            //To increase the buffer size of the console
                            using (Stream inputStream = Console.OpenStandardInput(READLINE_BUFFER_SIZE))
                            {
                                byte[] bytes = new byte[READLINE_BUFFER_SIZE];
                                char[] chars = null;
                                string temp  = "";

                                while (!_lastLine.IsMatch(temp))
                                {
                                    int outputLength = inputStream.Read(bytes, 0, READLINE_BUFFER_SIZE);
                                    //Console.WriteLine(outputLength);
                                    chars = Encoding.UTF7.GetChars(bytes, 0, outputLength);

                                    temp          = new string(chars);
                                    _strInArticle = string.Format($"{_strInArticle }{ temp}");
                                }
                            }
                            _strInArticle = _lastLine.Replace(_strInArticle, " ");
                            string[] _strArticleWords = _strInArticle.Split(new[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
                            if (_strArticleWords.Length > 0)
                            {
                                string _strLastString = _strArticleWords.Last();

                                //1. Create the list of strings
                                //2. search the string in trie
                                //3. if found add to the list
                                //4. join the words in the list to form a string and search it again.
                                //5. continue till mismatch occurs
                                //6. if the mismatch is due to a, an, the, and, or, but search again without them
                                //7. When mismatch, increament the count of string in the list in _lstNodes and clear the list

                                foreach (string _strArticleWord in _strArticleWords)
                                {
                                    bool _bLastWord = false;

                                    if (ReferenceEquals(_strLastString, _strArticleWord))
                                    {
                                        _bLastWord = true;
                                    }
                                    string _strFlattenedArticleWord = _flattenNames.Replace(_strArticleWord, string.Empty);

                                    string _strCurrentWord = _strFlattenedArticleWord;
                                    if (_lstFoundWords.Count > 0)
                                    {
                                        _strFlattenedArticleWord = string.Format($"{ string.Join(" ", _lstFoundWords)} {_strFlattenedArticleWord}").Trim();
                                    }

                                    _nTotalWordCound++;
                                    if (_trie.ContainsWord(_strFlattenedArticleWord) && !string.IsNullOrEmpty(_strCurrentWord))
                                    {
                                        if (!_bLastWord)
                                        {
                                            _lstFoundWords.Add(_strCurrentWord);
                                        }
                                        else
                                        {
                                            IncrementFrequency(_lstNodes, _lstFoundWords, _strFlattenedArticleWord);
                                        }
                                    }
                                    else
                                    {
                                        if (!_lstExclusion.Contains(_strCurrentWord))
                                        {
                                            _bIncrementFrequency = true;
                                        }

                                        _strFlattenedArticleWord = _strFlattenedArticleWord.Remove(_strFlattenedArticleWord.LastIndexOf(_strCurrentWord), _strCurrentWord.Length).Trim();

                                        if (!string.IsNullOrEmpty(_strFlattenedArticleWord) && _bIncrementFrequency)
                                        {
                                            IncrementFrequency(_lstNodes, _lstFoundWords, _strFlattenedArticleWord);
                                            _bIncrementFrequency = false;
                                        }

                                        //if current word is another company, then proceed with that
                                        if (!string.IsNullOrEmpty(_strFlattenedArticleWord) &&
                                            !string.IsNullOrEmpty(_strCurrentWord) &&
                                            !_strFlattenedArticleWord.Equals(_strCurrentWord))
                                        {
                                            _strFlattenedArticleWord = _strCurrentWord;
                                            if (!_bLastWord)
                                            {
                                                _lstFoundWords.Add(_strCurrentWord);
                                            }
                                            else
                                            {
                                                IncrementFrequency(_lstNodes, _lstFoundWords, _strFlattenedArticleWord);
                                            }
                                        }
                                    }
                                }
                            }

                            int _nHitCount = 0;
                            List <CompanyNodes> _lstParentNodes = _lstNodes.Where(x => x.ParentName == null).ToList();
                            int    _maxLength = _lstParentNodes.Max(x => x.Name.Trim().Length);
                            string _spaces    = new string(' ', _maxLength);
                            Console.WriteLine($"Company{_spaces}\tHit Count\tRelevance");
                            foreach (CompanyNodes item in _lstParentNodes)
                            {
                                _nHitCount += item.Frequency;
                                Console.WriteLine($"{item.Name}{_spaces.Remove(_spaces.Length - item.Name.Length)}\t\t{item.Frequency}\t\t{((double)item.Frequency / _nTotalWordCound) * 100}%");
                            }
                            Console.WriteLine();
                            Console.WriteLine($"Total{_spaces}\t\t{_nHitCount}\t\t{((double)_nHitCount / _nTotalWordCound) * 100}%");
                            Console.WriteLine($"Total Words{_spaces}\t{_nTotalWordCound}");
                        }
                        else
                        {
                            Console.WriteLine("Somthing went wrong while creating trie");
                        }
                    }
                    else
                    {
                        Console.WriteLine($"something went wrong with reading text of {_strCompaniesFileName}");
                    }
                }
                else
                {
                    Console.WriteLine($"'{_strCompaniesFileName}' is empty.");
                }
            }
            else
            {
                Console.WriteLine($"'{_strCompaniesFileName}' file not found.");
            }
            Console.ReadKey();
        }