private void BuildFromDefaultDictionary() { m_MyPrivateTrie = new Trie(); /* * ITrie trie = new rm.MyTrie.Trie(); * string[] strings = * { * "123", "1", "23", "1", * "this", "test", "the", "TEMP", "TOKEN", "TAKE", "THUMP" * }; * * foreach (string s in strings) * { * trie.AddWord(s); * } */ string l_Path = AppContext.BaseDirectory + "Assets\\words_alpha.txt"; string l_Dict_text; Console.WriteLine("Path" + l_Path); l_Dict_text = System.IO.File.ReadAllText(l_Path); //Todo: Decide if this is called on a thread, or use ReadAllTextAsync //Todo: Handle empty string and handle no lines int l_start_index = 0; int l_end_index = 0; int l_length = 0; string l_word; int l_words_thrown_out = 0; for (l_end_index = l_Dict_text.IndexOf('\r'); l_end_index > 0; l_start_index = l_end_index + 2, l_end_index = l_Dict_text.IndexOf(('\r'), l_start_index)) { l_length = l_end_index - l_start_index; if ((l_length > 2) && (l_length < 17)) { l_word = (l_Dict_text.Substring(l_start_index, l_length)).Trim(); m_MyPrivateTrie.AddWord(l_word); } else { //Todo: Fix dictionary file if the wrong words are included //Todo: Fix dictionary file for leading and trailing spaces on words l_words_thrown_out++; } } if (l_start_index > 0) { l_word = l_Dict_text.Substring(l_start_index).Trim(); m_MyPrivateTrie.AddWord(l_word); } Console.ReadKey(); l_Dict_text = null; Console.ReadKey(); }
public void GetLongestWords01() { trie.AddWord("the longest word"); var expected = new[] { "the longest word" }; var longestWords = trie.GetLongestWords(); Assert.AreEqual(expected, longestWords); }
public void GetLongestWords01() { trie.AddWord("the longest word"); var expected = new[] { "the longest word" }; var longestWords = trie.GetLongestWords(); Assert.AreEqual(expected.Length, longestWords.Count); string longWord; IEnumerator e1 = longestWords.GetEnumerator(); e1.MoveNext(); longWord = (string)e1.Current; Assert.AreEqual(expected[0], longWord); }
static void Setup() { /*Setup dictionary*/ _dictionary = TrieFactory.CreateTrie(); StreamReader reader = new StreamReader("..\\..\\..\\SampleText.txt"); string[] words = reader.ReadToEnd().Split(' ', '.', ',', '?', '!', ':', ';'); foreach (var word in words) { _dictionary.AddWord(word.ToLower()); } /*end dictionary*/ /*setup sample sentence*/ _sentence = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; /*end sample sentence*/ /*setup random encryption*/ _encryption = Service.GetRandomEncryption(); /*end random encryption*/ /*encrypt sentence*/ _encryptedSentence = Service.Encrypt(_sentence, _encryption); /*end ecnryption*/ }
public void AddWord_EmptyString01() { trie = new Trie(); Assert.AreEqual(0, trie.GetWords().Count); trie.AddWord(""); Assert.AreNotEqual(0, trie.GetWords().Count); }
public void Setup() { var words = File.ReadAllLines(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "vocabulary.txt")); _trie = new Trie(); _wordGroupsByFirstLetter = new Dictionary <char, IEnumerable <string> >(); for (int i = 0; i < words.Length; i++) { _trie.AddWord(words[i]); _wordGroupsByFirstLetter.AddOrUpdate( words[i][0], new List <string> { words[i] }, (x, y) => { (y as List <string>).Add(words[i]); return(y); }); if (!_wordGroupsByFirstLetter.ContainsKey(words[i][0])) { _wordGroupsByFirstLetter[words[i][0]] = new List <string>(); } (_wordGroupsByFirstLetter[words[i][0]] as List <string>).Add(words[i]); } _wordGroups = words .ToLookup(w => w[0]); }
private static void ReadWordsFromText(StreamReader reader, ITrie trie) { while (!reader.EndOfStream) { var line = reader.ReadLine(); var matches = Regex.Matches(line, @"\w+"); foreach (var match in matches) { trie.AddWord(match.ToString()); } } }
private static void AddWordsToTrie(ICollection<string> words, ITrie trie) { Console.Write("Adding words to trie... "); sw.Start(); foreach (var word in words) { trie.AddWord(word); } sw.Stop(); Console.WriteLine("\rAdding words to trie -> Elapsed time: {1}\n", words.Count, sw.Elapsed); sw.Reset(); }
private static void AddWordsToTrie(ICollection <string> words, ITrie trie) { Console.Write("Adding words to trie... "); sw.Start(); foreach (var word in words) { trie.AddWord(word); } sw.Stop(); Console.WriteLine("\rAdding words to trie -> Elapsed time: {1}\n", words.Count, sw.Elapsed); sw.Reset(); }
public void train(string filename) { StreamReader reader = File.OpenText(filename); string line; while ((line = reader.ReadLine()) != null) { string[] items = line.Split(' '); foreach (string ss in items) { trie.AddWord(ss); } } }
public void Setup() { _words = File.ReadAllLines(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "vocabulary.txt")) .Take(Number) .ToArray(); _trie = new Trie(); for (int i = 0; i < _words.Length; i++) { _trie.AddWord(_words[i]); } _words = _words .Randomize() .ToArray(); }
private ITrie Digits() { ITrie trie = TrieFactory.CreateTrie(); string[] strings = { "123", "1", "23" }; foreach (string s in strings) { trie.AddWord(s); } return(trie); }
private ITrie LowerCaseWords() { ITrie trie = TrieFactory.CreateTrie(); string[] strings = { "this", "test", "the", "temp", "token", "take", "thump" }; foreach (string s in strings) { trie.AddWord(s); } return(trie); }
private ITrie UpperCaseWords() { ITrie trie = TrieFactory.CreateTrie(); string[] strings = { "THIS", "TEST", "THE", "TEMP", "TOKEN", "TAKE", "THUMP" }; foreach (string s in strings) { trie.AddWord(s); } return(trie); }
private ITrie Words() { ITrie trie = TrieFactory.CreateTrie(); string[] strings = { "this", "test", "the", "TEMP", "TOKEN", "TAKE", "THUMP" }; foreach (string s in strings) { trie.AddWord(s); } return(trie); }
static ITrie BuildTestTrie() { ITrie trie = TrieFactory.CreateTrie(); List <string> strings = new List <string>() { "123", "1", "23", "1", "this", "test", "the", "TEMP", "TOKEN", "TAKE", "THUMP", "Microsoft Inc" }; foreach (string s in strings) { trie.AddWord(s); } return(trie); }
/// <summary> /// Creates the trie from flattend body contents. Stores the frequency of the word in the particular document by it's id /// </summary> /// <param name="webResponses"></param> public void CreateTrie(Dictionary <int, string> webResponses) { try { if (_trie == null) { _trie = TrieFactory.CreateTrie(); } foreach (var _webString in webResponses) { List <string> _lstSorted = _webString.Value.Split(' ', StringSplitOptions.RemoveEmptyEntries).ToList(); _lstSorted.Sort(); var numberOfTestcasesWithDuplicates = (from word in _lstSorted where !Constants._lstExclusion.Contains(word.ToLower()) select word.ToLower()) .GroupBy(x => x) .ToDictionary(x => x.First(), x => x.Count()); foreach (var item in numberOfTestcasesWithDuplicates) { var _trieNode = _trie.ContainsWord(item.Key.Trim()); if (_trieNode != null) { _trieNode.DocReferences[_webString.Key] = item.Value; } else { _trie.AddWord(item.Key.Trim(), new Dictionary <int, int>() { { _webString.Key, item.Value } }); } } } } catch (System.Exception ex) { Console.WriteLine($"An exception occurred while creating trie {ex}"); } }
public static void Main() { ITrie trie = TrieFactory.CreateTrie(); using (var reader = new StreamReader(@"..\..\Files\text.txt")) { while (!reader.EndOfStream) { reader .ReadLine() .Split(' ', '.', ',', '?', '!', ':') .ToList() .ForEach(word => { trie.AddWord(word); }); } } var countOfLorem = trie.WordCount("lorem"); Console.WriteLine("Lorem -> {0} times", countOfLorem); }
static void Main(string[] args) { string _strCompaniesFileName = "companies.dat"; Regex _lastLine = new Regex(@"\.[.]", RegexOptions.Compiled); Regex _flattenNames = new Regex(@"[^0-9a-zA-Z ]+", RegexOptions.Compiled); List <string> _lstExclusion = new List <string>() { "a", "an", "the", "and", "or", "but" }; ITrie _trie = null; List <CompanyNodes> _lstNodes = null; CompanyNodes _node = null; int _nTotalWordCound = 0; //Check file existance if (File.Exists(_strCompaniesFileName)) { string _strCompaniesFileText = File.ReadAllText(_strCompaniesFileName); //Check for contents if (!string.IsNullOrEmpty(_strCompaniesFileText)) { string[] _strCompanyLines = _strCompaniesFileText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); //Proceed only if at least one line is present if (_strCompanyLines.Length > 0) { _trie = TrieFactory.CreateTrie(); _lstNodes = new List <CompanyNodes>(); foreach (string _line in _strCompanyLines) { string _strParentName = string.Empty; //Break each line by \t to get synonyms string[] _namesToAdd = _line.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries); foreach (string _name in _namesToAdd) { //Remove special chars from names and then add to trie string _flattenedName = _flattenNames.Replace(_name, string.Empty); _trie.AddWord(_flattenedName); //To calculate the frequency _node = new CompanyNodes() { Name = _flattenedName, ParentName = !string.IsNullOrEmpty(_strParentName) ? _strParentName : null }; //The first name is parent for synonyms if (string.IsNullOrEmpty(_strParentName)) { _strParentName = _flattenedName; } _lstNodes.Add(_node); } } //If trie is correctly created if (_trie.GetAllWords().Count > 0) { Console.WriteLine("Enter article:"); List <string> _lstFoundWords = new List <string>(); bool _bIncrementFrequency = false; string _strInArticle = string.Empty; //To increase the buffer size of the console using (Stream inputStream = Console.OpenStandardInput(READLINE_BUFFER_SIZE)) { byte[] bytes = new byte[READLINE_BUFFER_SIZE]; char[] chars = null; string temp = ""; while (!_lastLine.IsMatch(temp)) { int outputLength = inputStream.Read(bytes, 0, READLINE_BUFFER_SIZE); //Console.WriteLine(outputLength); chars = Encoding.UTF7.GetChars(bytes, 0, outputLength); temp = new string(chars); _strInArticle = string.Format($"{_strInArticle }{ temp}"); } } _strInArticle = _lastLine.Replace(_strInArticle, " "); string[] _strArticleWords = _strInArticle.Split(new[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); if (_strArticleWords.Length > 0) { string _strLastString = _strArticleWords.Last(); //1. Create the list of strings //2. search the string in trie //3. if found add to the list //4. join the words in the list to form a string and search it again. //5. continue till mismatch occurs //6. if the mismatch is due to a, an, the, and, or, but search again without them //7. When mismatch, increament the count of string in the list in _lstNodes and clear the list foreach (string _strArticleWord in _strArticleWords) { bool _bLastWord = false; if (ReferenceEquals(_strLastString, _strArticleWord)) { _bLastWord = true; } string _strFlattenedArticleWord = _flattenNames.Replace(_strArticleWord, string.Empty); string _strCurrentWord = _strFlattenedArticleWord; if (_lstFoundWords.Count > 0) { _strFlattenedArticleWord = string.Format($"{ string.Join(" ", _lstFoundWords)} {_strFlattenedArticleWord}").Trim(); } _nTotalWordCound++; if (_trie.ContainsWord(_strFlattenedArticleWord) && !string.IsNullOrEmpty(_strCurrentWord)) { if (!_bLastWord) { _lstFoundWords.Add(_strCurrentWord); } else { IncrementFrequency(_lstNodes, _lstFoundWords, _strFlattenedArticleWord); } } else { if (!_lstExclusion.Contains(_strCurrentWord)) { _bIncrementFrequency = true; } _strFlattenedArticleWord = _strFlattenedArticleWord.Remove(_strFlattenedArticleWord.LastIndexOf(_strCurrentWord), _strCurrentWord.Length).Trim(); if (!string.IsNullOrEmpty(_strFlattenedArticleWord) && _bIncrementFrequency) { IncrementFrequency(_lstNodes, _lstFoundWords, _strFlattenedArticleWord); _bIncrementFrequency = false; } //if current word is another company, then proceed with that if (!string.IsNullOrEmpty(_strFlattenedArticleWord) && !string.IsNullOrEmpty(_strCurrentWord) && !_strFlattenedArticleWord.Equals(_strCurrentWord)) { _strFlattenedArticleWord = _strCurrentWord; if (!_bLastWord) { _lstFoundWords.Add(_strCurrentWord); } else { IncrementFrequency(_lstNodes, _lstFoundWords, _strFlattenedArticleWord); } } } } } int _nHitCount = 0; List <CompanyNodes> _lstParentNodes = _lstNodes.Where(x => x.ParentName == null).ToList(); int _maxLength = _lstParentNodes.Max(x => x.Name.Trim().Length); string _spaces = new string(' ', _maxLength); Console.WriteLine($"Company{_spaces}\tHit Count\tRelevance"); foreach (CompanyNodes item in _lstParentNodes) { _nHitCount += item.Frequency; Console.WriteLine($"{item.Name}{_spaces.Remove(_spaces.Length - item.Name.Length)}\t\t{item.Frequency}\t\t{((double)item.Frequency / _nTotalWordCound) * 100}%"); } Console.WriteLine(); Console.WriteLine($"Total{_spaces}\t\t{_nHitCount}\t\t{((double)_nHitCount / _nTotalWordCound) * 100}%"); Console.WriteLine($"Total Words{_spaces}\t{_nTotalWordCound}"); } else { Console.WriteLine("Somthing went wrong while creating trie"); } } else { Console.WriteLine($"something went wrong with reading text of {_strCompaniesFileName}"); } } else { Console.WriteLine($"'{_strCompaniesFileName}' is empty."); } } else { Console.WriteLine($"'{_strCompaniesFileName}' file not found."); } Console.ReadKey(); }