private void TestDecrementChainCount() { _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null); MarkovChainString mostMarkovChainUsingStrings = null; AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null); AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null); AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null); AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null); AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null); mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false); Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 15); mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false); Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 12); mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false); Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 9); mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false); Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 6); mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false); Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 3); mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false); Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 0); }
public override MarkovChainNodeString this[string key] { get { string filePath = GetActualOnDiskFilePath(key); object o = HttpRuntime.Cache.Get(filePath); if (o != null) { return((MarkovChainNodeString)o); } MarkovChainNodeString markovChainNodeString = (MarkovChainNodeString)Serialization.DeserializeObject(filePath, typeof(MarkovChainNodeString)); markovChainNodeString._onDiskDirectoryBasePath = _onDiskDirectoryBasePath; markovChainNodeString.Children = (MarkovChainNodeStorageOnDiskDictionary)Activator.CreateInstance(typeof(MarkovChainNodeStorageOnDiskDictionary), _onDiskDirectoryBasePath); markovChainNodeString.Children.Path = markovChainNodeString.Path; HttpRuntime.Cache.Add(filePath, markovChainNodeString, null, DateTime.MaxValue, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback); return(markovChainNodeString); } set { string filePath = GetActualOnDiskFilePath(key); HttpRuntime.Cache.Add(filePath, value, null, Cache.NoAbsoluteExpiration, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback); Serialization.SerializeObject(filePath, value); ManageCachedValues(Keys, value); } }
public MarkovChain(bool extractText, bool extractDistinctWords, bool isCaseSensitive) { _extractText = extractText; _extractDistinctWords = extractDistinctWords; _isCaseSensitive = isCaseSensitive; _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, null); }
public override void Add(string key, MarkovChainNodeString value) { string directoryPath = GetActualOnDiskDirectoryPath(); directoryPath = System.IO.Path.Combine(directoryPath, key); if (!Directory.Exists(directoryPath)) { Directory.CreateDirectory(directoryPath); } }
public override void Add(string key, MarkovChainNodeString value) { string directoryPath = GetActualOnDiskDirectoryPath(); directoryPath = System.IO.Path.Combine(directoryPath, key); if (HttpRuntime.Cache.Get(directoryPath) == null || !Directory.Exists(directoryPath)) { Delimon.Win32.IO.Directory.CreateDirectory(directoryPath); value.Update(); ManageCachedKeys(directoryPath, true); ManageCachedValues(Keys, value); HttpRuntime.Cache.Add(directoryPath, true, null, Cache.NoAbsoluteExpiration, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback); } }
public override MarkovChainNodeString this[string key] { get { string filePath = GetActualOnDiskFilePath(key); MarkovChainNodeString markovChainNodeString = (MarkovChainNodeString)Serialization.DeserializeObject(filePath, typeof(MarkovChainNodeString)); markovChainNodeString._onDiskDirectoryBasePath = _onDiskDirectoryBasePath; markovChainNodeString.Children = (MarkovChainNodeStorageOnDiskDictionaryString)Activator.CreateInstance(typeof(MarkovChainNodeStorageOnDiskDictionaryString), _onDiskDirectoryBasePath); return(markovChainNodeString); } set { string filePath = GetActualOnDiskFilePath(key); Serialization.SerializeObject(filePath, value); } }
private void TestChainRecallWithReset() { _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null); MarkovChainString mostMarkovChainUsingStrings = null; Dictionary <string, double> hikingAttributes = new Dictionary <string, double>(); hikingAttributes.Add("hiking", 1); hikingAttributes.Add("bacon", 0); AddMarkovChainString(Preprocess("I love hiking.", int.MaxValue), false, hikingAttributes); AddMarkovChainString(Preprocess("I love hiking.", int.MaxValue), false, hikingAttributes); mostMarkovChainUsingStrings = GetMarkovChainString("I love hiking.", false, false); #if DEBUG Debug.Assert(mostMarkovChainUsingStrings.ToString().StartsWith("String: i love hiking")); Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["hiking"]) == 6); Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["bacon"]) == 0); #endif /**/ ResetString(); /**/ Dictionary <string, double> baconAttributes = new Dictionary <string, double>(); baconAttributes.Add("hiking", 0); baconAttributes.Add("bacon", 1); AddMarkovChainString(Preprocess("I love bacon.", int.MaxValue), false, baconAttributes); AddMarkovChainString(Preprocess("I love bacon.", int.MaxValue), false, baconAttributes); mostMarkovChainUsingStrings = GetMarkovChainString("I love bacon.", false, false); #if DEBUG Debug.Assert(mostMarkovChainUsingStrings.ToString().StartsWith("String: i love bacon")); Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["hiking"]) == 0); Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["bacon"]) == 6); #endif }
private MarkovChainNodeString[] ManageCachedValues(ICollection <string> keys, MarkovChainNodeString markovChainNodeString) { object o = HttpRuntime.Cache.Get("Values: " + Path); HashSet <MarkovChainNodeString> markovChainNodeStrings = null; if (o == null) { markovChainNodeStrings = new HashSet <MarkovChainNodeString>(); int i = 0; foreach (string key in keys) { markovChainNodeStrings.Add(this[key]); i++; } HttpRuntime.Cache.Add("Values: " + Path, markovChainNodeStrings, null, Cache.NoAbsoluteExpiration, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback); } else { markovChainNodeStrings = (HashSet <MarkovChainNodeString>)o; } if (markovChainNodeString != null) { markovChainNodeStrings.Add(markovChainNodeString); } return(markovChainNodeStrings.ToArray()); }
public override void Add(string key, MarkovChainNodeString value) { _dictionary.Add(key, value); }
private void TestBaseline() { _markovChainString.Children.Clear(); MarkovChainString mostMarkovChainUsingStrings = null; //test simple and w/o Attributes... AddMarkovChainString(Preprocess("I love.", int.MaxValue), false, null); AddMarkovChainString(Preprocess("I love.", int.MaxValue), false, null); AddMarkovChainString(Preprocess("I like.", int.MaxValue), false, null); /**/ mostMarkovChainUsingStrings = GetMarkovChainString("I", false, true); #if DEBUG Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount); #endif Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 5); Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 6); if (!_isCaseSensitive) { Debug.Assert(mostMarkovChainUsingStrings.String == "i love"); } else { Debug.Assert(mostMarkovChainUsingStrings.String == "I love"); } /**/ mostMarkovChainUsingStrings = GetMarkovChainString("I love", false, true); #if DEBUG Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount); #endif Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 5); Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 6); if (!_isCaseSensitive) { Debug.Assert(mostMarkovChainUsingStrings.String == "i love"); } else { Debug.Assert(mostMarkovChainUsingStrings.String == "I love"); } /**/ mostMarkovChainUsingStrings = GetMarkovChainString("I like", false, true); #if DEBUG Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount); #endif Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 4); Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 6); if (!_isCaseSensitive) { Debug.Assert(mostMarkovChainUsingStrings.String == "i like"); } else { Debug.Assert(mostMarkovChainUsingStrings.String == "I like"); } /**/ _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null); //test with Attributes... Dictionary <string, double> attributes = new Dictionary <string, double>(); attributes.Add("Attribute3", 3); attributes.Add("Attribute1", 1); AddMarkovChainString(Preprocess("I love cats more than I love dogs.", int.MaxValue), false, attributes); //3,3,2,2,2,2 AddMarkovChainString(Preprocess("I love cats more than I love dogs.", int.MaxValue), false, attributes); AddMarkovChainString(Preprocess("I love dogs more than I love cats.", int.MaxValue), false, attributes); /**/ mostMarkovChainUsingStrings = GetMarkovChainString("I love", false, true); #if DEBUG Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount); #endif Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 18); Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 19); if (!_isCaseSensitive) { Debug.Assert(mostMarkovChainUsingStrings.String == "i love cats more than i love dogs"); } else { Debug.Assert(mostMarkovChainUsingStrings.String == "I love cats more than I love dogs"); } /**/ mostMarkovChainUsingStrings = GetMarkovChainString("I love cats", false, true); #if DEBUG Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount); #endif Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 18); Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 19); if (!_isCaseSensitive) { Debug.Assert(mostMarkovChainUsingStrings.String == "i love cats more than i love dogs"); } else { Debug.Assert(mostMarkovChainUsingStrings.String == "I love cats more than I love dogs"); } }
public void ResetString() { _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null); }
public MarkovChainNodeString AddMarkovChainString(MarkovChainInputString markovChainInputString, bool addMarkovChainBreaks, Dictionary <string, double> attributes) { if (markovChainInputString == null || markovChainInputString.Nodes == null || markovChainInputString.Nodes.Count == 0 || string.IsNullOrEmpty(markovChainInputString.ToString())) { return(null); } MarkovChainNodeString parent = null; MarkovChainNodeString currentMarkovChainNodeString = null; bool isFirstWord = true; string lastWord = null; if (!_isCaseSensitive) { for (int i = 0; i < markovChainInputString.Nodes.Count; i++) { markovChainInputString.Nodes[i].String = markovChainInputString.Nodes[i].String.ToLowerInvariant(); } } int index = 1; string path = null; foreach (MarkovChainInputNodeString markovChainInputNodeString in markovChainInputString.Nodes) { string word2 = string.Intern(markovChainInputNodeString.String.Trim()); lastWord = word2; path += word2 + "\\"; if (isFirstWord) { isFirstWord = false; if (!MarkovChainString.Children.ContainsKey(word2)) { MarkovChainNodeString markovChainNodeString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, path); MarkovChainString.Children.Add(word2, markovChainNodeString); } MarkovChainString.ChainCount++; ManageAttributes(MarkovChainString, attributes); MarkovChainString.Update(); /**/ currentMarkovChainNodeString = MarkovChainString.Children[word2]; currentMarkovChainNodeString.ChainCount++; currentMarkovChainNodeString.Children.Path = path; currentMarkovChainNodeString.IsInputBoundary = false; currentMarkovChainNodeString.IsWordBoundary = true; //commented to allow the model to be serialiazed to JSON... should be re-added once disk-backed storage is added... //currentMarkovChainNodeString.Parent = MarkovChainString currentMarkovChainNodeString.Index = index++; ManageAttributes(currentMarkovChainNodeString, attributes); MarkovChainString.Children[word2] = currentMarkovChainNodeString; //currentMarkovChainNodeString.Update(); continue; } if (!currentMarkovChainNodeString.Children.ContainsKey(word2)) { MarkovChainNodeString markovChainNodeString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, path); currentMarkovChainNodeString.Children.Add(word2, markovChainNodeString); } parent = currentMarkovChainNodeString; currentMarkovChainNodeString = currentMarkovChainNodeString.Children[word2]; currentMarkovChainNodeString.Path = path; currentMarkovChainNodeString.ChainCount++; currentMarkovChainNodeString.Children.Path = path; currentMarkovChainNodeString.IsInputBoundary = false; currentMarkovChainNodeString.IsWordBoundary = true; //commented to allow the model to be serialiazed to JSON... should be re-added once disk-backed storage is added... //currentMarkovChainNodeString.Parent = parent; currentMarkovChainNodeString.Index = index++; ManageAttributes(currentMarkovChainNodeString, attributes); if (addMarkovChainBreaks) { //experimental... if (!MarkovChainString.Children.ContainsKey(word2)) { MarkovChainString.Children.Add(word2, currentMarkovChainNodeString); } else { ManageAttributes(currentMarkovChainNodeString, currentMarkovChainNodeString.Attributes); } } parent.Children[word2] = currentMarkovChainNodeString; //currentMarkovChainNodeString.Update(); } if (currentMarkovChainNodeString != null && parent != null && !string.IsNullOrEmpty(lastWord)) { currentMarkovChainNodeString.IsInputBoundary = true; parent.Children[lastWord] = currentMarkovChainNodeString; //currentMarkovChainNodeString.Update(); } return(MarkovChainString); } private void ManageAttributes(MarkovChainNodeString currentMarkovChainNodeString, Dictionary <string, double> attributes) { if (attributes != null) { if (currentMarkovChainNodeString.Attributes == null) { currentMarkovChainNodeString.Attributes = new Dictionary <string, double>(); } List <string> keys = attributes.Keys.ToList(); foreach (string key in keys) { if (!currentMarkovChainNodeString.Attributes.ContainsKey(key)) { currentMarkovChainNodeString.Attributes.Add(key, attributes[key]); } else { currentMarkovChainNodeString.Attributes[key] += attributes[key]; } } } } public void PrepareMarkovChainString() { PrepareMarkovChainString(MarkovChainString); } private void PrepareMarkovChainString(MarkovChainNodeString markovChainNodeString) { markovChainNodeString.Children = new MarkovChainNodeStorageInMemoryDictionaryString(markovChainNodeString.Children.OrderByDescendingChainCount(), _onDiskDirectoryBasePath); foreach (KeyValuePair <string, MarkovChainNodeString> keyValuePair in markovChainNodeString.Children) { PrepareMarkovChainString(keyValuePair.Value); } } public MarkovChainString GetMarkovChainString(string input, bool decrementChainCount, bool alwaysContinueToEndOfChain) { if (string.IsNullOrEmpty(input)) { return(null); } MarkovChainString markovChainString = new MarkovChainString(); markovChainString.IsBrokenChain = false; markovChainString.Nodes = new List <MarkovChainNodeString>(); markovChainString.Input = input; bool continueProcessing = true; if (MarkovChainString.Children.Count != 0) { MarkovChainNodeString startingNodeString = MarkovChainString; decimal chainCount = 0; decimal chainCountTotal = 0; string path = null; StringBuilder chainString = new StringBuilder(); if (!string.IsNullOrEmpty(input)) { if (!_isCaseSensitive) { input = input.ToLowerInvariant(); } Func <string, bool> processWord = new Func <string, bool>(delegate(string word) { try { markovChainString.Nodes.Add(startingNodeString); chainCount += startingNodeString.Children[word].ChainCount; chainCountTotal += startingNodeString.Children.Values.Sum(_ => _.ChainCount); chainString.Append(word + " "); startingNodeString = startingNodeString.Children[word]; if (decrementChainCount) { startingNodeString.ChainCount--; } startingNodeString.Path = path; startingNodeString.Children.Path = path; } catch (Exception) { return(false); } return(true); }); List <string> words = UserDefinedFunctions.ExtractWords(input, _extractText, _extractDistinctWords).Cast <string>().ToList(); foreach (string word in words) { path += word + "\\"; if (startingNodeString.Children.ContainsKey(word)) { processWord(word); } else if (MarkovChainString.Children.ContainsKey(word)) { startingNodeString = MarkovChainString; markovChainString.IsBrokenChain = true; processWord(word); } else { if (startingNodeString == MarkovChainString) { return(markovChainString); } break; } } if (markovChainString.Nodes.Count == words.Count()) { continueProcessing = false; } } /**/ markovChainString.ChainCountKnown = chainCount; markovChainString.ChainCountTotalKnown = chainCountTotal; if (continueProcessing || alwaysContinueToEndOfChain) { KeyValuePair <string, MarkovChainNodeString> keyValuePair = startingNodeString.Children.OrderByDescendingChainCount().FirstOrDefault(); while (keyValuePair.Key != null && startingNodeString.Children.Values.Count != 0) { markovChainString.Nodes.Add(startingNodeString); chainCount += startingNodeString.Children[keyValuePair.Key].ChainCount; chainCountTotal += startingNodeString.Children.Values.Sum(_ => _.ChainCount); chainString.Append(keyValuePair.Key + " "); startingNodeString = startingNodeString.Children[keyValuePair.Key]; if (decrementChainCount) { keyValuePair.Value.ChainCount--; } path += keyValuePair.Key + "\\"; startingNodeString.Path = path; startingNodeString.Children.Path = path; keyValuePair = startingNodeString.Children.OrderByDescendingChainCount().FirstOrDefault(); } } markovChainString.String = chainString.ToString().Trim(); markovChainString.ChainCount = chainCount; markovChainString.ChainCountTotal = chainCountTotal; } return(markovChainString); }
public abstract void Add(string key, MarkovChainNodeString value);