Пример #1
0
        private void TestDecrementChainCount()
        {
            _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null);
            MarkovChainString mostMarkovChainUsingStrings = null;

            AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null);
            AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null);
            AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null);
            AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null);
            AddMarkovChainString(Preprocess("I love Hawaii.", int.MaxValue), true, null);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 15);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 12);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 9);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 6);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 3);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love Hawaii.", true, false);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 0);
        }
Пример #2
0
        public override MarkovChainNodeString this[string key]
        {
            get
            {
                string filePath = GetActualOnDiskFilePath(key);

                object o = HttpRuntime.Cache.Get(filePath);

                if (o != null)
                {
                    return((MarkovChainNodeString)o);
                }

                MarkovChainNodeString markovChainNodeString = (MarkovChainNodeString)Serialization.DeserializeObject(filePath, typeof(MarkovChainNodeString));

                markovChainNodeString._onDiskDirectoryBasePath = _onDiskDirectoryBasePath;
                markovChainNodeString.Children      = (MarkovChainNodeStorageOnDiskDictionary)Activator.CreateInstance(typeof(MarkovChainNodeStorageOnDiskDictionary), _onDiskDirectoryBasePath);
                markovChainNodeString.Children.Path = markovChainNodeString.Path;

                HttpRuntime.Cache.Add(filePath, markovChainNodeString, null, DateTime.MaxValue, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback);

                return(markovChainNodeString);
            }
            set
            {
                string filePath = GetActualOnDiskFilePath(key);

                HttpRuntime.Cache.Add(filePath, value, null, Cache.NoAbsoluteExpiration, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback);

                Serialization.SerializeObject(filePath, value);

                ManageCachedValues(Keys, value);
            }
        }
Пример #3
0
        public MarkovChain(bool extractText, bool extractDistinctWords, bool isCaseSensitive)
        {
            _extractText          = extractText;
            _extractDistinctWords = extractDistinctWords;
            _isCaseSensitive      = isCaseSensitive;

            _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, null);
        }
Пример #4
0
        public override void Add(string key, MarkovChainNodeString value)
        {
            string directoryPath = GetActualOnDiskDirectoryPath();

            directoryPath = System.IO.Path.Combine(directoryPath, key);

            if (!Directory.Exists(directoryPath))
            {
                Directory.CreateDirectory(directoryPath);
            }
        }
Пример #5
0
        public override void Add(string key, MarkovChainNodeString value)
        {
            string directoryPath = GetActualOnDiskDirectoryPath();

            directoryPath = System.IO.Path.Combine(directoryPath, key);

            if (HttpRuntime.Cache.Get(directoryPath) == null || !Directory.Exists(directoryPath))
            {
                Delimon.Win32.IO.Directory.CreateDirectory(directoryPath);

                value.Update();

                ManageCachedKeys(directoryPath, true);
                ManageCachedValues(Keys, value);

                HttpRuntime.Cache.Add(directoryPath, true, null, Cache.NoAbsoluteExpiration, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback);
            }
        }
Пример #6
0
        public override MarkovChainNodeString this[string key]
        {
            get
            {
                string filePath = GetActualOnDiskFilePath(key);

                MarkovChainNodeString markovChainNodeString = (MarkovChainNodeString)Serialization.DeserializeObject(filePath, typeof(MarkovChainNodeString));

                markovChainNodeString._onDiskDirectoryBasePath = _onDiskDirectoryBasePath;
                markovChainNodeString.Children = (MarkovChainNodeStorageOnDiskDictionaryString)Activator.CreateInstance(typeof(MarkovChainNodeStorageOnDiskDictionaryString), _onDiskDirectoryBasePath);

                return(markovChainNodeString);
            }
            set
            {
                string filePath = GetActualOnDiskFilePath(key);

                Serialization.SerializeObject(filePath, value);
            }
        }
Пример #7
0
        private void TestChainRecallWithReset()
        {
            _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null);
            MarkovChainString mostMarkovChainUsingStrings = null;

            Dictionary <string, double> hikingAttributes = new Dictionary <string, double>();

            hikingAttributes.Add("hiking", 1);
            hikingAttributes.Add("bacon", 0);

            AddMarkovChainString(Preprocess("I love hiking.", int.MaxValue), false, hikingAttributes);
            AddMarkovChainString(Preprocess("I love hiking.", int.MaxValue), false, hikingAttributes);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love hiking.", false, false);
#if DEBUG
            Debug.Assert(mostMarkovChainUsingStrings.ToString().StartsWith("String: i love hiking"));

            Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["hiking"]) == 6);
            Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["bacon"]) == 0);
#endif
            /**/

            ResetString();

            /**/

            Dictionary <string, double> baconAttributes = new Dictionary <string, double>();
            baconAttributes.Add("hiking", 0);
            baconAttributes.Add("bacon", 1);

            AddMarkovChainString(Preprocess("I love bacon.", int.MaxValue), false, baconAttributes);
            AddMarkovChainString(Preprocess("I love bacon.", int.MaxValue), false, baconAttributes);

            mostMarkovChainUsingStrings = GetMarkovChainString("I love bacon.", false, false);
#if DEBUG
            Debug.Assert(mostMarkovChainUsingStrings.ToString().StartsWith("String: i love bacon"));

            Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["hiking"]) == 0);
            Debug.Assert(mostMarkovChainUsingStrings.Nodes.Sum(_ => _.Attributes["bacon"]) == 6);
#endif
        }
Пример #8
0
        private MarkovChainNodeString[] ManageCachedValues(ICollection <string> keys, MarkovChainNodeString markovChainNodeString)
        {
            object o = HttpRuntime.Cache.Get("Values: " + Path);

            HashSet <MarkovChainNodeString> markovChainNodeStrings = null;

            if (o == null)
            {
                markovChainNodeStrings = new HashSet <MarkovChainNodeString>();

                int i = 0;
                foreach (string key in keys)
                {
                    markovChainNodeStrings.Add(this[key]);
                    i++;
                }

                HttpRuntime.Cache.Add("Values: " + Path, markovChainNodeStrings, null, Cache.NoAbsoluteExpiration, TimeSpan.FromSeconds(_slidingExpirationInSeconds), CacheItemPriority.Normal, _cacheItemRemovedCallback);
            }
            else
            {
                markovChainNodeStrings = (HashSet <MarkovChainNodeString>)o;
            }

            if (markovChainNodeString != null)
            {
                markovChainNodeStrings.Add(markovChainNodeString);
            }

            return(markovChainNodeStrings.ToArray());
        }
 public override void Add(string key, MarkovChainNodeString value)
 {
     _dictionary.Add(key, value);
 }
Пример #10
0
        private void TestBaseline()
        {
            _markovChainString.Children.Clear();
            MarkovChainString mostMarkovChainUsingStrings = null;

            //test simple and w/o Attributes...
            AddMarkovChainString(Preprocess("I love.", int.MaxValue), false, null);
            AddMarkovChainString(Preprocess("I love.", int.MaxValue), false, null);
            AddMarkovChainString(Preprocess("I like.", int.MaxValue), false, null);

            /**/

            mostMarkovChainUsingStrings = GetMarkovChainString("I", false, true);

#if DEBUG
            Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount);
#endif
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 5);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 6);
            if (!_isCaseSensitive)
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "i love");
            }
            else
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "I love");
            }

            /**/

            mostMarkovChainUsingStrings = GetMarkovChainString("I love", false, true);

#if DEBUG
            Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount);
#endif
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 5);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 6);
            if (!_isCaseSensitive)
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "i love");
            }
            else
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "I love");
            }

            /**/

            mostMarkovChainUsingStrings = GetMarkovChainString("I like", false, true);

#if DEBUG
            Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount);
#endif
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 4);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 6);
            if (!_isCaseSensitive)
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "i like");
            }
            else
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "I like");
            }

            /**/

            _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null);

            //test with Attributes...
            Dictionary <string, double> attributes = new Dictionary <string, double>();
            attributes.Add("Attribute3", 3);
            attributes.Add("Attribute1", 1);

            AddMarkovChainString(Preprocess("I love cats more than I love dogs.", int.MaxValue), false, attributes); //3,3,2,2,2,2
            AddMarkovChainString(Preprocess("I love cats more than I love dogs.", int.MaxValue), false, attributes);
            AddMarkovChainString(Preprocess("I love dogs more than I love cats.", int.MaxValue), false, attributes);

            /**/

            mostMarkovChainUsingStrings = GetMarkovChainString("I love", false, true);

#if DEBUG
            Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount);
#endif
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 18);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 19);
            if (!_isCaseSensitive)
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "i love cats more than i love dogs");
            }
            else
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "I love cats more than I love dogs");
            }

            /**/

            mostMarkovChainUsingStrings = GetMarkovChainString("I love cats", false, true);

#if DEBUG
            Console.WriteLine(mostMarkovChainUsingStrings.String + " :: " + mostMarkovChainUsingStrings.ChainCount);
#endif
            Debug.Assert(mostMarkovChainUsingStrings.ChainCount == 18);
            Debug.Assert(mostMarkovChainUsingStrings.ChainCountTotal == 19);
            if (!_isCaseSensitive)
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "i love cats more than i love dogs");
            }
            else
            {
                Debug.Assert(mostMarkovChainUsingStrings.String == "I love cats more than I love dogs");
            }
        }
Пример #11
0
 public void ResetString()
 {
     _markovChainString = new MarkovChainNodeString(typeof(MCStorageString), true, _onDiskDirectoryBasePath, null);
 }
Пример #12
0
        public MarkovChainNodeString AddMarkovChainString(MarkovChainInputString markovChainInputString, bool addMarkovChainBreaks, Dictionary <string, double> attributes)
        {
            if (markovChainInputString == null || markovChainInputString.Nodes == null || markovChainInputString.Nodes.Count == 0 || string.IsNullOrEmpty(markovChainInputString.ToString()))
            {
                return(null);
            }

            MarkovChainNodeString parent = null;
            MarkovChainNodeString currentMarkovChainNodeString = null;
            bool   isFirstWord = true;
            string lastWord    = null;

            if (!_isCaseSensitive)
            {
                for (int i = 0; i < markovChainInputString.Nodes.Count; i++)
                {
                    markovChainInputString.Nodes[i].String = markovChainInputString.Nodes[i].String.ToLowerInvariant();
                }
            }

            int    index = 1;
            string path  = null;

            foreach (MarkovChainInputNodeString markovChainInputNodeString in markovChainInputString.Nodes)
            {
                string word2 = string.Intern(markovChainInputNodeString.String.Trim());
                lastWord = word2;
                path    += word2 + "\\";

                if (isFirstWord)
                {
                    isFirstWord = false;

                    if (!MarkovChainString.Children.ContainsKey(word2))
                    {
                        MarkovChainNodeString markovChainNodeString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, path);

                        MarkovChainString.Children.Add(word2, markovChainNodeString);
                    }

                    MarkovChainString.ChainCount++;

                    ManageAttributes(MarkovChainString, attributes);

                    MarkovChainString.Update();

                    /**/

                    currentMarkovChainNodeString = MarkovChainString.Children[word2];
                    currentMarkovChainNodeString.ChainCount++;
                    currentMarkovChainNodeString.Children.Path   = path;
                    currentMarkovChainNodeString.IsInputBoundary = false;
                    currentMarkovChainNodeString.IsWordBoundary  = true;
                    //commented to allow the model to be serialiazed to JSON...  should be re-added once disk-backed storage is added...
                    //currentMarkovChainNodeString.Parent = MarkovChainString
                    currentMarkovChainNodeString.Index = index++;

                    ManageAttributes(currentMarkovChainNodeString, attributes);

                    MarkovChainString.Children[word2] = currentMarkovChainNodeString;
                    //currentMarkovChainNodeString.Update();

                    continue;
                }

                if (!currentMarkovChainNodeString.Children.ContainsKey(word2))
                {
                    MarkovChainNodeString markovChainNodeString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, path);

                    currentMarkovChainNodeString.Children.Add(word2, markovChainNodeString);
                }

                parent = currentMarkovChainNodeString;
                currentMarkovChainNodeString      = currentMarkovChainNodeString.Children[word2];
                currentMarkovChainNodeString.Path = path;
                currentMarkovChainNodeString.ChainCount++;
                currentMarkovChainNodeString.Children.Path   = path;
                currentMarkovChainNodeString.IsInputBoundary = false;
                currentMarkovChainNodeString.IsWordBoundary  = true;
                //commented to allow the model to be serialiazed to JSON...  should be re-added once disk-backed storage is added...
                //currentMarkovChainNodeString.Parent = parent;
                currentMarkovChainNodeString.Index = index++;

                ManageAttributes(currentMarkovChainNodeString, attributes);

                if (addMarkovChainBreaks)
                {
                    //experimental...
                    if (!MarkovChainString.Children.ContainsKey(word2))
                    {
                        MarkovChainString.Children.Add(word2, currentMarkovChainNodeString);
                    }
                    else
                    {
                        ManageAttributes(currentMarkovChainNodeString, currentMarkovChainNodeString.Attributes);
                    }
                }

                parent.Children[word2] = currentMarkovChainNodeString;
                //currentMarkovChainNodeString.Update();
            }

            if (currentMarkovChainNodeString != null && parent != null && !string.IsNullOrEmpty(lastWord))
            {
                currentMarkovChainNodeString.IsInputBoundary = true;
                parent.Children[lastWord] = currentMarkovChainNodeString;
                //currentMarkovChainNodeString.Update();
            }

            return(MarkovChainString);
        }

        private void ManageAttributes(MarkovChainNodeString currentMarkovChainNodeString, Dictionary <string, double> attributes)
        {
            if (attributes != null)
            {
                if (currentMarkovChainNodeString.Attributes == null)
                {
                    currentMarkovChainNodeString.Attributes = new Dictionary <string, double>();
                }

                List <string> keys = attributes.Keys.ToList();

                foreach (string key in keys)
                {
                    if (!currentMarkovChainNodeString.Attributes.ContainsKey(key))
                    {
                        currentMarkovChainNodeString.Attributes.Add(key, attributes[key]);
                    }
                    else
                    {
                        currentMarkovChainNodeString.Attributes[key] += attributes[key];
                    }
                }
            }
        }

        public void PrepareMarkovChainString()
        {
            PrepareMarkovChainString(MarkovChainString);
        }

        private void PrepareMarkovChainString(MarkovChainNodeString markovChainNodeString)
        {
            markovChainNodeString.Children = new MarkovChainNodeStorageInMemoryDictionaryString(markovChainNodeString.Children.OrderByDescendingChainCount(), _onDiskDirectoryBasePath);

            foreach (KeyValuePair <string, MarkovChainNodeString> keyValuePair in markovChainNodeString.Children)
            {
                PrepareMarkovChainString(keyValuePair.Value);
            }
        }

        public MarkovChainString GetMarkovChainString(string input, bool decrementChainCount, bool alwaysContinueToEndOfChain)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(null);
            }

            MarkovChainString markovChainString = new MarkovChainString();

            markovChainString.IsBrokenChain = false;
            markovChainString.Nodes         = new List <MarkovChainNodeString>();
            markovChainString.Input         = input;

            bool continueProcessing = true;

            if (MarkovChainString.Children.Count != 0)
            {
                MarkovChainNodeString startingNodeString = MarkovChainString;
                decimal chainCount      = 0;
                decimal chainCountTotal = 0;
                string  path            = null;

                StringBuilder chainString = new StringBuilder();

                if (!string.IsNullOrEmpty(input))
                {
                    if (!_isCaseSensitive)
                    {
                        input = input.ToLowerInvariant();
                    }

                    Func <string, bool> processWord = new Func <string, bool>(delegate(string word)
                    {
                        try
                        {
                            markovChainString.Nodes.Add(startingNodeString);

                            chainCount      += startingNodeString.Children[word].ChainCount;
                            chainCountTotal += startingNodeString.Children.Values.Sum(_ => _.ChainCount);

                            chainString.Append(word + " ");

                            startingNodeString = startingNodeString.Children[word];
                            if (decrementChainCount)
                            {
                                startingNodeString.ChainCount--;
                            }

                            startingNodeString.Path          = path;
                            startingNodeString.Children.Path = path;
                        }
                        catch (Exception)
                        {
                            return(false);
                        }

                        return(true);
                    });

                    List <string> words = UserDefinedFunctions.ExtractWords(input, _extractText, _extractDistinctWords).Cast <string>().ToList();

                    foreach (string word in words)
                    {
                        path += word + "\\";

                        if (startingNodeString.Children.ContainsKey(word))
                        {
                            processWord(word);
                        }
                        else if (MarkovChainString.Children.ContainsKey(word))
                        {
                            startingNodeString = MarkovChainString;

                            markovChainString.IsBrokenChain = true;

                            processWord(word);
                        }
                        else
                        {
                            if (startingNodeString == MarkovChainString)
                            {
                                return(markovChainString);
                            }

                            break;
                        }
                    }

                    if (markovChainString.Nodes.Count == words.Count())
                    {
                        continueProcessing = false;
                    }
                }

                /**/

                markovChainString.ChainCountKnown      = chainCount;
                markovChainString.ChainCountTotalKnown = chainCountTotal;

                if (continueProcessing || alwaysContinueToEndOfChain)
                {
                    KeyValuePair <string, MarkovChainNodeString> keyValuePair = startingNodeString.Children.OrderByDescendingChainCount().FirstOrDefault();

                    while (keyValuePair.Key != null && startingNodeString.Children.Values.Count != 0)
                    {
                        markovChainString.Nodes.Add(startingNodeString);

                        chainCount      += startingNodeString.Children[keyValuePair.Key].ChainCount;
                        chainCountTotal += startingNodeString.Children.Values.Sum(_ => _.ChainCount);

                        chainString.Append(keyValuePair.Key + " ");

                        startingNodeString = startingNodeString.Children[keyValuePair.Key];
                        if (decrementChainCount)
                        {
                            keyValuePair.Value.ChainCount--;
                        }

                        path += keyValuePair.Key + "\\";

                        startingNodeString.Path          = path;
                        startingNodeString.Children.Path = path;

                        keyValuePair = startingNodeString.Children.OrderByDescendingChainCount().FirstOrDefault();
                    }
                }

                markovChainString.String          = chainString.ToString().Trim();
                markovChainString.ChainCount      = chainCount;
                markovChainString.ChainCountTotal = chainCountTotal;
            }

            return(markovChainString);
        }
 public abstract void Add(string key, MarkovChainNodeString value);