Ejemplo n.º 1
0
        private void TestPerInputNodeAttributes()
        {
            _markovChainString.Children.Clear();

            string inputString = "This string contains per node attributes.";

            MarkovChainInputString markovChainInputString = Preprocess(inputString, int.MaxValue);

            foreach (MarkovChainInputNodeString markovChainInputNodeString in markovChainInputString.Nodes)
            {
                if (markovChainInputNodeString.String.Contains("i"))
                {
                    markovChainInputNodeString.Attributes.Add("perNodeAttribute", 1);
                }
            }

            Dictionary <string, double> globalAttributes = new Dictionary <string, double>();

            globalAttributes.Add("globalAttribute1", 1);
            globalAttributes.Add("globalAttribute2", 1);

            AddMarkovChainString(markovChainInputString, false, globalAttributes);

            MarkovChainString markovChainString = GetMarkovChainString(inputString, false, false);
        }
Ejemplo n.º 2
0
        public MarkovChainInputString Preprocess(string input, int maximumNumberOfNodesToProcess)
        {
            MarkovChainInputString markovChainInputString = new MarkovChainInputString();

            markovChainInputString.Nodes = new List <MarkovChainInputNodeString>();

            foreach (string word in UserDefinedFunctions.ExtractWords(input, _extractText, _extractDistinctWords))
            {
                MarkovChainInputNodeString markovChainInputNodeString = new MarkovChainInputNodeString();

                markovChainInputNodeString.String     = word;
                markovChainInputNodeString.Attributes = new Dictionary <string, double>();

                markovChainInputString.Nodes.Add(markovChainInputNodeString);
            }

            markovChainInputString.Nodes = markovChainInputString.Nodes.Take(maximumNumberOfNodesToProcess).ToList();

            return(markovChainInputString);
        }
Ejemplo n.º 3
0
        public MarkovChainNodeString AddMarkovChainString(MarkovChainInputString markovChainInputString, bool addMarkovChainBreaks, Dictionary <string, double> attributes)
        {
            if (markovChainInputString == null || markovChainInputString.Nodes == null || markovChainInputString.Nodes.Count == 0 || string.IsNullOrEmpty(markovChainInputString.ToString()))
            {
                return(null);
            }

            MarkovChainNodeString parent = null;
            MarkovChainNodeString currentMarkovChainNodeString = null;
            bool   isFirstWord = true;
            string lastWord    = null;

            if (!_isCaseSensitive)
            {
                for (int i = 0; i < markovChainInputString.Nodes.Count; i++)
                {
                    markovChainInputString.Nodes[i].String = markovChainInputString.Nodes[i].String.ToLowerInvariant();
                }
            }

            int    index = 1;
            string path  = null;

            foreach (MarkovChainInputNodeString markovChainInputNodeString in markovChainInputString.Nodes)
            {
                string word2 = string.Intern(markovChainInputNodeString.String.Trim());
                lastWord = word2;
                path    += word2 + "\\";

                if (isFirstWord)
                {
                    isFirstWord = false;

                    if (!MarkovChainString.Children.ContainsKey(word2))
                    {
                        MarkovChainNodeString markovChainNodeString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, path);

                        MarkovChainString.Children.Add(word2, markovChainNodeString);
                    }

                    MarkovChainString.ChainCount++;

                    ManageAttributes(MarkovChainString, attributes);

                    MarkovChainString.Update();

                    /**/

                    currentMarkovChainNodeString = MarkovChainString.Children[word2];
                    currentMarkovChainNodeString.ChainCount++;
                    currentMarkovChainNodeString.Children.Path   = path;
                    currentMarkovChainNodeString.IsInputBoundary = false;
                    currentMarkovChainNodeString.IsWordBoundary  = true;
                    //commented to allow the model to be serialiazed to JSON...  should be re-added once disk-backed storage is added...
                    //currentMarkovChainNodeString.Parent = MarkovChainString
                    currentMarkovChainNodeString.Index = index++;

                    ManageAttributes(currentMarkovChainNodeString, attributes);

                    MarkovChainString.Children[word2] = currentMarkovChainNodeString;
                    //currentMarkovChainNodeString.Update();

                    continue;
                }

                if (!currentMarkovChainNodeString.Children.ContainsKey(word2))
                {
                    MarkovChainNodeString markovChainNodeString = new MarkovChainNodeString(typeof(MCStorageString), false, _onDiskDirectoryBasePath, path);

                    currentMarkovChainNodeString.Children.Add(word2, markovChainNodeString);
                }

                parent = currentMarkovChainNodeString;
                currentMarkovChainNodeString      = currentMarkovChainNodeString.Children[word2];
                currentMarkovChainNodeString.Path = path;
                currentMarkovChainNodeString.ChainCount++;
                currentMarkovChainNodeString.Children.Path   = path;
                currentMarkovChainNodeString.IsInputBoundary = false;
                currentMarkovChainNodeString.IsWordBoundary  = true;
                //commented to allow the model to be serialiazed to JSON...  should be re-added once disk-backed storage is added...
                //currentMarkovChainNodeString.Parent = parent;
                currentMarkovChainNodeString.Index = index++;

                ManageAttributes(currentMarkovChainNodeString, attributes);

                if (addMarkovChainBreaks)
                {
                    //experimental...
                    if (!MarkovChainString.Children.ContainsKey(word2))
                    {
                        MarkovChainString.Children.Add(word2, currentMarkovChainNodeString);
                    }
                    else
                    {
                        ManageAttributes(currentMarkovChainNodeString, currentMarkovChainNodeString.Attributes);
                    }
                }

                parent.Children[word2] = currentMarkovChainNodeString;
                //currentMarkovChainNodeString.Update();
            }

            if (currentMarkovChainNodeString != null && parent != null && !string.IsNullOrEmpty(lastWord))
            {
                currentMarkovChainNodeString.IsInputBoundary = true;
                parent.Children[lastWord] = currentMarkovChainNodeString;
                //currentMarkovChainNodeString.Update();
            }

            return(MarkovChainString);
        }

        private void ManageAttributes(MarkovChainNodeString currentMarkovChainNodeString, Dictionary <string, double> attributes)
        {
            if (attributes != null)
            {
                if (currentMarkovChainNodeString.Attributes == null)
                {
                    currentMarkovChainNodeString.Attributes = new Dictionary <string, double>();
                }

                List <string> keys = attributes.Keys.ToList();

                foreach (string key in keys)
                {
                    if (!currentMarkovChainNodeString.Attributes.ContainsKey(key))
                    {
                        currentMarkovChainNodeString.Attributes.Add(key, attributes[key]);
                    }
                    else
                    {
                        currentMarkovChainNodeString.Attributes[key] += attributes[key];
                    }
                }
            }
        }

        public void PrepareMarkovChainString()
        {
            PrepareMarkovChainString(MarkovChainString);
        }

        private void PrepareMarkovChainString(MarkovChainNodeString markovChainNodeString)
        {
            markovChainNodeString.Children = new MarkovChainNodeStorageInMemoryDictionaryString(markovChainNodeString.Children.OrderByDescendingChainCount(), _onDiskDirectoryBasePath);

            foreach (KeyValuePair <string, MarkovChainNodeString> keyValuePair in markovChainNodeString.Children)
            {
                PrepareMarkovChainString(keyValuePair.Value);
            }
        }

        public MarkovChainString GetMarkovChainString(string input, bool decrementChainCount, bool alwaysContinueToEndOfChain)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(null);
            }

            MarkovChainString markovChainString = new MarkovChainString();

            markovChainString.IsBrokenChain = false;
            markovChainString.Nodes         = new List <MarkovChainNodeString>();
            markovChainString.Input         = input;

            bool continueProcessing = true;

            if (MarkovChainString.Children.Count != 0)
            {
                MarkovChainNodeString startingNodeString = MarkovChainString;
                decimal chainCount      = 0;
                decimal chainCountTotal = 0;
                string  path            = null;

                StringBuilder chainString = new StringBuilder();

                if (!string.IsNullOrEmpty(input))
                {
                    if (!_isCaseSensitive)
                    {
                        input = input.ToLowerInvariant();
                    }

                    Func <string, bool> processWord = new Func <string, bool>(delegate(string word)
                    {
                        try
                        {
                            markovChainString.Nodes.Add(startingNodeString);

                            chainCount      += startingNodeString.Children[word].ChainCount;
                            chainCountTotal += startingNodeString.Children.Values.Sum(_ => _.ChainCount);

                            chainString.Append(word + " ");

                            startingNodeString = startingNodeString.Children[word];
                            if (decrementChainCount)
                            {
                                startingNodeString.ChainCount--;
                            }

                            startingNodeString.Path          = path;
                            startingNodeString.Children.Path = path;
                        }
                        catch (Exception)
                        {
                            return(false);
                        }

                        return(true);
                    });

                    List <string> words = UserDefinedFunctions.ExtractWords(input, _extractText, _extractDistinctWords).Cast <string>().ToList();

                    foreach (string word in words)
                    {
                        path += word + "\\";

                        if (startingNodeString.Children.ContainsKey(word))
                        {
                            processWord(word);
                        }
                        else if (MarkovChainString.Children.ContainsKey(word))
                        {
                            startingNodeString = MarkovChainString;

                            markovChainString.IsBrokenChain = true;

                            processWord(word);
                        }
                        else
                        {
                            if (startingNodeString == MarkovChainString)
                            {
                                return(markovChainString);
                            }

                            break;
                        }
                    }

                    if (markovChainString.Nodes.Count == words.Count())
                    {
                        continueProcessing = false;
                    }
                }

                /**/

                markovChainString.ChainCountKnown      = chainCount;
                markovChainString.ChainCountTotalKnown = chainCountTotal;

                if (continueProcessing || alwaysContinueToEndOfChain)
                {
                    KeyValuePair <string, MarkovChainNodeString> keyValuePair = startingNodeString.Children.OrderByDescendingChainCount().FirstOrDefault();

                    while (keyValuePair.Key != null && startingNodeString.Children.Values.Count != 0)
                    {
                        markovChainString.Nodes.Add(startingNodeString);

                        chainCount      += startingNodeString.Children[keyValuePair.Key].ChainCount;
                        chainCountTotal += startingNodeString.Children.Values.Sum(_ => _.ChainCount);

                        chainString.Append(keyValuePair.Key + " ");

                        startingNodeString = startingNodeString.Children[keyValuePair.Key];
                        if (decrementChainCount)
                        {
                            keyValuePair.Value.ChainCount--;
                        }

                        path += keyValuePair.Key + "\\";

                        startingNodeString.Path          = path;
                        startingNodeString.Children.Path = path;

                        keyValuePair = startingNodeString.Children.OrderByDescendingChainCount().FirstOrDefault();
                    }
                }

                markovChainString.String          = chainString.ToString().Trim();
                markovChainString.ChainCount      = chainCount;
                markovChainString.ChainCountTotal = chainCountTotal;
            }

            return(markovChainString);
        }