Exemple #1
0
        /// <summary>
        /// Constructor of MarkovSegment, meant to be used by MarkovStructure
        /// </summary>
        /// <param name="prototypeSuccessors"></param>
        internal MarkovSegment(ConcurrentDictionary <int, int> prototypeSuccessors)
        {
            // key is index of current ngram
            // value is map<index of successor, associated weight>

            // Populate successors
            List <NGramSuccessor> successorList = new List <NGramSuccessor>(prototypeSuccessors.Count);

            // Add successors in sorted form
            NGramSuccessor.ReverseComparer reverseComparer = new NGramSuccessor.ReverseComparer();
            foreach (var successor in prototypeSuccessors)
            {
                successorList.SortAdd(new NGramSuccessor(successor.Key, successor.Value), reverseComparer);
            }

            SetupSuccessorsAndRunningTotals(successorList);
        }
Exemple #2
0
        // TODO: add summary, create unit test
        public MarkovSegment Combine(MarkovSegment other,
                                     int[] ngramOtherRemap,
                                     int ownNGramLength)
        {
            if (successors.Length == 0)
            {
                return(other);
            }
            if (other.successors.Length == 0)
            {
                return(this);
            }

            // Combined list, map
            // TODO: consider switching to BST structure (likely SortedSet) to prevent O(n) of list insert???
            List <NGramSuccessor> combinedSuccessors = new List <NGramSuccessor>(successors)
            {
                Capacity = successors.Length + other.successors.Length
            };
            Dictionary <int, int> successorMap = new Dictionary <int, int>(successors.Length + other.successors.Length);

            // Populate map with own
            int ind = 0;

            foreach (NGramSuccessor successor in successors)
            {
                // TODO: i dont think it should happen but each entry in here should be unique, maybe some sort of testing whether sucmap already has the index
                successorMap[successor.successorIndex] = ind++;
            }

            var reverseComparer = new NGramSuccessor.ReverseComparer();

            // Combine with other
            foreach (NGramSuccessor otherSuccessor in other.successors)
            {
                int remap = ngramOtherRemap[otherSuccessor.successorIndex];

                if (remap < ownNGramLength && successorMap.TryGetValue(remap, out int index))
                {
                    // Given succeeding gram is not unique to other, and within the own, succeeded the current ngram
                    // Combine the weights basically

                    // TODO: really wanna not have O(n) but idk
                    // First, grab the relevant successor and remove
                    var ownSuccessor = combinedSuccessors[index];

                    combinedSuccessors.RemoveAt(index);

                    // Combine weights
                    ownSuccessor.weight += otherSuccessor.weight;

                    // And add back (in sorted position)
                    combinedSuccessors.SortAdd(ownSuccessor, reverseComparer);
                }
                else
                {
                    // Either NGram is straight up unique to other, or the ngram is simply not a successor in this particular link
                    combinedSuccessors.SortAdd(new NGramSuccessor(remap, otherSuccessor.weight), reverseComparer);
                }
            }

            return(new MarkovSegment(combinedSuccessors));
        }