Example #1
0
        public MarkovStructure Combine(MarkovStructure other)
        {
            // TOOD: add summary, create unit test

            // --- Dictionary combining

            // Combined dictionary, dicmap
            List <string> combinedDictionary = new List <string>(dictionary)
            {
                Capacity = dictionary.Length + other.dictionary.Length
            };
            Dictionary <string, int> dictionaryMap = new Dictionary <string, int>(dictionary.Length + other.dictionary.Length);

            // Populate dictionaryMap
            int i = 0;

            foreach (string w in dictionary)
            {
                dictionaryMap[w] = i++;
            }

            // Go through other's dictionary, populate onto combined
            foreach (string w in other.dictionary)
            {
                if (!dictionaryMap.ContainsKey(w))
                {
                    dictionaryMap[w] = combinedDictionary.Count;
                    combinedDictionary.Add(w);
                }
            }

            // Remap array that maps other's index to combined index (remap[i] = j where other[i] = combined[j])
            int[] dictionaryOtherRemap = new int[other.dictionary.Length];
            for (int index = 0; index < dictionaryOtherRemap.Length; ++index)
            {
                string othersCurrentWord = other.dictionary[index];
                dictionaryOtherRemap[index] = dictionaryMap[othersCurrentWord];
            }

            // --- NGram Combining

            // TODO: it's possible to combine ngrams and their links at the same time instead of doing more work

            // Combined ngrams, ngrammap
            List <NGram> combinedNGrams = new List <NGram>(grams)
            {
                Capacity = grams.Length + other.grams.Length
            };
            Dictionary <NGram, int> ngramMap = new Dictionary <NGram, int>(grams.Length + other.grams.Length);

            // Populate gram map with own grams
            i = 0;
            foreach (NGram gram in grams)
            {
                ngramMap[gram] = i++;
            }

            // Go through other's ngrams, populate onto combined, and populate ngram remap
            i = 0;
            int[] ngramOtherRemap = new int[other.grams.Length];
            // TODO: consider parallelizing, would involve an add queue and a lock potentially
            foreach (NGram gram in other.grams)
            {
                // Translate ngram using dictionary remap
                var   g     = gram.gram.Select((e) => (e == -1) ? -1 : dictionaryOtherRemap[e]);
                NGram remap = new NGram(g);

                if (ngramMap.TryGetValue(remap, out int index))
                {
                    // If remapped ngram is not unique, remap points to it in combined
                    ngramOtherRemap[i++] = index;
                }
                else
                {
                    // If translated ngram is unique, add it to the end, remap points to it
                    ngramOtherRemap[i++] = combinedNGrams.Count;
                    combinedNGrams.Add(remap);
                }
            }

            // --- Chain links combining

            //	Other's unique chain links will not need to be touched
            //		Can tell if it's unique by testing whether ngram remap index >= original.length
            //		Remember that ngrams and the links are associated together despite being in seperate arrays (i.e. ngram[0] corresponds with links[0])
            //	For those which need to be comebined, use MarkovSegment combine method

            MarkovSegment[] combinedLinks = new MarkovSegment[combinedNGrams.Count];

            // Populate combined_links with own
            Parallel.For(0, combinedLinks.Length, (index) => {
                combinedLinks[index] = chainLinks[index];
            });

            // Populate linkmap with other
            // TODO: make parallel when done testing
            // Parallel.For(0, other.chain_links.Length, (index) => {
            for (int index = 0; index < other.chainLinks.Length; ++index)
            {
                var otherSegment = other.chainLinks[index];

                int remap;
                if ((remap = ngramOtherRemap[index]) >= chainLinks.Length)
                {
                    // Unique link needs to be associated with its remap spot
                    combinedLinks[remap] = otherSegment;
                }
                else
                {
                    var ownSegment = chainLinks[remap];
                    // Otherwise, combine the segments and replace
                    var replace = ownSegment.Combine(otherSegment, ngramOtherRemap, grams.Length);

                    // Replace link in relevant structures
                    combinedLinks[remap] = replace;
                }
            }
            // });

            // TODO: remove when done testing
            if (combinedLinks.Contains(null))
            {
                Console.WriteLine("yeah crazy");
            }

            // --- Seed combining

            //	Run the other's seeds through ngram remap,
            //	Any of other's seeds which are unique (larger than original seed's length), add to end

            List <int> combinedSeeds = new List <int>(seeds)
            {
                Capacity = seeds.Length + other.seeds.Length
            };

            combinedSeeds.AddRange(from oseed in other.seeds
                                   where ngramOtherRemap[oseed] >= seeds.Length
                                   select oseed);

            // Put it all together
            return(new MarkovStructure(combinedDictionary.ToArray(),
                                       combinedNGrams.ToArray(),
                                       combinedLinks,
                                       combinedSeeds.ToArray()));
        }