public override bool Equals(object obj) { if ((obj == null) || !GetType().Equals(obj.GetType())) { return(false); } else { NGram o = (NGram)obj; // Compare by array value, not by reference or whatever C# does by default return(Enumerable.SequenceEqual(gram, o.gram)); } }
public MarkovStructure Combine(MarkovStructure other) { // TOOD: add summary, create unit test // --- Dictionary combining // Combined dictionary, dicmap List <string> combinedDictionary = new List <string>(dictionary) { Capacity = dictionary.Length + other.dictionary.Length }; Dictionary <string, int> dictionaryMap = new Dictionary <string, int>(dictionary.Length + other.dictionary.Length); // Populate dictionaryMap int i = 0; foreach (string w in dictionary) { dictionaryMap[w] = i++; } // Go through other's dictionary, populate onto combined foreach (string w in other.dictionary) { if (!dictionaryMap.ContainsKey(w)) { dictionaryMap[w] = combinedDictionary.Count; combinedDictionary.Add(w); } } // Remap array that maps other's index to combined index (remap[i] = j where other[i] = combined[j]) int[] dictionaryOtherRemap = new int[other.dictionary.Length]; for (int index = 0; index < dictionaryOtherRemap.Length; ++index) { string othersCurrentWord = other.dictionary[index]; dictionaryOtherRemap[index] = dictionaryMap[othersCurrentWord]; } // --- NGram Combining // TODO: it's possible to combine ngrams and their links at the same time instead of doing more work // Combined ngrams, ngrammap List <NGram> combinedNGrams = new List <NGram>(grams) { Capacity = grams.Length + other.grams.Length }; Dictionary <NGram, int> ngramMap = new Dictionary <NGram, int>(grams.Length + other.grams.Length); // Populate gram map with own grams i = 0; foreach (NGram gram in grams) { ngramMap[gram] = i++; } // Go through other's ngrams, populate onto combined, and populate ngram remap i = 0; int[] ngramOtherRemap = new int[other.grams.Length]; // TODO: consider parallelizing, would involve an add queue and a lock potentially foreach (NGram gram in other.grams) { // Translate ngram using dictionary remap var g = gram.gram.Select((e) => (e == -1) ? -1 : dictionaryOtherRemap[e]); NGram remap = new NGram(g); if (ngramMap.TryGetValue(remap, out int index)) { // If remapped ngram is not unique, remap points to it in combined ngramOtherRemap[i++] = index; } else { // If translated ngram is unique, add it to the end, remap points to it ngramOtherRemap[i++] = combinedNGrams.Count; combinedNGrams.Add(remap); } } // --- Chain links combining // Other's unique chain links will not need to be touched // Can tell if it's unique by testing whether ngram remap index >= original.length // Remember that ngrams and the links are associated together despite being in seperate arrays (i.e. ngram[0] corresponds with links[0]) // For those which need to be comebined, use MarkovSegment combine method MarkovSegment[] combinedLinks = new MarkovSegment[combinedNGrams.Count]; // Populate combined_links with own Parallel.For(0, combinedLinks.Length, (index) => { combinedLinks[index] = chainLinks[index]; }); // Populate linkmap with other // TODO: make parallel when done testing // Parallel.For(0, other.chain_links.Length, (index) => { for (int index = 0; index < other.chainLinks.Length; ++index) { var otherSegment = other.chainLinks[index]; int remap; if ((remap = ngramOtherRemap[index]) >= chainLinks.Length) { // Unique link needs to be associated with its remap spot combinedLinks[remap] = otherSegment; } else { var ownSegment = chainLinks[remap]; // Otherwise, combine the segments and replace var replace = ownSegment.Combine(otherSegment, ngramOtherRemap, grams.Length); // Replace link in relevant structures combinedLinks[remap] = replace; } } // }); // TODO: remove when done testing if (combinedLinks.Contains(null)) { Console.WriteLine("yeah crazy"); } // --- Seed combining // Run the other's seeds through ngram remap, // Any of other's seeds which are unique (larger than original seed's length), add to end List <int> combinedSeeds = new List <int>(seeds) { Capacity = seeds.Length + other.seeds.Length }; combinedSeeds.AddRange(from oseed in other.seeds where ngramOtherRemap[oseed] >= seeds.Length select oseed); // Put it all together return(new MarkovStructure(combinedDictionary.ToArray(), combinedNGrams.ToArray(), combinedLinks, combinedSeeds.ToArray())); }