/// <summary> /// Constructor of MarkovSegment, meant to be used by MarkovStructure /// </summary> /// <param name="prototypeSuccessors"></param> internal MarkovSegment(ConcurrentDictionary <int, int> prototypeSuccessors) { // key is index of current ngram // value is map<index of successor, associated weight> // Populate successors List <NGramSuccessor> successorList = new List <NGramSuccessor>(prototypeSuccessors.Count); // Add successors in sorted form NGramSuccessor.ReverseComparer reverseComparer = new NGramSuccessor.ReverseComparer(); foreach (var successor in prototypeSuccessors) { successorList.SortAdd(new NGramSuccessor(successor.Key, successor.Value), reverseComparer); } SetupSuccessorsAndRunningTotals(successorList); }
// TODO: add summary, create unit test public MarkovSegment Combine(MarkovSegment other, int[] ngramOtherRemap, int ownNGramLength) { if (successors.Length == 0) { return(other); } if (other.successors.Length == 0) { return(this); } // Combined list, map // TODO: consider switching to BST structure (likely SortedSet) to prevent O(n) of list insert??? List <NGramSuccessor> combinedSuccessors = new List <NGramSuccessor>(successors) { Capacity = successors.Length + other.successors.Length }; Dictionary <int, int> successorMap = new Dictionary <int, int>(successors.Length + other.successors.Length); // Populate map with own int ind = 0; foreach (NGramSuccessor successor in successors) { // TODO: i dont think it should happen but each entry in here should be unique, maybe some sort of testing whether sucmap already has the index successorMap[successor.successorIndex] = ind++; } var reverseComparer = new NGramSuccessor.ReverseComparer(); // Combine with other foreach (NGramSuccessor otherSuccessor in other.successors) { int remap = ngramOtherRemap[otherSuccessor.successorIndex]; if (remap < ownNGramLength && successorMap.TryGetValue(remap, out int index)) { // Given succeeding gram is not unique to other, and within the own, succeeded the current ngram // Combine the weights basically // TODO: really wanna not have O(n) but idk // First, grab the relevant successor and remove var ownSuccessor = combinedSuccessors[index]; combinedSuccessors.RemoveAt(index); // Combine weights ownSuccessor.weight += otherSuccessor.weight; // And add back (in sorted position) combinedSuccessors.SortAdd(ownSuccessor, reverseComparer); } else { // Either NGram is straight up unique to other, or the ngram is simply not a successor in this particular link combinedSuccessors.SortAdd(new NGramSuccessor(remap, otherSuccessor.weight), reverseComparer); } } return(new MarkovSegment(combinedSuccessors)); }