private DString wrap(string s) { // Presence is a tallymark-like system for counting character occurrences in a string, regardless of position ulong presence = 0L; Dictionary <char, CCount> counts = CCount.Decompose(s); foreach (CCount cbits in bits) { ushort marked = 0; CCount c; if (counts.TryGetValue(cbits.Chr, out c)) { marked = c.Count; } ushort upTo = cbits.Count; for (ushort i = 0; i < upTo; i++) { presence = presence << 1; if (marked > 0) { presence |= 1; marked--; } } } return(new DString(s, presence)); }
public FuzzyMatcher Build() { Dictionary <char, Queue <uint> > costs = computeCosts(); // Setup a priority queue in largest-first order, with the first cost value for each character var q = new SortedSet <WeightedChar>(); foreach (KeyValuePair <char, Queue <uint> > e in costs) { q.Add(new WeightedChar(e.Key, e.Value.Dequeue())); } var bitCounts = new Dictionary <char, ushort>(); int iterations = 64; // bits in a Long while (iterations > 0 && q.Count != 0) { WeightedChar wc = q.Max; q.Remove(wc); // Increment bit count for the character ushort bitsCurrent; if (!bitCounts.TryGetValue(wc.Chr, out bitsCurrent)) { bitsCurrent = 0; } bitsCurrent++; bitCounts[wc.Chr] = bitsCurrent; Queue <uint> vals = costs[wc.Chr]; if (vals != null && vals.Count != 0) { // Costs queue still has values to go through, put it back into the priority queue q.Add(new WeightedChar(wc.Chr, vals.Dequeue())); } iterations--; } // Flatten the bitCounts map into an array of (character, bits) CCount[] bits = new CCount[bitCounts.Count]; int i = 0; foreach (KeyValuePair <char, ushort> e in bitCounts) { bits[i] = new CCount(e.Key, e.Value); i++; } Array.Sort(bits); return(new FuzzyMatcher(bits)); }
public void Update(string str) { if (str.Length == 0) { return; } foreach (CCount cc in CCount.Decompose(str).Values) { List <uint> vals; if (!counts.TryGetValue(cc.Chr, out vals)) { vals = new List <uint>(); counts.Add(cc.Chr, vals); } while (vals.Count < cc.Count) { // Insert zeros until the array of counts can support the value we want to insert vals.Add(0); } // We can't have a count of 0, so count positions need to be offset by 1 vals[cc.Count - 1]++; } total++; }