private DString wrap(string s)
        {
            // Presence is a tallymark-like system for counting character occurrences in a string, regardless of position
            ulong presence = 0L;

            Dictionary <char, CCount> counts = CCount.Decompose(s);

            foreach (CCount cbits in bits)
            {
                ushort marked = 0;
                CCount c;
                if (counts.TryGetValue(cbits.Chr, out c))
                {
                    marked = c.Count;
                }

                ushort upTo = cbits.Count;
                for (ushort i = 0; i < upTo; i++)
                {
                    presence = presence << 1;
                    if (marked > 0)
                    {
                        presence |= 1;
                        marked--;
                    }
                }
            }
            return(new DString(s, presence));
        }
예제 #2
0
        public FuzzyMatcher Build()
        {
            Dictionary <char, Queue <uint> > costs = computeCosts();

            // Setup a priority queue in largest-first order, with the first cost value for each character
            var q = new SortedSet <WeightedChar>();

            foreach (KeyValuePair <char, Queue <uint> > e in costs)
            {
                q.Add(new WeightedChar(e.Key, e.Value.Dequeue()));
            }

            var bitCounts  = new Dictionary <char, ushort>();
            int iterations = 64; // bits in a Long

            while (iterations > 0 && q.Count != 0)
            {
                WeightedChar wc = q.Max;
                q.Remove(wc);

                // Increment bit count for the character
                ushort bitsCurrent;
                if (!bitCounts.TryGetValue(wc.Chr, out bitsCurrent))
                {
                    bitsCurrent = 0;
                }
                bitsCurrent++;
                bitCounts[wc.Chr] = bitsCurrent;
                Queue <uint> vals = costs[wc.Chr];
                if (vals != null && vals.Count != 0)
                {
                    // Costs queue still has values to go through, put it back into the priority queue
                    q.Add(new WeightedChar(wc.Chr, vals.Dequeue()));
                }
                iterations--;
            }

            // Flatten the bitCounts map into an array of (character, bits)
            CCount[] bits = new CCount[bitCounts.Count];
            int      i    = 0;

            foreach (KeyValuePair <char, ushort> e in bitCounts)
            {
                bits[i] = new CCount(e.Key, e.Value);
                i++;
            }
            Array.Sort(bits);
            return(new FuzzyMatcher(bits));
        }
예제 #3
0
 public void Update(string str)
 {
     if (str.Length == 0)
     {
         return;
     }
     foreach (CCount cc in CCount.Decompose(str).Values)
     {
         List <uint> vals;
         if (!counts.TryGetValue(cc.Chr, out vals))
         {
             vals = new List <uint>();
             counts.Add(cc.Chr, vals);
         }
         while (vals.Count < cc.Count)
         {
             // Insert zeros until the array of counts can support the value we want to insert
             vals.Add(0);
         }
         // We can't have a count of 0, so count positions need to be offset by 1
         vals[cc.Count - 1]++;
     }
     total++;
 }