public GlyphNode FindWord(GlyphNode current, IList <byte> word, int keyoffset, int keylength) { SearchState pSearchState = SearchState.MIDDLE; for (int i = 0; i < keylength; i++) { byte curGlyph = word[keyoffset + i]; if (pSearchState == SearchState.LOW) { current = current.child_glyphs[0]; } else if (pSearchState == SearchState.HIGH) { current = current.child_glyphs[current.child_glyphs.Count - 1]; } else { if (null == current.child_glyphs) { return(current); } current = current.SelectChild(curGlyph); if (curGlyph > current.Glyph) { pSearchState = SearchState.HIGH; } else if (curGlyph < current.Glyph) { pSearchState = SearchState.LOW; } } } return(current); }
public override RangeNode Distro(IList <byte> key, int keyoffset, int keylength) { RangeNode rn; if (tree.child_glyphs == null) { rn.MajorID = tree.iMajorId; rn.MinorID = tree.iMinorId; return(rn); } if (keylength != m_iSampleKeyLength) { throw new Exception("Key length not of expected size (expected " + m_iSampleKeyLength.ToString() + ", got " + keylength.ToString() + ")"); } if (keyoffset + keylength > key.Count) { throw new Exception("Key buffer of size " + key.Count.ToString() + " not big enough to hold key of length " + keylength.ToString() + " at offset " + keyoffset.ToString()); } if (keyoffset < 0 || keylength < 0) { throw new Exception("Key buffer of size " + key.Count.ToString() + " not big enough to hold key of BAD length " + keylength.ToString() + " at BAD offset " + keyoffset.ToString()); } GlyphNode gn = tree.FindWord(tree, key, keyoffset, keylength); rn.MajorID = gn.iMajorId; rn.MinorID = gn.iMinorId; return(rn); }
public GlyphNode SelectChild(byte childGlyph) { if (nodtype == SearchType_FLAT) { int childCnt = child_glyphs.Count; for (int i = 0; i < childCnt; i++) { if (child_glyphs[i].Glyph == childGlyph) { return(child_glyphs[i]); } } throw new Exception("child not found while parent in flat mode"); } else if (nodtype == SearchType_BINARY) { int location = BinarySearch(child_glyphs, 0, child_glyphs.Count, childGlyph); GlyphNode foundGlyphNode = child_glyphs[location]; return(foundGlyphNode); } else if (nodtype == SearchType_HASH) { int location = hash[(int)childGlyph]; GlyphNode foundGlyphNode = child_glyphs[location]; return(foundGlyphNode); } throw new Exception("unkown node type"); }
public void EatWord(IList <byte> word, int index) { if (nodtype != SearchType_FLAT) { throw new Exception("node type must be FLAT for this operation"); } if (index >= word.Count) { return; } else { byte newGlyph = word[index]; bool contains_ch = ContainsChild(newGlyph); if (!contains_ch) { bool is_leaf = false; if (index == word.Count - 1) { is_leaf = true; } AppendChild(newGlyph, is_leaf, word); } GlyphNode newGlyphNode = SelectChild(newGlyph); newGlyphNode.EatWord(word, index + 1); } }
static public void VisitNode(GlyphNode current) { if (current.child_glyphs != null) { current.SortChildren(); if (current.child_glyphs.Count > 1) { current.HashChildren(); } } }
static public void IndexNodes(GlyphNode current) { VisitNode(current); if (current.child_glyphs != null) { int count = current.child_glyphs.Count; for (int i = 0; i < count; i++) { IndexNodes(current.child_glyphs[i]); } } }
public static GlyphNode Prepare(byte newGlyph, ushort major, ushort minor) { GlyphNode gn = new GlyphNode(); gn.hash = null; gn.leaf_word_ref = null; gn.Glyph = newGlyph; gn.iMajorId = major; gn.iMinorId = minor; gn.nodtype = SearchType_FLAT; gn.child_glyphs = null; gn.hash = null; return gn; }
public static GlyphNode Prepare(byte newGlyph, ushort major, ushort minor) { GlyphNode gn = new GlyphNode(); gn.hash = null; gn.leaf_word_ref = null; gn.Glyph = newGlyph; gn.iMajorId = major; gn.iMinorId = minor; gn.nodtype = SearchType_FLAT; gn.child_glyphs = null; gn.hash = null; return(gn); }
public MdoDistro(IList <IList <byte> > samples, int iMajorCount, int iMinorCount) { if (samples.Count > 0) { m_iSampleKeyLength = samples[0].Count; } m_iMajorCount = iMajorCount; m_iMinorCount = iMinorCount; iSampleCount = samples.Count; for (int i = 0; i < iSampleCount; i++) { if (samples[i].Count != m_iSampleKeyLength) { // Fail case... StringBuilder keysb = new StringBuilder(samples[i].Count); for (int ki = 0; ki < samples[i].Count; ki++) { if (keysb.Length > 0) { keysb.Append(','); } keysb.Append((byte)samples[i][ki]); } throw new Exception("Not all samples have the same key length (expected " + m_iSampleKeyLength.ToString() + ", got " + samples[i].Count.ToString() + "). Consider recreate input or delete cache file if caching on. {key=" + keysb.ToString() + "}"); } IList <byte> word = samples[i]; tree.EatWord(word, 0); } GlyphNode.IndexNodes(tree); try { GlyphNode.leaf_nodes.Sort(new cmprGlyphNode()); PartitionMajorAndMinor(GlyphNode.leaf_nodes, m_iMajorCount, m_iMinorCount); GlyphNode.leaf_nodes = null; // Not needed anymore. } catch { } }
public void AppendChild(byte childGlyph, bool is_leaf, IList <byte> word) { if (nodtype != SearchType_FLAT) { throw new Exception("node type must be FLAT for this operation"); } if (child_glyphs == null) { child_glyphs = new List <GlyphNode>(); } GlyphNode pNewChildNode = GlyphNode.Prepare(childGlyph); child_glyphs.Add(pNewChildNode); if (is_leaf) { pNewChildNode.leaf_word_ref = word; if (leaf_nodes == null) { leaf_nodes = new List <GlyphNode>(); } leaf_nodes.Add(pNewChildNode); } }
static void RunWordTest() { const int KeyLength = 16; const uint iSlaveCount = 1; const uint iZblocksPerSlave = 113; List <List <byte> > samples = new List <List <byte> >(1000); foreach (string part in MySpaceText.Split('\n')[0].Split(' ')) // Sample from first line. { string word = part.Trim(WordTrimChars).ToLower(); if (0 != word.Length) { List <byte> key = new List <byte>(KeyLength); key.AddRange(Encoding.ASCII.GetBytes(word.PadRight(KeyLength, '\0'))); samples.Add(key); } } MdoDistro pCRangeEstimator = new MdoDistro(samples, iSlaveCount, iZblocksPerSlave); { Dictionary <string, List <GlyphNode> > results = new Dictionary <string, List <GlyphNode> >(); List <Word> words = new List <Word>(1000); foreach (string part in MySpaceText.Split(' ', '\n')) { string word = part.Trim(WordTrimChars).ToLower(); if (0 != word.Length) { List <byte> key = new List <byte>(KeyLength); key.AddRange(Encoding.ASCII.GetBytes(word.PadRight(KeyLength, '\0'))); GlyphNode pRangeItem = pCRangeEstimator.Distro(key); if (pRangeItem.iMajorId >= iSlaveCount) { throw new Exception("pRangeItem.iMajorId >= iSlaveCount"); } if (pRangeItem.iMinorId >= iZblocksPerSlave) { throw new Exception("pRangeItem.iMinorId >= iZblocksPerSlave"); } Word w = new Word(); w.word = word; w.range = pRangeItem; words.Add(w); string sExchangePath = @"" + pRangeItem.iMajorId.ToString() + @"_" + pRangeItem.iMinorId + ""; if (!results.ContainsKey(sExchangePath)) { results[sExchangePath] = new List <GlyphNode>(); } results[sExchangePath].Add(pRangeItem); } } words.Sort(); int i333 = 33 + 33; { Console.WriteLine("Testing WordTest..."); int lastmajor = 0; int lastminor = 0; for (int i = 0; i < words.Count; i++) { if (words[i].word == "create") { int i3323 = 33 + 33; } if (words[i].range.iMajorId < lastmajor) { throw new Exception("Whoops!"); } if (words[i].range.iMajorId != lastmajor) { lastminor = 0; } if (words[i].range.iMinorId < lastminor) { while (true) { GlyphNode ppRangeItem1 = pCRangeEstimator.Distro("create".PadRight(16, '\0')); GlyphNode ppRangeItem2 = pCRangeEstimator.Distro("day".PadRight(16, '\0')); GlyphNode ppRangeItem3 = pCRangeEstimator.Distro("decide".PadRight(16, '\0')); int i23zzz = 23 + 23; } throw new Exception("Whoops!"); } lastmajor = words[i].range.iMajorId; lastminor = words[i].range.iMinorId; } Console.WriteLine("Done with WordTest!"); } { Console.WriteLine("Testing WordTest..."); byte last = 0; byte lastn = 0; for (int major = 0; major < iSlaveCount; major++) { for (int minor = 0; minor < iZblocksPerSlave; minor++) { string key = major.ToString() + "_" + minor.ToString(); if (results.ContainsKey(key)) { results[key].Sort(new cmprGlyphNode()); for (int ig = 0; ig < results[key].Count; ig++) { byte current = results[key][ig].leaf_word_ref[0]; byte currentn = results[key][ig].leaf_word_ref[1]; if (current < last) { throw new Exception("Whoops"); } if (current != last) { lastn = 0; } if (currentn < lastn) { throw new Exception("Whoops"); } last = current; lastn = currentn; } } } } Console.WriteLine("Done with WordTest!"); } int i2233zz = 23 + 23; } }
static void Main(string[] args) { RunWordTest(); List <List <byte> > samples = MdoDistro.GenerateRandomSamples_DenseInLowValues(100000, 0); uint iSlaveCount = 31; uint iZblocksPerSlave = 113; MdoDistro pCRangeEstimator = new MdoDistro(samples, iSlaveCount, iZblocksPerSlave); Random rnd = new Random(); long total_major = 0; long total_minor = 0; List <List <int> > lii = new List <List <int> >((int)iSlaveCount); for (int i = 0; i < iSlaveCount; i++) { List <int> ilist = new List <int>((int)iZblocksPerSlave); lii.Add(ilist); for (int j = 0; j < iZblocksPerSlave; j++) { ilist.Add(0); } } Dictionary <string, List <GlyphNode> > results = new Dictionary <string, List <GlyphNode> >(); for (int i = 0; i < 100000; i++) { GlyphNode pRangeItem = pCRangeEstimator.Distro(samples[rnd.Next(0, samples.Count)]); if (pRangeItem.iMajorId >= iSlaveCount) { throw new Exception("pRangeItem.iMajorId >= iSlaveCount"); } if (pRangeItem.iMinorId >= iZblocksPerSlave) { throw new Exception("pRangeItem.iMinorId >= iZblocksPerSlave"); } string sExchangePath = @"" + pRangeItem.iMajorId.ToString() + @"_" + pRangeItem.iMinorId + ""; total_major += pRangeItem.iMajorId; total_minor += pRangeItem.iMinorId; lii[pRangeItem.iMajorId][pRangeItem.iMinorId]++; if (!results.ContainsKey(sExchangePath)) { results[sExchangePath] = new List <GlyphNode>(); } results[sExchangePath].Add(pRangeItem); } double majorpercent = ((double)(total_major / 100000)) / iSlaveCount; double minorpercent = ((double)(total_minor / 100000)) / iZblocksPerSlave; { Console.WriteLine("Testing..."); byte last = 0; byte lastn = 0; for (int major = 0; major < iSlaveCount; major++) { for (int minor = 0; minor < iZblocksPerSlave; minor++) { string key = major.ToString() + "_" + minor.ToString(); if (results.ContainsKey(key)) { results[key].Sort(new cmprGlyphNode()); for (int ig = 0; ig < results[key].Count; ig++) { byte current = results[key][ig].leaf_word_ref[0]; byte currentn = results[key][ig].leaf_word_ref[1]; if (current < last) { throw new Exception("Whoops"); } if (current != last) { lastn = 0; } if (currentn < lastn) { throw new Exception("Whoops"); } last = current; lastn = currentn; } } } } Console.WriteLine("Done with test!"); } int i2233zz = 23 + 23; }
public GlyphNode FindWord(GlyphNode current, IList<byte> word, int keyoffset, int keylength) { SearchState pSearchState = SearchState.MIDDLE; for (int i = 0; i < keylength; i++) { byte curGlyph = word[keyoffset + i]; if (pSearchState == SearchState.LOW) { current = current.child_glyphs[0]; } else if (pSearchState == SearchState.HIGH) { current = current.child_glyphs[current.child_glyphs.Count - 1]; } else { if (null == current.child_glyphs) { return current; } current = current.SelectChild(curGlyph); if (curGlyph > current.Glyph) { pSearchState = SearchState.HIGH; } else if (curGlyph < current.Glyph) { pSearchState = SearchState.LOW; } } } return current; }
public static GlyphNode Prepare(byte newGlyph) { return(GlyphNode.Prepare(newGlyph, ushort.MaxValue, ushort.MaxValue)); }
private void PartitionMajorAndMinor(List <GlyphNode> rConverted_Samples, int iMajorCount, int iMinorCount) { int slotspermajor = rConverted_Samples.Count / iMajorCount; int extramajorslots = rConverted_Samples.Count % iMajorCount; if (0 != extramajorslots) { slotspermajor++; } int slotsperminor = slotspermajor / iMinorCount; int extraminorslots = slotspermajor % iMinorCount; if (0 != extraminorslots) { slotsperminor++; } ushort major = 0; int majorslot = 0; ushort minor = 0; int minorslot = 0; for (int i = 0; i < rConverted_Samples.Count; i++) { if (major >= iMajorCount) { throw new Exception("MajorID miscalculation for " + this.GetType().Name + ": out of range"); } if (minor >= iMinorCount) { throw new Exception("MinorID miscalculation for " + this.GetType().Name + ": out of range"); } //ranges[i] = new RangeNode(); GlyphNode gn = rConverted_Samples[i]; gn.iMajorId = major; gn.iMinorId = minor; rConverted_Samples[i] = gn; if (++majorslot >= slotspermajor) { major++; minor = 0; majorslot = 0; minorslot = 0; if (major == extramajorslots) { slotspermajor--; } { extraminorslots = slotspermajor % iMinorCount; if (0 != extraminorslots) { slotsperminor++; } } } else if (++minorslot >= slotsperminor) { minor++; minorslot = 0; if (minor == extraminorslots) { slotsperminor--; } } } }