internal UInt32 WriteNode(BuildNode bn, BinaryWriter indexFile, UInt32 offset) { if (bn == null) { return(offset); // No node } if (bn.Offset > offset) { throw new ArgumentException("Offset " + offset + " does not match node at " + bn.Offset); } if (bn.Offset < offset) { return(offset); // Already written } Node node = bn.ToNode(); //Console.WriteLine($"Writing at {offset*nodeSize}"); indexFile.Write(node.letter); indexFile.Write(node.firstChild); indexFile.Write(node.nextSibling); indexFile.Write(node.terminal); offset++; offset = WriteNode(bn.NextSibling, indexFile, offset); offset = WriteNode(bn.FirstChild, indexFile, offset); return(offset); }
private void AssignOffsets(BuildNode bn, Dictionary <BuildNode, UInt32> offsets) { if (bn != null && bn.Offset < 1) { bn.Offset = nextOffset++; AssignOffsets(bn.NextSibling, offsets); AssignOffsets(bn.FirstChild, offsets); } }
private void Add(string word) { BuildNode node = root; foreach (var letter in word) { node = node.GetOrCreateChild(letter); } node.Terminal = true; }
internal int GetCount(int nextMark) { if (mark == nextMark) { return(0); } mark = nextMark; int count = 1; BuildNode n = FirstChild; while (n != null) { count += n.GetCount(mark); n = n.NextSibling; } return(count); }
internal BuildNode GetOrCreateChild(char l) { if (hashCalculated) { throw new InvalidOperationException("Hash already calculated"); } // Case 1, no children yet, start children if (FirstChild == null) { FirstChild = new BuildNode(l, this); return(FirstChild); } // Handle case when the first child is the matching one if (FirstChild.letter == l) { return(FirstChild); } // Scan through children BuildNode n = FirstChild; while (n.NextSibling != null && n.NextSibling.letter < l) { n = n.NextSibling; } // If I found the child, return it if (n.NextSibling != null && n.NextSibling.letter == l) { return(n.NextSibling); } // If not, this is where I need to insert it else { BuildNode oldNextSibling = n.NextSibling; n.NextSibling = new BuildNode(l, this); n.NextSibling.NextSibling = oldNextSibling; return(n.NextSibling); } }
public void Build(StreamReader reader, String indexPath) { root = new BuildNode('\0', null); String line; int count = 0; while ((line = reader.ReadLine()) != null) { line = line.Trim(); if (IsValid(line)) { if (count % 1000 == 0) { Console.WriteLine(line); } Add(line); count++; } } Console.WriteLine("Done, added " + count + " words, " + BuildNode.Count + " nodes"); Console.WriteLine("GetCount() is " + root.GetCount(1)); Deduplicate(); Console.WriteLine("GetCount() is " + root.GetCount(2)); AssignOffsets(); Console.WriteLine("Offsets assigned"); using (var indexFile = new FileStream(indexPath, FileMode.Create)) { Console.WriteLine("Attempting to create index of size " + (Node.SIZE * nextOffset)); using (var indexWriter = new BinaryWriter(indexFile, System.Text.Encoding.UTF32)) { WriteNode(root, indexWriter, 0); indexWriter.Flush(); } } }
internal BuildNode(char letter, BuildNode parent) { this.letter = letter; Parent = parent; Count++; }