public async Task Build(StreamReader reader, String indexPath) { root = new BuildNode('\0', null); String line; int count = 0; while ((line = await reader.ReadLineAsync()) != null) { line = line.Trim(); if (IsValid(line)) { if (count % 1000 == 0) { Console.WriteLine(line); } Add(line); count++; } } Console.WriteLine($"Done, added {count} words, {BuildNode.Count} nodes"); Console.WriteLine($"GetCount() is {root.GetCount(1)}"); Deduplicate(); Console.WriteLine($"GetCount() is {root.GetCount(2)}"); AssignOffsets(); Console.WriteLine("Offsets assigned"); using (var indexFile = MemoryMappedFile.CreateFromFile(indexPath, FileMode.Create, null, nodeSize * nextOffset)) { Console.WriteLine($"Attempting to create view of size {nodeSize * nextOffset}"); using (var indexView = indexFile.CreateViewAccessor(0, nodeSize * nextOffset)) { WriteNode(root, indexView, 0); indexView.Flush(); } } }
private void AssignOffsets(BuildNode bn, Dictionary <BuildNode, UInt32> offsets) { if (bn != null && bn.Offset < 1) { bn.Offset = nextOffset++; AssignOffsets(bn.NextSibling, offsets); AssignOffsets(bn.FirstChild, offsets); } }
private void Add(string word) { BuildNode node = root; foreach (var letter in word) { node = node.GetOrCreateChild(letter); } node.Terminal = true; }
internal int GetCount(int nextMark) { if (mark == nextMark) { return(0); } mark = nextMark; int count = 1; BuildNode n = FirstChild; while (n != null) { count += n.GetCount(mark); n = n.NextSibling; } return(count); }
internal BuildNode GetOrCreateChild(char l) { if (hashCalculated) { throw new InvalidOperationException("Hash already calculated"); } // Case 1, no children yet, start children if (FirstChild == null) { FirstChild = new BuildNode(l, this); return(FirstChild); } // Handle case when the first child is the matching one if (FirstChild.letter == l) { return(FirstChild); } // Scan through children BuildNode n = FirstChild; while (n.NextSibling != null && n.NextSibling.letter < l) { n = n.NextSibling; } // If I found the child, return it if (n.NextSibling != null && n.NextSibling.letter == l) { return(n.NextSibling); } // If not, this is where I need to insert it else { BuildNode oldNextSibling = n.NextSibling; n.NextSibling = new BuildNode(l, this); n.NextSibling.NextSibling = oldNextSibling; return(n.NextSibling); } }
internal UInt32 WriteNode(BuildNode bn, MemoryMappedViewAccessor indexView, UInt32 offset) { if (bn == null) { return(offset); // No node } if (bn.Offset > offset) { throw new ArgumentException($"Offset {offset} does not match node at {bn.Offset}"); } if (bn.Offset < offset) { return(offset); // Already written } Node node = bn.ToNode(); //Console.WriteLine($"Writing at {offset*nodeSize}"); indexView.Write(nodeSize * offset, ref node); offset++; offset = WriteNode(bn.NextSibling, indexView, offset); offset = WriteNode(bn.FirstChild, indexView, offset); return(offset); }
internal BuildNode(char letter, BuildNode parent) { this.letter = letter; Parent = parent; Count++; }