public static void Serialize(this AnalyzedTerm term, Stream stream) { foreach (var position in term.Positions) { byte[] idBytes = BitConverter.GetBytes(term.DocumentId); byte[] dataBytes = BitConverter.GetBytes(position); if (!BitConverter.IsLittleEndian) { Array.Reverse(idBytes); Array.Reverse(dataBytes); } stream.Write(idBytes, 0, sizeof(int)); stream.Write(dataBytes, 0, sizeof(int)); } }
public void Add(string word, int index, AnalyzedTerm term) { if (string.IsNullOrWhiteSpace(word)) { throw new ArgumentException("word"); } if (index == word.Length) { return; } var key = word[index]; var eow = word.Length == index + 1; LcrsTrie node; if (!TryGetChild(key, out node)) { node = new LcrsTrie(key, eow); if (LeftChild == null) { LeftChild = node; } else { // place new node in lexical order if (LeftChild.Value > node.Value) { var tmp = LeftChild; LeftChild = node; node.RightSibling = tmp; } else { var sibling = LeftChild; while (true) { if (sibling.Value < node.Value && (sibling.RightSibling == null || sibling.RightSibling.Value > node.Value)) { break; } sibling = sibling.RightSibling; } var rightSibling = sibling.RightSibling; sibling.RightSibling = node; node.RightSibling = rightSibling; } } } if (eow) { node.EndOfWord = true; if (node.PostingsStream == null) { node.PostingsStream = new MemoryStream(); } term.Serialize(node.PostingsStream); if (node.Size > 100000 && node.WriteToDisk == false) { var fn = Path.Combine(Directory.GetCurrentDirectory(), Path.GetRandomFileName()); var fs = new FileStream(fn, FileMode.Create, FileAccess.ReadWrite, FileShare.None, 4096, FileOptions.DeleteOnClose); node.PostingsStream.Position = 0; node.PostingsStream.CopyTo(fs); node.PostingsStream.Dispose(); node.PostingsStream = fs; node.WriteToDisk = true; } else if (node.WriteToDisk == false) { node.Size += term.Positions.Count; } } else { node.Add(word, index + 1, term); } }
public void Add(string key, string value, AnalyzedTerm term) { var tree = GetTree(key); tree.Add(value, 0, term); }