private long Truncate(string srcIxFileName) { Log.InfoFormat("truncating {0}", srcIxFileName); var srcIx = SegmentInfo.Load(srcIxFileName); var srcDataFileName = Path.Combine(_directory, srcIx.Version + ".rdb"); long version; using (var source = new FileStream(srcDataFileName, FileMode.Open)) using (var documentStream = new DocumentTableStream(source, srcIx)) { using (var upsert = new FullTextUpsertTransaction( _directory, _analyzer, srcIx.Compression, documentStream)) { version = upsert.Write(); } Log.InfoFormat("truncated ix {0}", version); } File.Delete(srcIxFileName); return(version); }
static void Rewrite(string[] args) { var take = int.MaxValue; var skip = 0; string pk = null; bool gzip = false; bool lz = false; string dir = null; if (Array.IndexOf(args, "--take") > 0) { take = int.Parse(args[Array.IndexOf(args, "--take") + 1]); } if (Array.IndexOf(args, "--skip") > 0) { skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]); } if (Array.IndexOf(args, "--pk") > 0) { pk = args[Array.IndexOf(args, "--pk") + 1]; } if (Array.IndexOf(args, "--gzip") > 0) { gzip = true; } if (Array.IndexOf(args, "--lz") > 0) { lz = true; } if (Array.IndexOf(args, "--dir") > 0) { dir = args[Array.IndexOf(args, "--dir") + 1]; } var compression = gzip ? Compression.GZip : lz ? Compression.Lz : Compression.NoCompression; var dataFileName = args[Array.IndexOf(args, "--file") + 1]; var ixFileName = Directory.GetFiles(Path.GetDirectoryName(dataFileName), "*.ix") .OrderBy(s => s).First(); var ix = SegmentInfo.Load(ixFileName); Console.WriteLine("rewriting..."); var writeTimer = new Stopwatch(); writeTimer.Start(); using (var stream = new FileStream(dataFileName, FileMode.Open)) using (var documents = new DocumentTableStream(stream, ix, skip, take)) using (var upsert = new FullTextUpsertTransaction(dir, new Analyzer(), compression, documents)) { upsert.Write(); } Console.WriteLine("write operation took {0}", writeTimer.Elapsed); }
public static IDictionary <long, SegmentInfo> GetIndexVersionInfoInChronologicalOrder(string directory) { var list = new Dictionary <long, SegmentInfo>(); foreach (var file in Directory.GetFiles(directory, "*.ix")) { var version = long.Parse(Path.GetFileNameWithoutExtension(file)); var ix = SegmentInfo.Load(Path.Combine(directory, version + ".ix")); list.Add(version, ix); } return(list); }
static void Export(string[] args) { var take = int.MaxValue; var skip = 0; if (Array.IndexOf(args, "--take") > 0) { take = int.Parse(args[Array.IndexOf(args, "--take") + 1]); } if (Array.IndexOf(args, "--skip") > 0) { skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]); } var sourceFileName = args[Array.IndexOf(args, "--source-file") + 1]; var targetFileName = args[Array.IndexOf(args, "--target-file") + 1]; var dir = Path.GetDirectoryName(sourceFileName); var version = Path.GetFileNameWithoutExtension(sourceFileName); var ix = SegmentInfo.Load(Path.Combine(dir, version + ".ix")); Console.WriteLine("migrating..."); var writeTimer = new Stopwatch(); writeTimer.Start(); using (var sourceStream = new FileStream(sourceFileName, FileMode.Open)) using (var targetStream = new FileStream(targetFileName, FileMode.Create)) using (var jsonWriter = new StreamWriter(targetStream, Encoding.UTF8)) using (var documents = new DocumentTableStream(sourceStream, ix, skip, take)) { jsonWriter.WriteLine("["); foreach (var document in documents.ReadSource()) { var dic = document.Fields.ToDictionary(x => x.Key, y => y.Value.Value); var json = JsonConvert.SerializeObject(dic, Formatting.None); jsonWriter.WriteLine(json); } jsonWriter.Write("]"); } Console.WriteLine("write operation took {0}", writeTimer.Elapsed); }
private long Merge(string srcIxFileName, string targetIxFileName) { Log.InfoFormat("merging branch {0} with trunk {1}", srcIxFileName, targetIxFileName); var srcIx = SegmentInfo.Load(srcIxFileName); var targetIx = SegmentInfo.Load(targetIxFileName); var srcDataFileName = Path.Combine(_directory, srcIx.Version + ".rdb"); var targetDataFileName = Path.Combine(_directory, targetIx.Version + ".rdb"); FileStream lockFile; if (!LockUtil.TryAquireWriteLock(_directory, out lockFile)) { throw new InvalidOperationException( "Cannot merge because there are other writes in progress."); } using (lockFile) using (var source = new FileStream(srcDataFileName, FileMode.Open)) using (var target = new FileStream(targetDataFileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)) { var newStartIndex = targetIx.Length; var fieldOffsets = new Dictionary <ulong, long>(); foreach (var field in srcIx.FieldOffsets) { fieldOffsets[field.Key] = field.Value + newStartIndex; } srcIx.FieldOffsets = fieldOffsets; var tree = new byte[srcIx.PostingsOffset]; var postings = new byte[srcIx.DocHashOffset - srcIx.PostingsOffset]; var docHashes = new byte[srcIx.DocAddressesOffset - srcIx.DocHashOffset]; var docAddresses = new byte[srcIx.KeyIndexOffset - srcIx.DocAddressesOffset]; var documents = new byte[srcIx.Length - srcIx.KeyIndexOffset]; var sum = tree.Length + postings.Length + docHashes.Length + docAddresses.Length + documents.Length; if (sum != srcIx.Length) { throw new DataMisalignedException("Size of segment does not compute."); } source.Read(tree, 0, tree.Length); source.Read(postings, 0, postings.Length); source.Read(docHashes, 0, docHashes.Length); source.Read(docAddresses, 0, docAddresses.Length); source.Read(documents, 0, documents.Length); target.Write(tree, 0, tree.Length); srcIx.PostingsOffset = target.Position; target.Write(postings, 0, postings.Length); srcIx.DocHashOffset = target.Position; target.Write(docHashes, 0, docHashes.Length); srcIx.DocAddressesOffset = target.Position; target.Write(docAddresses, 0, docAddresses.Length); srcIx.KeyIndexOffset = target.Position; target.Write(documents, 0, documents.Length); srcIx.Serialize(srcIxFileName); Log.InfoFormat("merged {0} with {1} creating a segmented index", srcIxFileName, targetIx); } File.Delete(srcDataFileName); return(srcIx.Version); }