static public void Merge(System.IO.Stream src1, long src1Length, System.IO.Stream src2, long src2Length, System.IO.Stream destStream) { long src1EndPosition = src1.Position + src1Length; long src2EndPosition = src2.Position + src2Length; int src1DocsCount = VInt.sReadFromStream(src1); int src2DocsCount = VInt.sReadFromStream(src2); //Write docs count VInt.sWriteToStream(src1DocsCount + src2DocsCount, destStream); //Merge src1 byte[] buf = new byte[8192]; int remain = (int)(src1EndPosition - sizeof(int) - src1.Position); int len = src1.Read(buf, 0, Math.Min(buf.Length, remain)); while (len > 0) { destStream.Write(buf, 0, len); remain -= len; len = src1.Read(buf, 0, Math.Min(buf.Length, remain)); } //Get last docid of src1 byte[] lastDocIdBuf = new byte[sizeof(int)]; src1.Read(lastDocIdBuf, 0, lastDocIdBuf.Length); int lastDocid = BitConverter.ToInt32(lastDocIdBuf, 0); //Get first docid of src2 int src2FirstDocId = VInt.sReadFromStream(src2); //Write gap between above VInt.sWriteToStream(src2FirstDocId - lastDocid, destStream); //Merge src2 remain = (int)(src2EndPosition - src2.Position); len = src2.Read(buf, 0, Math.Min(buf.Length, remain)); while (len > 0) { destStream.Write(buf, 0, len); remain -= len; len = src2.Read(buf, 0, Math.Min(buf.Length, remain)); } }
static private void SerializeSkipDocIndex(System.IO.Stream stream, List <DocumentPosition> skipDocIndex) { if (skipDocIndex.Count <= 0) { return; } VInt.sWriteToStream(0, stream); //Begin flag VInt.sWriteToStream(skipDocIndex.Count, stream); //Write the count of skip doc index's items int docId = 0; int position = 0; for (int i = 0; i < skipDocIndex.Count; i++) { VInt.sWriteToStream(skipDocIndex[i].DocId - docId, stream); docId = skipDocIndex[i].DocId; VInt.sWriteToStream(skipDocIndex[i].Position - position, stream); position = skipDocIndex[i].Position; } }
static public void Merge(IList <MergeStream> srcList, System.IO.Stream destStream) { List <long> srcEndPositionList = new List <long>(); for (int i = 0; i < srcList.Count; i++) { srcEndPositionList.Add(srcList[i].Stream.Position + srcList[i].Length); } int docsCount = 0; List <DocumentPosition> skipDocIndex = new List <DocumentPosition>(); foreach (MergeStream ms in srcList) { int count = VInt.sReadFromStream(ms.Stream); ms.Count = count; if (count == 0) { //This index has skip doc index ms.SkipDocIndex = (DeserializeSkipDocIndex(ms.Stream, true)); count = VInt.sReadFromStream(ms.Stream); } docsCount += count; } int lastDocId = -1; System.IO.Stream originalDestStream = destStream; destStream = new System.IO.MemoryStream(8192); int remainCount = 0; //remain count does not build in skip doc index for (int i = 0; i < srcList.Count; i++) { System.IO.Stream src = srcList[i].Stream; long srcFirstDocIdPosition = src.Position; int firstDocId = VInt.sReadFromStream(src); long srcFirstDocIdLength = src.Position - srcFirstDocIdPosition; long destFirstDocIdPosition = destStream.Position; if (lastDocId < 0) { VInt.sWriteToStream(firstDocId, destStream); } else { VInt.sWriteToStream(firstDocId - lastDocId, destStream); } long destFirstDocIdLength = destStream.Position - destFirstDocIdPosition; int delta = (int)(destFirstDocIdLength - srcFirstDocIdLength); //build skip doc index if (srcList[i].SkipDocIndex != null) { //Merge skip doc index if (i > 0) { skipDocIndex.Add(new DocumentPosition(lastDocId, (int)destFirstDocIdPosition)); } foreach (DocumentPosition dp in srcList[i].SkipDocIndex) { skipDocIndex.Add(new DocumentPosition(dp.DocId, (int)(destFirstDocIdPosition + dp.Position + delta))); } } else { if (remainCount > 1024) { skipDocIndex.Add(new DocumentPosition(lastDocId, (int)destFirstDocIdPosition)); remainCount = 0; } else { remainCount += srcList[i].Count; } } byte[] buf = new byte[8192]; int remain = (int)(srcEndPositionList[i] - sizeof(int) - src.Position); int len = src.Read(buf, 0, Math.Min(buf.Length, remain)); while (len > 0) { destStream.Write(buf, 0, len); remain -= len; len = src.Read(buf, 0, Math.Min(buf.Length, remain)); } //Get last docid of src byte[] lastDocIdBuf = new byte[sizeof(int)]; src.Read(lastDocIdBuf, 0, lastDocIdBuf.Length); lastDocId = BitConverter.ToInt32(lastDocIdBuf, 0); } //Write last doc id destStream.Write(BitConverter.GetBytes(lastDocId), 0, sizeof(int)); //Write skip doc index if (skipDocIndex.Count > 0) { SerializeSkipDocIndex(originalDestStream, skipDocIndex); } //Write docs count VInt.sWriteToStream(docsCount, originalDestStream); //Write memory buffer to original dest stream destStream.Position = 0; byte[] buffer = new byte[8192]; int c = 0; do { c = destStream.Read(buffer, 0, buffer.Length); if (c > 0) { originalDestStream.Write(buffer, 0, c); } } while (c > 0); }
static public void Serialize(DocumentPositionList first, int docsCount, IEnumerable <DocumentPositionList> docPositions, System.IO.Stream stream, bool simple) { //int docsCount = docPositions.Count; //Write documets count VInt.sWriteToStream(docsCount, stream); //DocumentPositionList first = docPositions.GetEnumerator(); //Write first document id int lstDocId = first.DocumentId; VInt.sWriteToStream(lstDocId, stream); int count = first.Count; if (count >= 32768) { count = 32767; } count *= 8; //Shift 3 bit count += first._TotalWordsInThisDocumentIndex; VInt.sWriteToStream(count, stream); if (!simple) { VInt.sWriteToStream(first.FirstPosition, stream); } int i = 0; foreach (DocumentPositionList docPosition in docPositions) { i++; if (i == 1) { continue; } VInt.sWriteToStream(docPosition.DocumentId - lstDocId, stream); count = docPosition.Count; if (count >= 32768) { count = 32767; } count *= 8; //Shift 3 bit count += docPosition._TotalWordsInThisDocumentIndex; VInt.sWriteToStream(count, stream); if (!simple) { VInt.sWriteToStream(docPosition.FirstPosition, stream); } lstDocId = docPosition.DocumentId; } byte[] lstDocIdBuf = BitConverter.GetBytes(lstDocId); stream.Write(lstDocIdBuf, 0, lstDocIdBuf.Length); }