Exemplo n.º 1
0
        /// <summary>
        /// Get skip document index.
        /// This index mark the document id and position skipping every 1000 records or more
        /// inside the index buffer.
        /// </summary>
        /// <param name="stream">stream of the index</param>
        /// <param name="count">if has stepDocIndx, return count of stepDocIndex, else return count of the document records of this index</param>
        /// <param name="skipBeginByte">if it is true, skip the first byte 0, sometime this byte has been read.</param>
        /// <returns></returns>
        static public List <DocumentPosition> DeserializeSkipDocIndex(System.IO.Stream stream, bool skipBeginByte, out int count)
        {
            if (!skipBeginByte)
            {
                count = VInt.sReadFromStream(stream); //First item is the count of the list;

                if (count > 0)
                {
                    //if hasn't stepDocIndex, count > 0
                    return(null);
                }
            }

            List <DocumentPosition> result = new List <DocumentPosition>();

            count = VInt.sReadFromStream(stream); //First item is the count of the list;

            int docId    = 0;
            int position = 0; // Position is the relational position to the first byte of the index.

            for (int i = 0; i < count; i++)
            {
                docId    += VInt.sReadFromStream(stream);
                position += VInt.sReadFromStream(stream);

                result.Add(new DocumentPosition(docId, position));
            }

            return(result);
        }
Exemplo n.º 2
0
        static public DocumentPositionList GetNextDocumentPositionList(ref int docId, System.IO.Stream stream, bool simple)
        {
            if (docId < 0)
            {
                docId = VInt.sReadFromStream(stream);

                int count = VInt.sReadFromStream(stream);

                if (!simple)
                {
                    int firstPosition = VInt.sReadFromStream(stream);
                    return(new DocumentPositionList(docId, count / 8, (Int16)(count % 8), firstPosition));
                }
                else
                {
                    return(new DocumentPositionList(docId, count / 8, (Int16)(count % 8)));
                }
            }
            else
            {
                docId = VInt.sReadFromStream(stream) + docId;
                int count    = VInt.sReadFromStream(stream);
                int docCount = count / 8;

                if (!simple)
                {
                    int firstPosition = VInt.sReadFromStream(stream);
                    return(new DocumentPositionList(docId, docCount, (Int16)(count % 8), firstPosition));
                }
                else
                {
                    return(new DocumentPositionList(docId, docCount, (Int16)(count % 8)));
                }
            }
        }
Exemplo n.º 3
0
        static public void Merge(System.IO.Stream src1, long src1Length, System.IO.Stream src2, long src2Length, System.IO.Stream destStream)
        {
            long src1EndPosition = src1.Position + src1Length;
            long src2EndPosition = src2.Position + src2Length;

            int src1DocsCount = VInt.sReadFromStream(src1);
            int src2DocsCount = VInt.sReadFromStream(src2);

            //Write docs count
            VInt.sWriteToStream(src1DocsCount + src2DocsCount, destStream);

            //Merge src1
            byte[] buf    = new byte[8192];
            int    remain = (int)(src1EndPosition - sizeof(int) - src1.Position);

            int len = src1.Read(buf, 0, Math.Min(buf.Length, remain));

            while (len > 0)
            {
                destStream.Write(buf, 0, len);

                remain -= len;
                len     = src1.Read(buf, 0, Math.Min(buf.Length, remain));
            }

            //Get last docid of src1
            byte[] lastDocIdBuf = new byte[sizeof(int)];

            src1.Read(lastDocIdBuf, 0, lastDocIdBuf.Length);

            int lastDocid = BitConverter.ToInt32(lastDocIdBuf, 0);

            //Get first docid of src2
            int src2FirstDocId = VInt.sReadFromStream(src2);

            //Write gap between above
            VInt.sWriteToStream(src2FirstDocId - lastDocid, destStream);

            //Merge src2
            remain = (int)(src2EndPosition - src2.Position);

            len = src2.Read(buf, 0, Math.Min(buf.Length, remain));

            while (len > 0)
            {
                destStream.Write(buf, 0, len);

                remain -= len;
                len     = src2.Read(buf, 0, Math.Min(buf.Length, remain));
            }
        }
Exemplo n.º 4
0
        static public void Merge(IList <MergeStream> srcList, System.IO.Stream destStream)
        {
            List <long> srcEndPositionList = new List <long>();

            for (int i = 0; i < srcList.Count; i++)
            {
                srcEndPositionList.Add(srcList[i].Stream.Position + srcList[i].Length);
            }

            int docsCount = 0;

            List <DocumentPosition> skipDocIndex = new List <DocumentPosition>();

            foreach (MergeStream ms in srcList)
            {
                int count = VInt.sReadFromStream(ms.Stream);
                ms.Count = count;

                if (count == 0)
                {
                    //This index has skip doc index
                    ms.SkipDocIndex = (DeserializeSkipDocIndex(ms.Stream, true));

                    count = VInt.sReadFromStream(ms.Stream);
                }

                docsCount += count;
            }

            int lastDocId = -1;

            System.IO.Stream originalDestStream = destStream;

            destStream = new System.IO.MemoryStream(8192);

            int remainCount = 0; //remain count does not build in skip doc index

            for (int i = 0; i < srcList.Count; i++)
            {
                System.IO.Stream src = srcList[i].Stream;

                long srcFirstDocIdPosition = src.Position;

                int firstDocId = VInt.sReadFromStream(src);

                long srcFirstDocIdLength = src.Position - srcFirstDocIdPosition;

                long destFirstDocIdPosition = destStream.Position;

                if (lastDocId < 0)
                {
                    VInt.sWriteToStream(firstDocId, destStream);
                }
                else
                {
                    VInt.sWriteToStream(firstDocId - lastDocId, destStream);
                }

                long destFirstDocIdLength = destStream.Position - destFirstDocIdPosition;

                int delta = (int)(destFirstDocIdLength - srcFirstDocIdLength);

                //build skip doc index
                if (srcList[i].SkipDocIndex != null)
                {
                    //Merge skip doc index
                    if (i > 0)
                    {
                        skipDocIndex.Add(new DocumentPosition(lastDocId, (int)destFirstDocIdPosition));
                    }

                    foreach (DocumentPosition dp in srcList[i].SkipDocIndex)
                    {
                        skipDocIndex.Add(new DocumentPosition(dp.DocId,
                                                              (int)(destFirstDocIdPosition + dp.Position + delta)));
                    }
                }
                else
                {
                    if (remainCount > 1024)
                    {
                        skipDocIndex.Add(new DocumentPosition(lastDocId, (int)destFirstDocIdPosition));
                        remainCount = 0;
                    }
                    else
                    {
                        remainCount += srcList[i].Count;
                    }
                }


                byte[] buf    = new byte[8192];
                int    remain = (int)(srcEndPositionList[i] - sizeof(int) - src.Position);

                int len = src.Read(buf, 0, Math.Min(buf.Length, remain));

                while (len > 0)
                {
                    destStream.Write(buf, 0, len);

                    remain -= len;

                    len = src.Read(buf, 0, Math.Min(buf.Length, remain));
                }

                //Get last docid of src
                byte[] lastDocIdBuf = new byte[sizeof(int)];

                src.Read(lastDocIdBuf, 0, lastDocIdBuf.Length);

                lastDocId = BitConverter.ToInt32(lastDocIdBuf, 0);
            }

            //Write last doc id
            destStream.Write(BitConverter.GetBytes(lastDocId), 0, sizeof(int));

            //Write skip doc index
            if (skipDocIndex.Count > 0)
            {
                SerializeSkipDocIndex(originalDestStream, skipDocIndex);
            }

            //Write docs count
            VInt.sWriteToStream(docsCount, originalDestStream);


            //Write memory buffer to original dest stream
            destStream.Position = 0;

            byte[] buffer = new byte[8192];
            int    c      = 0;

            do
            {
                c = destStream.Read(buffer, 0, buffer.Length);

                if (c > 0)
                {
                    originalDestStream.Write(buffer, 0, c);
                }
            } while (c > 0);
        }
Exemplo n.º 5
0
        static public DocumentPositionList[] Deserialize(System.IO.Stream stream, ref int documentsCount, bool simple, out long wordCountSum)
        {
            wordCountSum = 0;

            int docsCount = VInt.sReadFromStream(stream);

            if (docsCount == 0)
            {
                //This index has skip doc index
                DeserializeSkipDocIndex(stream, true);

                docsCount = VInt.sReadFromStream(stream);
            }

            int relDocCount = docsCount;

            int lastDocId = VInt.sReadFromStream(stream);

            int count = VInt.sReadFromStream(stream);

            docsCount = Math.Min(docsCount, documentsCount);

            DocumentPositionList[] result = new DocumentPositionList[docsCount];

            if (docsCount <= 0)
            {
                documentsCount = relDocCount;
                return(result);
            }

            if (!simple)
            {
                int firstPosition = VInt.sReadFromStream(stream);
                result[0] = new DocumentPositionList(lastDocId, count / 8, (Int16)(count % 8), firstPosition);
            }
            else
            {
                result[0] = new DocumentPositionList(lastDocId, count / 8, (Int16)(count % 8));
            }

            if (docsCount == 1)
            {
                wordCountSum = 1;
            }

            for (int i = 1; i < docsCount; i++)
            {
                lastDocId = VInt.sReadFromStream(stream) + lastDocId;
                count     = VInt.sReadFromStream(stream);
                int docCount = (Int16)(count / 8);

                if (docCount >= 32768)
                {
                    docCount = 32767;
                }

                if (!simple)
                {
                    int firstPosition = VInt.sReadFromStream(stream);
                    result[i] = new DocumentPositionList(lastDocId, docCount, (Int16)(count % 8), firstPosition);
                }
                else
                {
                    result[i] = new DocumentPositionList(lastDocId, docCount, (Int16)(count % 8));
                }

                wordCountSum += docCount;
            }

            documentsCount = relDocCount;

            return(result);
        }
Exemplo n.º 6
0
 static public int GetDocumentsCount(System.IO.Stream stream)
 {
     return(VInt.sReadFromStream(stream));
 }