Пример #1
0
        /// <summary>
        /// Encode list of integers to bytes
        /// </summary>
        /// <param name="postings">a posting list</param>
        /// <returns>encoded bytes stream</returns>
        public byte[] Encoding(List <Posting> postings)
        {
            List <int> concat = new List <int>();

            //1. Write document frequency
            concat.Add(postings.Count);

            int previousDocID = 0;

            foreach (Posting p in postings)
            {
                //2. Write docID using gap
                concat.Add(p.DocumentId - previousDocID); //4byte integer per docID

                List <int> positions = p.Positions;

                //3. Write term frequency (# of positions)
                concat.Add(positions.Count);              //4byte integer per term frequency

                //4. Write positions using gap
                int previousPos = 0;
                foreach (int pos in positions)
                {
                    concat.Add(pos - previousPos);        //4byte integer per position
                    previousPos = pos;
                }

                previousDocID = p.DocumentId;
            }

            return(VariableBytes.Compress(concat));
        }
Пример #2
0
        /// <summary>
        /// Encode list of integers to bytes
        /// </summary>
        /// <param name="value">List of integers</param>
        /// <returns>bytes array</returns>
        public byte[] Encoding(List <MaxPriorityQueue.InvertedIndex> queue)
        {
            queue.Sort(
                delegate(MaxPriorityQueue.InvertedIndex Item1, MaxPriorityQueue.InvertedIndex Item2)
            {
                int docID1 = Item1.GetDocumentId();
                int docID2 = Item2.GetDocumentId();
                if (docID1 < docID2)
                {
                    return(-1);
                }
                if (docID2 < docID1)
                {
                    return(1);
                }
                else
                {
                    return(0);
                }
            });
            List <int> concat = new List <int>();

            //1. Write the document frequency
            concat.Add(queue.Count);

            int previousDocId = 0;

            int documentID;
            int termFreq;

            foreach (MaxPriorityQueue.InvertedIndex item in queue)
            {
                documentID = item.GetDocumentId();

                //1. Write the document id using gaps
                concat.Add(documentID - previousDocId);

                termFreq = item.GetTermFreq();

                //3.Write the term frequency
                concat.Add(termFreq);

                previousDocId = documentID;
            }

            return(VariableBytes.Compress(concat));
        }
Пример #3
0
        /// <summary>
        /// Converts an byte array to a list of postings for a term.
        /// The byte array should follow the form
        /// < df, (docID tf p1 p2 p3), (doc2 tf p1 p2), ... >
        /// </summary>
        /// <param name="value"></param>
        /// <returns></returns>
        public List <Posting> Decoding(byte[] value)
        {
            List <int> integers = VariableBytes.DecompressToInts(value);
            // Read and construct a posting list from bytes from postings.bin
            // < df, (docID tf p1 p2 p3), (doc2 tf p1 p2), ... >
            // docIDs and positions are written as gap)

            List <Posting> postings = new List <Posting>();
            int            index    = 0;
            //1. Read document frequency
            int docFrequency = integers[index++];

            int prevDocID = 0;

            for (int i = 0; i < docFrequency; i++)         //for each posting
            {
                //2. Read documentID using gap
                int docID = prevDocID + integers[index++];

                List <int> positions = new List <int>();

                //3. Read term frequency
                int termFrequency = integers[index++];

                //4. Read positions using gap
                int prevPos = 0;
                for (int j = 0; j < termFrequency; j++)    //for each position
                {
                    int pos = prevPos + integers[index++];
                    positions.Add(pos);
                    prevPos = pos;  //update prevPos
                }

                //Insert a posting to the posting list
                postings.Add(new Posting(docID, positions));

                prevDocID = docID;  //update prevDocID
            }

            return(postings);
        }
Пример #4
0
        /// <summary>
        /// Decode bytes to Inverted
        /// </summary>
        /// <param name="value">Bytes</param>
        /// <returns>List of integers</returns>
        public List <MaxPriorityQueue.InvertedIndex> Decoding(byte[] value)
        {
            List <int> integers = VariableBytes.DecompressToInts(value);


            List <MaxPriorityQueue.InvertedIndex> tierPostings = new List <MaxPriorityQueue.InvertedIndex>();

            int index = 0;

            int docFrequency = integers[index++];

            int previousDocId = 0;

            for (int i = 0; i < docFrequency; i++)
            {
                //Read documentID using gap
                int docID    = previousDocId + integers[index++];
                int termFreq = integers[index++];
                tierPostings.Add(new MaxPriorityQueue.InvertedIndex(termFreq, docID));
                previousDocId = docID;
            }

            return(tierPostings);
        }
Пример #5
0
 /// <summary>
 /// Decode bytes to integers
 /// </summary>
 /// <param name="value">Bytes</param>
 /// <returns>integers</returns>
 public int Decoding(byte[] value)
 {
     return(VariableBytes.Decode(value));
 }
Пример #6
0
 /// <summary>
 /// Encode integers to bytes
 /// </summary>
 /// <param name="value">integers</param>
 /// <returns>bytes array</returns>
 public byte[] Encoding(int value)
 {
     return(VariableBytes.Encode(value));
 }
 /// <summary>
 /// Decode bytes to list of integers
 /// </summary>
 /// <param name="value">Bytes</param>
 /// <returns>List of integers</returns>
 public List <int> Decoding(byte[] value)
 {
     return(VariableBytes.DecompressToInts(value));
 }
 /// <summary>
 /// Encode list of integers to bytes
 /// </summary>
 /// <param name="value">List of integers</param>
 /// <returns>bytes array</returns>
 public byte[] Encoding(List <int> value)
 {
     return(VariableBytes.Compress(value));
 }