/// <summary> /// Encode list of integers to bytes /// </summary> /// <param name="postings">a posting list</param> /// <returns>encoded bytes stream</returns> public byte[] Encoding(List <Posting> postings) { List <int> concat = new List <int>(); //1. Write document frequency concat.Add(postings.Count); int previousDocID = 0; foreach (Posting p in postings) { //2. Write docID using gap concat.Add(p.DocumentId - previousDocID); //4byte integer per docID List <int> positions = p.Positions; //3. Write term frequency (# of positions) concat.Add(positions.Count); //4byte integer per term frequency //4. Write positions using gap int previousPos = 0; foreach (int pos in positions) { concat.Add(pos - previousPos); //4byte integer per position previousPos = pos; } previousDocID = p.DocumentId; } return(VariableBytes.Compress(concat)); }
/// <summary> /// Encode list of integers to bytes /// </summary> /// <param name="value">List of integers</param> /// <returns>bytes array</returns> public byte[] Encoding(List <MaxPriorityQueue.InvertedIndex> queue) { queue.Sort( delegate(MaxPriorityQueue.InvertedIndex Item1, MaxPriorityQueue.InvertedIndex Item2) { int docID1 = Item1.GetDocumentId(); int docID2 = Item2.GetDocumentId(); if (docID1 < docID2) { return(-1); } if (docID2 < docID1) { return(1); } else { return(0); } }); List <int> concat = new List <int>(); //1. Write the document frequency concat.Add(queue.Count); int previousDocId = 0; int documentID; int termFreq; foreach (MaxPriorityQueue.InvertedIndex item in queue) { documentID = item.GetDocumentId(); //1. Write the document id using gaps concat.Add(documentID - previousDocId); termFreq = item.GetTermFreq(); //3.Write the term frequency concat.Add(termFreq); previousDocId = documentID; } return(VariableBytes.Compress(concat)); }
/// <summary> /// Converts an byte array to a list of postings for a term. /// The byte array should follow the form /// < df, (docID tf p1 p2 p3), (doc2 tf p1 p2), ... > /// </summary> /// <param name="value"></param> /// <returns></returns> public List <Posting> Decoding(byte[] value) { List <int> integers = VariableBytes.DecompressToInts(value); // Read and construct a posting list from bytes from postings.bin // < df, (docID tf p1 p2 p3), (doc2 tf p1 p2), ... > // docIDs and positions are written as gap) List <Posting> postings = new List <Posting>(); int index = 0; //1. Read document frequency int docFrequency = integers[index++]; int prevDocID = 0; for (int i = 0; i < docFrequency; i++) //for each posting { //2. Read documentID using gap int docID = prevDocID + integers[index++]; List <int> positions = new List <int>(); //3. Read term frequency int termFrequency = integers[index++]; //4. Read positions using gap int prevPos = 0; for (int j = 0; j < termFrequency; j++) //for each position { int pos = prevPos + integers[index++]; positions.Add(pos); prevPos = pos; //update prevPos } //Insert a posting to the posting list postings.Add(new Posting(docID, positions)); prevDocID = docID; //update prevDocID } return(postings); }
/// <summary> /// Decode bytes to Inverted /// </summary> /// <param name="value">Bytes</param> /// <returns>List of integers</returns> public List <MaxPriorityQueue.InvertedIndex> Decoding(byte[] value) { List <int> integers = VariableBytes.DecompressToInts(value); List <MaxPriorityQueue.InvertedIndex> tierPostings = new List <MaxPriorityQueue.InvertedIndex>(); int index = 0; int docFrequency = integers[index++]; int previousDocId = 0; for (int i = 0; i < docFrequency; i++) { //Read documentID using gap int docID = previousDocId + integers[index++]; int termFreq = integers[index++]; tierPostings.Add(new MaxPriorityQueue.InvertedIndex(termFreq, docID)); previousDocId = docID; } return(tierPostings); }
/// <summary> /// Decode bytes to integers /// </summary> /// <param name="value">Bytes</param> /// <returns>integers</returns> public int Decoding(byte[] value) { return(VariableBytes.Decode(value)); }
/// <summary> /// Encode integers to bytes /// </summary> /// <param name="value">integers</param> /// <returns>bytes array</returns> public byte[] Encoding(int value) { return(VariableBytes.Encode(value)); }
/// <summary> /// Decode bytes to list of integers /// </summary> /// <param name="value">Bytes</param> /// <returns>List of integers</returns> public List <int> Decoding(byte[] value) { return(VariableBytes.DecompressToInts(value)); }
/// <summary> /// Encode list of integers to bytes /// </summary> /// <param name="value">List of integers</param> /// <returns>bytes array</returns> public byte[] Encoding(List <int> value) { return(VariableBytes.Compress(value)); }