public void TestMerge() { List<IEnumerator<PostingList>> postingLists = new List<IEnumerator<PostingList>>(); SpimiBlockReader reader = new SpimiBlockReader(); SpimiBlockWriter writer = new SpimiBlockWriter(); writer.AddPosting("aTerm", "aDoc"); writer.AddPosting("aTerm", "bDoc"); writer.AddPosting("bTerm", "aDoc"); postingLists.Add(reader.Read(writer.FlushToFile()).GetEnumerator()); writer = new SpimiBlockWriter(); writer.AddPosting("cTerm", "dDoc"); writer.AddPosting("aTerm", "zDoc"); writer.AddPosting("bTerm", "aDoc"); postingLists.Add(reader.Read(writer.FlushToFile()).GetEnumerator()); using (IEnumerator<PostingList> merged = reader.BeginBlockMerge(postingLists).GetEnumerator()) { Assert.IsTrue(merged.MoveNext()); PostingList postingList = merged.Current; Assert.AreEqual("aTerm", postingList.Term); Assert.AreEqual(3, postingList.Postings.Count); Assert.AreEqual("aDoc", postingList.Postings[0]); Assert.AreEqual("bDoc", postingList.Postings[1]); Assert.AreEqual("zDoc", postingList.Postings[2]); Assert.IsTrue(merged.MoveNext()); postingList = merged.Current; Assert.AreEqual("bTerm", postingList.Term); Assert.AreEqual(1, postingList.Postings.Count); Assert.AreEqual("aDoc", postingList.Postings[0]); Assert.IsTrue(merged.MoveNext()); postingList = merged.Current; Assert.AreEqual("cTerm", postingList.Term); Assert.AreEqual(1, postingList.Postings.Count); Assert.AreEqual("dDoc", postingList.Postings[0]); Assert.IsFalse(merged.MoveNext()); } }
private void MergeBlocks() { if (termIndexBlockWriter.Postings > 0) FlushBlockWriter(); using (FileIndexWriter<string, IList<Posting>> writer = new FileIndexWriter<string, IList<Posting>>( new StringEncoder(), new PostingListEncoder(), indexStream)) { SpimiBlockReader blockReader = new SpimiBlockReader(); List<IEnumerator<PostingList>> openedBlocks = blockReader.OpenBlocks(this.termIndexBlockFilePaths); foreach (PostingList postingList in blockReader.BeginBlockMerge(openedBlocks)) { writer.Add(postingList.Term, postingList.Postings); } writer.WriteOut(); } }