Пример #1
0
 public void TestWrite()
 {
     PostingListEncoder decoder = new PostingListEncoder();
     SpimiBlockWriter writer = new SpimiBlockWriter();
     writer.AddPosting("bTerm", DocA);
     writer.AddPosting("aTerm", DocA);
     writer.AddPosting("aTerm", DocB);
     string filePath = writer.FlushToFile();
     using (FileStream file = File.Open(filePath, FileMode.Open))
     {
         BinaryReader reader = new BinaryReader(file);
         Assert.AreEqual(2, reader.ReadInt32());
         Assert.AreEqual("aTerm", reader.ReadString());
         IList<Posting> postings = new List<Posting>();
         postings.Add(new Posting(DocA, 1));
         postings.Add(new Posting(DocB, 1));
         IList<Posting> readPostings = decoder.read(reader);
         for (int i = 0; i < postings.Count; i++ )
         {
             readPostings[i].Equals(postings[i]);
         }
         Assert.AreEqual("bTerm", reader.ReadString());
         readPostings = decoder.read(reader);
         Assert.AreEqual(new Posting(DocA, 1), readPostings[0]);
     }
 }
Пример #2
0
 public SpimiIndexer(ILexer lexer)
 {
     this.lexer = lexer;
     this.blockReader = new SpimiBlockReader();
     this.blockWriter = new SpimiBlockWriter();
     this.fileIndexWriter = new FileIndexWriter();
 }
Пример #3
0
 public SpimiIndexer(ILexer lexer, IParser parser, Stream indexStream, Stream metadata)
 {
     this.lexer = lexer;
     this.parser = parser;
     this.termIndexBlockWriter = new SpimiBlockWriter();
     this.indexStream = indexStream;
     this.metadataWriter = new CollectionMetadataWriter(metadata);
 }
Пример #4
0
        public void TestMerge()
        {
            List<IEnumerator<PostingList>> postingLists
                = new List<IEnumerator<PostingList>>();

            SpimiBlockReader reader = new SpimiBlockReader();

            SpimiBlockWriter writer = new SpimiBlockWriter();
            writer.AddPosting("aTerm", "aDoc");
            writer.AddPosting("aTerm", "bDoc");
            writer.AddPosting("bTerm", "aDoc");
            postingLists.Add(reader.Read(writer.FlushToFile()).GetEnumerator());

            writer = new SpimiBlockWriter();
            writer.AddPosting("cTerm", "dDoc");
            writer.AddPosting("aTerm", "zDoc");
            writer.AddPosting("bTerm", "aDoc");
            postingLists.Add(reader.Read(writer.FlushToFile()).GetEnumerator());

            using (IEnumerator<PostingList> merged = reader.BeginBlockMerge(postingLists).GetEnumerator())
            {
                Assert.IsTrue(merged.MoveNext());
                PostingList postingList = merged.Current;
                Assert.AreEqual("aTerm", postingList.Term);
                Assert.AreEqual(3, postingList.Postings.Count);
                Assert.AreEqual("aDoc", postingList.Postings[0]);
                Assert.AreEqual("bDoc", postingList.Postings[1]);
                Assert.AreEqual("zDoc", postingList.Postings[2]);

                Assert.IsTrue(merged.MoveNext());
                postingList = merged.Current;
                Assert.AreEqual("bTerm", postingList.Term);
                Assert.AreEqual(1, postingList.Postings.Count);
                Assert.AreEqual("aDoc", postingList.Postings[0]);

                Assert.IsTrue(merged.MoveNext());
                postingList = merged.Current;
                Assert.AreEqual("cTerm", postingList.Term);
                Assert.AreEqual(1, postingList.Postings.Count);
                Assert.AreEqual("dDoc", postingList.Postings[0]);

                Assert.IsFalse(merged.MoveNext());
            }
        }
Пример #5
0
        public void TestRead()
        {
            SpimiBlockWriter writer = new SpimiBlockWriter();
            writer.AddPosting("aTerm", "aDoc");
            writer.AddPosting("aTerm", "bDoc");
            writer.AddPosting("bTerm", "aDoc");
            string filePath = writer.FlushToFile();

            SpimiBlockReader reader = new SpimiBlockReader();
            IEnumerable<PostingList> postingLists = reader.Read(filePath);
            IEnumerator<PostingList> enumerator = postingLists.GetEnumerator();
            Assert.AreEqual(true, enumerator.MoveNext());
            Assert.AreEqual("aTerm", enumerator.Current.Term);
            Assert.AreEqual("aDoc", enumerator.Current.Postings[0]);
            Assert.AreEqual("bDoc", enumerator.Current.Postings[1]);
            Assert.AreEqual(true, enumerator.MoveNext());

            Assert.AreEqual("bTerm", enumerator.Current.Term);
            Assert.AreEqual("aDoc", enumerator.Current.Postings[0]);
        }
Пример #6
0
 public void TestWrite()
 {
     SpimiBlockWriter writer = new SpimiBlockWriter();
     writer.AddPosting("bTerm", "aDoc");
     writer.AddPosting("aTerm", "aDoc");
     writer.AddPosting("aTerm", "bDoc");
     string filePath = writer.FlushToFile();
     using (FileStream file = File.Open(filePath, FileMode.Open))
     {
         BinaryReader reader = new BinaryReader(file);
         Assert.AreEqual(2, reader.ReadInt32());
         Assert.AreEqual("aTerm", reader.ReadString());
         Assert.AreEqual(2, reader.ReadInt32());
         Assert.AreEqual("aDoc", reader.ReadString());
         Assert.AreEqual("bDoc", reader.ReadString());
         Assert.AreEqual("bTerm", reader.ReadString());
         Assert.AreEqual(1, reader.ReadInt32());
         Assert.AreEqual("aDoc", reader.ReadString());
     }
 }
Пример #7
0
        public void TestRead()
        {
            SpimiBlockWriter writer = new SpimiBlockWriter();
            writer.AddPosting("aTerm", DocA);
            writer.AddPosting("aTerm", DocB);
            writer.AddPosting("bTerm", DocA);
            string filePath = writer.FlushToFile();

            SpimiBlockReader reader = new SpimiBlockReader();
            IEnumerator<PostingList> postingLists = reader.Read(filePath).GetEnumerator();
            Assert.AreEqual(true, postingLists.MoveNext());
            Assert.AreEqual("aTerm", postingLists.Current.Term);
            Assert.AreEqual(2, postingLists.Current.Postings.Count);
            Assert.AreEqual(new Posting(DocA, 1), postingLists.Current.Postings[0]);
            Assert.AreEqual(new Posting(DocB, 1), postingLists.Current.Postings[1]);
            Assert.AreEqual(true, postingLists.MoveNext());

            Assert.AreEqual("bTerm", postingLists.Current.Term);
            Assert.AreEqual(new Posting(DocA, 1), postingLists.Current.Postings[0]);
        }