Beispiel #1
0
        public PostingArray Union(PostingArray other, bool max)
        {
            int min = Math.Max(this.list.Length, other.list.Length);

            if (this.list.Length == 0)
            {
                return(other);
            }
            if (other.list.Length == 0)
            {
                return(this);
            }
            List <Posting> result = new List <Posting>(min);
            int            i0     = 0;
            int            i1     = 0;
            Posting        item0  = this.list[i0];
            Posting        item1  = other.list[i1];

            while (i0 < this.list.Length && i1 < other.list.Length)
            {
                if (i0 == this.list.Length)
                {
                    result.Add(item1);
                    i1 = other.Next(i1, out item1);
                }
                else if (i1 == other.list.Length)
                {
                    result.Add(item0);
                    i0 = this.Next(i0, out item0);
                }
                else if (item0.Document == item1.Document)
                {
                    if (max)
                    {
                        result.Add(new Posting(item0.Document, Math.Max(item0.Bm25F, item1.Bm25F)));
                    }
                    else
                    {
                        result.Add(new Posting(item0.Document, item0.Bm25F + item1.Bm25F));
                    }

                    i0 = this.Next(i0, out item0);
                    i1 = other.Next(i1, out item1);
                }
                else if (item0.Document.hash < item1.Document.hash)
                {
                    result.Add(item0);
                    i0 = this.Next(i0, out item0);
                }
                else
                {
                    result.Add(item1);
                    i1 = other.Next(i1, out item1);
                }
            }

            return(new PostingArray(result.ToArray()));
        }
        void Resolve()
        {
            if (arrays.Count() == 1)
            {
                return;
            }
            arrays.Sort((x, y) => x.Count().CompareTo(y.Count()));
            PostingArray array = arrays[0];

            for (int i = 1; i < arrays.Count(); i++)
            {
                array = array.Union(arrays[i], max) as PostingArray;
            }

            arrays.Clear();
            arrays.Add(array);
        }
Beispiel #3
0
        public PostingArray Intersection(PostingArray other, bool max)
        {
            int min = Math.Min(this.list.Length, other.list.Length);

            if (min == 0)
            {
                return(new PostingArray(new Posting[0]));
            }

            int            span0  = this.list.Length / other.list.Length;
            int            span1  = other.list.Length / this.list.Length;
            List <Posting> result = new List <Posting>(min / 10 + 1);
            int            i0     = 0;
            int            i1     = 0;
            Posting        item0  = this.list[i0];
            Posting        item1  = other.list[i1];

            while (i0 < this.list.Length && i1 < other.list.Length)
            {
                if (item0.Document == item1.Document)
                {
                    if (max)
                    {
                        result.Add(new Posting(item0.Document, Math.Max(item0.Bm25F, item1.Bm25F)));
                    }
                    else
                    {
                        result.Add(new Posting(item0.Document, item0.Bm25F + item1.Bm25F));
                    }
                    i0 = this.Next(i0, out item0);
                    i1 = other.Next(i1, out item1);
                }
                else if (item0.Document.hash < item1.Document.hash)
                {
                    i0 = this.SkipAhead(i0, span0, item1.Document, out item0);
                }
                else
                {
                    i1 = other.SkipAhead(i1, span1, item0.Document, out item1);
                }
            }

            return(new PostingArray(result.ToArray()));
        }
Beispiel #4
0
        static void Process(string src, string dst)
        {
            string fn = Path.GetFileName(src);

            if (fn.StartsWith("_"))
            {
                PostingArray plist = new PostingArray(src);
                plist.Sort();
                plist = plist.RemoveRepeats();
                plist.Write(dst);
            }
            else
            {
                File.Copy(src, dst, true);
            }

            sofar += new FileInfo(src).Length;
            double remain = (DateTime.Now - start).TotalHours * (tot - sofar) / sofar;

            Console.WriteLine($"{sofar / 1e6:0,000} MBs processed {remain:0.00} hours remaining");
        }
 public PostingUnion(PostingArray a)
 {
     this.arrays.Add(a);
 }