public PostingArray Union(PostingArray other, bool max) { int min = Math.Max(this.list.Length, other.list.Length); if (this.list.Length == 0) { return(other); } if (other.list.Length == 0) { return(this); } List <Posting> result = new List <Posting>(min); int i0 = 0; int i1 = 0; Posting item0 = this.list[i0]; Posting item1 = other.list[i1]; while (i0 < this.list.Length && i1 < other.list.Length) { if (i0 == this.list.Length) { result.Add(item1); i1 = other.Next(i1, out item1); } else if (i1 == other.list.Length) { result.Add(item0); i0 = this.Next(i0, out item0); } else if (item0.Document == item1.Document) { if (max) { result.Add(new Posting(item0.Document, Math.Max(item0.Bm25F, item1.Bm25F))); } else { result.Add(new Posting(item0.Document, item0.Bm25F + item1.Bm25F)); } i0 = this.Next(i0, out item0); i1 = other.Next(i1, out item1); } else if (item0.Document.hash < item1.Document.hash) { result.Add(item0); i0 = this.Next(i0, out item0); } else { result.Add(item1); i1 = other.Next(i1, out item1); } } return(new PostingArray(result.ToArray())); }
void Resolve() { if (arrays.Count() == 1) { return; } arrays.Sort((x, y) => x.Count().CompareTo(y.Count())); PostingArray array = arrays[0]; for (int i = 1; i < arrays.Count(); i++) { array = array.Union(arrays[i], max) as PostingArray; } arrays.Clear(); arrays.Add(array); }
public PostingArray Intersection(PostingArray other, bool max) { int min = Math.Min(this.list.Length, other.list.Length); if (min == 0) { return(new PostingArray(new Posting[0])); } int span0 = this.list.Length / other.list.Length; int span1 = other.list.Length / this.list.Length; List <Posting> result = new List <Posting>(min / 10 + 1); int i0 = 0; int i1 = 0; Posting item0 = this.list[i0]; Posting item1 = other.list[i1]; while (i0 < this.list.Length && i1 < other.list.Length) { if (item0.Document == item1.Document) { if (max) { result.Add(new Posting(item0.Document, Math.Max(item0.Bm25F, item1.Bm25F))); } else { result.Add(new Posting(item0.Document, item0.Bm25F + item1.Bm25F)); } i0 = this.Next(i0, out item0); i1 = other.Next(i1, out item1); } else if (item0.Document.hash < item1.Document.hash) { i0 = this.SkipAhead(i0, span0, item1.Document, out item0); } else { i1 = other.SkipAhead(i1, span1, item0.Document, out item1); } } return(new PostingArray(result.ToArray())); }
static void Process(string src, string dst) { string fn = Path.GetFileName(src); if (fn.StartsWith("_")) { PostingArray plist = new PostingArray(src); plist.Sort(); plist = plist.RemoveRepeats(); plist.Write(dst); } else { File.Copy(src, dst, true); } sofar += new FileInfo(src).Length; double remain = (DateTime.Now - start).TotalHours * (tot - sofar) / sofar; Console.WriteLine($"{sofar / 1e6:0,000} MBs processed {remain:0.00} hours remaining"); }
public PostingUnion(PostingArray a) { this.arrays.Add(a); }