/// <summary> /// Expert: highlights the top-N passages from multiple fields, /// for the provided int[] docids, to custom object as /// returned by the <see cref="PassageFormatter"/>. Use /// this API to render to something other than <see cref="string"/>. /// </summary> /// <param name="fieldsIn">field names to highlight. Must have a stored string value and also be indexed with offsets.</param> /// <param name="query">query to highlight.</param> /// <param name="searcher">searcher that was previously used to execute the query.</param> /// <param name="docidsIn">containing the document IDs to highlight.</param> /// <param name="maxPassagesIn">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param> /// <returns> /// <see cref="T:IDictionary{string, object[]}"/> keyed on field name, containing the array of formatted snippets /// corresponding to the documents in <paramref name="docidsIn"/>. /// If no highlights were found for a document, the /// first <paramref name="maxPassagesIn"/> from the field will /// be returned. /// </returns> /// <exception cref="IOException">if an I/O error occurred during processing</exception> /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception> protected internal virtual IDictionary <string, object[]> HighlightFieldsAsObjects(string[] fieldsIn, Query query, IndexSearcher searcher, int[] docidsIn, int[] maxPassagesIn) { if (fieldsIn.Length < 1) { throw new ArgumentException("fieldsIn must not be empty"); } if (fieldsIn.Length != maxPassagesIn.Length) { throw new ArgumentException("invalid number of maxPassagesIn"); } IndexReader reader = searcher.IndexReader; Query rewritten = Rewrite(query); TreeSet <Term> queryTerms = new TreeSet <Term>(); rewritten.ExtractTerms(queryTerms); IndexReaderContext readerContext = reader.Context; IList <AtomicReaderContext> leaves = readerContext.Leaves; // Make our own copies because we sort in-place: int[] docids = new int[docidsIn.Length]; System.Array.Copy(docidsIn, 0, docids, 0, docidsIn.Length); string[] fields = new string[fieldsIn.Length]; System.Array.Copy(fieldsIn, 0, fields, 0, fieldsIn.Length); int[] maxPassages = new int[maxPassagesIn.Length]; System.Array.Copy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.Length); // sort for sequential io ArrayUtil.TimSort(docids); new InPlaceMergeSorterAnonymousHelper(fields, maxPassages).Sort(0, fields.Length); // pull stored data: IList <string[]> contents = LoadFieldValues(searcher, fields, docids, maxLength); IDictionary <string, object[]> highlights = new Dictionary <string, object[]>(); for (int i = 0; i < fields.Length; i++) { string field = fields[i]; int numPassages = maxPassages[i]; Term floor = new Term(field, ""); Term ceiling = new Term(field, UnicodeUtil.BIG_TERM); // LUCENENET NOTE: System.Collections.Generic.SortedSet<T>.GetViewBetween ceiling is inclusive. // However, in Java, subSet ceiling is exclusive. Also, // SortedSet<T> doesn't seem to have the correct logic, but C5.TreeSet<T> does. var fieldTerms = queryTerms.RangeFromTo(floor, ceiling); //SubSet(floor, ceiling); // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords) // Strip off the redundant field: BytesRef[] terms = new BytesRef[fieldTerms.Count]; int termUpto = 0; foreach (Term term in fieldTerms) { terms[termUpto++] = term.Bytes; } IDictionary <int, object> fieldHighlights = HighlightField(field, contents[i], GetBreakIterator(field), terms, docids, leaves, numPassages, query); object[] result = new object[docids.Length]; for (int j = 0; j < docidsIn.Length; j++) { fieldHighlights.TryGetValue(docidsIn[j], out result[j]); } highlights[field] = result; } return(highlights); }
/// <summary> /// adapted from /// http://www.baptiste-wicht.com/2010/04/closest-pair-of-point-plane-sweep-algorithm/ /// </summary> /// <param name="points"></param> /// <returns></returns> static P[] ClosestPair(IEnumerable <P> points) { var closestPair = new P[2]; // When we start the min distance is the infinity var crtMinDist = MaxDistance; // Get the points and sort them var sorted = new List <P>(); sorted.AddRange(points); sorted.Sort(XComparer.XCompare); // When we start the left most candidate is the first one var leftMostCandidateIndex = 0; // Vertically sorted set of candidates var candidates = new TreeSet <P>(new YComparer()); // C5 data structure // For each point from left to right foreach (var current in sorted) { // Shrink the candidates while (current.X - sorted[leftMostCandidateIndex].X > crtMinDist) { candidates.Remove(sorted[leftMostCandidateIndex]); leftMostCandidateIndex++; } // Compute the y head and the y tail of the candidates set var head = new P { X = current.X, Y = checked (current.Y - crtMinDist) }; var tail = new P { X = current.X, Y = checked (current.Y + crtMinDist) }; // We take only the interesting candidates in the y axis var subset = candidates.RangeFromTo(head, tail); foreach (var point in subset) { var distance = current.Distance(point); if (distance < 0) { throw new ApplicationException("number overflow"); } // Simple min computation if (distance < crtMinDist) { crtMinDist = distance; closestPair[0] = current; closestPair[1] = point; } } // The current point is now a candidate candidates.Add(current); } return(closestPair); }