예제 #1
0
        /// <summary>
        /// Expert: highlights the top-N passages from multiple fields,
        /// for the provided int[] docids, to custom object as
        /// returned by the <see cref="PassageFormatter"/>.  Use
        /// this API to render to something other than <see cref="string"/>.
        /// </summary>
        /// <param name="fieldsIn">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
        /// <param name="query">query to highlight.</param>
        /// <param name="searcher">searcher that was previously used to execute the query.</param>
        /// <param name="docidsIn">containing the document IDs to highlight.</param>
        /// <param name="maxPassagesIn">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param>
        /// <returns>
        /// <see cref="T:IDictionary{string, object[]}"/> keyed on field name, containing the array of formatted snippets
        /// corresponding to the documents in <paramref name="docidsIn"/>.
        /// If no highlights were found for a document, the
        /// first <paramref name="maxPassagesIn"/> from the field will
        /// be returned.
        /// </returns>
        /// <exception cref="IOException">if an I/O error occurred during processing</exception>
        /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
        protected internal virtual IDictionary <string, object[]> HighlightFieldsAsObjects(string[] fieldsIn, Query query, IndexSearcher searcher, int[] docidsIn, int[] maxPassagesIn)
        {
            if (fieldsIn.Length < 1)
            {
                throw new ArgumentException("fieldsIn must not be empty");
            }
            if (fieldsIn.Length != maxPassagesIn.Length)
            {
                throw new ArgumentException("invalid number of maxPassagesIn");
            }
            IndexReader    reader     = searcher.IndexReader;
            Query          rewritten  = Rewrite(query);
            TreeSet <Term> queryTerms = new TreeSet <Term>();

            rewritten.ExtractTerms(queryTerms);

            IndexReaderContext          readerContext = reader.Context;
            IList <AtomicReaderContext> leaves        = readerContext.Leaves;

            // Make our own copies because we sort in-place:
            int[] docids = new int[docidsIn.Length];
            System.Array.Copy(docidsIn, 0, docids, 0, docidsIn.Length);
            string[] fields = new string[fieldsIn.Length];
            System.Array.Copy(fieldsIn, 0, fields, 0, fieldsIn.Length);
            int[] maxPassages = new int[maxPassagesIn.Length];
            System.Array.Copy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.Length);

            // sort for sequential io
            ArrayUtil.TimSort(docids);
            new InPlaceMergeSorterAnonymousHelper(fields, maxPassages).Sort(0, fields.Length);

            // pull stored data:
            IList <string[]> contents = LoadFieldValues(searcher, fields, docids, maxLength);

            IDictionary <string, object[]> highlights = new Dictionary <string, object[]>();

            for (int i = 0; i < fields.Length; i++)
            {
                string field       = fields[i];
                int    numPassages = maxPassages[i];
                Term   floor       = new Term(field, "");
                Term   ceiling     = new Term(field, UnicodeUtil.BIG_TERM);
                // LUCENENET NOTE: System.Collections.Generic.SortedSet<T>.GetViewBetween ceiling is inclusive.
                // However, in Java, subSet ceiling is exclusive. Also,
                // SortedSet<T> doesn't seem to have the correct logic, but C5.TreeSet<T> does.
                var fieldTerms = queryTerms.RangeFromTo(floor, ceiling); //SubSet(floor, ceiling);
                // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)

                // Strip off the redundant field:
                BytesRef[] terms    = new BytesRef[fieldTerms.Count];
                int        termUpto = 0;
                foreach (Term term in fieldTerms)
                {
                    terms[termUpto++] = term.Bytes;
                }
                IDictionary <int, object> fieldHighlights = HighlightField(field, contents[i], GetBreakIterator(field), terms, docids, leaves, numPassages, query);

                object[] result = new object[docids.Length];
                for (int j = 0; j < docidsIn.Length; j++)
                {
                    fieldHighlights.TryGetValue(docidsIn[j], out result[j]);
                }
                highlights[field] = result;
            }
            return(highlights);
        }
예제 #2
0
        /// <summary>
        /// adapted from
        /// http://www.baptiste-wicht.com/2010/04/closest-pair-of-point-plane-sweep-algorithm/
        /// </summary>
        /// <param name="points"></param>
        /// <returns></returns>
        static P[] ClosestPair(IEnumerable <P> points)
        {
            var closestPair = new P[2];

            // When we start the min distance is the infinity
            var crtMinDist = MaxDistance;

            // Get the points and sort them
            var sorted = new List <P>();

            sorted.AddRange(points);
            sorted.Sort(XComparer.XCompare);

            // When we start the left most candidate is the first one
            var leftMostCandidateIndex = 0;

            // Vertically sorted set of candidates
            var candidates = new TreeSet <P>(new YComparer()); // C5 data structure

            // For each point from left to right
            foreach (var current in sorted)
            {
                // Shrink the candidates
                while (current.X - sorted[leftMostCandidateIndex].X > crtMinDist)
                {
                    candidates.Remove(sorted[leftMostCandidateIndex]);
                    leftMostCandidateIndex++;
                }

                // Compute the y head and the y tail of the candidates set
                var head = new P {
                    X = current.X, Y = checked (current.Y - crtMinDist)
                };
                var tail = new P {
                    X = current.X, Y = checked (current.Y + crtMinDist)
                };

                // We take only the interesting candidates in the y axis
                var subset = candidates.RangeFromTo(head, tail);
                foreach (var point in subset)
                {
                    var distance = current.Distance(point);
                    if (distance < 0)
                    {
                        throw new ApplicationException("number overflow");
                    }

                    // Simple min computation
                    if (distance < crtMinDist)
                    {
                        crtMinDist     = distance;
                        closestPair[0] = current;
                        closestPair[1] = point;
                    }
                }

                // The current point is now a candidate
                candidates.Add(current);
            }

            return(closestPair);
        }