Beispiel #1
0
        public virtual void TestRead1waySynonymRules()
        {
            SlowSynonymMap synMap;

            // (a)->[a]
            // (b)->[a]
            IList <string> rules = new JCG.List <string>();

            rules.Add("a,b");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a");
            AssertTokIncludes(synMap, "b", "a");

            // (a)->[a]
            // (b)->[a]
            // (c)->[a]
            rules.Clear();
            rules.Add("a,b,c");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(3, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a");
            AssertTokIncludes(synMap, "b", "a");
            AssertTokIncludes(synMap, "c", "a");

            // (a)->[a]
            // (b1)->(b2)->[a]
            rules.Clear();
            rules.Add("a,b1 b2");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a");
            assertEquals(1, GetSubSynonymMap(synMap, "b1").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "b1"), "b2", "a");

            // (a1)->(a2)->[a1][a2]
            // (b)->[a1][a2]
            rules.Clear();
            rules.Add("a1 a2,b");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(2, synMap.Submap.size());
            assertEquals(1, GetSubSynonymMap(synMap, "a1").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a1"), "a2", "a1");
            AssertTokIncludes(GetSubSynonymMap(synMap, "a1"), "a2", "a2");
            AssertTokIncludes(synMap, "b", "a1");
            AssertTokIncludes(synMap, "b", "a2");
        }
Beispiel #2
0
        public void TestSearchPhraseSlop()
        {
            // "a b c"~0
            Query query = pqF("a", "b", "c");

            // phraseHighlight = true, fieldMatch = true
            FieldQuery fq = new FieldQuery(query, true, true);

            // "a b c" w/ position-gap = 2
            IList <TermInfo> phraseCandidate = new JCG.List <TermInfo>();

            phraseCandidate.Add(new TermInfo("a", 0, 1, 0, 1));
            phraseCandidate.Add(new TermInfo("b", 2, 3, 2, 1));
            phraseCandidate.Add(new TermInfo("c", 4, 5, 4, 1));
            assertNull(fq.SearchPhrase(F, phraseCandidate));

            // "a b c"~1
            query = pqF(1F, 1, "a", "b", "c");

            // phraseHighlight = true, fieldMatch = true
            fq = new FieldQuery(query, true, true);

            // "a b c" w/ position-gap = 2
            assertNotNull(fq.SearchPhrase(F, phraseCandidate));

            // "a b c" w/ position-gap = 3
            phraseCandidate.Clear();
            phraseCandidate.Add(new TermInfo("a", 0, 1, 0, 1));
            phraseCandidate.Add(new TermInfo("b", 2, 3, 3, 1));
            phraseCandidate.Add(new TermInfo("c", 4, 5, 6, 1));
            assertNull(fq.SearchPhrase(F, phraseCandidate));
        }
 /// <summary>Resets stems accumulator and hands over to superclass.</summary>
 public override void Reset()
 {
     lemmaListIndex = 0;
     lemmaList      = new List <WordData>();
     tagsList.Clear();
     base.Reset();
 }
 public virtual void Clear()
 {
     lock (this)
     {
         updates.Clear();
         nextGen         = 1;
         numTerms.Value  = 0;
         bytesUsed.Value = 0;
     }
 }
Beispiel #5
0
 /// <summary>
 /// Move to the next match, returning true iff any such exists. </summary>
 public override bool MoveNext()
 {
     if (firstTime)
     {
         firstTime = false;
         for (int i = 0; i < subSpans.Length; i++)
         {
             if (!subSpans[i].MoveNext())
             {
                 more = false;
                 return(false);
             }
         }
         more = true;
     }
     if (collectPayloads)
     {
         matchPayload.Clear();
     }
     return(AdvanceAfterOrdered());
 }
Beispiel #6
0
        public override bool IncrementToken()
        {
            if (buffer != null && buffer.Count > 0)
            {
                CharsRef nextStem = buffer[0];
                buffer.RemoveAt(0);
                RestoreState(savedState);
                posIncAtt.PositionIncrement = 0;
                termAtt.SetEmpty().Append(nextStem);
                return(true);
            }

            if (!m_input.IncrementToken())
            {
                return(false);
            }

            if (keywordAtt.IsKeyword)
            {
                return(true);
            }

            buffer = new JCG.List <CharsRef>(dedup ? stemmer.UniqueStems(termAtt.Buffer, termAtt.Length) : stemmer.Stem(termAtt.Buffer, termAtt.Length));

            if (buffer.Count == 0) // we do not know this word, return it unchanged
            {
                return(true);
            }

            if (longestOnly && buffer.Count > 1)
            {
                buffer.Sort(lengthComparer);
            }

            CharsRef stem = buffer[0];

            buffer.RemoveAt(0);
            termAtt.SetEmpty().Append(stem);

            if (longestOnly)
            {
                buffer.Clear();
            }
            else
            {
                if (buffer.Count > 0)
                {
                    savedState = CaptureState();
                }
            }

            return(true);
        }
Beispiel #7
0
 public virtual void Clear()
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         updates.Clear();
         nextGen         = 1;
         numTerms.Value  = 0;
         bytesUsed.Value = 0;
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }
Beispiel #8
0
        public virtual void TestAppendIterator()
        {
            Random         random     = Random;
            BytesRefArray  list       = new BytesRefArray(Util.Counter.NewCounter());
            IList <string> stringList = new JCG.List <string>();

            for (int j = 0; j < 2; j++)
            {
                if (j > 0 && random.NextBoolean())
                {
                    list.Clear();
                    stringList.Clear();
                }
                int      entries  = AtLeast(500);
                BytesRef spare    = new BytesRef();
                int      initSize = list.Length;
                for (int i = 0; i < entries; i++)
                {
                    string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random);
                    spare.CopyChars(randomRealisticUnicodeString);
                    Assert.AreEqual(i + initSize, list.Append(spare));
                    stringList.Add(randomRealisticUnicodeString);
                }
                for (int i = 0; i < entries; i++)
                {
                    Assert.IsNotNull(list.Get(spare, i));
                    Assert.AreEqual(stringList[i], spare.Utf8ToString(), "entry " + i + " doesn't match");
                }

                // check random
                for (int i = 0; i < entries; i++)
                {
                    int e = random.Next(entries);
                    Assert.IsNotNull(list.Get(spare, e));
                    Assert.AreEqual(stringList[e], spare.Utf8ToString(), "entry " + i + " doesn't match");
                }
                for (int i = 0; i < 2; i++)
                {
                    IBytesRefEnumerator iterator = list.GetEnumerator();
                    foreach (string @string in stringList)
                    {
                        Assert.IsTrue(iterator.MoveNext());
                        Assert.AreEqual(@string, iterator.Current.Utf8ToString());
                    }
                }
            }
        }
Beispiel #9
0
        public void TestSearchPhrase()
        {
            Query query = pqF("a", "b", "c");

            // phraseHighlight = true, fieldMatch = true
            FieldQuery fq = new FieldQuery(query, true, true);

            // "a"
            IList <TermInfo> phraseCandidate = new JCG.List <TermInfo>();

            phraseCandidate.Add(new TermInfo("a", 0, 1, 0, 1));
            assertNull(fq.SearchPhrase(F, phraseCandidate));
            // "a b"
            phraseCandidate.Add(new TermInfo("b", 2, 3, 1, 1));
            assertNull(fq.SearchPhrase(F, phraseCandidate));
            // "a b c"
            phraseCandidate.Add(new TermInfo("c", 4, 5, 2, 1));
            assertNotNull(fq.SearchPhrase(F, phraseCandidate));
            assertNull(fq.SearchPhrase("x", phraseCandidate));

            // phraseHighlight = true, fieldMatch = false
            fq = new FieldQuery(query, true, false);

            // "a b c"
            assertNotNull(fq.SearchPhrase(F, phraseCandidate));
            assertNotNull(fq.SearchPhrase("x", phraseCandidate));

            // phraseHighlight = false, fieldMatch = true
            fq = new FieldQuery(query, false, true);

            // "a"
            phraseCandidate.Clear();
            phraseCandidate.Add(new TermInfo("a", 0, 1, 0, 1));
            assertNotNull(fq.SearchPhrase(F, phraseCandidate));
            // "a b"
            phraseCandidate.Add(new TermInfo("b", 2, 3, 1, 1));
            assertNull(fq.SearchPhrase(F, phraseCandidate));
            // "a b c"
            phraseCandidate.Add(new TermInfo("c", 4, 5, 2, 1));
            assertNotNull(fq.SearchPhrase(F, phraseCandidate));
            assertNull(fq.SearchPhrase("x", phraseCandidate));
        }
Beispiel #10
0
        public virtual void TestSortIterator()
        {
            Random         random     = Random;
            BytesRefArray  list       = new BytesRefArray(Util.Counter.NewCounter());
            IList <string> stringList = new JCG.List <string>();

            for (int j = 0; j < 2; j++)
            {
                if (j > 0 && random.NextBoolean())
                {
                    list.Clear();
                    stringList.Clear();
                }
                int      entries  = AtLeast(500);
                BytesRef spare    = new BytesRef();
                int      initSize = list.Length;
                for (int i = 0; i < entries; i++)
                {
                    string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random);
                    spare.CopyChars(randomRealisticUnicodeString);
                    Assert.AreEqual(initSize + i, list.Append(spare));
                    stringList.Add(randomRealisticUnicodeString);
                }

                // LUCENENET NOTE: Must sort using ArrayUtil.GetNaturalComparator<T>()
                // to ensure culture isn't taken into consideration during the sort,
                // which will match the sort order of BytesRef.UTF8SortedAsUTF16Comparer.
                CollectionUtil.TimSort(stringList);
#pragma warning disable 612, 618
                IBytesRefIterator iter = list.GetIterator(BytesRef.UTF8SortedAsUTF16Comparer);
#pragma warning restore 612, 618
                int a = 0;
                while ((spare = iter.Next()) != null)
                {
                    Assert.AreEqual(stringList[a], spare.Utf8ToString(), "entry " + a + " doesn't match");
                    a++;
                }
                Assert.IsNull(iter.Next());
                Assert.AreEqual(a, stringList.Count);
            }
        }
Beispiel #11
0
        /// <summary>
        /// Perform the actual DM Soundex algorithm on the input string.
        /// </summary>
        /// <param name="source">A string to encode.</param>
        /// <param name="branching">If branching shall be performed.</param>
        /// <returns>A string array containing all DM Soundex codes corresponding to the string supplied depending on the selected branching mode.</returns>
        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
        private string[] GetSoundex(string source, bool branching)
        {
            if (source == null)
            {
                return(null);
            }

            string input = Cleanup(source);

            // LinkedHashSet preserves input order. In .NET we can use List for that purpose.
            IList <Branch> currentBranches = new JCG.List <Branch>
            {
                new Branch()
            };

            char lastChar = '\0';

            for (int index = 0; index < input.Length; index++)
            {
                char ch = input[index];

                // ignore whitespace inside a name
                if (char.IsWhiteSpace(ch))
                {
                    continue;
                }

                string inputContext = input.Substring(index);
                if (!RULES.TryGetValue(ch, out IList <Rule> rules) || rules == null)
                {
                    continue;
                }

                // use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
                IList <Branch> nextBranches = branching ? new JCG.List <Branch>() : Collections.EmptyList <Branch>() as IList <Branch>;

                foreach (Rule rule in rules)
                {
                    if (rule.Matches(inputContext))
                    {
                        if (branching)
                        {
                            nextBranches.Clear();
                        }
                        string[] replacements      = rule.GetReplacements(inputContext, lastChar == '\0');
                        bool     branchingRequired = replacements.Length > 1 && branching;

                        foreach (Branch branch in currentBranches)
                        {
                            foreach (string nextReplacement in replacements)
                            {
                                // if we have multiple replacements, always create a new branch
                                Branch nextBranch = branchingRequired ? branch.CreateBranch() : branch;

                                // special rule: occurrences of mn or nm are treated differently
                                bool force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm');

                                nextBranch.ProcessNextReplacement(nextReplacement, force);

                                if (branching)
                                {
                                    if (!nextBranches.Contains(nextBranch))
                                    {
                                        nextBranches.Add(nextBranch);
                                    }
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }

                        if (branching)
                        {
                            currentBranches.Clear();
                            currentBranches.AddRange(nextBranches);
                        }
                        index += rule.PatternLength - 1;
                        break;
                    }
                }

                lastChar = ch;
            }

            string[] result = new string[currentBranches.Count];
            int      idx    = 0;

            foreach (Branch branch in currentBranches)
            {
                branch.Finish();
                result[idx++] = branch.ToString();
            }

            return(result);
        }
Beispiel #12
0
 /// <summary>
 /// Returns <c>true</c> if the given string is accepted by the automaton.
 /// <para/>
 /// Complexity: linear in the length of the string.
 /// <para/>
 /// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class.
 /// </summary>
 public static bool Run(Automaton a, string s)
 {
     if (a.IsSingleton)
     {
         return(s.Equals(a.singleton, StringComparison.Ordinal));
     }
     if (a.deterministic)
     {
         State p = a.initial;
         int   cp; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(cp))
         {
             State q = p.Step(cp = Character.CodePointAt(s, i));
             if (q == null)
             {
                 return(false);
             }
             p = q;
         }
         return(p.accept);
     }
     else
     {
         State[]            states   = a.GetNumberedStates();
         LinkedList <State> pp       = new LinkedList <State>();
         LinkedList <State> pp_other = new LinkedList <State>();
         OpenBitSet         bb       = new OpenBitSet(states.Length);
         OpenBitSet         bb_other = new OpenBitSet(states.Length);
         pp.AddLast(a.initial);
         JCG.List <State> dest   = new JCG.List <State>();
         bool             accept = a.initial.accept;
         int c; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(c))
         {
             c      = Character.CodePointAt(s, i);
             accept = false;
             pp_other.Clear();
             bb_other.Clear(0, bb_other.Length - 1);
             foreach (State p in pp)
             {
                 dest.Clear();
                 p.Step(c, dest);
                 foreach (State q in dest)
                 {
                     if (q.accept)
                     {
                         accept = true;
                     }
                     if (!bb_other.Get(q.number))
                     {
                         bb_other.Set(q.number);
                         pp_other.AddLast(q);
                     }
                 }
             }
             LinkedList <State> tp = pp;
             pp       = pp_other;
             pp_other = tp;
             OpenBitSet tb = bb;
             bb       = bb_other;
             bb_other = tb;
         }
         return(accept);
     }
 }
Beispiel #13
0
        protected override IQueryNode PostProcessNode(IQueryNode node)
        {
            if (node is ITextableQueryNode &&
                !(node is WildcardQueryNode) &&
                !(node is FuzzyQueryNode) &&
                !(node is RegexpQueryNode) &&
                !(node.Parent is IRangeQueryNode))
            {
                FieldQueryNode fieldNode = ((FieldQueryNode)node);
                string         text      = fieldNode.GetTextAsString();
                string         field     = fieldNode.GetFieldAsString();

                CachingTokenFilter          buffer     = null;
                IPositionIncrementAttribute posIncrAtt = null;
                int  numTokens     = 0;
                int  positionCount = 0;
                bool severalTokensAtSamePosition = false;

                TokenStream source = null;
                try
                {
                    source = this.analyzer.GetTokenStream(field, text);
                    source.Reset();
                    buffer = new CachingTokenFilter(source);

                    if (buffer.HasAttribute <IPositionIncrementAttribute>())
                    {
                        posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>();
                    }

                    try
                    {
                        while (buffer.IncrementToken())
                        {
                            numTokens++;
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt
                                                    .PositionIncrement : 1;
                            if (positionIncrement != 0)
                            {
                                positionCount += positionIncrement;
                            }
                            else
                            {
                                severalTokensAtSamePosition = true;
                            }
                        }
                    }
                    catch (Exception e) when(e.IsIOException())
                    {
                        // ignore
                    }
                }
                catch (Exception e) when(e.IsIOException())
                {
                    throw RuntimeException.Create(e);
                }
                finally
                {
                    IOUtils.DisposeWhileHandlingException(source);
                }

                // rewind the buffer stream
                buffer.Reset();

                if (!buffer.HasAttribute <ICharTermAttribute>())
                {
                    return(new NoTokenFoundQueryNode());
                }

                ICharTermAttribute termAtt = buffer.GetAttribute <ICharTermAttribute>();

                if (numTokens == 0)
                {
                    return(new NoTokenFoundQueryNode());
                }
                else if (numTokens == 1)
                {
                    string term = null;
                    try
                    {
                        bool hasNext;
                        hasNext = buffer.IncrementToken();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(hasNext == true);
                        }
                        term = termAtt.ToString();
                    }
                    catch (Exception e) when(e.IsIOException())
                    {
                        // safe to ignore, because we know the number of tokens
                    }

                    fieldNode.Text = term.AsCharSequence();

                    return(fieldNode);
                }
                else if (severalTokensAtSamePosition || !(node is QuotedFieldQueryNode))
                {
                    if (positionCount == 1 || !(node is QuotedFieldQueryNode))
                    {
                        // no phrase query:

                        if (positionCount == 1)
                        {
                            // simple case: only one position, with synonyms
                            IList <IQueryNode> children = new JCG.List <IQueryNode>();

                            for (int i = 0; i < numTokens; i++)
                            {
                                string term = null;
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    if (Debugging.AssertsEnabled)
                                    {
                                        Debugging.Assert(hasNext == true);
                                    }
                                    term = termAtt.ToString();
                                }
                                catch (Exception e) when(e.IsIOException())
                                {
                                    // safe to ignore, because we know the number of tokens
                                }

                                children.Add(new FieldQueryNode(field, term, -1, -1));
                            }
                            return(new GroupQueryNode(
                                       new StandardBooleanQueryNode(children, positionCount == 1)));
                        }
                        else
                        {
                            // multiple positions
                            IQueryNode q            = new StandardBooleanQueryNode(Collections.EmptyList <IQueryNode>(), false);
                            IQueryNode currentQuery = null;
                            for (int i = 0; i < numTokens; i++)
                            {
                                string term = null;
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    if (Debugging.AssertsEnabled)
                                    {
                                        Debugging.Assert(hasNext == true);
                                    }
                                    term = termAtt.ToString();
                                }
                                catch (Exception e) when(e.IsIOException())
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
                                {
                                    if (!(currentQuery is BooleanQueryNode))
                                    {
                                        IQueryNode t = currentQuery;
                                        currentQuery = new StandardBooleanQueryNode(Collections.EmptyList <IQueryNode>(), true);
                                        ((BooleanQueryNode)currentQuery).Add(t);
                                    }
                                    ((BooleanQueryNode)currentQuery).Add(new FieldQueryNode(field, term, -1, -1));
                                }
                                else
                                {
                                    if (currentQuery != null)
                                    {
                                        if (this.defaultOperator == Operator.OR)
                                        {
                                            q.Add(currentQuery);
                                        }
                                        else
                                        {
                                            q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                                        }
                                    }
                                    currentQuery = new FieldQueryNode(field, term, -1, -1);
                                }
                            }
                            if (this.defaultOperator == Operator.OR)
                            {
                                q.Add(currentQuery);
                            }
                            else
                            {
                                q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                            }

                            if (q is BooleanQueryNode)
                            {
                                q = new GroupQueryNode(q);
                            }
                            return(q);
                        }
                    }
                    else
                    {
                        // phrase query:
                        MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();

                        IList <FieldQueryNode> multiTerms = new JCG.List <FieldQueryNode>();
                        int position       = -1;
                        int i              = 0;
                        int termGroupCount = 0;
                        for (; i < numTokens; i++)
                        {
                            string term = null;
                            int    positionIncrement = 1;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(hasNext == true);
                                }
                                term = termAtt.ToString();
                                if (posIncrAtt != null)
                                {
                                    positionIncrement = posIncrAtt.PositionIncrement;
                                }
                            }
                            catch (Exception e) when(e.IsIOException())
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                            {
                                foreach (FieldQueryNode termNode in multiTerms)
                                {
                                    if (this.positionIncrementsEnabled)
                                    {
                                        termNode.PositionIncrement = position;
                                    }
                                    else
                                    {
                                        termNode.PositionIncrement = termGroupCount;
                                    }

                                    mpq.Add(termNode);
                                }

                                // Only increment once for each "group" of
                                // terms that were in the same position:
                                termGroupCount++;

                                multiTerms.Clear();
                            }

                            position += positionIncrement;
                            multiTerms.Add(new FieldQueryNode(field, term, -1, -1));
                        }

                        foreach (FieldQueryNode termNode in multiTerms)
                        {
                            if (this.positionIncrementsEnabled)
                            {
                                termNode.PositionIncrement = position;
                            }
                            else
                            {
                                termNode.PositionIncrement = termGroupCount;
                            }

                            mpq.Add(termNode);
                        }

                        return(mpq);
                    }
                }
                else
                {
                    TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

                    int position = -1;

                    for (int i = 0; i < numTokens; i++)
                    {
                        string term = null;
                        int    positionIncrement = 1;

                        try
                        {
                            bool hasNext = buffer.IncrementToken();
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(hasNext == true);
                            }
                            term = termAtt.ToString();

                            if (posIncrAtt != null)
                            {
                                positionIncrement = posIncrAtt.PositionIncrement;
                            }
                        }
                        catch (Exception e) when(e.IsIOException())
                        {
                            // safe to ignore, because we know the number of tokens
                        }

                        FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                        if (this.positionIncrementsEnabled)
                        {
                            position += positionIncrement;
                            newFieldNode.PositionIncrement = position;
                        }
                        else
                        {
                            newFieldNode.PositionIncrement = i;
                        }

                        pq.Add(newFieldNode);
                    }

                    return(pq);
                }
            }

            return(node);
        }
Beispiel #14
0
        /// <summary>
        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
        /// memory may deviate from the hint (may be smaller or larger).
        /// </summary>
        public SortInfo Sort(FileInfo input, FileInfo output)
        {
            sortInfo = new SortInfo(this)
            {
                TotalTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond
            };                                                                                                      // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            output.Delete();

            var  merges   = new JCG.List <FileInfo>();
            bool success2 = false;

            try
            {
                var  inputStream = new ByteSequencesReader(input);
                bool success     = false;
                try
                {
                    int lines = 0;
                    while ((lines = ReadPartition(inputStream)) > 0)
                    {
                        merges.Add(SortPartition(/*lines*/)); // LUCENENET specific - removed unused parameter
                        sortInfo.TempMergeFiles++;
                        sortInfo.Lines += lines;

                        // Handle intermediate merges.
                        if (merges.Count == maxTempFiles)
                        {
                            var intermediate = new FileInfo(Path.GetTempFileName());
                            try
                            {
                                MergePartitions(merges, intermediate);
                            }
                            finally
                            {
                                foreach (var file in merges)
                                {
                                    file.Delete();
                                }
                                merges.Clear();
                                merges.Add(intermediate);
                            }
                            sortInfo.TempMergeFiles++;
                        }
                    }
                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(inputStream);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(inputStream);
                    }
                }

                // One partition, try to rename or copy if unsuccessful.
                if (merges.Count == 1)
                {
                    FileInfo single = merges[0];
                    Copy(single, output);
                    try
                    {
                        File.Delete(single.FullName);
                    }
#pragma warning disable CA1031 // Do not catch general exception types
                    catch
                    {
                        // ignored
                    }
#pragma warning restore CA1031 // Do not catch general exception types
                }
                else
                {
                    // otherwise merge the partitions with a priority queue.
                    MergePartitions(merges, output);
                }
                success2 = true;
            }
            finally
            {
                foreach (FileInfo file in merges)
                {
                    file.Delete();
                }
                if (!success2)
                {
                    output.Delete();
                }
            }

            sortInfo.TotalTime = ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - sortInfo.TotalTime); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            return(sortInfo);
        }
Beispiel #15
0
        /// <summary>
        /// a constructor.
        /// </summary>
        /// <param name="fieldTermStack"><see cref="FieldTermStack"/> object</param>
        /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
        /// <param name="phraseLimit">maximum size of phraseList</param>
        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit)
        {
            string field = fieldTermStack.FieldName;

            IList <TermInfo> phraseCandidate = new JCG.List <TermInfo>();
            QueryPhraseMap   currMap; // LUCENENET: IDE0059: Remove unnecessary value assignment
            QueryPhraseMap   nextMap; // LUCENENET: IDE0059: Remove unnecessary value assignment

            while (!fieldTermStack.IsEmpty && (phraseList.Count < phraseLimit))
            {
                phraseCandidate.Clear();

                TermInfo ti;    // LUCENENET: IDE0059: Remove unnecessary value assignment
                TermInfo first; // LUCENENET: IDE0059: Remove unnecessary value assignment

                first   = ti = fieldTermStack.Pop();
                currMap = fieldQuery.GetFieldTermMap(field, ti.Text);
                while (currMap is null && ti.Next != first)
                {
                    ti      = ti.Next;
                    currMap = fieldQuery.GetFieldTermMap(field, ti.Text);
                }

                // if not found, discard top TermInfo from stack, then try next element
                if (currMap is null)
                {
                    continue;
                }

                // if found, search the longest phrase
                phraseCandidate.Add(ti);
                while (true)
                {
                    first   = ti = fieldTermStack.Pop();
                    nextMap = null;
                    if (ti != null)
                    {
                        nextMap = currMap.GetTermMap(ti.Text);
                        while (nextMap is null && ti.Next != first)
                        {
                            ti      = ti.Next;
                            nextMap = currMap.GetTermMap(ti.Text);
                        }
                    }
                    if (ti is null || nextMap is null)
                    {
                        if (ti != null)
                        {
                            fieldTermStack.Push(ti);
                        }
                        if (currMap.IsValidTermOrPhrase(phraseCandidate))
                        {
                            AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
                        }
                        else
                        {
                            while (phraseCandidate.Count > 1)
                            {
                                //fieldTermStack.Push(phraseCandidate.Last.Value);
                                //phraseCandidate.RemoveLast();

                                TermInfo last = phraseCandidate[phraseCandidate.Count - 1];
                                phraseCandidate.Remove(last);
                                fieldTermStack.Push(last);

                                currMap = fieldQuery.SearchPhrase(field, phraseCandidate);
                                if (currMap != null)
                                {
                                    AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
                                    break;
                                }
                            }
                        }
                        break;
                    }
Beispiel #16
0
        protected virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize)
        {
            // LUCENENET specific - added guard clauses to check for nulls
            if (fieldPhraseList is null)
            {
                throw new ArgumentNullException(nameof(fieldPhraseList));
            }
            if (fieldFragList is null)
            {
                throw new ArgumentNullException(nameof(fieldFragList));
            }
            if (fragCharSize < minFragCharSize)
            {
                throw new ArgumentOutOfRangeException(nameof(fragCharSize), "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher."); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            }
            JCG.List <WeightedPhraseInfo> wpil = new JCG.List <WeightedPhraseInfo>();
            using (IteratorQueue <WeightedPhraseInfo> queue = new IteratorQueue <WeightedPhraseInfo>(fieldPhraseList.PhraseList.GetEnumerator()))
            {
                WeightedPhraseInfo phraseInfo = null;
                int startOffset = 0;
                while ((phraseInfo = queue.Top()) != null)
                {
                    // if the phrase violates the border of previous fragment, discard it and try next phrase
                    if (phraseInfo.StartOffset < startOffset)
                    {
                        queue.RemoveTop();
                        continue;
                    }

                    wpil.Clear();
                    int currentPhraseStartOffset = phraseInfo.StartOffset;
                    int currentPhraseEndOffset   = phraseInfo.EndOffset;
                    int spanStart = Math.Max(currentPhraseStartOffset - margin, startOffset);
                    int spanEnd   = Math.Max(currentPhraseEndOffset, spanStart + fragCharSize);
                    if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                    {
                        wpil.Add(phraseInfo);
                    }
                    while ((phraseInfo = queue.Top()) != null)
                    { // pull until we crossed the current spanEnd
                        if (phraseInfo.EndOffset <= spanEnd)
                        {
                            currentPhraseEndOffset = phraseInfo.EndOffset;
                            if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                            {
                                wpil.Add(phraseInfo);
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (wpil.Count == 0)
                    {
                        continue;
                    }

                    int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
                    // now recalculate the start and end position to "center" the result
                    int newMargin = Math.Max(0, (fragCharSize - matchLen) / 2); // matchLen can be > fragCharSize prevent IAOOB here
                    spanStart = currentPhraseStartOffset - newMargin;
                    if (spanStart < startOffset)
                    {
                        spanStart = startOffset;
                    }
                    // whatever is bigger here we grow this out
                    spanEnd     = spanStart + Math.Max(matchLen, fragCharSize);
                    startOffset = spanEnd;
                    fieldFragList.Add(spanStart, spanEnd, wpil);
                }
            }
            return(fieldFragList);
        }
Beispiel #17
0
        /// <summary>
        /// Merging constructor.
        /// </summary>
        /// <param name="toMerge"><see cref="FieldPhraseList"/>s to merge to build this one</param>
        public FieldPhraseList(FieldPhraseList[] toMerge)
        {
            // Merge all overlapping WeightedPhraseInfos
            // Step 1.  Sort by startOffset, endOffset, and boost, in that order.

            IEnumerator <WeightedPhraseInfo>[] allInfos = new IEnumerator <WeightedPhraseInfo> [toMerge.Length];
            try
            {
                int index = 0;
                foreach (FieldPhraseList fplToMerge in toMerge)
                {
                    allInfos[index++] = fplToMerge.phraseList.GetEnumerator();
                }
                using MergedEnumerator <WeightedPhraseInfo> itr = new MergedEnumerator <WeightedPhraseInfo>(false, allInfos);
                // Step 2.  Walk the sorted list merging infos that overlap
                phraseList = new JCG.List <WeightedPhraseInfo>();
                if (!itr.MoveNext())
                {
                    return;
                }
                IList <WeightedPhraseInfo> work  = new JCG.List <WeightedPhraseInfo>();
                WeightedPhraseInfo         first = itr.Current;
                work.Add(first);
                int workEndOffset = first.EndOffset;
                while (itr.MoveNext())
                {
                    WeightedPhraseInfo current = itr.Current;
                    if (current.StartOffset <= workEndOffset)
                    {
                        workEndOffset = Math.Max(workEndOffset, current.EndOffset);
                        work.Add(current);
                    }
                    else
                    {
                        if (work.Count == 1)
                        {
                            phraseList.Add(work[0]);
                            work[0] = current;
                        }
                        else
                        {
                            phraseList.Add(new WeightedPhraseInfo(work));
                            work.Clear();
                            work.Add(current);
                        }
                        workEndOffset = current.EndOffset;
                    }
                }
                if (work.Count == 1)
                {
                    phraseList.Add(work[0]);
                }
                else
                {
                    phraseList.Add(new WeightedPhraseInfo(work));
                    work.Clear();
                }
            }
            finally
            {
                IOUtils.Dispose(allInfos);
            }
        }
Beispiel #18
0
        public virtual void TestReadMappingRules()
        {
            SlowSynonymMap synMap;

            // (a)->[b]
            IList <string> rules = new JCG.List <string>();

            rules.Add("a=>b");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "b");

            // (a)->[c]
            // (b)->[c]
            rules.Clear();
            rules.Add("a,b=>c");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "c");
            AssertTokIncludes(synMap, "b", "c");

            // (a)->[b][c]
            rules.Clear();
            rules.Add("a=>b,c");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "b");
            AssertTokIncludes(synMap, "a", "c");

            // (a)->(b)->[a2]
            //      [a1]
            rules.Clear();
            rules.Add("a=>a1");
            rules.Add("a b=>a2");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a1");
            assertEquals(1, GetSubSynonymMap(synMap, "a").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2");

            // (a)->(b)->[a2]
            //      (c)->[a3]
            //      [a1]
            rules.Clear();
            rules.Add("a=>a1");
            rules.Add("a b=>a2");
            rules.Add("a c=>a3");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a1");
            assertEquals(2, GetSubSynonymMap(synMap, "a").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2");
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "c", "a3");

            // (a)->(b)->[a2]
            //      [a1]
            // (b)->(c)->[b2]
            //      [b1]
            rules.Clear();
            rules.Add("a=>a1");
            rules.Add("a b=>a2");
            rules.Add("b=>b1");
            rules.Add("b c=>b2");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a1");
            assertEquals(1, GetSubSynonymMap(synMap, "a").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2");
            AssertTokIncludes(synMap, "b", "b1");
            assertEquals(1, GetSubSynonymMap(synMap, "b").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "b"), "c", "b2");
        }
Beispiel #19
0
        public virtual void TestBasic()
        {
            string groupField = "author";

            FieldType customType = new FieldType();

            customType.IsStored = (true);

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            bool canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);

            JCG.List <Document> documents = new JCG.List <Document>();
            // 0
            Document doc = new Document();

            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "random text", Field.Store.YES));
            doc.Add(new Field("id", "1", customType));
            documents.Add(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
            doc.Add(new Field("id", "2", customType));
            documents.Add(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.YES));
            doc.Add(new Field("id", "3", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));
            documents.Add(doc);
            w.AddDocuments(documents);
            documents.Clear();

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV);
            doc.Add(new TextField("content", "some random text", Field.Store.YES));
            doc.Add(new Field("id", "4", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
            doc.Add(new Field("id", "5", customType));
            documents.Add(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "random", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));
            documents.Add(doc);
            w.AddDocuments(documents);
            documents.Clear();

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));

            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            w.Dispose();

            Sort           groupSort      = Sort.RELEVANCE;
            GroupingSearch groupingSearch = CreateRandomGroupingSearch(groupField, groupSort, 5, canUseIDV);

            ITopGroups <object> groups = groupingSearch.Search(indexSearcher, (Filter)null, new TermQuery(new Index.Term("content", "random")), 0, 10);

            assertEquals(7, groups.TotalHitCount);
            assertEquals(7, groups.TotalGroupedHitCount);
            assertEquals(4, groups.Groups.Length);

            // relevance order: 5, 0, 3, 4, 1, 2, 6

            // the later a document is added the higher this docId
            // value
            IGroupDocs <object> group = groups.Groups[0];

            CompareGroupValue("author3", group);
            assertEquals(2, group.ScoreDocs.Length);
            assertEquals(5, group.ScoreDocs[0].Doc);
            assertEquals(4, group.ScoreDocs[1].Doc);
            assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score);

            group = groups.Groups[1];
            CompareGroupValue("author1", group);
            assertEquals(3, group.ScoreDocs.Length);
            assertEquals(0, group.ScoreDocs[0].Doc);
            assertEquals(1, group.ScoreDocs[1].Doc);
            assertEquals(2, group.ScoreDocs[2].Doc);
            assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score);
            assertTrue(group.ScoreDocs[1].Score > group.ScoreDocs[2].Score);

            group = groups.Groups[2];
            CompareGroupValue("author2", group);
            assertEquals(1, group.ScoreDocs.Length);
            assertEquals(3, group.ScoreDocs[0].Doc);

            group = groups.Groups[3];
            CompareGroupValue(null, group);
            assertEquals(1, group.ScoreDocs.Length);
            assertEquals(6, group.ScoreDocs[0].Doc);

            Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Index.Term("groupend", "x"))));

            groupingSearch = new GroupingSearch(lastDocInBlock);
            groups         = groupingSearch.Search(indexSearcher, null, new TermQuery(new Index.Term("content", "random")), 0, 10);

            assertEquals(7, groups.TotalHitCount);
            assertEquals(7, groups.TotalGroupedHitCount);
            assertEquals(4, groups.TotalGroupCount.GetValueOrDefault());
            assertEquals(4, groups.Groups.Length);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #20
0
        /// <summary>
        /// Dumps an <see cref="FST{T}"/> to a GraphViz's <c>dot</c> language description
        /// for visualization. Example of use:
        ///
        /// <code>
        /// using (TextWriter sw = new StreamWriter(&quot;out.dot&quot;))
        /// {
        ///     Util.ToDot(fst, sw, true, true);
        /// }
        /// </code>
        ///
        /// and then, from command line:
        ///
        /// <code>
        /// dot -Tpng -o out.png out.dot
        /// </code>
        ///
        /// <para/>
        /// Note: larger FSTs (a few thousand nodes) won't even
        /// render, don't bother.  If the FST is &gt; 2.1 GB in size
        /// then this method will throw strange exceptions.
        /// <para/>
        /// See also <a href="http://www.graphviz.org/">http://www.graphviz.org/</a>.
        /// </summary>
        /// <param name="sameRank">
        ///          If <c>true</c>, the resulting <c>dot</c> file will try
        ///          to order states in layers of breadth-first traversal. This may
        ///          mess up arcs, but makes the output FST's structure a bit clearer.
        /// </param>
        /// <param name="labelStates">
        ///          If <c>true</c> states will have labels equal to their offsets in their
        ///          binary format. Expands the graph considerably.
        /// </param>
        public static void ToDot <T>(FST <T> fst, TextWriter @out, bool sameRank, bool labelStates)
        {
            const string expandedNodeColor = "blue";

            // this is the start arc in the automaton (from the epsilon state to the first state
            // with outgoing transitions.
            FST.Arc <T> startArc = fst.GetFirstArc(new FST.Arc <T>());

            // A queue of transitions to consider for the next level.
            IList <FST.Arc <T> > thisLevelQueue = new JCG.List <FST.Arc <T> >();

            // A queue of transitions to consider when processing the next level.
            IList <FST.Arc <T> > nextLevelQueue = new JCG.List <FST.Arc <T> >();

            nextLevelQueue.Add(startArc);
            //System.out.println("toDot: startArc: " + startArc);

            // A list of states on the same level (for ranking).
            IList <int?> sameLevelStates = new JCG.List <int?>();

            // A bitset of already seen states (target offset).
            BitArray seen = new BitArray(32);

            seen.SafeSet((int)startArc.Target, true);

            // Shape for states.
            const string stateShape      = "circle";
            const string finalStateShape = "doublecircle";

            // Emit DOT prologue.
            @out.Write("digraph FST {\n");
            @out.Write("  rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");

            if (!labelStates)
            {
                @out.Write("  node [shape=circle, width=.2, height=.2, style=filled]\n");
            }

            EmitDotState(@out, "initial", "point", "white", "");

            T   NO_OUTPUT = fst.Outputs.NoOutput;
            var r         = fst.GetBytesReader();

            // final FST.Arc<T> scratchArc = new FST.Arc<>();

            {
                string stateColor;
                if (fst.IsExpandedTarget(startArc, r))
                {
                    stateColor = expandedNodeColor;
                }
                else
                {
                    stateColor = null;
                }

                bool isFinal;
                T    finalOutput;
                if (startArc.IsFinal)
                {
                    isFinal     = true;
                    finalOutput = startArc.NextFinalOutput.Equals(NO_OUTPUT) ? default(T) : startArc.NextFinalOutput;
                }
                else
                {
                    isFinal     = false;
                    finalOutput = default(T);
                }

                EmitDotState(@out, Convert.ToString(startArc.Target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.Outputs.OutputToString(finalOutput));
            }

            @out.Write("  initial -> " + startArc.Target + "\n");

            int level = 0;

            while (nextLevelQueue.Count > 0)
            {
                // we could double buffer here, but it doesn't matter probably.
                //System.out.println("next level=" + level);
                thisLevelQueue.AddRange(nextLevelQueue);
                nextLevelQueue.Clear();

                level++;
                @out.Write("\n  // Transitions and states at level: " + level + "\n");
                while (thisLevelQueue.Count > 0)
                {
                    FST.Arc <T> arc = thisLevelQueue[thisLevelQueue.Count - 1];
                    thisLevelQueue.RemoveAt(thisLevelQueue.Count - 1);
                    //System.out.println("  pop: " + arc);
                    if (FST <T> .TargetHasArcs(arc))
                    {
                        // scan all target arcs
                        //System.out.println("  readFirstTarget...");

                        long node = arc.Target;

                        fst.ReadFirstRealTargetArc(arc.Target, arc, r);

                        //System.out.println("    firstTarget: " + arc);

                        while (true)
                        {
                            //System.out.println("  cycle arc=" + arc);
                            // Emit the unseen state and add it to the queue for the next level.
                            if (arc.Target >= 0 && !seen.SafeGet((int)arc.Target))
                            {
                                /*
                                 * boolean isFinal = false;
                                 * T finalOutput = null;
                                 * fst.readFirstTargetArc(arc, scratchArc);
                                 * if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
                                 * // target is final
                                 * isFinal = true;
                                 * finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
                                 * System.out.println("dot hit final label=" + (char) scratchArc.label);
                                 * }
                                 */
                                string stateColor;
                                if (fst.IsExpandedTarget(arc, r))
                                {
                                    stateColor = expandedNodeColor;
                                }
                                else
                                {
                                    stateColor = null;
                                }

                                string finalOutput;
                                if (arc.NextFinalOutput != null && !arc.NextFinalOutput.Equals(NO_OUTPUT))
                                {
                                    finalOutput = fst.Outputs.OutputToString(arc.NextFinalOutput);
                                }
                                else
                                {
                                    finalOutput = "";
                                }

                                EmitDotState(@out, Convert.ToString(arc.Target), stateShape, stateColor, finalOutput);
                                // To see the node address, use this instead:
                                //emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
                                seen.SafeSet((int)arc.Target, true);
                                nextLevelQueue.Add((new FST.Arc <T>()).CopyFrom(arc));
                                sameLevelStates.Add((int)arc.Target);
                            }

                            string outs;
                            if (!arc.Output.Equals(NO_OUTPUT))
                            {
                                outs = "/" + fst.Outputs.OutputToString(arc.Output);
                            }
                            else
                            {
                                outs = "";
                            }

                            if (!FST <T> .TargetHasArcs(arc) && arc.IsFinal && !arc.NextFinalOutput.Equals(NO_OUTPUT))
                            {
                                // Tricky special case: sometimes, due to
                                // pruning, the builder can [sillily] produce
                                // an FST with an arc into the final end state
                                // (-1) but also with a next final output; in
                                // this case we pull that output up onto this
                                // arc
                                outs = outs + "/[" + fst.Outputs.OutputToString(arc.NextFinalOutput) + "]";
                            }

                            string arcColor;
                            if (arc.Flag(FST.BIT_TARGET_NEXT))
                            {
                                arcColor = "red";
                            }
                            else
                            {
                                arcColor = "black";
                            }

                            Debug.Assert(arc.Label != FST.END_LABEL);
                            @out.Write("  " + node + " -> " + arc.Target + " [label=\"" + PrintableLabel(arc.Label) + outs + "\"" + (arc.IsFinal ? " style=\"bold\"" : "") + " color=\"" + arcColor + "\"]\n");

                            // Break the loop if we're on the last arc of this state.
                            if (arc.IsLast)
                            {
                                //System.out.println("    break");
                                break;
                            }
                            fst.ReadNextRealArc(arc, r);
                        }
                    }
                }

                // Emit state ranking information.
                if (sameRank && sameLevelStates.Count > 1)
                {
                    @out.Write("  {rank=same; ");
                    foreach (int state in sameLevelStates)
                    {
                        @out.Write(state + "; ");
                    }
                    @out.Write(" }\n");
                }
                sameLevelStates.Clear();
            }

            // Emit terminating state (always there anyway).
            @out.Write("  -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
            @out.Write("  {rank=sink; -1 }\n");

            @out.Write("}\n");
            @out.Flush();
        }
Beispiel #21
0
            public override void Run()
            {
                // TODO: would be better if this were cross thread, so that we make sure one thread deleting anothers added docs works:
                IList <string>  toDeleteIDs     = new JCG.List <string>();
                IList <SubDocs> toDeleteSubDocs = new JCG.List <SubDocs>();

                while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime && !outerInstance.m_failed) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                {
                    try
                    {
                        // Occasional longish pause if running
                        // nightly
                        if (LuceneTestCase.TestNightly && Random.Next(6) == 3)
                        {
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": now long sleep");
                            }
                            //Thread.Sleep(TestUtil.NextInt32(Random, 50, 500));
                            // LUCENENET specific - Reduced amount of pause to keep the total
                            // Nightly test time under 1 hour
                            Thread.Sleep(TestUtil.NextInt32(Random, 50, 250));
                        }

                        // Rate limit ingest rate:
                        if (Random.Next(7) == 5)
                        {
                            Thread.Sleep(TestUtil.NextInt32(Random, 1, 10));
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": done sleep");
                            }
                        }

                        Document doc = docs.NextDoc();
                        if (doc is null)
                        {
                            break;
                        }

                        // Maybe add randomly named field
                        string addedField;
                        if (Random.NextBoolean())
                        {
                            addedField = "extra" + Random.Next(40);
                            doc.Add(NewTextField(addedField, "a random field", Field.Store.YES));
                        }
                        else
                        {
                            addedField = null;
                        }

                        if (Random.NextBoolean())
                        {
                            if (Random.NextBoolean())
                            {
                                // Add/update doc block:
                                string  packID;
                                SubDocs delSubDocs;
                                if (toDeleteSubDocs.Count > 0 && Random.NextBoolean())
                                {
                                    delSubDocs = toDeleteSubDocs[Random.Next(toDeleteSubDocs.Count)];
                                    if (Debugging.AssertsEnabled)
                                    {
                                        Debugging.Assert(!delSubDocs.Deleted);
                                    }
                                    toDeleteSubDocs.Remove(delSubDocs);
                                    // Update doc block, replacing prior packID
                                    packID = delSubDocs.PackID;
                                }
                                else
                                {
                                    delSubDocs = null;
                                    // Add doc block, using new packID
                                    packID = outerInstance.m_packCount.GetAndIncrement().ToString(CultureInfo.InvariantCulture);
                                }

                                Field            packIDField = NewStringField("packID", packID, Field.Store.YES);
                                IList <string>   docIDs      = new JCG.List <string>();
                                SubDocs          subDocs     = new SubDocs(packID, docIDs);
                                IList <Document> docsList    = new JCG.List <Document>();

                                allSubDocs.Enqueue(subDocs);
                                doc.Add(packIDField);
                                docsList.Add(TestUtil.CloneDocument(doc));
                                docIDs.Add(doc.Get("docid"));

                                int maxDocCount = TestUtil.NextInt32(Random, 1, 10);
                                while (docsList.Count < maxDocCount)
                                {
                                    doc = docs.NextDoc();
                                    if (doc is null)
                                    {
                                        break;
                                    }
                                    docsList.Add(TestUtil.CloneDocument(doc));
                                    docIDs.Add(doc.Get("docid"));
                                }
                                outerInstance.m_addCount.AddAndGet(docsList.Count);

                                Term packIDTerm = new Term("packID", packID);

                                if (delSubDocs != null)
                                {
                                    delSubDocs.Deleted = true;
                                    delIDs.UnionWith(delSubDocs.SubIDs);
                                    outerInstance.m_delCount.AddAndGet(delSubDocs.SubIDs.Count);
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": update pack packID=" + delSubDocs.PackID +
                                                          " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs));
                                    }
                                    outerInstance.UpdateDocuments(packIDTerm, docsList);
                                }
                                else
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": add pack packID=" + packID +
                                                          " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs));
                                    }
                                    outerInstance.AddDocuments(packIDTerm, docsList);
                                }
                                doc.RemoveField("packID");

                                if (Random.Next(5) == 2)
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + packID);
                                    }
                                    toDeleteSubDocs.Add(subDocs);
                                }
                            }
                            else
                            {
                                // Add single doc
                                string docid = doc.Get("docid");
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": add doc docid:" + docid);
                                }
                                outerInstance.AddDocument(new Term("docid", docid), doc);
                                outerInstance.m_addCount.GetAndIncrement();

                                if (Random.Next(5) == 3)
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid"));
                                    }
                                    toDeleteIDs.Add(docid);
                                }
                            }
                        }
                        else
                        {
                            // Update single doc, but we never re-use
                            // and ID so the delete will never
                            // actually happen:
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": update doc id:" + doc.Get("docid"));
                            }
                            string docid = doc.Get("docid");
                            outerInstance.UpdateDocument(new Term("docid", docid), doc);
                            outerInstance.m_addCount.GetAndIncrement();

                            if (Random.Next(5) == 3)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid"));
                                }
                                toDeleteIDs.Add(docid);
                            }
                        }

                        if (Random.Next(30) == 17)
                        {
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": apply " + toDeleteIDs.Count + " deletes");
                            }
                            foreach (string id in toDeleteIDs)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": del term=id:" + id);
                                }
                                outerInstance.DeleteDocuments(new Term("docid", id));
                            }
                            int count = outerInstance.m_delCount.AddAndGet(toDeleteIDs.Count);
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": tot " + count + " deletes");
                            }
                            delIDs.UnionWith(toDeleteIDs);
                            toDeleteIDs.Clear();

                            foreach (SubDocs subDocs in toDeleteSubDocs)
                            {
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(!subDocs.Deleted);
                                }
                                delPackIDs.Add(subDocs.PackID);
                                outerInstance.DeleteDocuments(new Term("packID", subDocs.PackID));
                                subDocs.Deleted = true;
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": del subs: " + subDocs.SubIDs + " packID=" + subDocs.PackID);
                                }
                                delIDs.UnionWith(subDocs.SubIDs);
                                outerInstance.m_delCount.AddAndGet(subDocs.SubIDs.Count);
                            }
                            toDeleteSubDocs.Clear();
                        }
                        if (addedField != null)
                        {
                            doc.RemoveField(addedField);
                        }
                    }
                    catch (Exception t) when(t.IsThrowable())
                    {
                        Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
                        Console.WriteLine(t.ToString());
                        Console.Write(t.StackTrace);
                        outerInstance.m_failed.Value = (true);
                        throw RuntimeException.Create(t);
                    }
                }
                if (Verbose)
                {
                    Console.WriteLine(Thread.CurrentThread.Name + ": indexing done");
                }

                outerInstance.DoAfterIndexingThreadDone();
            }
Beispiel #22
0
        /// <summary>
        /// Minimizes the given automaton using Hopcroft's algorithm.
        /// </summary>
        public static void MinimizeHopcroft(Automaton a)
        {
            a.Determinize();
            if (a.initial.numTransitions == 1)
            {
                Transition t = a.initial.TransitionsArray[0];
                if (t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint)
                {
                    return;
                }
            }
            a.Totalize();

            // initialize data structures
            int[]   sigma = a.GetStartPoints();
            State[] states = a.GetNumberedStates();
            int     sigmaLen = sigma.Length, statesLen = states.Length;

            JCG.List <State>[,] reverse = new JCG.List <State> [statesLen, sigmaLen];
            ISet <State>[]     partition  = new JCG.HashSet <State> [statesLen];
            JCG.List <State>[] splitblock = new JCG.List <State> [statesLen];
            int[] block = new int[statesLen];
            StateList[,] active      = new StateList[statesLen, sigmaLen];
            StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen];
            Queue <Int32Pair> pending = new Queue <Int32Pair>(); // LUCENENET specific - Queue is much more performant than LinkedList
            OpenBitSet        pending2 = new OpenBitSet(sigmaLen * statesLen);
            OpenBitSet        split = new OpenBitSet(statesLen),
                              refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen);

            for (int q = 0; q < statesLen; q++)
            {
                splitblock[q] = new JCG.List <State>();
                partition[q]  = new JCG.HashSet <State>();
                for (int x = 0; x < sigmaLen; x++)
                {
                    active[q, x] = new StateList();
                }
            }
            // find initial partition and reverse edges
            for (int q = 0; q < statesLen; q++)
            {
                State qq = states[q];
                int   j  = qq.accept ? 0 : 1;
                partition[j].Add(qq);
                block[q] = j;
                for (int x = 0; x < sigmaLen; x++)
                {
                    //JCG.List<State>[] r = reverse[qq.Step(sigma[x]).number];
                    var r = qq.Step(sigma[x]).number;
                    if (reverse[r, x] == null)
                    {
                        reverse[r, x] = new JCG.List <State>();
                    }
                    reverse[r, x].Add(qq);
                }
            }
            // initialize active sets
            for (int j = 0; j <= 1; j++)
            {
                for (int x = 0; x < sigmaLen; x++)
                {
                    foreach (State qq in partition[j])
                    {
                        if (reverse[qq.number, x] != null)
                        {
                            active2[qq.number, x] = active[j, x].Add(qq);
                        }
                    }
                }
            }
            // initialize pending
            for (int x = 0; x < sigmaLen; x++)
            {
                int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1;
                pending.Enqueue(new Int32Pair(j, x));
                pending2.Set(x * statesLen + j);
            }
            // process pending until fixed point
            int k = 2;

            while (pending.Count > 0)
            {
                Int32Pair ip = pending.Dequeue();
                int       p  = ip.n1;
                int       x  = ip.n2;
                pending2.Clear(x * statesLen + p);
                // find states that need to be split off their blocks
                for (StateListNode m = active[p, x].First; m != null; m = m.Next)
                {
                    JCG.List <State> r = reverse[m.Q.number, x];
                    if (r != null)
                    {
                        foreach (State s in r)
                        {
                            int i = s.number;
                            if (!split.Get(i))
                            {
                                split.Set(i);
                                int j = block[i];
                                splitblock[j].Add(s);
                                if (!refine2.Get(j))
                                {
                                    refine2.Set(j);
                                    refine.Set(j);
                                }
                            }
                        }
                    }
                }
                // refine blocks
                for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1))
                {
                    JCG.List <State> sb = splitblock[j];
                    if (sb.Count < partition[j].Count)
                    {
                        ISet <State> b1 = partition[j];
                        ISet <State> b2 = partition[k];
                        foreach (State s in sb)
                        {
                            b1.Remove(s);
                            b2.Add(s);
                            block[s.number] = k;
                            for (int c = 0; c < sigmaLen; c++)
                            {
                                StateListNode sn = active2[s.number, c];
                                if (sn != null && sn.Sl == active[j, c])
                                {
                                    sn.Remove();
                                    active2[s.number, c] = active[k, c].Add(s);
                                }
                            }
                        }
                        // update pending
                        for (int c = 0; c < sigmaLen; c++)
                        {
                            int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen;
                            if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak)
                            {
                                pending2.Set(ofs + j);
                                pending.Enqueue(new Int32Pair(j, c));
                            }
                            else
                            {
                                pending2.Set(ofs + k);
                                pending.Enqueue(new Int32Pair(k, c));
                            }
                        }
                        k++;
                    }
                    refine2.Clear(j);
                    foreach (State s in sb)
                    {
                        split.Clear(s.number);
                    }
                    sb.Clear();
                }
                refine.Clear(0, refine.Length);
            }
            // make a new state for each equivalence class, set initial state
            State[] newstates = new State[k];
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = new State();
                newstates[n] = s;
                foreach (State q in partition[n])
                {
                    if (q == a.initial)
                    {
                        a.initial = s;
                    }
                    s.accept = q.accept;
                    s.number = q.number; // select representative
                    q.number = n;
                }
            }
            // build transitions and set acceptance
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = newstates[n];
                s.accept = states[s.number].accept;
                foreach (Transition t in states[s.number].GetTransitions())
                {
                    s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number]));
                }
            }
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Beispiel #23
0
        public void TestNestedSorting()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                                                                            new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            IList <Document> docs     = new JCG.List <Document>();
            Document         document = new Document();

            document.Add(new StringField("field2", "a", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "b", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "c", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "a", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);
            w.Commit();

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "c", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "d", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "e", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "b", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "e", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "f", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "g", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "c", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "g", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "h", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "i", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "d", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);
            w.Commit();

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "i", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "j", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "k", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "f", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "k", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "l", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "m", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "g", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);

            // This doc will not be included, because it doesn't have nested docs
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "h", Field.Store.NO));
            w.AddDocument(document);

            docs.Clear();
            document = new Document();
            document.Add(new StringField("field2", "m", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "n", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("field2", "o", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            docs.Add(document);
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "i", Field.Store.NO));
            docs.Add(document);
            w.AddDocuments(docs);
            w.Commit();

            // Some garbage docs, just to check if the NestedFieldComparer can deal with this.
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            w.AddDocument(document);
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            w.AddDocument(document);
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            w.AddDocument(document);

            IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.IndexWriter, false));

            w.Dispose();
            Filter parentFilter          = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent")));
            Filter childFilter           = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
            ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);

            // Sort by field ascending, order first
            ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, Wrap(parentFilter), Wrap(childFilter));
            Sort         sort    = new Sort(sortField);
            TopFieldDocs topDocs = searcher.Search(query, 5, sort);

            assertEquals(7, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(3, topDocs.ScoreDocs[0].Doc);
            assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[1].Doc);
            assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[4].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field ascending, order last
            sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, true, Wrap(parentFilter), Wrap(childFilter));
            sort      = new Sort(sortField);
            topDocs   = searcher.Search(query, 5, sort);
            assertEquals(7, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(3, topDocs.ScoreDocs[0].Doc);
            assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[1].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[4].Doc);
            assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field descending, order last
            sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter));
            sort      = new Sort(sortField);
            topDocs   = searcher.Search(query, 5, sort);
            assertEquals(topDocs.TotalHits, 7);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(28, topDocs.ScoreDocs[0].Doc);
            assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(23, topDocs.ScoreDocs[1].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[2].Doc);
            assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[4].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field descending, order last, sort filter (filter_1:T)
            childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
            query       = new ToParentBlockJoinQuery(
                new FilteredQuery(new MatchAllDocsQuery(), childFilter),
                new FixedBitSetCachingWrapperFilter(parentFilter),
                ScoreMode.None);
            sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter));
            sort      = new Sort(sortField);
            topDocs   = searcher.Search(query, 5, sort);
            assertEquals(6, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(23, topDocs.ScoreDocs[0].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(28, topDocs.ScoreDocs[1].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[4].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            searcher.IndexReader.Dispose();
            dir.Dispose();
        }