예제 #1
0
        protected override Query[] PrepareQueries()
        {
            // analyzer (default is standard analyzer)
            Analyzer anlzr = NewAnalyzerTask.CreateAnalyzer(m_config.Get("analyzer",
                                                                         typeof(Lucene.Net.Analysis.Standard.StandardAnalyzer).AssemblyQualifiedName));

            JCG.List <object> queryList = new JCG.List <object>(20);
            queryList.AddRange(STANDARD_QUERIES);
            queryList.AddRange(GetPrebuiltQueries(DocMaker.BODY_FIELD));
            return(CreateQueries(queryList, anlzr));
        }
예제 #2
0
        protected override Query[] PrepareQueries()
        {
            // analyzer (default is standard analyzer)
            Analyzer anlzr = NewAnalyzerTask.CreateAnalyzer(m_config.Get("analyzer", typeof(StandardAnalyzer).AssemblyQualifiedName));

            JCG.List <object> queryList = new JCG.List <object>(20);
            queryList.AddRange(STANDARD_QUERIES);
            if (!m_config.Get("enwikiQueryMaker.disableSpanQueries", false))
            {
                queryList.AddRange(GetPrebuiltQueries(DocMaker.BODY_FIELD));
            }
            return(CreateQueries(queryList, anlzr));
        }
예제 #3
0
        public virtual void TestNextAllTerms()
        {
            IList <string> termsList = new JCG.List <string>(commonTerms.Length + mediumTerms.Length + rareTerms.Length);

            termsList.AddRange(commonTerms);
            termsList.AddRange(mediumTerms);
            termsList.AddRange(rareTerms);
            string[] terms = termsList.ToArray();

            for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
            {
                Scorer expected = Scorer(terms, minNrShouldMatch, true);
                Scorer actual   = Scorer(terms, minNrShouldMatch, false);
                AssertNext(expected, actual);
            }
        }
        private static IList <FacetField> RandomCategories(Random random)
        {
            // add random categories from the two dimensions, ensuring that the same
            // category is not added twice.
            int numFacetsA = random.Next(3) + 1; // 1-3
            int numFacetsB = random.Next(2) + 1; // 1-2

            JCG.List <FacetField> categories_a = new JCG.List <FacetField>();
            categories_a.AddRange(CATEGORIES_A);
            JCG.List <FacetField> categories_b = new JCG.List <FacetField>();
            categories_b.AddRange(CATEGORIES_B);
            categories_a.Shuffle(Random);
            categories_b.Shuffle(Random);

            List <FacetField> categories = new List <FacetField>();

            categories.AddRange(categories_a.GetView(0, numFacetsA)); // LUCENENET: Checked length for correctness
            categories.AddRange(categories_b.GetView(0, numFacetsB)); // LUCENENET: Checked length for correctness

            // add the NO_PARENT categories
            categories.Add(CATEGORIES_C[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_C)]);
            categories.Add(CATEGORIES_D[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_D)]);

            return(categories);
        }
예제 #5
0
 // TODO: this should use inputstreams from the loader, not File!
 public virtual void Inform(IResourceLoader loader)
 {
     if (mapping != null)
     {
         IList <string> wlist;
         if (File.Exists(mapping))
         {
             wlist = new JCG.List <string>(GetLines(loader, mapping));
         }
         else
         {
             var files = SplitFileNames(mapping);
             wlist = new JCG.List <string>();
             foreach (string file in files)
             {
                 var lines = GetLines(loader, file.Trim());
                 wlist.AddRange(lines);
             }
         }
         NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
         ParseRules(wlist, builder);
         m_normMap = builder.Build();
         if (m_normMap.map == null)
         {
             // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
             // so just set the whole map to null
             m_normMap = null;
         }
     }
 }
예제 #6
0
        /// <summary>
        /// LUCENENET specific
        /// Is non-static because NewIndexWriterConfig is no longer static.
        /// </summary>
        public void IndexSerial(Random random, IDictionary <string, Document> docs, Directory dir)
        {
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));

            // index all docs in a single thread
            IEnumerator <Document> iter = docs.Values.GetEnumerator();

            while (iter.MoveNext())
            {
                Document d = iter.Current;
                IList <IIndexableField> fields = new JCG.List <IIndexableField>();
                fields.AddRange(d.Fields);
                // put fields in same order each time
                fields.Sort(fieldNameComparer);

                Document d1 = new Document();
                for (int i = 0; i < fields.Count; i++)
                {
                    d1.Add(fields[i]);
                }
                w.AddDocument(d1);
                // System.out.println("indexing "+d1);
            }

            w.Dispose();
        }
예제 #7
0
            /// <summary>
            /// Merging constructor.  Note that this just grabs seqnum from the first info.
            /// </summary>
            public WeightedPhraseInfo(ICollection <WeightedPhraseInfo> toMerge)
            {
                IEnumerator <Toffs>[] allToffs = new IEnumerator <Toffs> [toMerge.Count];
                try
                {
                    // Pretty much the same idea as merging FieldPhraseLists:
                    // Step 1.  Sort by startOffset, endOffset
                    //          While we are here merge the boosts and termInfos
                    using IEnumerator <WeightedPhraseInfo> toMergeItr = toMerge.GetEnumerator();
                    if (!toMergeItr.MoveNext())
                    {
                        throw new ArgumentException("toMerge must contain at least one WeightedPhraseInfo.");
                    }
                    WeightedPhraseInfo first = toMergeItr.Current;

                    termsInfos  = new JCG.List <TermInfo>();
                    seqnum      = first.seqnum;
                    boost       = first.boost;
                    allToffs[0] = first.termsOffsets.GetEnumerator();
                    int index = 1;
                    while (toMergeItr.MoveNext())
                    {
                        WeightedPhraseInfo info = toMergeItr.Current;
                        boost += info.boost;
                        termsInfos.AddRange(info.termsInfos);
                        allToffs[index++] = info.termsOffsets.GetEnumerator();
                    }

                    // Step 2.  Walk the sorted list merging overlaps
                    using MergedEnumerator <Toffs> itr = new MergedEnumerator <Toffs>(false, allToffs);
                    termsOffsets = new JCG.List <Toffs>();
                    if (!itr.MoveNext())
                    {
                        return;
                    }
                    Toffs work = itr.Current;
                    while (itr.MoveNext())
                    {
                        Toffs current = itr.Current;
                        if (current.StartOffset <= work.EndOffset)
                        {
                            work.EndOffset = Math.Max(work.EndOffset, current.EndOffset);
                        }
                        else
                        {
                            termsOffsets.Add(work);
                            work = current;
                        }
                    }
                    termsOffsets.Add(work);
                }
                finally
                {
                    IOUtils.Dispose(allToffs);
                }
            }
        public override void BeforeClass()
        {
            base.BeforeClass();

            assertFalse("test infra is broken!", OldFormatImpersonationIsActive);
            JCG.List <string> names = new JCG.List <string>(oldNames.Length + oldSingleSegmentNames.Length);
            names.AddRange(oldNames);
            names.AddRange(oldSingleSegmentNames);
            oldIndexDirs = new Dictionary <string, Directory>();
            foreach (string name in names)
            {
                DirectoryInfo dir = CreateTempDir(name);
                using (Stream zipFileStream = this.GetType().FindAndGetManifestResourceStream("index." + name + ".zip"))
                {
                    TestUtil.Unzip(zipFileStream, dir);
                }
                oldIndexDirs[name] = NewFSDirectory(dir);
            }
        }
예제 #9
0
        public virtual void TestNextVaryingNumberOfTerms()
        {
            IList <string> termsList = new JCG.List <string>(commonTerms.Length + mediumTerms.Length + rareTerms.Length);

            termsList.AddRange(commonTerms);
            termsList.AddRange(mediumTerms);
            termsList.AddRange(rareTerms);
            termsList.Shuffle(Random);

            for (int numTerms = 2; numTerms <= termsList.Count; numTerms++)
            {
                string[] terms = termsList.GetView(0, numTerms).ToArray(/*new string[0]*/); // LUCENENET: Checked length of GetView() for correctness
                for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
                {
                    Scorer expected = Scorer(terms, minNrShouldMatch, true);
                    Scorer actual   = Scorer(terms, minNrShouldMatch, false);
                    AssertNext(expected, actual);
                }
            }
        }
        public virtual void TestUpgradeOldIndex()
        {
            JCG.List <string> names = new JCG.List <string>(oldNames.Length + oldSingleSegmentNames.Length);
            names.AddRange(oldNames);
            names.AddRange(oldSingleSegmentNames);
            foreach (string name in names)
            {
                if (Verbose)
                {
                    Console.WriteLine("testUpgradeOldIndex: index=" + name);
                }
                Directory dir = NewDirectory(oldIndexDirs[name]);

                (new IndexUpgrader(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false)).Upgrade();

                CheckAllSegmentsUpgraded(dir);

                dir.Dispose();
            }
        }
예제 #11
0
        /// <summary>
        /// Gets rules for a combination of name type, rule type and languages.
        /// </summary>
        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
        /// <param name="langs">The set of languages to consider.</param>
        /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
        public static IList <Rule> GetInstance(NameType nameType, RuleType rt,
                                               LanguageSet langs)
        {
            IDictionary <string, IList <Rule> > ruleMap = GetInstanceMap(nameType, rt, langs);
            IList <Rule> allRules = new JCG.List <Rule>();

            foreach (IList <Rule> rules in ruleMap.Values)
            {
                allRules.AddRange(rules);
            }
            return(allRules);
        }
예제 #12
0
        private IList <Document> CreateDocsForSegment(int segmentNumber)
        {
            IList <IList <Document> > blocks = new JCG.List <IList <Document> >(AMOUNT_OF_PARENT_DOCS);

            for (int i = 0; i < AMOUNT_OF_PARENT_DOCS; i++)
            {
                blocks.Add(CreateParentDocWithChildren(segmentNumber, i));
            }
            IList <Document> result = new JCG.List <Document>(AMOUNT_OF_DOCS_IN_SEGMENT);

            foreach (IList <Document> block in blocks)
            {
                result.AddRange(block);
            }
            return(result);
        }
예제 #13
0
        private IList <CharsRef> DoStem(char[] word, int length, bool caseVariant)
        {
            JCG.List <CharsRef> stems = new JCG.List <CharsRef>();
            Int32sRef           forms = dictionary.LookupWord(word, 0, length);

            if (forms != null)
            {
                for (int i = 0; i < forms.Length; i += formStep)
                {
                    bool checkKeepCase       = caseVariant && dictionary.keepcase != -1;
                    bool checkNeedAffix      = dictionary.needaffix != -1;
                    bool checkOnlyInCompound = dictionary.onlyincompound != -1;
                    if (checkKeepCase || checkNeedAffix || checkOnlyInCompound)
                    {
                        dictionary.flagLookup.Get(forms.Int32s[forms.Offset + i], scratch);
                        char[] wordFlags = Dictionary.DecodeFlags(scratch);
                        // we are looking for a case variant, but this word does not allow it
                        if (checkKeepCase && Dictionary.HasFlag(wordFlags, (char)dictionary.keepcase))
                        {
                            continue;
                        }
                        // we can't add this form, its a pseudostem requiring an affix
                        if (checkNeedAffix && Dictionary.HasFlag(wordFlags, (char)dictionary.needaffix))
                        {
                            continue;
                        }
                        // we can't add this form, it only belongs inside a compound word
                        if (checkOnlyInCompound && Dictionary.HasFlag(wordFlags, (char)dictionary.onlyincompound))
                        {
                            continue;
                        }
                    }
                    stems.Add(NewStem(word, length, forms, i));
                }
            }
            try
            {
                stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
            }
            catch (Exception bogus) when(bogus.IsIOException())
            {
                throw RuntimeException.Create(bogus);
            }
            return(stems);
        }
예제 #14
0
 public virtual void Inform(IResourceLoader loader)
 {
     if (wordFiles != null)
     {
         protectedWords = GetWordSet(loader, wordFiles, false);
     }
     if (types != null)
     {
         IList <string> files = SplitFileNames(types);
         IList <string> wlist = new JCG.List <string>();
         foreach (string file in files)
         {
             IList <string> lines = GetLines(loader, file.Trim());
             wlist.AddRange(lines);
         }
         typeTable = ParseTypes(wlist);
     }
 }
예제 #15
0
 /// <returns> a list of all rules </returns>
 private IEnumerable <string> LoadRules(string synonyms, IResourceLoader loader)
 {
     JCG.List <string> wlist = null;
     if (File.Exists(synonyms))
     {
         wlist = new JCG.List <string>(GetLines(loader, synonyms));
     }
     else
     {
         IList <string> files = SplitFileNames(synonyms);
         wlist = new JCG.List <string>();
         foreach (string file in files)
         {
             IList <string> lines = GetLines(loader, file.Trim());
             wlist.AddRange(lines);
         }
     }
     return(wlist);
 }
예제 #16
0
 private Scorer MakeCountingSumScorerSomeReq(/* bool disableCoord // LUCENENET: Not Referenced */) // At least one required scorer.
 {
     if (optionalScorers.Count == minNrShouldMatch)                                                // all optional scorers also required.
     {
         JCG.List <Scorer> allReq = new JCG.List <Scorer>(requiredScorers);
         allReq.AddRange(optionalScorers);
         return(AddProhibitedScorers(CountingConjunctionSumScorer(/* disableCoord, // LUCENENET: Not Referenced */ allReq)));
     } // optionalScorers.size() > minNrShouldMatch, and at least one required scorer
     else
     {
         Scorer requiredCountingSumScorer = requiredScorers.Count == 1 ? new SingleMatchScorer(this, requiredScorers[0]) : CountingConjunctionSumScorer(/* disableCoord, // LUCENENET: Not Referenced */ requiredScorers);
         if (minNrShouldMatch > 0)                                                                                                                                                                                // use a required disjunction scorer over the optional scorers
         {
             return(AddProhibitedScorers(DualConjunctionSumScorer(/* disableCoord, // LUCENENET: Not Referenced */ requiredCountingSumScorer, CountingDisjunctionSumScorer(optionalScorers, minNrShouldMatch)))); // non counting
         } // minNrShouldMatch == 0
         else
         {
             return(new ReqOptSumScorer(AddProhibitedScorers(requiredCountingSumScorer), optionalScorers.Count == 1 ? new SingleMatchScorer(this, optionalScorers[0])
                                        // require 1 in combined, optional scorer.
                             : CountingDisjunctionSumScorer(optionalScorers, 1)));
         }
     }
 }
예제 #17
0
        /// <summary>
        /// Perform the actual DM Soundex algorithm on the input string.
        /// </summary>
        /// <param name="source">A string to encode.</param>
        /// <param name="branching">If branching shall be performed.</param>
        /// <returns>A string array containing all DM Soundex codes corresponding to the string supplied depending on the selected branching mode.</returns>
        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
        private string[] GetSoundex(string source, bool branching)
        {
            if (source == null)
            {
                return(null);
            }

            string input = Cleanup(source);

            // LinkedHashSet preserves input order. In .NET we can use List for that purpose.
            IList <Branch> currentBranches = new JCG.List <Branch>
            {
                new Branch()
            };

            char lastChar = '\0';

            for (int index = 0; index < input.Length; index++)
            {
                char ch = input[index];

                // ignore whitespace inside a name
                if (char.IsWhiteSpace(ch))
                {
                    continue;
                }

                string inputContext = input.Substring(index);
                if (!RULES.TryGetValue(ch, out IList <Rule> rules) || rules == null)
                {
                    continue;
                }

                // use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
                IList <Branch> nextBranches = branching ? new JCG.List <Branch>() : Collections.EmptyList <Branch>() as IList <Branch>;

                foreach (Rule rule in rules)
                {
                    if (rule.Matches(inputContext))
                    {
                        if (branching)
                        {
                            nextBranches.Clear();
                        }
                        string[] replacements      = rule.GetReplacements(inputContext, lastChar == '\0');
                        bool     branchingRequired = replacements.Length > 1 && branching;

                        foreach (Branch branch in currentBranches)
                        {
                            foreach (string nextReplacement in replacements)
                            {
                                // if we have multiple replacements, always create a new branch
                                Branch nextBranch = branchingRequired ? branch.CreateBranch() : branch;

                                // special rule: occurrences of mn or nm are treated differently
                                bool force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm');

                                nextBranch.ProcessNextReplacement(nextReplacement, force);

                                if (branching)
                                {
                                    if (!nextBranches.Contains(nextBranch))
                                    {
                                        nextBranches.Add(nextBranch);
                                    }
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }

                        if (branching)
                        {
                            currentBranches.Clear();
                            currentBranches.AddRange(nextBranches);
                        }
                        index += rule.PatternLength - 1;
                        break;
                    }
                }

                lastChar = ch;
            }

            string[] result = new string[currentBranches.Count];
            int      idx    = 0;

            foreach (Branch branch in currentBranches)
            {
                branch.Finish();
                result[idx++] = branch.ToString();
            }

            return(result);
        }
예제 #18
0
        /// <summary>
        /// Applies the affix rule to the given word, producing a list of stems if any are found
        /// </summary>
        /// <param name="strippedWord"> Word the affix has been removed and the strip added </param>
        /// <param name="length"> valid length of stripped word </param>
        /// <param name="affix"> HunspellAffix representing the affix rule itself </param>
        /// <param name="prefixFlag"> when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
        ///                   so we must check dictionary form against both to add it as a stem! </param>
        /// <param name="recursionDepth"> current recursion depth </param>
        /// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
        /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
        ///        this means inner most suffix must also contain circumfix flag. </param>
        /// <param name="caseVariant"> true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed. </param>
        /// <returns> <see cref="IList{CharsRef}"/> of stems for the word, or an empty list if none are found </returns>
        internal IList <CharsRef> ApplyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix, bool caseVariant)
        {
            // TODO: just pass this in from before, no need to decode it twice
            affixReader.Position = 8 * affix;
            char flag = (char)(affixReader.ReadInt16() & 0xffff);

            affixReader.SkipBytes(2); // strip
            int  condition    = (char)(affixReader.ReadInt16() & 0xffff);
            bool crossProduct = (condition & 1) == 1;

            condition = condition.TripleShift(1);
            char append = (char)(affixReader.ReadInt16() & 0xffff);

            JCG.List <CharsRef> stems = new JCG.List <CharsRef>();

            Int32sRef forms = dictionary.LookupWord(strippedWord, 0, length);

            if (forms != null)
            {
                for (int i = 0; i < forms.Length; i += formStep)
                {
                    dictionary.flagLookup.Get(forms.Int32s[forms.Offset + i], scratch);
                    char[] wordFlags = Dictionary.DecodeFlags(scratch);
                    if (Dictionary.HasFlag(wordFlags, flag))
                    {
                        // confusing: in this one exception, we already chained the first prefix against the second,
                        // so it doesnt need to be checked against the word
                        bool chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
                        if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.HasFlag(wordFlags, (char)prefixFlag))
                        {
                            // see if we can chain prefix thru the suffix continuation class (only if it has any!)
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
                            if (!HasCrossCheckedFlag((char)prefixFlag, appendFlags, false))
                            {
                                continue;
                            }
                        }

                        // if circumfix was previously set by a prefix, we must check this suffix,
                        // to ensure it has it, and vice versa
                        if (dictionary.circumfix != -1)
                        {
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags     = Dictionary.DecodeFlags(scratch);
                            bool   suffixCircumfix = Dictionary.HasFlag(appendFlags, (char)dictionary.circumfix);
                            if (circumfix != suffixCircumfix)
                            {
                                continue;
                            }
                        }

                        // we are looking for a case variant, but this word does not allow it
                        if (caseVariant && dictionary.keepcase != -1 && Dictionary.HasFlag(wordFlags, (char)dictionary.keepcase))
                        {
                            continue;
                        }
                        // we aren't decompounding (yet)
                        if (dictionary.onlyincompound != -1 && Dictionary.HasFlag(wordFlags, (char)dictionary.onlyincompound))
                        {
                            continue;
                        }
                        stems.Add(NewStem(strippedWord, length, forms, i));
                    }
                }
            }

            // if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag
            if (dictionary.circumfix != -1 && !circumfix && prefix)
            {
                dictionary.flagLookup.Get(append, scratch);
                char[] appendFlags = Dictionary.DecodeFlags(scratch);
                circumfix = Dictionary.HasFlag(appendFlags, (char)dictionary.circumfix);
            }

            if (crossProduct)
            {
                if (recursionDepth == 0)
                {
                    if (prefix)
                    {
                        // we took away the first prefix.
                        // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix
                        // COMPLEXPREFIXES = false: combine with a suffix
                        stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant));
                    }
                    else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
                    {
                        // we took away a suffix.
                        // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
                        // COMPLEXPREFIXES = false: combine with another suffix
                        stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
                    }
                }
                else if (recursionDepth == 1)
                {
                    if (prefix && dictionary.complexPrefixes)
                    {
                        // we took away the second prefix: go look for another suffix
                        stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant));
                    }
                    else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
                    {
                        // we took away a prefix, then a suffix: go look for another suffix
                        stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
                    }
                }
            }

            return(stems);
        }
예제 #19
0
        protected virtual IList <WeightedFragInfo> DiscreteMultiValueHighlighting(IList <WeightedFragInfo> fragInfos, Field[] fields)
        {
            IDictionary <string, IList <WeightedFragInfo> > fieldNameToFragInfos = new Dictionary <string, IList <WeightedFragInfo> >();

            foreach (Field field in fields)
            {
                fieldNameToFragInfos[field.Name] = new JCG.List <WeightedFragInfo>();
            }

            foreach (WeightedFragInfo fragInfo in fragInfos)
            {
                int fieldStart;
                int fieldEnd = 0;
                foreach (Field field in fields)
                {
                    if (field.GetStringValue().Length == 0)
                    {
                        fieldEnd++;
                        continue;
                    }
                    fieldStart = fieldEnd;
                    fieldEnd  += field.GetStringValue().Length + 1; // + 1 for going to next field with same name.

                    if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart &&
                        fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd)
                    {
                        fieldNameToFragInfos[field.Name].Add(fragInfo);

                        goto fragInfos_continue;
                    }

                    if (fragInfo.SubInfos.Count == 0)
                    {
                        goto fragInfos_continue;
                    }

                    Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0];
                    if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd)
                    {
                        continue;
                    }

                    int fragStart = fieldStart;
                    if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd)
                    {
                        fragStart = fragInfo.StartOffset;
                    }

                    int fragEnd = fieldEnd;
                    if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd)
                    {
                        fragEnd = fragInfo.EndOffset;
                    }

                    // LUCENENET NOTE: Instead of removing during iteration (which isn't allowed in .NET when using an IEnumerator),
                    // We use the IList<T>.RemoveAll() extension method of J2N. This removal happens in a forward way, but since it
                    // accepts a predicate, we can put in the rest of Lucene's logic without doing something expensive like keeping
                    // track of the items to remove in a separate collection. In a nutshell, any time Lucene calls iterator.remove(),
                    // we return true and any time it is skipped, we return false.

                    IList <SubInfo> subInfos = new JCG.List <SubInfo>();
                    float           boost    = 0.0f; //  The boost of the new info will be the sum of the boosts of its SubInfos
                    fragInfo.SubInfos.RemoveAll((subInfo) =>
                    {
                        IList <Toffs> toffsList = new JCG.List <Toffs>();
                        subInfo.TermsOffsets.RemoveAll((toffs) =>
                        {
                            if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd)
                            {
                                toffsList.Add(toffs);
                                return(true); // Remove
                            }
                            return(false);
                        });
                        if (toffsList.Count > 0)
                        {
                            subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum, subInfo.Boost));
                            boost += subInfo.Boost;
                        }

                        if (subInfo.TermsOffsets.Count == 0)
                        {
                            return(true); // Remove
                        }
                        return(false);
                    });

                    WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost);
                    fieldNameToFragInfos[field.Name].Add(weightedFragInfo);
                }
                fragInfos_continue : { }
            }

            JCG.List <WeightedFragInfo> result = new JCG.List <WeightedFragInfo>();
            foreach (IList <WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values)
            {
                result.AddRange(weightedFragInfos);
            }
            CollectionUtil.TimSort(result, Comparer <WeightedFragInfo> .Create((info1, info2) => info1.StartOffset - info2.StartOffset));

            return(result);
        }
예제 #20
0
        /// <summary>
        /// Generates a list of stems for the provided word
        /// </summary>
        /// <param name="word"> Word to generate the stems for </param>
        /// <param name="length"> length </param>
        /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
        /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
        /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
        /// <param name="recursionDepth"> current recursiondepth </param>
        /// <param name="doPrefix"> true if we should remove prefixes </param>
        /// <param name="doSuffix"> true if we should remove suffixes </param>
        /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
        ///        if we are removing a suffix, and it has no continuation requirements, its ok.
        ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
        /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
        ///        this means inner most suffix must also contain circumfix flag. </param>
        /// <param name="caseVariant"> true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed. </param>
        /// <returns> <see cref="IList{CharsRef}"/> of stems, or empty list if no stems are found </returns>
        private IList <CharsRef> Stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix, bool caseVariant)
        {
            // TODO: allow this stuff to be reused by tokenfilter
            JCG.List <CharsRef> stems = new JCG.List <CharsRef>();

            if (doPrefix && dictionary.prefixes != null)
            {
                FST <Int32sRef>     fst         = dictionary.prefixes;
                Outputs <Int32sRef> outputs     = fst.Outputs;
                FST.BytesReader     bytesReader = prefixReaders[recursionDepth];
                FST.Arc <Int32sRef> arc         = prefixArcs[recursionDepth];
                fst.GetFirstArc(arc);
                Int32sRef NO_OUTPUT = outputs.NoOutput;
                Int32sRef output    = NO_OUTPUT;
                int       limit     = dictionary.fullStrip ? length : length - 1;
                for (int i = 0; i < limit; i++)
                {
                    if (i > 0)
                    {
                        int ch = word[i - 1];
                        if (fst.FindTargetArc(ch, arc, arc, bytesReader) == null)
                        {
                            break;
                        }
                        else if (arc.Output != NO_OUTPUT)
                        {
                            output = fst.Outputs.Add(output, arc.Output);
                        }
                    }
                    Int32sRef prefixes; // LUCENENET: IDE0059 - Removed unnecessary value assignment
                    if (!arc.IsFinal)
                    {
                        continue;
                    }
                    else
                    {
                        prefixes = fst.Outputs.Add(output, arc.NextFinalOutput);
                    }

                    for (int j = 0; j < prefixes.Length; j++)
                    {
                        int prefix = prefixes.Int32s[prefixes.Offset + j];
                        if (prefix == previous)
                        {
                            continue;
                        }
                        affixReader.Position = 8 * prefix;
                        char flag         = (char)(affixReader.ReadInt16() & 0xffff);
                        char stripOrd     = (char)(affixReader.ReadInt16() & 0xffff);
                        int  condition    = (char)(affixReader.ReadInt16() & 0xffff);
                        bool crossProduct = (condition & 1) == 1;
                        condition = condition.TripleShift(1);
                        char append = (char)(affixReader.ReadInt16() & 0xffff);

                        bool compatible;
                        if (recursionDepth == 0)
                        {
                            if (dictionary.onlyincompound == -1)
                            {
                                compatible = true;
                            }
                            else
                            {
                                // check if affix is allowed in a non-compound word
                                dictionary.flagLookup.Get(append, scratch);
                                char[] appendFlags = Dictionary.DecodeFlags(scratch);
                                compatible = !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            }
                        }
                        else if (crossProduct)
                        {
                            // cross check incoming continuation class (flag of previous affix) against list.
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(prevFlag >= 0);
                            }
                            bool allowed = dictionary.onlyincompound == -1 ||
                                           !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            compatible = allowed && HasCrossCheckedFlag((char)prevFlag, appendFlags, false);
                        }
                        else
                        {
                            compatible = false;
                        }

                        if (compatible)
                        {
                            int deAffixedStart  = i;
                            int deAffixedLength = length - deAffixedStart;

                            int stripStart  = dictionary.stripOffsets[stripOrd];
                            int stripEnd    = dictionary.stripOffsets[stripOrd + 1];
                            int stripLength = stripEnd - stripStart;

                            if (!CheckCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
                            {
                                continue;
                            }

                            char[] strippedWord = new char[stripLength + deAffixedLength];
                            Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
                            Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);

                            IList <CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix, caseVariant);

                            stems.AddRange(stemList);
                        }
                    }
                }
            }

            if (doSuffix && dictionary.suffixes != null)
            {
                FST <Int32sRef>     fst         = dictionary.suffixes;
                Outputs <Int32sRef> outputs     = fst.Outputs;
                FST.BytesReader     bytesReader = suffixReaders[recursionDepth];
                FST.Arc <Int32sRef> arc         = suffixArcs[recursionDepth];
                fst.GetFirstArc(arc);
                Int32sRef NO_OUTPUT = outputs.NoOutput;
                Int32sRef output    = NO_OUTPUT;
                int       limit     = dictionary.fullStrip ? 0 : 1;
                for (int i = length; i >= limit; i--)
                {
                    if (i < length)
                    {
                        int ch = word[i];
                        if (fst.FindTargetArc(ch, arc, arc, bytesReader) == null)
                        {
                            break;
                        }
                        else if (arc.Output != NO_OUTPUT)
                        {
                            output = fst.Outputs.Add(output, arc.Output);
                        }
                    }
                    Int32sRef suffixes; // LUCENENET: IDE0059 - Removed unnecessary value assignment
                    if (!arc.IsFinal)
                    {
                        continue;
                    }
                    else
                    {
                        suffixes = fst.Outputs.Add(output, arc.NextFinalOutput);
                    }

                    for (int j = 0; j < suffixes.Length; j++)
                    {
                        int suffix = suffixes.Int32s[suffixes.Offset + j];
                        if (suffix == previous)
                        {
                            continue;
                        }
                        affixReader.Position = 8 * suffix;
                        char flag         = (char)(affixReader.ReadInt16() & 0xffff);
                        char stripOrd     = (char)(affixReader.ReadInt16() & 0xffff);
                        int  condition    = (char)(affixReader.ReadInt16() & 0xffff);
                        bool crossProduct = (condition & 1) == 1;
                        condition = condition.TripleShift(1);
                        char append = (char)(affixReader.ReadInt16() & 0xffff);

                        bool compatible;
                        if (recursionDepth == 0)
                        {
                            if (dictionary.onlyincompound == -1)
                            {
                                compatible = true;
                            }
                            else
                            {
                                // check if affix is allowed in a non-compound word
                                dictionary.flagLookup.Get(append, scratch);
                                char[] appendFlags = Dictionary.DecodeFlags(scratch);
                                compatible = !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            }
                        }
                        else if (crossProduct)
                        {
                            // cross check incoming continuation class (flag of previous affix) against list.
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(prevFlag >= 0);
                            }
                            bool allowed = dictionary.onlyincompound == -1 ||
                                           !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
                        }
                        else
                        {
                            compatible = false;
                        }

                        if (compatible)
                        {
                            int appendLength    = length - i;
                            int deAffixedLength = length - appendLength;

                            int stripStart  = dictionary.stripOffsets[stripOrd];
                            int stripEnd    = dictionary.stripOffsets[stripOrd + 1];
                            int stripLength = stripEnd - stripStart;

                            if (!CheckCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
                            {
                                continue;
                            }

                            char[] strippedWord = new char[stripLength + deAffixedLength];
                            Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
                            Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);

                            IList <CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);

                            stems.AddRange(stemList);
                        }
                    }
                }
            }

            return(stems);
        }
예제 #21
0
        /// <summary>
        /// Tests a CacheEntry[] for indication of "insane" cache usage.
        /// <para>
        /// <b>NOTE:</b>FieldCache CreationPlaceholder objects are ignored.
        /// (:TODO: is this a bad idea? are we masking a real problem?)
        /// </para>
        /// </summary>
        public Insanity[] Check(params FieldCache.CacheEntry[] cacheEntries)
        {
            if (null == cacheEntries || 0 == cacheEntries.Length)
            {
                return(Arrays.Empty <Insanity>());
            }

            if (estimateRam)
            {
                for (int i = 0; i < cacheEntries.Length; i++)
                {
                    cacheEntries[i].EstimateSize();
                }
            }

            // the indirect mapping lets MapOfSet dedup identical valIds for us
            // maps the (valId) identityhashCode of cache values to
            // sets of CacheEntry instances
            MapOfSets <int, FieldCache.CacheEntry> valIdToItems = new MapOfSets <int, FieldCache.CacheEntry>(new Dictionary <int, ISet <FieldCache.CacheEntry> >(17));
            // maps ReaderField keys to Sets of ValueIds
            MapOfSets <ReaderField, int> readerFieldToValIds = new MapOfSets <ReaderField, int>(new Dictionary <ReaderField, ISet <int> >(17));

            // any keys that we know result in more then one valId
            ISet <ReaderField> valMismatchKeys = new JCG.HashSet <ReaderField>();

            // iterate over all the cacheEntries to get the mappings we'll need
            for (int i = 0; i < cacheEntries.Length; i++)
            {
                FieldCache.CacheEntry item = cacheEntries[i];
                object val = item.Value;

                // It's OK to have dup entries, where one is eg
                // float[] and the other is the Bits (from
                // getDocWithField())
                if (val is IBits)
                {
                    continue;
                }

                if (val is FieldCache.ICreationPlaceholder)
                {
                    continue;
                }

                ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName);

                int valId = RuntimeHelpers.GetHashCode(val);

                // indirect mapping, so the MapOfSet will dedup identical valIds for us
                valIdToItems.Put(valId, item);
                if (1 < readerFieldToValIds.Put(rf, valId))
                {
                    valMismatchKeys.Add(rf);
                }
            }

            JCG.List <Insanity> insanity = new JCG.List <Insanity>(valMismatchKeys.Count * 3);

            insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys));
            insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds));

            return(insanity.ToArray());
        }
예제 #22
0
 /// <summary>
 /// Extracts all <see cref="MultiTermQuery"/>s for <paramref name="field"/>, and returns equivalent
 /// automata that will match terms.
 /// </summary>
 internal static CharacterRunAutomaton[] ExtractAutomata(Query query, string field)
 {
     JCG.List <CharacterRunAutomaton> list = new JCG.List <CharacterRunAutomaton>();
     if (query is BooleanQuery booleanQuery)
     {
         foreach (BooleanClause clause in booleanQuery.GetClauses())
         {
             if (!clause.IsProhibited)
             {
                 list.AddRange(ExtractAutomata(clause.Query, field));
             }
         }
     }
     else if (query is DisjunctionMaxQuery disjunctionMaxQuery)
     {
         foreach (Query sub in disjunctionMaxQuery.Disjuncts)
         {
             list.AddRange(ExtractAutomata(sub, field));
         }
     }
     else if (query is SpanOrQuery spanOrQuery)
     {
         foreach (Query sub in spanOrQuery.GetClauses())
         {
             list.AddRange(ExtractAutomata(sub, field));
         }
     }
     else if (query is SpanNearQuery spanNearQuery)
     {
         foreach (Query sub in spanNearQuery.GetClauses())
         {
             list.AddRange(ExtractAutomata(sub, field));
         }
     }
     else if (query is SpanNotQuery spanNotQuery)
     {
         list.AddRange(ExtractAutomata(spanNotQuery.Include, field));
     }
     else if (query is SpanPositionCheckQuery spanPositionCheckQuery)
     {
         list.AddRange(ExtractAutomata(spanPositionCheckQuery.Match, field));
     }
     else if (query is ISpanMultiTermQueryWrapper spanMultiTermQueryWrapper)
     {
         list.AddRange(ExtractAutomata(spanMultiTermQueryWrapper.WrappedQuery, field));
     }
     else if (query is AutomatonQuery aq)
     {
         if (aq.Field.Equals(field, StringComparison.Ordinal))
         {
             list.Add(new CharacterRunAutomatonToStringAnonymousClass(aq.Automaton, () => aq.ToString()));
         }
     }
     else if (query is PrefixQuery pq)
     {
         Term prefix = pq.Prefix;
         if (prefix.Field.Equals(field, StringComparison.Ordinal))
         {
             list.Add(new CharacterRunAutomatonToStringAnonymousClass(
                          BasicOperations.Concatenate(BasicAutomata.MakeString(prefix.Text), BasicAutomata.MakeAnyString()),
                          () => pq.ToString()));
         }
     }
     else if (query is FuzzyQuery fq)
     {
         if (fq.Field.Equals(field, StringComparison.Ordinal))
         {
             string utf16    = fq.Term.Text;
             int[]  termText = new int[utf16.CodePointCount(0, utf16.Length)];
             for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp))
             {
                 termText[j++] = cp = utf16.CodePointAt(i);
             }
             int    termLength             = termText.Length;
             int    prefixLength           = Math.Min(fq.PrefixLength, termLength);
             string suffix                 = UnicodeUtil.NewString(termText, prefixLength, termText.Length - prefixLength);
             LevenshteinAutomata builder   = new LevenshteinAutomata(suffix, fq.Transpositions);
             Automaton           automaton = builder.ToAutomaton(fq.MaxEdits);
             if (prefixLength > 0)
             {
                 Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(termText, 0, prefixLength));
                 automaton = BasicOperations.Concatenate(prefix, automaton);
             }
             list.Add(new CharacterRunAutomatonToStringAnonymousClass(automaton, () => fq.ToString()));
         }
     }
     else if (query is TermRangeQuery tq)
     {
         if (tq.Field.Equals(field, StringComparison.Ordinal))
         {
             // this is *not* an automaton, but its very simple
             list.Add(new SimpleCharacterRunAutomatonAnonymousClass(BasicAutomata.MakeEmpty(), tq));
         }
     }
     return(list.ToArray(/*new CharacterRunAutomaton[list.size()]*/));
 }
예제 #23
0
        public virtual ApplyDeletesResult ApplyDeletesAndUpdates(IndexWriter.ReaderPool readerPool, IList <SegmentCommitInfo> infos)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

                if (infos.Count == 0)
                {
                    return(new ApplyDeletesResult(false, nextGen++, null));
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CheckDeleteStats());
                }

                if (!Any())
                {
                    if (infoStream.IsEnabled("BD"))
                    {
                        infoStream.Message("BD", "applyDeletes: no deletes; skipping");
                    }
                    return(new ApplyDeletesResult(false, nextGen++, null));
                }

                if (infoStream.IsEnabled("BD"))
                {
                    infoStream.Message("BD", "applyDeletes: infos=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", infos) + " packetCount=" + updates.Count);
                }

                long gen = nextGen++;

                JCG.List <SegmentCommitInfo> infos2 = new JCG.List <SegmentCommitInfo>();
                infos2.AddRange(infos);
                infos2.Sort(sortSegInfoByDelGen);

                CoalescedUpdates coalescedUpdates = null;
                bool             anyNewDeletes    = false;

                int infosIDX = infos2.Count - 1;
                int delIDX   = updates.Count - 1;

                IList <SegmentCommitInfo> allDeleted = null;

                while (infosIDX >= 0)
                {
                    //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);

                    FrozenBufferedUpdates packet = delIDX >= 0 ? updates[delIDX] : null;
                    SegmentCommitInfo     info   = infos2[infosIDX];
                    long segGen = info.BufferedDeletesGen;

                    if (packet != null && segGen < packet.DelGen)
                    {
                        //        System.out.println("  coalesce");
                        if (coalescedUpdates is null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }
                        if (!packet.isSegmentPrivate)
                        {
                            /*
                             * Only coalesce if we are NOT on a segment private del packet: the segment private del packet
                             * must only applied to segments with the same delGen.  Yet, if a segment is already deleted
                             * from the SI since it had no more documents remaining after some del packets younger than
                             * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been
                             * removed.
                             */
                            coalescedUpdates.Update(packet);
                        }

                        delIDX--;
                    }
                    else if (packet != null && segGen == packet.DelGen)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(packet.isSegmentPrivate, "Packet and Segments deletegen can only match on a segment private del packet gen={0}", segGen);
                        }
                        //System.out.println("  eq");

                        // Lock order: IW -> BD -> RP
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(readerPool.InfoIsLive(info));
                        }
                        ReadersAndUpdates rld    = readerPool.Get(info, true);
                        SegmentReader     reader = rld.GetReader(IOContext.READ);
                        int  delCount            = 0;
                        bool segAllDeletes;
                        try
                        {
                            DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                            if (coalescedUpdates != null)
                            {
                                //System.out.println("    del coalesced");
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates);
                            }
                            //System.out.println("    del exact");
                            // Don't delete by Term here; DocumentsWriterPerThread
                            // already did that on flush:
                            delCount += (int)ApplyQueryDeletes(packet.GetQueriesEnumerable(), rld, reader);
                            ApplyDocValuesUpdates(packet.numericDVUpdates, rld, reader, dvUpdates);
                            ApplyDocValuesUpdates(packet.binaryDVUpdates, rld, reader, dvUpdates);
                            if (dvUpdates.Any())
                            {
                                rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                            }
                            int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(fullDelCount <= rld.Info.Info.DocCount);
                            }
                            segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                        }
                        finally
                        {
                            rld.Release(reader);
                            readerPool.Release(rld);
                        }
                        anyNewDeletes |= delCount > 0;

                        if (segAllDeletes)
                        {
                            if (allDeleted is null)
                            {
                                allDeleted = new JCG.List <SegmentCommitInfo>();
                            }
                            allDeleted.Add(info);
                        }

                        if (infoStream.IsEnabled("BD"))
                        {
                            infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedUpdates is null ? "null" : coalescedUpdates.ToString()) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                        }

                        if (coalescedUpdates is null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }

                        /*
                         * Since we are on a segment private del packet we must not
                         * update the coalescedDeletes here! We can simply advance to the
                         * next packet and seginfo.
                         */
                        delIDX--;
                        infosIDX--;
                        info.SetBufferedDeletesGen(gen);
                    }
                    else
                    {
                        //System.out.println("  gt");

                        if (coalescedUpdates != null)
                        {
                            // Lock order: IW -> BD -> RP
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(readerPool.InfoIsLive(info));
                            }
                            ReadersAndUpdates rld    = readerPool.Get(info, true);
                            SegmentReader     reader = rld.GetReader(IOContext.READ);
                            int  delCount            = 0;
                            bool segAllDeletes;
                            try
                            {
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                                ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates);
                                if (dvUpdates.Any())
                                {
                                    rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                                }
                                int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fullDelCount <= rld.Info.Info.DocCount);
                                }
                                segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                            }
                            finally
                            {
                                rld.Release(reader);
                                readerPool.Release(rld);
                            }
                            anyNewDeletes |= delCount > 0;

                            if (segAllDeletes)
                            {
                                if (allDeleted is null)
                                {
                                    allDeleted = new JCG.List <SegmentCommitInfo>();
                                }
                                allDeleted.Add(info);
                            }

                            if (infoStream.IsEnabled("BD"))
                            {
                                infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedUpdates + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                            }
                        }
                        info.SetBufferedDeletesGen(gen);

                        infosIDX--;
                    }
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CheckDeleteStats());
                }
                if (infoStream.IsEnabled("BD"))
                {
                    infoStream.Message("BD", "applyDeletes took " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                }
                // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;

                return(new ApplyDeletesResult(anyNewDeletes, gen, allDeleted));
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
예제 #24
0
        /// <summary>
        /// The <see cref="SubSpans"/> are ordered in the same doc, so there is a possible match.
        /// Compute the slop while making the match as short as possible by advancing
        /// all <see cref="SubSpans"/> except the last one in reverse order.
        /// </summary>
        private bool ShrinkToAfterShortestMatch()
        {
            matchStart = subSpans[subSpans.Length - 1].Start;
            matchEnd   = subSpans[subSpans.Length - 1].End;
            var possibleMatchPayloads = new JCG.HashSet <byte[]>();

            if (subSpans[subSpans.Length - 1].IsPayloadAvailable)
            {
                possibleMatchPayloads.UnionWith(subSpans[subSpans.Length - 1].GetPayload());
            }

            IList <byte[]> possiblePayload = null;

            int matchSlop = 0;
            int lastStart = matchStart;
            int lastEnd   = matchEnd;

            for (int i = subSpans.Length - 2; i >= 0; i--)
            {
                Spans prevSpans = subSpans[i];
                if (collectPayloads && prevSpans.IsPayloadAvailable)
                {
                    possiblePayload = new JCG.List <byte[]>(prevSpans.GetPayload()); // LUCENENET specific - using copy constructor instead of AddRange()
                }

                int prevStart = prevSpans.Start;
                int prevEnd   = prevSpans.End;
                while (true) // Advance prevSpans until after (lastStart, lastEnd)
                {
                    if (!prevSpans.MoveNext())
                    {
                        inSameDoc = false;
                        more      = false;
                        break; // Check remaining subSpans for final match.
                    }
                    else if (matchDoc != prevSpans.Doc)
                    {
                        inSameDoc = false; // The last subSpans is not advanced here.
                        break;             // Check remaining subSpans for last match in this document.
                    }
                    else
                    {
                        int ppStart = prevSpans.Start;
                        int ppEnd   = prevSpans.End; // Cannot avoid invoking .end()
                        if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd))
                        {
                            break; // Check remaining subSpans.
                        } // prevSpans still before (lastStart, lastEnd)
                        else
                        {
                            prevStart = ppStart;
                            prevEnd   = ppEnd;
                            if (collectPayloads && prevSpans.IsPayloadAvailable)
                            {
                                possiblePayload = new JCG.List <byte[]>(prevSpans.GetPayload()); // LUCENENET specific - using copy constructor instead of AddRange()
                            }
                        }
                    }
                }

                if (collectPayloads && possiblePayload != null)
                {
                    possibleMatchPayloads.UnionWith(possiblePayload);
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(prevStart <= matchStart);
                }
                if (matchStart > prevEnd) // Only non overlapping spans add to slop.
                {
                    matchSlop += (matchStart - prevEnd);
                }

                /* Do not break on (matchSlop > allowedSlop) here to make sure
                 * that subSpans[0] is advanced after the match, if any.
                 */
                matchStart = prevStart;
                lastStart  = prevStart;
                lastEnd    = prevEnd;
            }

            bool match = matchSlop <= allowedSlop;

            if (collectPayloads && match && possibleMatchPayloads.Count > 0)
            {
                matchPayload.AddRange(possibleMatchPayloads);
            }

            return(match); // ordered and allowed slop
        }