Ejemplo n.º 1
0
        public virtual void TestConcurrency()
        {
            // tests that addTaxonomy and addCategory work in parallel
            int numCategories = AtLeast(10000);

            // build an input taxonomy index
            Directory src = NewDirectory();
            var       tw  = new DirectoryTaxonomyWriter(src);

            for (int i = 0; i < numCategories; i++)
            {
                tw.AddCategory(new FacetLabel("a", Convert.ToString(i, CultureInfo.InvariantCulture)));
            }
            tw.Dispose();

            // now add the taxonomy to an empty taxonomy, while adding the categories
            // again, in parallel -- in the end, no duplicate categories should exist.
            Directory dest   = NewDirectory();
            var       destTw = new DirectoryTaxonomyWriter(dest);
            var       t      = new ThreadAnonymousInnerClassHelper2(this, numCategories, destTw);

            t.Start();

            IOrdinalMap map = new MemoryOrdinalMap();

            destTw.AddTaxonomy(src, map);
            t.Join();
            destTw.Dispose();

            // now validate

            var dtr = new DirectoryTaxonomyReader(dest);

            // +2 to account for the root category + "a"
            Assert.AreEqual(numCategories + 2, dtr.Count);
            var categories = new JCG.HashSet <FacetLabel>();

            for (int i = 1; i < dtr.Count; i++)
            {
                FacetLabel cat = dtr.GetPath(i);
                Assert.True(categories.Add(cat), "category " + cat + " already existed");
            }
            dtr.Dispose();

            IOUtils.Dispose(src, dest);
        }
Ejemplo n.º 2
0
        public virtual void TestMerge()
        {
            RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20);
            int        numDocs    = AtLeast(100);
            int        numDeletes = Random.Next(numDocs);
            ISet <int> deletes    = new JCG.HashSet <int>();

            while (deletes.Count < numDeletes)
            {
                deletes.Add(Random.Next(numDocs));
            }
            foreach (Options options in ValidOptions())
            {
                RandomDocument[] docs = new RandomDocument[numDocs];
                for (int i = 0; i < numDocs; ++i)
                {
                    docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options);
                }
                using Directory dir            = NewDirectory();
                using RandomIndexWriter writer = new RandomIndexWriter(Random, dir);
                for (int i = 0; i < numDocs; ++i)
                {
                    writer.AddDocument(AddId(docs[i].ToDocument(), "" + i));
                    if (Rarely())
                    {
                        writer.Commit();
                    }
                }
                foreach (int delete in deletes)
                {
                    writer.DeleteDocuments(new Term("id", "" + delete));
                }
                // merge with deletes
                writer.ForceMerge(1);
                using IndexReader reader = writer.GetReader();
                for (int i = 0; i < numDocs; ++i)
                {
                    if (!deletes.Contains(i))
                    {
                        int docID = DocID(reader, "" + i);
                        AssertEquals(docs[i], reader.GetTermVectors(docID));
                    }
                }
            }
        }
Ejemplo n.º 3
0
 private static ISet <string> LoadDefaultStopTagSet() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
 {
     try
     {
         CharArraySet tagset            = LoadStopwordSet(false, typeof(JapaneseAnalyzer), "stoptags.txt", "#");
         var          DEFAULT_STOP_TAGS = new JCG.HashSet <string>();
         foreach (string element in tagset)
         {
             DEFAULT_STOP_TAGS.Add(element);
         }
         return(DEFAULT_STOP_TAGS);
     }
     catch (IOException ex)
     {
         // default set should always be present as it is part of the distribution (JAR)
         throw new Exception("Unable to load default stoptag set", ex);
     }
 }
Ejemplo n.º 4
0
        private void checkHits(SpatialArgs args, int assertNumFound, int[] assertIds)
        {
            SearchResults got = executeQuery(strategy.MakeQuery(args), 100);

            assertEquals("" + args, assertNumFound, got.numFound);
            if (assertIds != null)
            {
                ISet <int> gotIds = new JCG.HashSet <int>();
                foreach (SearchResult result in got.results)
                {
                    gotIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture));
                }
                foreach (int assertId in assertIds)
                {
                    assertTrue("has " + assertId, gotIds.Contains(assertId));
                }
            }
        }
Ejemplo n.º 5
0
        public static bool RetainAll <T>(this ICollection <T> source, ICollection <T> collection)
        {
            if (source is null)
            {
                throw new ArgumentNullException(nameof(source));
            }
            if (collection is null)
            {
                throw new ArgumentNullException(nameof(collection));
            }

            if (source.Count == 0)
            {
                return(false);
            }

            if (source is ISet <T> set)
            {
                int originalCount = set.Count;
                set.IntersectWith(collection);
                return(originalCount != set.Count);
            }
            else if (source is IList <T> list)
            {
                int removed = list.RemoveAll((value) => !collection.Contains(value));
                return(removed > 0);
            }

            // Slow path for unknown collection types
            var toRemove = new JCG.HashSet <T>();

            foreach (var e in source)
            {
                if (!collection.Contains(e))
                {
                    toRemove.Add(e);
                }
            }
            if (toRemove.Count > 0)
            {
                return(source.RemoveAll(toRemove));
            }
            return(false);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Reverses the language of the given (non-singleton) automaton while returning
        /// the set of new initial states.
        /// </summary>
        public static ISet <State> Reverse(Automaton a)
        {
            a.ExpandSingleton();
            // reverse all edges
            Dictionary <State, ISet <Transition> > m = new Dictionary <State, ISet <Transition> >();

            State[]      states = a.GetNumberedStates();
            ISet <State> accept = new JCG.HashSet <State>();

            foreach (State s in states)
            {
                if (s.Accept)
                {
                    accept.Add(s);
                }
            }
            foreach (State r in states)
            {
                m[r]     = new JCG.HashSet <Transition>();
                r.accept = false;
            }
            foreach (State r in states)
            {
                foreach (Transition t in r.GetTransitions())
                {
                    m[t.to].Add(new Transition(t.min, t.max, r));
                }
            }
            foreach (State r in states)
            {
                ISet <Transition> tr = m[r];
                r.SetTransitions(tr.ToArray(/*new Transition[tr.Count]*/));
            }
            // make new initial+final states
            a.initial.accept = true;
            a.initial        = new State();
            foreach (State r in accept)
            {
                a.initial.AddEpsilon(r); // ensures that all initial states are reachable
            }
            a.deterministic = false;
            a.ClearNumberedStates();
            return(accept);
        }
            protected internal RandomDocumentFactory(BaseTermVectorsFormatTestCase baseTermVectorsFormatTestCase, int distinctFieldNames, int disctinctTerms)
            {
                this.outerInstance = baseTermVectorsFormatTestCase;
                ISet <string> fieldNames = new JCG.HashSet <string>();

                while (fieldNames.Count < distinctFieldNames)
                {
                    fieldNames.Add(TestUtil.RandomSimpleString(Random));
                    fieldNames.Remove("id");
                }
                this.fieldNames = fieldNames.ToArray(/*new string[0]*/);
                terms           = new string[disctinctTerms];
                termBytes       = new BytesRef[disctinctTerms];
                for (int i = 0; i < disctinctTerms; ++i)
                {
                    terms[i]     = TestUtil.RandomRealisticUnicodeString(Random);
                    termBytes[i] = new BytesRef(terms[i]);
                }
            }
Ejemplo n.º 8
0
        public ICollection <DocFieldConsumerPerField> Fields()
        {
            ICollection <DocFieldConsumerPerField> fields = new JCG.HashSet <DocFieldConsumerPerField>();

            for (int i = 0; i < fieldHash.Length; i++)
            {
                DocFieldProcessorPerField field = fieldHash[i];
                while (field != null)
                {
                    fields.Add(field.consumer);
                    field = field.next;
                }
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(fields.Count == totalFieldCount);
            }
            return(fields);
        }
Ejemplo n.º 9
0
        public virtual void TestShrinkToAfterShortestMatch()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer(this)));

            Document doc = new Document();

            doc.Add(new TextField("content", new StringReader("a b c d e f g h i j a k")));
            writer.AddDocument(doc);

            IndexReader   reader = writer.GetReader();
            IndexSearcher @is    = NewSearcher(reader);

            writer.Dispose();

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs   = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq   = new SpanNearQuery(sqs, 1, true);
            Spans         spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);

            TopDocs       topDocs    = @is.Search(snq, 1);
            ISet <string> payloadSet = new JCG.HashSet <string>();

            for (int i = 0; i < topDocs.ScoreDocs.Length; i++)
            {
                while (spans.Next())
                {
                    var payloads = spans.GetPayload();
                    foreach (var payload in payloads)
                    {
                        payloadSet.Add(Encoding.UTF8.GetString(payload));
                    }
                }
            }
            Assert.AreEqual(2, payloadSet.Count);
            Assert.IsTrue(payloadSet.Contains("a:Noise:10"));
            Assert.IsTrue(payloadSet.Contains("k:Noise:11"));
            reader.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 10
0
        public static Languages GetInstance(string languagesResourceName)
        {
            // read languages list
            ISet <string> ls     = new JCG.HashSet <string>();
            Stream        langIS = typeof(Languages).FindAndGetManifestResourceStream(languagesResourceName);

            if (langIS == null)
            {
                throw new ArgumentException("Unable to resolve required resource: " + languagesResourceName);
            }

            using (TextReader reader = new StreamReader(langIS, ResourceConstants.ENCODING))
            {
                bool   inExtendedComment = false;
                string rawLine;
                while ((rawLine = reader.ReadLine()) != null)
                {
                    string line = rawLine.Trim();
                    if (inExtendedComment)
                    {
                        if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal))
                        {
                            inExtendedComment = false;
                        }
                    }
                    else
                    {
                        if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal))
                        {
                            inExtendedComment = true;
                        }
                        else if (line.Length > 0)
                        {
                            ls.Add(line);
                        }
                    }
                }
            }

            return(new Languages(ls.AsReadOnly()));
        }
Ejemplo n.º 11
0
            public bool MoveNext()
            {
                while (currentDocId < docCount)
                {
                    currentDocId++;
                    if (liveDocs != null && !liveDocs.Get(currentDocId))
                    {
                        continue;
                    }

                    Document doc = outerInstance.m_reader.Document(currentDocId, relevantFields);

                    BytesRef        tempPayload  = null;
                    ISet <BytesRef> tempContexts = new JCG.HashSet <BytesRef>();

                    if (hasPayloads)
                    {
                        IIndexableField payload = doc.GetField(outerInstance.m_payloadField);
                        if (payload is null || (payload.GetBinaryValue() is null && payload.GetStringValue() is null))
                        {
                            continue;
                        }
                        tempPayload = payload.GetBinaryValue() ?? new BytesRef(payload.GetStringValue());
                    }

                    if (hasContexts)
                    {
                        IIndexableField[] contextFields = doc.GetFields(outerInstance.m_contextsField);
                        foreach (IIndexableField contextField in contextFields)
                        {
                            if (contextField.GetBinaryValue() is null && contextField.GetStringValue() is null)
                            {
                                continue;
                            }
                            else
                            {
                                tempContexts.Add(contextField.GetBinaryValue() ?? new BytesRef(contextField.GetStringValue()));
                            }
                        }
                    }
Ejemplo n.º 12
0
        /// <summary>
        /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>.
        /// </summary>
        public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList)
        {
            IList <SubInfo> tempSubInfos  = new JCG.List <SubInfo>();
            IList <SubInfo> realSubInfos  = new JCG.List <SubInfo>();
            ISet <string>   distinctTerms = new JCG.HashSet <string>();
            int             length        = 0;

            foreach (WeightedPhraseInfo phraseInfo in phraseInfoList)
            {
                float phraseTotalBoost = 0;
                foreach (TermInfo ti in phraseInfo.TermsInfos)
                {
                    if (distinctTerms.Add(ti.Text))
                    {
                        phraseTotalBoost += ti.Weight * phraseInfo.Boost;
                    }
                    length++;
                }
                tempSubInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets,
                                             phraseInfo.Seqnum, phraseTotalBoost));
            }

            // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
            // would cause an equal weight for all fragments regardless of how much words they contain.
            // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
            // we "bend" the length with a standard-normalization a little bit.
            float norm = length * (1 / (float)Math.Sqrt(length));

            float totalBoost = 0;

            foreach (SubInfo tempSubInfo in tempSubInfos)
            {
                float subInfoBoost = tempSubInfo.Boost * norm;
                realSubInfos.Add(new SubInfo(tempSubInfo.Text, tempSubInfo.TermsOffsets,
                                             tempSubInfo.Seqnum, subInfoBoost));
                totalBoost += subInfoBoost;
            }

            FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, realSubInfos, totalBoost));
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Returns the files required for replication. By default, this method returns
        /// all files that exist in the new revision, but not in the handler.
        /// </summary>
        protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles)
        {
            IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles;

            if (handlerRevisionFiles == null)
            {
                return(newRevisionFiles);
            }

            Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >();

            foreach (var e in handlerRevisionFiles)
            {
                // put the handler files in a Set, for faster contains() checks later
                ISet <string> handlerFiles = new JCG.HashSet <string>();
                foreach (RevisionFile file in e.Value)
                {
                    handlerFiles.Add(file.FileName);
                }

                // make sure to preserve revisionFiles order
                IList <RevisionFile> res = new JCG.List <RevisionFile>();
                string source            = e.Key;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(newRevisionFiles.ContainsKey(source), "source not found in newRevisionFiles: {0}", newRevisionFiles);
                }
                foreach (RevisionFile file in newRevisionFiles[source])
                {
                    if (!handlerFiles.Contains(file.FileName))
                    {
                        res.Add(file);
                    }
                }
                requiredFiles[source] = res;
            }

            return(requiredFiles);
        }
Ejemplo n.º 14
0
        public virtual void TestReserved()
        {
            string        test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
            ISet <string> set  = new JCG.HashSet <string>();

            set.Add("reserved");
            TextReader    reader  = new HTMLStripCharFilter(new StringReader(test), set);
            StringBuilder builder = new StringBuilder();
            int           ch      = 0;

            while ((ch = reader.Read()) > 0)
            {
                builder.Append((char)ch);
            }
            string result = builder.ToString();

            // System.out.println("Result: " + result);
            assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", StringComparison.Ordinal), result.IndexOf("reserved", StringComparison.Ordinal) == 9);
            assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 15, StringComparison.Ordinal), result.IndexOf("reserved", 15, StringComparison.Ordinal) == 38);
            assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 41, StringComparison.Ordinal), result.IndexOf("reserved", 41, StringComparison.Ordinal) == 54);
            assertTrue("Other tag should be removed", result.IndexOf("other", StringComparison.Ordinal) == -1);
        }
Ejemplo n.º 15
0
        private void _CheckHits(bool bbox, IPoint pt, double distKM, int assertNumFound, params int[] assertIds)
        {
            SpatialOperation op      = SpatialOperation.Intersects;
            double           distDEG = DistanceUtils.Dist2Degrees(distKM, DistanceUtils.EarthMeanRadiusKilometers);
            IShape           shape   = ctx.MakeCircle(pt, distDEG);

            if (bbox)
            {
                shape = shape.BoundingBox;
            }

            SpatialArgs args = new SpatialArgs(op, shape);
            //args.setDistPrecision(0.025);
            Query query;

            if (Random.nextBoolean())
            {
                query = strategy.MakeQuery(args);
            }
            else
            {
                query = new FilteredQuery(new MatchAllDocsQuery(), strategy.MakeFilter(args));
            }
            SearchResults results = executeQuery(query, 100);

            assertEquals("" + shape, assertNumFound, results.numFound);
            if (assertIds != null)
            {
                ISet <int> resultIds = new JCG.HashSet <int>();
                foreach (SearchResult result in results.results)
                {
                    resultIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture));
                }
                foreach (int assertId in assertIds)
                {
                    assertTrue("has " + assertId, resultIds.Contains(assertId));
                }
            }
        }
Ejemplo n.º 16
0
        private List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > CreateExpectedResult(IndexContext context, string term, Sort groupSort, int topN)
        {
            List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > result = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >();
            IDictionary <string, ISet <string> > groupCounts = context.searchTermToGroupCounts[term];
            int i = 0;

            foreach (string group in groupCounts.Keys)
            {
                if (topN <= i++)
                {
                    break;
                }
                ISet <BytesRef> uniqueValues = new JCG.HashSet <BytesRef>();
                foreach (string val in groupCounts[group])
                {
                    uniqueValues.Add(val != null ? new BytesRef(val) : null);
                }
                var gc = new GroupCount(group != null ? new BytesRef(group) : (BytesRef)null, uniqueValues);
                result.Add(gc);
            }
            return(result);
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Returns an automaton that accepts the union of the languages of the given
        /// automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Union(ICollection <Automaton> l)
        {
            JCG.HashSet <int> ids = new JCG.HashSet <int>();
            foreach (Automaton a in l)
            {
                ids.Add(a.GetHashCode());
            }
            bool  has_aliases = ids.Count != l.Count;
            State s           = new State();

            foreach (Automaton b in l)
            {
                if (BasicOperations.IsEmpty(b))
                {
                    continue;
                }
                Automaton bb = b;
                if (has_aliases)
                {
                    bb = bb.CloneExpanded();
                }
                else
                {
                    bb = bb.CloneExpandedIfRequired();
                }
                s.AddEpsilon(bb.initial);
            }
            Automaton a_ = new Automaton
            {
                initial       = s,
                deterministic = false
            };

            //a.clearHashCode();
            a_.ClearNumberedStates();
            a_.CheckMinimizeAlways();
            return(a_);
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Cleans up the index directory from old index files. This method uses the
        /// last commit found by <see cref="GetLastCommit(Directory)"/>. If it matches the
        /// expected <paramref name="segmentsFile"/>, then all files not referenced by this commit point
        /// are deleted.
        /// </summary>
        /// <remarks>
        /// <b>NOTE:</b> This method does a best effort attempt to clean the index
        /// directory. It suppresses any exceptions that occur, as this can be retried
        /// the next time.
        /// </remarks>
        public static void CleanupOldIndexFiles(Directory directory, string segmentsFile)
        {
            try
            {
                IndexCommit commit = GetLastCommit(directory);
                // commit == null means weird IO errors occurred, ignore them
                // if there were any IO errors reading the expected commit point (i.e.
                // segments files mismatch), then ignore that commit either.

                if (commit != null && commit.SegmentsFileName.Equals(segmentsFile, StringComparison.Ordinal))
                {
                    ISet <string> commitFiles = new JCG.HashSet <string>(commit.FileNames);
                    commitFiles.Add(IndexFileNames.SEGMENTS_GEN);

                    Regex matcher = IndexFileNames.CODEC_FILE_PATTERN;
                    foreach (string file in directory.ListAll())
                    {
                        if (!commitFiles.Contains(file) && (matcher.IsMatch(file) || file.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal)))
                        {
                            try
                            {
                                directory.DeleteFile(file);
                            }
                            catch
                            {
                                // suppress, it's just a best effort
                            }
                        }
                    }
                }
            }
            catch
            {
                // ignore any errors that happens during this state and only log it. this
                // cleanup will have a chance to succeed the next time we get a new
                // revision.
            }
        }
Ejemplo n.º 19
0
        private void CheckTermsOrder(IndexReader r, ISet <string> allTerms, bool isTop)
        {
            TermsEnum terms = MultiFields.GetFields(r).GetTerms("f").GetIterator(null);

            BytesRef last = new BytesRef();

            ISet <string> seenTerms = new JCG.HashSet <string>();

            while (true)
            {
                BytesRef term = terms.Next();
                if (term == null)
                {
                    break;
                }

                Assert.IsTrue(last.CompareTo(term) < 0);
                last.CopyBytes(term);

                string s = term.Utf8ToString();
                Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")");
                seenTerms.Add(s);
            }

            if (isTop)
            {
                Assert.IsTrue(allTerms.SetEquals(seenTerms));
            }

            // Test seeking:
            IEnumerator <string> it = seenTerms.GetEnumerator();

            while (it.MoveNext())
            {
                BytesRef tr = new BytesRef(it.Current);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString()));
            }
        }
Ejemplo n.º 20
0
 public override void Run()
 {
     try
     {
         Document document = new Document();
         Field    field    = NewTextField("field", "", Field.Store.NO);
         document.Add(field);
         startingGun.Wait();
         while (!(postings.Count == 0))
         {
             StringBuilder text    = new StringBuilder();
             ISet <string> visited = new JCG.HashSet <string>();
             for (int i = 0; i < maxTermsPerDoc; i++)
             {
                 string token;
                 if (!postings.TryDequeue(out token))
                 {
                     break;
                 }
                 if (visited.Contains(token))
                 {
                     // Put it back:
                     postings.Enqueue(token);
                     break;
                 }
                 text.Append(' ');
                 text.Append(token);
                 visited.Add(token);
             }
             field.SetStringValue(text.ToString());
             iw.AddDocument(document);
         }
     }
     catch (Exception e)
     {
         throw new Exception(e.Message, e);
     }
 }
Ejemplo n.º 21
0
        public virtual void TestOverrideGetters()
        {
            // Test that IndexWriterConfig overrides all getters, so that javadocs
            // contain all methods for the users. Also, ensures that IndexWriterConfig
            // doesn't declare getters that are not declared on LiveIWC.
            ISet<string> liveGetters = new JCG.HashSet<string>();
            foreach (MethodInfo m in typeof(LiveIndexWriterConfig).GetMethods())
            {
                if (m.Name.StartsWith("get", StringComparison.Ordinal) && !m.IsStatic)
                {
                    liveGetters.Add(m.Name);
                }
            }

            foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods())
            {
                if (m.Name.StartsWith("get", StringComparison.Ordinal) && !m.Name.StartsWith("get_", StringComparison.Ordinal) && !m.IsStatic)
                {
                    Assert.AreEqual(typeof(IndexWriterConfig), m.DeclaringType, "method " + m.Name + " not overrided by IndexWriterConfig");
                    Assert.IsTrue(liveGetters.Contains(m.Name), "method " + m.Name + " not declared on LiveIndexWriterConfig");
                }
            }
        }
Ejemplo n.º 22
0
        private void PruneDups(IList <OneSyn> syns)
        {
            ISet <string> seen = new JCG.HashSet <string>();

            foreach (OneSyn syn in syns)
            {
                int idx = 0;
                while (idx < [email protected])
                {
                    string @out = syn.@out[idx];
                    if (!seen.Contains(@out))
                    {
                        seen.Add(@out);
                        idx++;
                    }
                    else
                    {
                        [email protected](idx);
                    }
                }
                seen.Clear();
            }
        }
Ejemplo n.º 23
0
        private NormalizeCharMap RandomMap()
        {
            Random random = Random;

            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            // we can't add duplicate keys, or NormalizeCharMap gets angry
            ISet <string> keys = new JCG.HashSet <string>();
            int           num  = random.Next(5);

            //System.out.println("NormalizeCharMap=");
            for (int i = 0; i < num; i++)
            {
                string key = TestUtil.RandomSimpleString(random);
                if (!keys.Contains(key) && key.Length != 0)
                {
                    string value = TestUtil.RandomSimpleString(random);
                    builder.Add(key, value);
                    keys.Add(key);
                    //System.out.println("mapping: '" + key + "' => '" + value + "'");
                }
            }
            return(builder.Build());
        }
Ejemplo n.º 24
0
 internal AttributedIterator(AttributedString attrString,
                             AttributedCharacterIteratorAttribute[] attributes, int begin,
                             int end)
 {
     if (begin < 0 || end > attrString.text.Length || begin > end)
     {
         throw new ArgumentException();
     }
     this.begin      = begin;
     this.end        = end;
     offset          = begin;
     this.attrString = attrString;
     if (attributes != null)
     {
         var set = new JCG.HashSet <AttributedCharacterIteratorAttribute>(
             (attributes.Length * 4 / 3) + 1);
         for (int i = attributes.Length; --i >= 0;)
         {
             set.Add(attributes[i]);
         }
         attributesAllowed = set;
     }
 }
Ejemplo n.º 25
0
        protected virtual void AssertEquals(RandomDocument doc, Fields fields)
        {
            // compare field names
            Assert.AreEqual(doc is null, fields is null);
            Assert.AreEqual(doc.fieldNames.Length, fields.Count);
            ISet <string> fields1 = new JCG.HashSet <string>();
            ISet <string> fields2 = new JCG.HashSet <string>();

            for (int i = 0; i < doc.fieldNames.Length; ++i)
            {
                fields1.Add(doc.fieldNames[i]);
            }
            foreach (string field in fields)
            {
                fields2.Add(field);
            }
            Assert.IsTrue(fields1.SetEquals(fields2));

            for (int i = 0; i < doc.fieldNames.Length; ++i)
            {
                AssertEquals(doc.tokenStreams[i], doc.fieldTypes[i], fields.GetTerms(doc.fieldNames[i]));
            }
        }
Ejemplo n.º 26
0
                public override Query Rewrite(Query original)
                {
                    Query       rewritten = base.Rewrite(original);
                    ISet <Term> terms     = new JCG.HashSet <Term>();

                    rewritten.ExtractTerms(terms);

                    // Make a single request to remote nodes for term
                    // stats:
                    for (int nodeID = 0; nodeID < nodeVersions.Length; nodeID++)
                    {
                        if (nodeID == MyNodeID)
                        {
                            continue;
                        }

                        ISet <Term> missing = new JCG.HashSet <Term>();
                        foreach (Term term in terms)
                        {
                            TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term);
                            if (!outerInstance.termStatsCache.ContainsKey(key))
                            {
                                missing.Add(term);
                            }
                        }
                        if (missing.Count != 0)
                        {
                            foreach (KeyValuePair <Term, TermStatistics> ent in outerInstance.outerInstance.GetNodeTermStats(missing, nodeID, nodeVersions[nodeID]))
                            {
                                TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], ent.Key);
                                outerInstance.termStatsCache[key] = ent.Value;
                            }
                        }
                    }

                    return(rewritten);
                }
Ejemplo n.º 27
0
        private static ISet <string> DifFiles(string[] files1, string[] files2)
        {
            ISet <string> set1  = new JCG.HashSet <string>();
            ISet <string> set2  = new JCG.HashSet <string>();
            ISet <string> extra = new JCG.HashSet <string>();

            for (int x = 0; x < files1.Length; x++)
            {
                set1.Add(files1[x]);
            }
            for (int x = 0; x < files2.Length; x++)
            {
                set2.Add(files2[x]);
            }
            IEnumerator <string> i1 = set1.GetEnumerator();

            while (i1.MoveNext())
            {
                string o = i1.Current;
                if (!set2.Contains(o))
                {
                    extra.Add(o);
                }
            }
            IEnumerator <string> i2 = set2.GetEnumerator();

            while (i2.MoveNext())
            {
                string o = i2.Current;
                if (!set1.Contains(o))
                {
                    extra.Add(o);
                }
            }
            return(extra);
        }
Ejemplo n.º 28
0
 /// <summary>
 /// Returns whitespace- and/or comma-separated set of values, or null if none are found </summary>
 public virtual ISet <string> GetSet(IDictionary <string, string> args, string name)
 {
     if (args.TryGetValue(name, out string s))
     {
         args.Remove(name);
         ISet <string> set     = null;
         Match         matcher = ITEM_PATTERN.Match(s);
         if (matcher.Success)
         {
             set = new JCG.HashSet <string>
             {
                 matcher.Groups[0].Value
             };
             matcher = matcher.NextMatch();
             while (matcher.Success)
             {
                 set.Add(matcher.Groups[0].Value);
                 matcher = matcher.NextMatch();
             }
         }
         return(set);
     }
     return(null);
 }
Ejemplo n.º 29
0
 public override LanguageSet RestrictTo(LanguageSet other)
 {
     if (other == Languages.NO_LANGUAGES)
     {
         return(other);
     }
     else if (other == Languages.ANY_LANGUAGE)
     {
         return(this);
     }
     else
     {
         SomeLanguages sl = (SomeLanguages)other;
         ISet <string> ls = new JCG.HashSet <string>(Math.Min(languages.Count, sl.languages.Count));
         foreach (string lang in languages)
         {
             if (sl.languages.Contains(lang))
             {
                 ls.Add(lang);
             }
         }
         return(From(ls));
     }
 }
Ejemplo n.º 30
0
        private void AddTerms(IndexReader reader, FieldVals f)
        {
            if (f.queryString is null)
            {
                return;
            }
            Terms terms = MultiFields.GetTerms(reader, f.fieldName);

            if (terms is null)
            {
                return;
            }
            TokenStream ts = analyzer.GetTokenStream(f.fieldName, f.queryString);

            try
            {
                ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>();

                int           corpusNumDocs  = reader.NumDocs;
                ISet <string> processedTerms = new JCG.HashSet <string>();
                ts.Reset();
                while (ts.IncrementToken())
                {
                    string term = termAtt.ToString();
                    if (!processedTerms.Contains(term))
                    {
                        processedTerms.Add(term);
                        ScoreTermQueue  variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                        float           minScore  = 0;
                        Term            startTerm = new Term(f.fieldName, term);
                        AttributeSource atts      = new AttributeSource();
                        IMaxNonCompetitiveBoostAttribute maxBoostAtt =
                            atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>();
#pragma warning disable 612, 618
                        SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
#pragma warning restore 612, 618
                        //store the df so all variants use same idf
                        int             df                   = reader.DocFreq(startTerm);
                        int             numVariants          = 0;
                        int             totalVariantDocFreqs = 0;
                        BytesRef        possibleMatch;
                        IBoostAttribute boostAtt =
                            fe.Attributes.AddAttribute <IBoostAttribute>();
                        while (fe.MoveNext())
                        {
                            possibleMatch = fe.Term;
                            numVariants++;
                            totalVariantDocFreqs += fe.DocFreq;
                            float score = boostAtt.Boost;
                            if (variantsQ.Count < MAX_VARIANTS_PER_TERM || score > minScore)
                            {
                                ScoreTerm st = new ScoreTerm(new Term(startTerm.Field, BytesRef.DeepCopyOf(possibleMatch)), score, startTerm);
                                variantsQ.InsertWithOverflow(st);
                                minScore = variantsQ.Top.Score; // maintain minScore
                            }
                            maxBoostAtt.MaxNonCompetitiveBoost = variantsQ.Count >= MAX_VARIANTS_PER_TERM ? minScore : float.NegativeInfinity;
                        }

                        if (numVariants > 0)
                        {
                            int avgDf = totalVariantDocFreqs / numVariants;
                            if (df == 0)    //no direct match we can use as df for all variants
                            {
                                df = avgDf; //use avg df of all variants
                            }

                            // take the top variants (scored by edit distance) and reset the score
                            // to include an IDF factor then add to the global queue for ranking
                            // overall top query terms
                            int size = variantsQ.Count;
                            for (int i = 0; i < size; i++)
                            {
                                ScoreTerm st = variantsQ.Pop();
                                st.Score = (st.Score * st.Score) * sim.Idf(df, corpusNumDocs);
                                q.InsertWithOverflow(st);
                            }
                        }
                    }
                }
                ts.End();
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(ts);
            }
        }