Пример #1
0
        public virtual void TestBasic()
        {
            ISet <string> fileExtensions = new JCG.HashSet <string>();

            fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
            fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);

            MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(Random, new RAMDirectory());

            primaryDir.CheckIndexOnDispose = false; // only part of an index
            MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(Random, new RAMDirectory());

            secondaryDir.CheckIndexOnDispose = false; // only part of an index

            FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true);
            // for now we wire Lucene40Codec because we rely upon its specific impl
            bool oldValue = OldFormatImpersonationIsActive;

            OldFormatImpersonationIsActive = true;
            IndexWriter writer = new IndexWriter(fsd, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(NewLogMergePolicy(false)).SetCodec(Codec.ForName("Lucene40")).SetUseCompoundFile(false));

            TestIndexWriterReader.CreateIndexNoClose(true, "ram", writer);
            IndexReader reader = DirectoryReader.Open(writer, true);

            Assert.AreEqual(100, reader.MaxDoc);
            writer.Commit();
            // we should see only fdx,fdt files here
            string[] files = primaryDir.ListAll();
            Assert.IsTrue(files.Length > 0);
            for (int x = 0; x < files.Length; x++)
            {
                string ext = FileSwitchDirectory.GetExtension(files[x]);
                Assert.IsTrue(fileExtensions.Contains(ext));
            }
            files = secondaryDir.ListAll();
            Assert.IsTrue(files.Length > 0);
            // we should not see fdx,fdt files here
            for (int x = 0; x < files.Length; x++)
            {
                string ext = FileSwitchDirectory.GetExtension(files[x]);
                Assert.IsFalse(fileExtensions.Contains(ext));
            }
            reader.Dispose();
            writer.Dispose();

            files = fsd.ListAll();
            for (int i = 0; i < files.Length; i++)
            {
                Assert.IsNotNull(files[i]);
            }
            fsd.Dispose();
            OldFormatImpersonationIsActive = oldValue;
        }
Пример #2
0
        public virtual void TestRandomTerms()
        {
            var terms = new string[TestUtil.NextInt32(Random, 1, AtLeast(1000))];
            var seen  = new JCG.HashSet <string>();

            var allowEmptyString = Random.NextBoolean();

            if (Random.Next(10) == 7 && terms.Length > 2)
            {
                // Sometimes add a bunch of terms sharing a longish common prefix:
                int numTermsSamePrefix = Random.Next(terms.Length / 2);
                if (numTermsSamePrefix > 0)
                {
                    string prefix;
                    while (true)
                    {
                        prefix = RandomString;
                        if (prefix.Length < 5)
                        {
                            continue;
                        }
                        else
                        {
                            break;
                        }
                    }
                    while (seen.Count < numTermsSamePrefix)
                    {
                        string t = prefix + RandomString;
                        if (!seen.Contains(t))
                        {
                            terms[seen.Count] = t;
                            seen.Add(t);
                        }
                    }
                }
            }

            while (seen.Count < terms.Length)
            {
                string t = RandomString;
                if (!seen.Contains(t) && (allowEmptyString || t.Length != 0))
                {
                    terms[seen.Count] = t;
                    seen.Add(t);
                }
            }

            using var d = NewDirectory();
            using var r = MakeIndex(d, terms);
            TestRandomSeeks(r, terms);
        }
Пример #3
0
        public void TestCachability()
        {
            TermsFilter   a             = TermsFilter(Random.NextBoolean(), new Term("field1", "a"), new Term("field1", "b"));
            ISet <Filter> cachedFilters = new JCG.HashSet <Filter>();

            cachedFilters.Add(a);
            TermsFilter b = TermsFilter(Random.NextBoolean(), new Term("field1", "b"), new Term("field1", "a"));

            assertTrue("Must be cached", cachedFilters.Contains(b));
            //duplicate term
            assertTrue("Must be cached", cachedFilters.Contains(TermsFilter(true, new Term("field1", "a"), new Term("field1", "a"), new Term("field1", "b"))));
            assertFalse("Must not be cached", cachedFilters.Contains(TermsFilter(Random.NextBoolean(), new Term("field1", "a"), new Term("field1", "a"), new Term("field1", "b"), new Term("field1", "v"))));
        }
Пример #4
0
        public virtual void TestShrinkToAfterShortestMatch3()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer(this)));

            Document doc = new Document();

            doc.Add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
            writer.AddDocument(doc);
            IndexReader   reader = writer.GetReader();
            IndexSearcher @is    = NewSearcher(reader);

            writer.Dispose();

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs   = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq   = new SpanNearQuery(sqs, 0, true);
            Spans         spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);

            TopDocs       topDocs    = @is.Search(snq, 1);
            ISet <string> payloadSet = new JCG.HashSet <string>();

            for (int i = 0; i < topDocs.ScoreDocs.Length; i++)
            {
                while (spans.Next())
                {
                    var payloads = spans.GetPayload();
                    foreach (var payload in payloads)
                    {
                        payloadSet.Add(Encoding.UTF8.GetString(payload));
                    }
                }
            }
            Assert.AreEqual(2, payloadSet.Count);
            if (Verbose)
            {
                foreach (String payload in payloadSet)
                {
                    Console.WriteLine("match:" + payload);
                }
            }
            Assert.IsTrue(payloadSet.Contains("a:Noise:10"));
            Assert.IsTrue(payloadSet.Contains("k:Noise:11"));
            reader.Dispose();
            directory.Dispose();
        }
Пример #5
0
 /// <summary>
 /// Returns the strings that can be produced from the given state, or
 /// <c>false</c> if more than <paramref name="limit"/> strings are found.
 /// <paramref name="limit"/>&lt;0 means "infinite".
 /// </summary>
 private static bool GetFiniteStrings(State s, JCG.HashSet <State> pathstates, JCG.HashSet <Int32sRef> strings, Int32sRef path, int limit)
 {
     pathstates.Add(s);
     foreach (Transition t in s.GetTransitions())
     {
         if (pathstates.Contains(t.to))
         {
             return(false);
         }
         for (int n = t.min; n <= t.max; n++)
         {
             path.Grow(path.Length + 1);
             path.Int32s[path.Length] = n;
             path.Length++;
             if (t.to.accept)
             {
                 strings.Add(Int32sRef.DeepCopyOf(path));
                 if (limit >= 0 && strings.Count > limit)
                 {
                     return(false);
                 }
             }
             if (!GetFiniteStrings(t.to, pathstates, strings, path, limit))
             {
                 return(false);
             }
             path.Length--;
         }
     }
     pathstates.Remove(s);
     return(true);
 }
Пример #6
0
        // TODO: this currently requites a determinized machine,
        // but it need not -- we can speed it up by walking the
        // NFA instead.  it'd still be fail fast.
        public static BytesRef GetCommonPrefixBytesRef(Automaton a)
        {
            if (a.IsSingleton)
            {
                return(new BytesRef(a.singleton));
            }
            BytesRef @ref = new BytesRef(10);

            JCG.HashSet <State> visited = new JCG.HashSet <State>();
            State s = a.initial;
            bool  done;

            do
            {
                done = true;
                visited.Add(s);
                if (!s.accept && s.NumTransitions == 1)
                {
                    Transition t = s.GetTransitions().First();

                    if (t.min == t.max && !visited.Contains(t.to))
                    {
                        @ref.Grow([email protected]);
                        @ref.Bytes[@ref.Length - 1] = (byte)t.min;
                        s    = t.to;
                        done = false;
                    }
                }
            } while (!done);
            return(@ref);
        }
Пример #7
0
        private void CheckMatches(string qString, string expectedVals)
        {
            ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer);

            qp.InOrder           = inOrder;
            qp.FuzzyPrefixLength = 1; // usually a good idea

            Query q = qp.Parse(qString);

            ISet <string> expecteds = new JCG.HashSet <string>();

            string[] vals = expectedVals.Split(',').TrimEnd();
            for (int i = 0; i < vals.Length; i++)
            {
                if (vals[i].Length > 0)
                {
                    expecteds.Add(vals[i]);
                }
            }

            TopDocs td = searcher.Search(q, 10);

            ScoreDoc[] sd = td.ScoreDocs;
            for (int i = 0; i < sd.Length; i++)
            {
                Document doc = searcher.Doc(sd[i].Doc);
                string   id  = doc.Get("id");
                assertTrue(qString + "matched doc#" + id + " not expected", expecteds
                           .Contains(id));
                expecteds.Remove(id);
            }

            assertEquals(qString + " missing some matches ", 0, expecteds.Count);
        }
Пример #8
0
        public virtual void TestSettersChaining()
        {
            // Ensures that every setter returns IndexWriterConfig to allow chaining.
            ISet <string> liveSetters = new JCG.HashSet <string>();
            ISet <string> allSetters  = new JCG.HashSet <string>();

            foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods())
            {
                if (m.Name.StartsWith("Set", StringComparison.Ordinal) && !m.IsStatic)
                {
                    allSetters.Add(m.Name);
                    // setters overridden from LiveIndexWriterConfig are returned twice, once with
                    // IndexWriterConfig return type and second with LiveIndexWriterConfig. The ones
                    // from LiveIndexWriterConfig are marked 'synthetic', so just collect them and
                    // assert in the end that we also received them from IWC.
                    // In C# we do not have them marked synthetic so we look at the declaring type instead.
                    if (m.DeclaringType.Name == "LiveIndexWriterConfig")
                    {
                        liveSetters.Add(m.Name);
                    }
                    else
                    {
                        Assert.AreEqual(typeof(IndexWriterConfig), m.ReturnType, "method " + m.Name + " does not return IndexWriterConfig");
                    }
                }
            }
            foreach (string setter in liveSetters)
            {
                Assert.IsTrue(allSetters.Contains(setter), "setter method not overridden by IndexWriterConfig: " + setter);
            }
        }
Пример #9
0
        public virtual void TestCheck()
        {
            Random rnd = Random;

            ISet <object> jdk = new JCG.HashSet <object>(IdentityEqualityComparer <object> .Default);

            RamUsageEstimator.IdentityHashSet <object> us = new RamUsageEstimator.IdentityHashSet <object>();

            int max       = 100000;
            int threshold = 256;

            for (int i = 0; i < max; i++)
            {
                // some of these will be interned and some will not so there will be collisions.
                int v = rnd.Next(threshold);

                bool e1 = jdk.Contains(v);
                bool e2 = us.Contains(v);
                Assert.AreEqual(e1, e2);

                e1 = jdk.Add(v);
                e2 = us.Add(v);
                Assert.AreEqual(e1, e2);
            }

            ISet <object> collected = new JCG.HashSet <object>(IdentityEqualityComparer <object> .Default);

            foreach (var o in us)
            {
                collected.Add(o);
            }

            // LUCENENET: We have 2 J2N hashsets, so no need to use aggressive mode
            assertEquals(collected, jdk, aggressive: false);
        }
Пример #10
0
        /// <summary>
        /// Returns the files required for replication. By default, this method returns
        /// all files that exist in the new revision, but not in the handler.
        /// </summary>
        protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles)
        {
            IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles;

            if (handlerRevisionFiles == null)
            {
                return(newRevisionFiles);
            }

            Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >();

            foreach (KeyValuePair <string, IList <RevisionFile> > pair in handlerRevisionFiles)
            {
                // put the handler files in a Set, for faster contains() checks later
                ISet <string> handlerFiles = new JCG.HashSet <string>(pair.Value.Select(v => v.FileName));

                // make sure to preserve revisionFiles order
                string source = pair.Key;
                Debug.Assert(newRevisionFiles.ContainsKey(source), string.Format("source not found in newRevisionFiles: {0}", newRevisionFiles));
                List <RevisionFile> res = newRevisionFiles[source]
                                          .Where(file => !handlerFiles.Contains(file.FileName))
                                          .ToList();
                requiredFiles.Add(source, res);
            }
            return(requiredFiles);
        }
Пример #11
0
 public override bool IsLocked()
 {
     lock (locks)
     {
         return(locks.Contains(lockName));
     }
 }
Пример #12
0
        /// <summary>
        /// Returns the longest string that is a prefix of all accepted strings and
        /// visits each state at most once.
        /// </summary>
        /// <returns> Common prefix. </returns>
        public static string GetCommonPrefix(Automaton a)
        {
            if (a.IsSingleton)
            {
                return(a.singleton);
            }
            StringBuilder b = new StringBuilder();

            JCG.HashSet <State> visited = new JCG.HashSet <State>();
            State s = a.initial;
            bool  done;

            do
            {
                done = true;
                visited.Add(s);
                if (!s.accept && s.NumTransitions == 1)
                {
                    Transition t = s.GetTransitions().First();
                    if (t.min == t.max && !visited.Contains(t.to))
                    {
                        b.AppendCodePoint(t.min);
                        s    = t.to;
                        done = false;
                    }
                }
            } while (!done);
            return(b.ToString());
        }
Пример #13
0
        public RandomAcceptedStrings(Automaton a)
        {
            this.a = a;
            if (a.IsSingleton)
            {
                leadsToAccept = null;
                return;
            }

            // must use IdentityHashmap because two Transitions w/
            // different start nodes can be considered the same
            leadsToAccept = new JCG.Dictionary <Transition, bool?>(IdentityEqualityComparer <Transition> .Default);
            IDictionary <State, IList <ArrivingTransition> > allArriving = new Dictionary <State, IList <ArrivingTransition> >();

            Queue <State> q    = new Queue <State>();
            ISet <State>  seen = new JCG.HashSet <State>();

            // reverse map the transitions, so we can quickly look
            // up all arriving transitions to a given state
            foreach (State s in a.GetNumberedStates())
            {
                for (int i = 0; i < s.numTransitions; i++)
                {
                    Transition t = s.TransitionsArray[i];
                    if (!allArriving.TryGetValue(t.to, out IList <ArrivingTransition> tl) || tl == null)
                    {
                        tl = new List <ArrivingTransition>();
                        allArriving[t.to] = tl;
                    }
                    tl.Add(new ArrivingTransition(s, t));
                }
                if (s.Accept)
                {
                    q.Enqueue(s);
                    seen.Add(s);
                }
            }

            // Breadth-first search, from accept states,
            // backwards:
            while (q.Count > 0)
            {
                State s = q.Dequeue();
                if (allArriving.TryGetValue(s, out IList <ArrivingTransition> arriving) && arriving != null)
                {
                    foreach (ArrivingTransition at in arriving)
                    {
                        State from = at.from;
                        if (!seen.Contains(from))
                        {
                            q.Enqueue(from);
                            seen.Add(from);
                            leadsToAccept[at.t] = true;
                        }
                    }
                }
            }
        }
Пример #14
0
 public override void ClearLock(string lockName)
 {
     lock (locks)
     {
         if (locks.Contains(lockName))
         {
             locks.Remove(lockName);
         }
     }
 }
Пример #15
0
        /// <summary>
        /// Helper function to create an HashSet fulfilling the given specific parameters. The function will
        /// create an HashSet using the Comparer constructor and then add values
        /// to it until it is full. It will begin by adding the desired number of matching,
        /// followed by random (deterministic) elements until the desired count is reached.
        /// </summary>
        protected IEnumerable <T> CreateHashSet(IEnumerable <T> enumerableToMatchTo, int count, int numberOfMatchingElements)
        {
            JCG.HashSet <T> set  = new JCG.HashSet <T>(GetIEqualityComparer());
            int             seed = 528;

            JCG.List <T> match = null;

            // Add Matching elements
            if (enumerableToMatchTo != null)
            {
                match = enumerableToMatchTo.ToList();
                for (int i = 0; i < numberOfMatchingElements; i++)
                {
                    set.Add(match[i]);
                }
            }

            // Add elements to reach the desired count
            while (set.Count < count)
            {
                T toAdd = CreateT(seed++);
                while (set.Contains(toAdd) || (match != null && match.Contains(toAdd, GetIEqualityComparer()))) // Don't want any unexpectedly duplicate values
                {
                    toAdd = CreateT(seed++);
                }
                set.Add(toAdd);
            }

            // Validate that the Enumerable fits the guidelines as expected
            Debug.Assert(set.Count == count);
            if (match != null)
            {
                int actualMatchingCount = 0;
                foreach (T lookingFor in match)
                {
                    actualMatchingCount += set.Contains(lookingFor) ? 1 : 0;
                }
                Assert.Equal(numberOfMatchingElements, actualMatchingCount);
            }

            return(set);
        }
Пример #16
0
        public virtual void TestRandom()
        {
            Directory         d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, d);

            w.IndexWriter.Config.SetMaxBufferedDocs(17);
            int           numDocs = AtLeast(100);
            ISet <string> aDocs   = new JCG.HashSet <string>();

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string   v;
                if (Random.Next(5) == 4)
                {
                    v = "a";
                    aDocs.Add("" + i);
                }
                else
                {
                    v = "b";
                }
                Field f = NewStringField("field", v, Field.Store.NO);
                doc.Add(f);
                doc.Add(NewStringField("id", "" + i, Field.Store.YES));
                w.AddDocument(doc);
            }

            int numDelDocs = AtLeast(10);

            for (int i = 0; i < numDelDocs; i++)
            {
                string delID = "" + Random.Next(numDocs);
                w.DeleteDocuments(new Term("id", delID));
                aDocs.Remove(delID);
            }

            IndexReader r = w.GetReader();

            w.Dispose();
            TopDocs hits = NewSearcher(r).Search(new MatchAllDocsQuery(), new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), numDocs);

            Assert.AreEqual(aDocs.Count, hits.TotalHits);
            foreach (ScoreDoc sd in hits.ScoreDocs)
            {
                Assert.IsTrue(aDocs.Contains(r.Document(sd.Doc).Get("id")));
            }
            r.Dispose();
            d.Dispose();
        }
Пример #17
0
        private readonly object lineFileLock = new object(); // LUCENENET specific - lock to ensure writes don't collide for this instance


        public WriteLineDocTask(PerfRunData runData)
            : base(runData)
        {
            Config config = runData.Config;

            m_fname = config.Get("line.file.out", null);
            if (m_fname == null)
            {
                throw new ArgumentException("line.file.out must be set");
            }
            Stream @out = StreamUtils.GetOutputStream(new FileInfo(m_fname));

            lineFileOut = new StreamWriter(@out, Encoding.UTF8);
            docMaker    = runData.DocMaker;

            // init fields
            string f2r = config.Get("line.fields", null);

            if (f2r == null)
            {
                fieldsToWrite = DEFAULT_FIELDS;
            }
            else
            {
                if (f2r.IndexOf(SEP) >= 0)
                {
                    throw new ArgumentException("line.fields " + f2r + " should not contain the separator char: " + SEP);
                }
                fieldsToWrite = f2r.Split(',').TrimEnd();
            }

            // init sufficient fields
            sufficientFields = new bool[fieldsToWrite.Length];
            string suff = config.Get("sufficient.fields", DEFAULT_SUFFICIENT_FIELDS);

            if (",".Equals(suff, StringComparison.Ordinal))
            {
                checkSufficientFields = false;
            }
            else
            {
                checkSufficientFields = true;
                ISet <string> sf = new JCG.HashSet <string>(suff.Split(',').TrimEnd());
                for (int i = 0; i < fieldsToWrite.Length; i++)
                {
                    if (sf.Contains(fieldsToWrite[i]))
                    {
                        sufficientFields[i] = true;
                    }
                }
            }

            WriteHeader(lineFileOut);
        }
Пример #18
0
 public override bool IsLocked()
 {
     UninterruptableMonitor.Enter(locks);
     try
     {
         return(locks.Contains(lockName));
     }
     finally
     {
         UninterruptableMonitor.Exit(locks);
     }
 }
Пример #19
0
 /// <summary>
 /// Checks whether there is a loop containing s. (this is sufficient since
 /// there are never transitions to dead states.)
 /// </summary>
 // TODO: not great that this is recursive... in theory a
 // large automata could exceed java's stack
 private static bool IsFiniteSlow(State s, JCG.HashSet <State> path)
 {
     path.Add(s);
     foreach (Transition t in s.GetTransitions())
     {
         if (path.Contains(t.to) || !IsFiniteSlow(t.to, path))
         {
             return(false);
         }
     }
     path.Remove(s);
     return(true);
 }
Пример #20
0
 public override void ClearLock(string lockName)
 {
     UninterruptableMonitor.Enter(locks);
     try
     {
         if (locks.Contains(lockName))
         {
             locks.Remove(lockName);
         }
     }
     finally
     {
         UninterruptableMonitor.Exit(locks);
     }
 }
Пример #21
0
        public void TestSkipField()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            int num   = AtLeast(10);
            var terms = new JCG.HashSet <Term>();

            for (int i = 0; i < num; i++)
            {
                string field = "field" + Random.Next(100);
                terms.Add(new Term(field, "content1"));
                Document doc = new Document();
                doc.Add(NewStringField(field, "content1", Field.Store.YES));
                w.AddDocument(doc);
            }
            int randomFields = Random.Next(10);

            for (int i = 0; i < randomFields; i++)
            {
                while (true)
                {
                    string field = "field" + Random.Next(100);
                    Term   t     = new Term(field, "content1");
                    if (!terms.Contains(t))
                    {
                        terms.Add(t);
                        break;
                    }
                }
            }
            w.ForceMerge(1);
            IndexReader reader = w.GetReader();

            w.Dispose();
            assertEquals(1, reader.Leaves.size());
            AtomicReaderContext context = reader.Leaves[0];
            TermsFilter         tf      = new TermsFilter(terms.ToList());

            FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs);

            assertEquals(context.Reader.NumDocs, bits.Cardinality);
            reader.Dispose();
            dir.Dispose();
        }
Пример #22
0
        public virtual void TestMerge()
        {
            RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20);
            int        numDocs    = AtLeast(100);
            int        numDeletes = Random.Next(numDocs);
            ISet <int> deletes    = new JCG.HashSet <int>();

            while (deletes.Count < numDeletes)
            {
                deletes.Add(Random.Next(numDocs));
            }
            foreach (Options options in ValidOptions())
            {
                RandomDocument[] docs = new RandomDocument[numDocs];
                for (int i = 0; i < numDocs; ++i)
                {
                    docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options);
                }
                using Directory dir            = NewDirectory();
                using RandomIndexWriter writer = new RandomIndexWriter(Random, dir);
                for (int i = 0; i < numDocs; ++i)
                {
                    writer.AddDocument(AddId(docs[i].ToDocument(), "" + i));
                    if (Rarely())
                    {
                        writer.Commit();
                    }
                }
                foreach (int delete in deletes)
                {
                    writer.DeleteDocuments(new Term("id", "" + delete));
                }
                // merge with deletes
                writer.ForceMerge(1);
                using IndexReader reader = writer.GetReader();
                for (int i = 0; i < numDocs; ++i)
                {
                    if (!deletes.Contains(i))
                    {
                        int docID = DocID(reader, "" + i);
                        AssertEquals(docs[i], reader.GetTermVectors(docID));
                    }
                }
            }
        }
Пример #23
0
        private void checkHits(SpatialArgs args, int assertNumFound, int[] assertIds)
        {
            SearchResults got = executeQuery(strategy.MakeQuery(args), 100);

            assertEquals("" + args, assertNumFound, got.numFound);
            if (assertIds != null)
            {
                ISet <int> gotIds = new JCG.HashSet <int>();
                foreach (SearchResult result in got.results)
                {
                    gotIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture));
                }
                foreach (int assertId in assertIds)
                {
                    assertTrue("has " + assertId, gotIds.Contains(assertId));
                }
            }
        }
Пример #24
0
        /// <summary>
        /// Set a list of BCP47 extensions and private use subtags.
        /// BCP47 extensions are already validated and well-formed, but may contain duplicates.
        /// </summary>
        private InternalLocaleBuilder SetExtensions(IList <string> bcpExtensions, string privateuse)
        {
            ClearExtensions();

            if (bcpExtensions != null && bcpExtensions.Count > 0)
            {
                var processedExtensions = new JCG.HashSet <CaseInsensitiveChar>(bcpExtensions.Count);
                foreach (string bcpExt in bcpExtensions)
                {
                    CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt[0]);
                    // ignore duplicates
                    if (!processedExtensions.Contains(key))
                    {
                        // each extension string contains singleton, e.g. "a-abc-def"
                        if (UnicodeLocaleExtension.IsSingletonChar(key.Value))
                        {
                            SetUnicodeLocaleExtension(bcpExt.Substring(2));
                        }
                        else
                        {
                            if (_extensions == null)
                            {
                                _extensions = new Dictionary <CaseInsensitiveChar, string>(4);
                            }
                            _extensions[key] = bcpExt.Substring(2);
                        }
                    }
                }
            }
            if (privateuse != null && privateuse.Length > 0)
            {
                // privateuse string contains prefix, e.g. "x-abc-def"
                if (_extensions == null)
                {
                    _extensions = new Dictionary <CaseInsensitiveChar, string>(1);
                }
                _extensions[new CaseInsensitiveChar(privateuse[0])] = privateuse.Substring(2);
            }

            return(this);
        }
Пример #25
0
        /// <summary>
        /// Cleans up the index directory from old index files. This method uses the
        /// last commit found by <see cref="GetLastCommit(Directory)"/>. If it matches the
        /// expected <paramref name="segmentsFile"/>, then all files not referenced by this commit point
        /// are deleted.
        /// </summary>
        /// <remarks>
        /// <b>NOTE:</b> This method does a best effort attempt to clean the index
        /// directory. It suppresses any exceptions that occur, as this can be retried
        /// the next time.
        /// </remarks>
        public static void CleanupOldIndexFiles(Directory directory, string segmentsFile)
        {
            try
            {
                IndexCommit commit = GetLastCommit(directory);
                // commit == null means weird IO errors occurred, ignore them
                // if there were any IO errors reading the expected commit point (i.e.
                // segments files mismatch), then ignore that commit either.

                if (commit != null && commit.SegmentsFileName.Equals(segmentsFile, StringComparison.Ordinal))
                {
                    ISet <string> commitFiles = new JCG.HashSet <string>(commit.FileNames)
                    {
                        IndexFileNames.SEGMENTS_GEN
                    };

                    Regex matcher = IndexFileNames.CODEC_FILE_PATTERN;
                    foreach (string file in directory.ListAll())
                    {
                        if (!commitFiles.Contains(file) && (matcher.IsMatch(file) || file.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal)))
                        {
                            try
                            {
                                directory.DeleteFile(file);
                            }
                            catch
                            {
                                // suppress, it's just a best effort
                            }
                        }
                    }
                }
            }
            catch
            {
                // ignore any errors that happens during this state and only log it. this
                // cleanup will have a chance to succeed the next time we get a new
                // revision.
            }
        }
Пример #26
0
        public virtual void TestCopyJDKSet()
        {
            ISet <string> set = new JCG.HashSet <string>();

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpperInvariant());
            }
            set.addAll(TEST_STOP_WORDS);

            CharArraySet copy = CharArraySet.Copy(TEST_VERSION_CURRENT, set);

            assertEquals(set.Count, copy.size());
            assertEquals(set.Count, copy.size());

            assertTrue(copy.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copy.contains(@string));
            }

            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(set.Contains(@string));
            }
        }
Пример #27
0
        /// <summary>
        /// Returns the files required for replication. By default, this method returns
        /// all files that exist in the new revision, but not in the handler.
        /// </summary>
        protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles)
        {
            IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles;

            if (handlerRevisionFiles == null)
            {
                return(newRevisionFiles);
            }

            Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >();

            foreach (var e in handlerRevisionFiles)
            {
                // put the handler files in a Set, for faster contains() checks later
                ISet <string> handlerFiles = new JCG.HashSet <string>();
                foreach (RevisionFile file in e.Value)
                {
                    handlerFiles.Add(file.FileName);
                }

                // make sure to preserve revisionFiles order
                IList <RevisionFile> res = new JCG.List <RevisionFile>();
                string source            = e.Key;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(newRevisionFiles.ContainsKey(source), "source not found in newRevisionFiles: {0}", newRevisionFiles);
                }
                foreach (RevisionFile file in newRevisionFiles[source])
                {
                    if (!handlerFiles.Contains(file.FileName))
                    {
                        res.Add(file);
                    }
                }
                requiredFiles[source] = res;
            }

            return(requiredFiles);
        }
Пример #28
0
 public override bool IncrementToken()
 {
     if (m_input.IncrementToken())
     {
         string term = termAtt.ToString();
         // Check the exclusion table.
         if (!keywordAttr.IsKeyword && (exclusions == null || !exclusions.Contains(term)))
         {
             string s = stemmer.Stem(term);
             // If not stemmed, don't waste the time adjusting the token.
             if ((s != null) && !s.Equals(term, StringComparison.Ordinal))
             {
                 termAtt.SetEmpty().Append(s);
             }
         }
         return(true);
     }
     else
     {
         return(false);
     }
 }
Пример #29
0
        private void _CheckHits(bool bbox, IPoint pt, double distKM, int assertNumFound, params int[] assertIds)
        {
            SpatialOperation op      = SpatialOperation.Intersects;
            double           distDEG = DistanceUtils.Dist2Degrees(distKM, DistanceUtils.EarthMeanRadiusKilometers);
            IShape           shape   = ctx.MakeCircle(pt, distDEG);

            if (bbox)
            {
                shape = shape.BoundingBox;
            }

            SpatialArgs args = new SpatialArgs(op, shape);
            //args.setDistPrecision(0.025);
            Query query;

            if (Random.nextBoolean())
            {
                query = strategy.MakeQuery(args);
            }
            else
            {
                query = new FilteredQuery(new MatchAllDocsQuery(), strategy.MakeFilter(args));
            }
            SearchResults results = executeQuery(query, 100);

            assertEquals("" + shape, assertNumFound, results.numFound);
            if (assertIds != null)
            {
                ISet <int> resultIds = new JCG.HashSet <int>();
                foreach (SearchResult result in results.results)
                {
                    resultIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture));
                }
                foreach (int assertId in assertIds)
                {
                    assertTrue("has " + assertId, resultIds.Contains(assertId));
                }
            }
        }
Пример #30
0
 public override void Run()
 {
     try
     {
         Document document = new Document();
         Field    field    = NewTextField("field", "", Field.Store.NO);
         document.Add(field);
         startingGun.Wait();
         while (!(postings.Count == 0))
         {
             StringBuilder text    = new StringBuilder();
             ISet <string> visited = new JCG.HashSet <string>();
             for (int i = 0; i < maxTermsPerDoc; i++)
             {
                 string token;
                 if (!postings.TryDequeue(out token))
                 {
                     break;
                 }
                 if (visited.Contains(token))
                 {
                     // Put it back:
                     postings.Enqueue(token);
                     break;
                 }
                 text.Append(' ');
                 text.Append(token);
                 visited.Add(token);
             }
             field.SetStringValue(text.ToString());
             iw.AddDocument(document);
         }
     }
     catch (Exception e)
     {
         throw new Exception(e.Message, e);
     }
 }