public virtual void TestBasic() { ISet <string> fileExtensions = new JCG.HashSet <string>(); fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_EXTENSION); fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(Random, new RAMDirectory()); primaryDir.CheckIndexOnDispose = false; // only part of an index MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(Random, new RAMDirectory()); secondaryDir.CheckIndexOnDispose = false; // only part of an index FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); // for now we wire Lucene40Codec because we rely upon its specific impl bool oldValue = OldFormatImpersonationIsActive; OldFormatImpersonationIsActive = true; IndexWriter writer = new IndexWriter(fsd, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(NewLogMergePolicy(false)).SetCodec(Codec.ForName("Lucene40")).SetUseCompoundFile(false)); TestIndexWriterReader.CreateIndexNoClose(true, "ram", writer); IndexReader reader = DirectoryReader.Open(writer, true); Assert.AreEqual(100, reader.MaxDoc); writer.Commit(); // we should see only fdx,fdt files here string[] files = primaryDir.ListAll(); Assert.IsTrue(files.Length > 0); for (int x = 0; x < files.Length; x++) { string ext = FileSwitchDirectory.GetExtension(files[x]); Assert.IsTrue(fileExtensions.Contains(ext)); } files = secondaryDir.ListAll(); Assert.IsTrue(files.Length > 0); // we should not see fdx,fdt files here for (int x = 0; x < files.Length; x++) { string ext = FileSwitchDirectory.GetExtension(files[x]); Assert.IsFalse(fileExtensions.Contains(ext)); } reader.Dispose(); writer.Dispose(); files = fsd.ListAll(); for (int i = 0; i < files.Length; i++) { Assert.IsNotNull(files[i]); } fsd.Dispose(); OldFormatImpersonationIsActive = oldValue; }
public virtual void TestRandomTerms() { var terms = new string[TestUtil.NextInt32(Random, 1, AtLeast(1000))]; var seen = new JCG.HashSet <string>(); var allowEmptyString = Random.NextBoolean(); if (Random.Next(10) == 7 && terms.Length > 2) { // Sometimes add a bunch of terms sharing a longish common prefix: int numTermsSamePrefix = Random.Next(terms.Length / 2); if (numTermsSamePrefix > 0) { string prefix; while (true) { prefix = RandomString; if (prefix.Length < 5) { continue; } else { break; } } while (seen.Count < numTermsSamePrefix) { string t = prefix + RandomString; if (!seen.Contains(t)) { terms[seen.Count] = t; seen.Add(t); } } } } while (seen.Count < terms.Length) { string t = RandomString; if (!seen.Contains(t) && (allowEmptyString || t.Length != 0)) { terms[seen.Count] = t; seen.Add(t); } } using var d = NewDirectory(); using var r = MakeIndex(d, terms); TestRandomSeeks(r, terms); }
public void TestCachability() { TermsFilter a = TermsFilter(Random.NextBoolean(), new Term("field1", "a"), new Term("field1", "b")); ISet <Filter> cachedFilters = new JCG.HashSet <Filter>(); cachedFilters.Add(a); TermsFilter b = TermsFilter(Random.NextBoolean(), new Term("field1", "b"), new Term("field1", "a")); assertTrue("Must be cached", cachedFilters.Contains(b)); //duplicate term assertTrue("Must be cached", cachedFilters.Contains(TermsFilter(true, new Term("field1", "a"), new Term("field1", "a"), new Term("field1", "b")))); assertFalse("Must not be cached", cachedFilters.Contains(TermsFilter(Random.NextBoolean(), new Term("field1", "a"), new Term("field1", "a"), new Term("field1", "b"), new Term("field1", "v")))); }
public virtual void TestShrinkToAfterShortestMatch3() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer(this))); Document doc = new Document(); doc.Add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a"))); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); IndexSearcher @is = NewSearcher(reader); writer.Dispose(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); TopDocs topDocs = @is.Search(snq, 1); ISet <string> payloadSet = new JCG.HashSet <string>(); for (int i = 0; i < topDocs.ScoreDocs.Length; i++) { while (spans.Next()) { var payloads = spans.GetPayload(); foreach (var payload in payloads) { payloadSet.Add(Encoding.UTF8.GetString(payload)); } } } Assert.AreEqual(2, payloadSet.Count); if (Verbose) { foreach (String payload in payloadSet) { Console.WriteLine("match:" + payload); } } Assert.IsTrue(payloadSet.Contains("a:Noise:10")); Assert.IsTrue(payloadSet.Contains("k:Noise:11")); reader.Dispose(); directory.Dispose(); }
/// <summary> /// Returns the strings that can be produced from the given state, or /// <c>false</c> if more than <paramref name="limit"/> strings are found. /// <paramref name="limit"/><0 means "infinite". /// </summary> private static bool GetFiniteStrings(State s, JCG.HashSet <State> pathstates, JCG.HashSet <Int32sRef> strings, Int32sRef path, int limit) { pathstates.Add(s); foreach (Transition t in s.GetTransitions()) { if (pathstates.Contains(t.to)) { return(false); } for (int n = t.min; n <= t.max; n++) { path.Grow(path.Length + 1); path.Int32s[path.Length] = n; path.Length++; if (t.to.accept) { strings.Add(Int32sRef.DeepCopyOf(path)); if (limit >= 0 && strings.Count > limit) { return(false); } } if (!GetFiniteStrings(t.to, pathstates, strings, path, limit)) { return(false); } path.Length--; } } pathstates.Remove(s); return(true); }
// TODO: this currently requites a determinized machine, // but it need not -- we can speed it up by walking the // NFA instead. it'd still be fail fast. public static BytesRef GetCommonPrefixBytesRef(Automaton a) { if (a.IsSingleton) { return(new BytesRef(a.singleton)); } BytesRef @ref = new BytesRef(10); JCG.HashSet <State> visited = new JCG.HashSet <State>(); State s = a.initial; bool done; do { done = true; visited.Add(s); if (!s.accept && s.NumTransitions == 1) { Transition t = s.GetTransitions().First(); if (t.min == t.max && !visited.Contains(t.to)) { @ref.Grow([email protected]); @ref.Bytes[@ref.Length - 1] = (byte)t.min; s = t.to; done = false; } } } while (!done); return(@ref); }
private void CheckMatches(string qString, string expectedVals) { ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); qp.InOrder = inOrder; qp.FuzzyPrefixLength = 1; // usually a good idea Query q = qp.Parse(qString); ISet <string> expecteds = new JCG.HashSet <string>(); string[] vals = expectedVals.Split(',').TrimEnd(); for (int i = 0; i < vals.Length; i++) { if (vals[i].Length > 0) { expecteds.Add(vals[i]); } } TopDocs td = searcher.Search(q, 10); ScoreDoc[] sd = td.ScoreDocs; for (int i = 0; i < sd.Length; i++) { Document doc = searcher.Doc(sd[i].Doc); string id = doc.Get("id"); assertTrue(qString + "matched doc#" + id + " not expected", expecteds .Contains(id)); expecteds.Remove(id); } assertEquals(qString + " missing some matches ", 0, expecteds.Count); }
public virtual void TestSettersChaining() { // Ensures that every setter returns IndexWriterConfig to allow chaining. ISet <string> liveSetters = new JCG.HashSet <string>(); ISet <string> allSetters = new JCG.HashSet <string>(); foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods()) { if (m.Name.StartsWith("Set", StringComparison.Ordinal) && !m.IsStatic) { allSetters.Add(m.Name); // setters overridden from LiveIndexWriterConfig are returned twice, once with // IndexWriterConfig return type and second with LiveIndexWriterConfig. The ones // from LiveIndexWriterConfig are marked 'synthetic', so just collect them and // assert in the end that we also received them from IWC. // In C# we do not have them marked synthetic so we look at the declaring type instead. if (m.DeclaringType.Name == "LiveIndexWriterConfig") { liveSetters.Add(m.Name); } else { Assert.AreEqual(typeof(IndexWriterConfig), m.ReturnType, "method " + m.Name + " does not return IndexWriterConfig"); } } } foreach (string setter in liveSetters) { Assert.IsTrue(allSetters.Contains(setter), "setter method not overridden by IndexWriterConfig: " + setter); } }
public virtual void TestCheck() { Random rnd = Random; ISet <object> jdk = new JCG.HashSet <object>(IdentityEqualityComparer <object> .Default); RamUsageEstimator.IdentityHashSet <object> us = new RamUsageEstimator.IdentityHashSet <object>(); int max = 100000; int threshold = 256; for (int i = 0; i < max; i++) { // some of these will be interned and some will not so there will be collisions. int v = rnd.Next(threshold); bool e1 = jdk.Contains(v); bool e2 = us.Contains(v); Assert.AreEqual(e1, e2); e1 = jdk.Add(v); e2 = us.Add(v); Assert.AreEqual(e1, e2); } ISet <object> collected = new JCG.HashSet <object>(IdentityEqualityComparer <object> .Default); foreach (var o in us) { collected.Add(o); } // LUCENENET: We have 2 J2N hashsets, so no need to use aggressive mode assertEquals(collected, jdk, aggressive: false); }
/// <summary> /// Returns the files required for replication. By default, this method returns /// all files that exist in the new revision, but not in the handler. /// </summary> protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles) { IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles; if (handlerRevisionFiles == null) { return(newRevisionFiles); } Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >(); foreach (KeyValuePair <string, IList <RevisionFile> > pair in handlerRevisionFiles) { // put the handler files in a Set, for faster contains() checks later ISet <string> handlerFiles = new JCG.HashSet <string>(pair.Value.Select(v => v.FileName)); // make sure to preserve revisionFiles order string source = pair.Key; Debug.Assert(newRevisionFiles.ContainsKey(source), string.Format("source not found in newRevisionFiles: {0}", newRevisionFiles)); List <RevisionFile> res = newRevisionFiles[source] .Where(file => !handlerFiles.Contains(file.FileName)) .ToList(); requiredFiles.Add(source, res); } return(requiredFiles); }
public override bool IsLocked() { lock (locks) { return(locks.Contains(lockName)); } }
/// <summary> /// Returns the longest string that is a prefix of all accepted strings and /// visits each state at most once. /// </summary> /// <returns> Common prefix. </returns> public static string GetCommonPrefix(Automaton a) { if (a.IsSingleton) { return(a.singleton); } StringBuilder b = new StringBuilder(); JCG.HashSet <State> visited = new JCG.HashSet <State>(); State s = a.initial; bool done; do { done = true; visited.Add(s); if (!s.accept && s.NumTransitions == 1) { Transition t = s.GetTransitions().First(); if (t.min == t.max && !visited.Contains(t.to)) { b.AppendCodePoint(t.min); s = t.to; done = false; } } } while (!done); return(b.ToString()); }
public RandomAcceptedStrings(Automaton a) { this.a = a; if (a.IsSingleton) { leadsToAccept = null; return; } // must use IdentityHashmap because two Transitions w/ // different start nodes can be considered the same leadsToAccept = new JCG.Dictionary <Transition, bool?>(IdentityEqualityComparer <Transition> .Default); IDictionary <State, IList <ArrivingTransition> > allArriving = new Dictionary <State, IList <ArrivingTransition> >(); Queue <State> q = new Queue <State>(); ISet <State> seen = new JCG.HashSet <State>(); // reverse map the transitions, so we can quickly look // up all arriving transitions to a given state foreach (State s in a.GetNumberedStates()) { for (int i = 0; i < s.numTransitions; i++) { Transition t = s.TransitionsArray[i]; if (!allArriving.TryGetValue(t.to, out IList <ArrivingTransition> tl) || tl == null) { tl = new List <ArrivingTransition>(); allArriving[t.to] = tl; } tl.Add(new ArrivingTransition(s, t)); } if (s.Accept) { q.Enqueue(s); seen.Add(s); } } // Breadth-first search, from accept states, // backwards: while (q.Count > 0) { State s = q.Dequeue(); if (allArriving.TryGetValue(s, out IList <ArrivingTransition> arriving) && arriving != null) { foreach (ArrivingTransition at in arriving) { State from = at.from; if (!seen.Contains(from)) { q.Enqueue(from); seen.Add(from); leadsToAccept[at.t] = true; } } } } }
public override void ClearLock(string lockName) { lock (locks) { if (locks.Contains(lockName)) { locks.Remove(lockName); } } }
/// <summary> /// Helper function to create an HashSet fulfilling the given specific parameters. The function will /// create an HashSet using the Comparer constructor and then add values /// to it until it is full. It will begin by adding the desired number of matching, /// followed by random (deterministic) elements until the desired count is reached. /// </summary> protected IEnumerable <T> CreateHashSet(IEnumerable <T> enumerableToMatchTo, int count, int numberOfMatchingElements) { JCG.HashSet <T> set = new JCG.HashSet <T>(GetIEqualityComparer()); int seed = 528; JCG.List <T> match = null; // Add Matching elements if (enumerableToMatchTo != null) { match = enumerableToMatchTo.ToList(); for (int i = 0; i < numberOfMatchingElements; i++) { set.Add(match[i]); } } // Add elements to reach the desired count while (set.Count < count) { T toAdd = CreateT(seed++); while (set.Contains(toAdd) || (match != null && match.Contains(toAdd, GetIEqualityComparer()))) // Don't want any unexpectedly duplicate values { toAdd = CreateT(seed++); } set.Add(toAdd); } // Validate that the Enumerable fits the guidelines as expected Debug.Assert(set.Count == count); if (match != null) { int actualMatchingCount = 0; foreach (T lookingFor in match) { actualMatchingCount += set.Contains(lookingFor) ? 1 : 0; } Assert.Equal(numberOfMatchingElements, actualMatchingCount); } return(set); }
public virtual void TestRandom() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); w.IndexWriter.Config.SetMaxBufferedDocs(17); int numDocs = AtLeast(100); ISet <string> aDocs = new JCG.HashSet <string>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); string v; if (Random.Next(5) == 4) { v = "a"; aDocs.Add("" + i); } else { v = "b"; } Field f = NewStringField("field", v, Field.Store.NO); doc.Add(f); doc.Add(NewStringField("id", "" + i, Field.Store.YES)); w.AddDocument(doc); } int numDelDocs = AtLeast(10); for (int i = 0; i < numDelDocs; i++) { string delID = "" + Random.Next(numDocs); w.DeleteDocuments(new Term("id", delID)); aDocs.Remove(delID); } IndexReader r = w.GetReader(); w.Dispose(); TopDocs hits = NewSearcher(r).Search(new MatchAllDocsQuery(), new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), numDocs); Assert.AreEqual(aDocs.Count, hits.TotalHits); foreach (ScoreDoc sd in hits.ScoreDocs) { Assert.IsTrue(aDocs.Contains(r.Document(sd.Doc).Get("id"))); } r.Dispose(); d.Dispose(); }
private readonly object lineFileLock = new object(); // LUCENENET specific - lock to ensure writes don't collide for this instance public WriteLineDocTask(PerfRunData runData) : base(runData) { Config config = runData.Config; m_fname = config.Get("line.file.out", null); if (m_fname == null) { throw new ArgumentException("line.file.out must be set"); } Stream @out = StreamUtils.GetOutputStream(new FileInfo(m_fname)); lineFileOut = new StreamWriter(@out, Encoding.UTF8); docMaker = runData.DocMaker; // init fields string f2r = config.Get("line.fields", null); if (f2r == null) { fieldsToWrite = DEFAULT_FIELDS; } else { if (f2r.IndexOf(SEP) >= 0) { throw new ArgumentException("line.fields " + f2r + " should not contain the separator char: " + SEP); } fieldsToWrite = f2r.Split(',').TrimEnd(); } // init sufficient fields sufficientFields = new bool[fieldsToWrite.Length]; string suff = config.Get("sufficient.fields", DEFAULT_SUFFICIENT_FIELDS); if (",".Equals(suff, StringComparison.Ordinal)) { checkSufficientFields = false; } else { checkSufficientFields = true; ISet <string> sf = new JCG.HashSet <string>(suff.Split(',').TrimEnd()); for (int i = 0; i < fieldsToWrite.Length; i++) { if (sf.Contains(fieldsToWrite[i])) { sufficientFields[i] = true; } } } WriteHeader(lineFileOut); }
public override bool IsLocked() { UninterruptableMonitor.Enter(locks); try { return(locks.Contains(lockName)); } finally { UninterruptableMonitor.Exit(locks); } }
/// <summary> /// Checks whether there is a loop containing s. (this is sufficient since /// there are never transitions to dead states.) /// </summary> // TODO: not great that this is recursive... in theory a // large automata could exceed java's stack private static bool IsFiniteSlow(State s, JCG.HashSet <State> path) { path.Add(s); foreach (Transition t in s.GetTransitions()) { if (path.Contains(t.to) || !IsFiniteSlow(t.to, path)) { return(false); } } path.Remove(s); return(true); }
public override void ClearLock(string lockName) { UninterruptableMonitor.Enter(locks); try { if (locks.Contains(lockName)) { locks.Remove(lockName); } } finally { UninterruptableMonitor.Exit(locks); } }
public void TestSkipField() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int num = AtLeast(10); var terms = new JCG.HashSet <Term>(); for (int i = 0; i < num; i++) { string field = "field" + Random.Next(100); terms.Add(new Term(field, "content1")); Document doc = new Document(); doc.Add(NewStringField(field, "content1", Field.Store.YES)); w.AddDocument(doc); } int randomFields = Random.Next(10); for (int i = 0; i < randomFields; i++) { while (true) { string field = "field" + Random.Next(100); Term t = new Term(field, "content1"); if (!terms.Contains(t)) { terms.Add(t); break; } } } w.ForceMerge(1); IndexReader reader = w.GetReader(); w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves[0]; TermsFilter tf = new TermsFilter(terms.ToList()); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals(context.Reader.NumDocs, bits.Cardinality); reader.Dispose(); dir.Dispose(); }
public virtual void TestMerge() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); int numDocs = AtLeast(100); int numDeletes = Random.Next(numDocs); ISet <int> deletes = new JCG.HashSet <int>(); while (deletes.Count < numDeletes) { deletes.Add(Random.Next(numDocs)); } foreach (Options options in ValidOptions()) { RandomDocument[] docs = new RandomDocument[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options); } using Directory dir = NewDirectory(); using RandomIndexWriter writer = new RandomIndexWriter(Random, dir); for (int i = 0; i < numDocs; ++i) { writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); if (Rarely()) { writer.Commit(); } } foreach (int delete in deletes) { writer.DeleteDocuments(new Term("id", "" + delete)); } // merge with deletes writer.ForceMerge(1); using IndexReader reader = writer.GetReader(); for (int i = 0; i < numDocs; ++i) { if (!deletes.Contains(i)) { int docID = DocID(reader, "" + i); AssertEquals(docs[i], reader.GetTermVectors(docID)); } } } }
private void checkHits(SpatialArgs args, int assertNumFound, int[] assertIds) { SearchResults got = executeQuery(strategy.MakeQuery(args), 100); assertEquals("" + args, assertNumFound, got.numFound); if (assertIds != null) { ISet <int> gotIds = new JCG.HashSet <int>(); foreach (SearchResult result in got.results) { gotIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture)); } foreach (int assertId in assertIds) { assertTrue("has " + assertId, gotIds.Contains(assertId)); } } }
/// <summary> /// Set a list of BCP47 extensions and private use subtags. /// BCP47 extensions are already validated and well-formed, but may contain duplicates. /// </summary> private InternalLocaleBuilder SetExtensions(IList <string> bcpExtensions, string privateuse) { ClearExtensions(); if (bcpExtensions != null && bcpExtensions.Count > 0) { var processedExtensions = new JCG.HashSet <CaseInsensitiveChar>(bcpExtensions.Count); foreach (string bcpExt in bcpExtensions) { CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt[0]); // ignore duplicates if (!processedExtensions.Contains(key)) { // each extension string contains singleton, e.g. "a-abc-def" if (UnicodeLocaleExtension.IsSingletonChar(key.Value)) { SetUnicodeLocaleExtension(bcpExt.Substring(2)); } else { if (_extensions == null) { _extensions = new Dictionary <CaseInsensitiveChar, string>(4); } _extensions[key] = bcpExt.Substring(2); } } } } if (privateuse != null && privateuse.Length > 0) { // privateuse string contains prefix, e.g. "x-abc-def" if (_extensions == null) { _extensions = new Dictionary <CaseInsensitiveChar, string>(1); } _extensions[new CaseInsensitiveChar(privateuse[0])] = privateuse.Substring(2); } return(this); }
/// <summary> /// Cleans up the index directory from old index files. This method uses the /// last commit found by <see cref="GetLastCommit(Directory)"/>. If it matches the /// expected <paramref name="segmentsFile"/>, then all files not referenced by this commit point /// are deleted. /// </summary> /// <remarks> /// <b>NOTE:</b> This method does a best effort attempt to clean the index /// directory. It suppresses any exceptions that occur, as this can be retried /// the next time. /// </remarks> public static void CleanupOldIndexFiles(Directory directory, string segmentsFile) { try { IndexCommit commit = GetLastCommit(directory); // commit == null means weird IO errors occurred, ignore them // if there were any IO errors reading the expected commit point (i.e. // segments files mismatch), then ignore that commit either. if (commit != null && commit.SegmentsFileName.Equals(segmentsFile, StringComparison.Ordinal)) { ISet <string> commitFiles = new JCG.HashSet <string>(commit.FileNames) { IndexFileNames.SEGMENTS_GEN }; Regex matcher = IndexFileNames.CODEC_FILE_PATTERN; foreach (string file in directory.ListAll()) { if (!commitFiles.Contains(file) && (matcher.IsMatch(file) || file.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal))) { try { directory.DeleteFile(file); } catch { // suppress, it's just a best effort } } } } } catch { // ignore any errors that happens during this state and only log it. this // cleanup will have a chance to succeed the next time we get a new // revision. } }
public virtual void TestCopyJDKSet() { ISet <string> set = new JCG.HashSet <string>(); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpperInvariant()); } set.addAll(TEST_STOP_WORDS); CharArraySet copy = CharArraySet.Copy(TEST_VERSION_CURRENT, set); assertEquals(set.Count, copy.size()); assertEquals(set.Count, copy.size()); assertTrue(copy.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copy.contains(@string)); } IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(set.Contains(@string)); } }
/// <summary> /// Returns the files required for replication. By default, this method returns /// all files that exist in the new revision, but not in the handler. /// </summary> protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles) { IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles; if (handlerRevisionFiles == null) { return(newRevisionFiles); } Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >(); foreach (var e in handlerRevisionFiles) { // put the handler files in a Set, for faster contains() checks later ISet <string> handlerFiles = new JCG.HashSet <string>(); foreach (RevisionFile file in e.Value) { handlerFiles.Add(file.FileName); } // make sure to preserve revisionFiles order IList <RevisionFile> res = new JCG.List <RevisionFile>(); string source = e.Key; if (Debugging.AssertsEnabled) { Debugging.Assert(newRevisionFiles.ContainsKey(source), "source not found in newRevisionFiles: {0}", newRevisionFiles); } foreach (RevisionFile file in newRevisionFiles[source]) { if (!handlerFiles.Contains(file.FileName)) { res.Add(file); } } requiredFiles[source] = res; } return(requiredFiles); }
public override bool IncrementToken() { if (m_input.IncrementToken()) { string term = termAtt.ToString(); // Check the exclusion table. if (!keywordAttr.IsKeyword && (exclusions == null || !exclusions.Contains(term))) { string s = stemmer.Stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.Equals(term, StringComparison.Ordinal)) { termAtt.SetEmpty().Append(s); } } return(true); } else { return(false); } }
private void _CheckHits(bool bbox, IPoint pt, double distKM, int assertNumFound, params int[] assertIds) { SpatialOperation op = SpatialOperation.Intersects; double distDEG = DistanceUtils.Dist2Degrees(distKM, DistanceUtils.EarthMeanRadiusKilometers); IShape shape = ctx.MakeCircle(pt, distDEG); if (bbox) { shape = shape.BoundingBox; } SpatialArgs args = new SpatialArgs(op, shape); //args.setDistPrecision(0.025); Query query; if (Random.nextBoolean()) { query = strategy.MakeQuery(args); } else { query = new FilteredQuery(new MatchAllDocsQuery(), strategy.MakeFilter(args)); } SearchResults results = executeQuery(query, 100); assertEquals("" + shape, assertNumFound, results.numFound); if (assertIds != null) { ISet <int> resultIds = new JCG.HashSet <int>(); foreach (SearchResult result in results.results) { resultIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture)); } foreach (int assertId in assertIds) { assertTrue("has " + assertId, resultIds.Contains(assertId)); } } }
public override void Run() { try { Document document = new Document(); Field field = NewTextField("field", "", Field.Store.NO); document.Add(field); startingGun.Wait(); while (!(postings.Count == 0)) { StringBuilder text = new StringBuilder(); ISet <string> visited = new JCG.HashSet <string>(); for (int i = 0; i < maxTermsPerDoc; i++) { string token; if (!postings.TryDequeue(out token)) { break; } if (visited.Contains(token)) { // Put it back: postings.Enqueue(token); break; } text.Append(' '); text.Append(token); visited.Add(token); } field.SetStringValue(text.ToString()); iw.AddDocument(document); } } catch (Exception e) { throw new Exception(e.Message, e); } }