public SpanWeight(SpanQuery query, Searcher searcher) { this.similarity = query.GetSimilarity(searcher); this.query = query; terms = new Support.Set <Lucene.Net.Index.Term>(); query.ExtractTerms(terms); idfExp = similarity.idfExplain(terms.ToArray(), searcher); idf = idfExp.GetIdf(); }
public SpanWeight(SpanQuery query, Searcher searcher) { this.similarity = query.GetSimilarity(searcher); this.query = query; terms = new Support.Set<Lucene.Net.Index.Term>(); query.ExtractTerms(terms); idfExp = similarity.idfExplain(terms.ToArray(), searcher); idf = idfExp.GetIdf(); }
public void Add(Support.Set <T> items) { foreach (T item in items) { if (_Set.Contains(item)) { continue; } _Set.Add(item); base.Add(item); } }
// Prevent instantiation. private IndexFileNameFilter() { extensions = new Support.Set <string>(); for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++) { extensions.Add(IndexFileNames.INDEX_EXTENSIONS[i]); } extensionsInCFS = new Support.Set <string>(); for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.Length; i++) { extensionsInCFS.Add(IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i]); } }
/// <summary>Expert: called when re-writing queries under MultiSearcher. /// /// Create a single query suitable for use by all subsearchers (in 1-1 /// correspondence with queries). This is an optimization of the OR of /// all queries. We handle the common optimization cases of equal /// queries and overlapping clauses of boolean OR queries (as generated /// by MultiTermQuery.rewrite()). /// Be careful overriding this method as queries[0] determines which /// method will be called and is not necessarily of the same type as /// the other queries. /// </summary> public virtual Query Combine(params Query[] queries) { Support.Set <Query> uniques = new Support.Set <Query>(); for (int i = 0; i < queries.Length; i++) { Query query = queries[i]; BooleanClause[] clauses = null; // check if we can split the query into clauses bool splittable = (query is BooleanQuery); if (splittable) { BooleanQuery bq = (BooleanQuery)query; splittable = bq.IsCoordDisabled(); clauses = bq.GetClauses(); for (int j = 0; splittable && j < clauses.Length; j++) { splittable = (clauses[j].GetOccur() == BooleanClause.Occur.SHOULD); } } if (splittable) { for (int j = 0; j < clauses.Length; j++) { uniques.Add(clauses[j].GetQuery()); } } else { uniques.Add(query); } } // optimization: if we have just one query, just return it if (uniques.Count == 1) { foreach (Query key in uniques) { return((Query)key); } } BooleanQuery result = new BooleanQuery(true); foreach (Query key in uniques) { result.Add(key, BooleanClause.Occur.SHOULD); } return(result); }
private bool IsOptimized(SegmentInfos infos, int maxNumSegments, Support.Set <SegmentInfo> segmentsToOptimize) { int numSegments = infos.Count; int numToOptimize = 0; SegmentInfo optimizeInfo = null; for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { SegmentInfo info = infos.Info(i); if (segmentsToOptimize.Contains(info)) { numToOptimize++; optimizeInfo = info; } } return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo))); }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 Support.Set <Lucene.Net.Index.Term> terms = new Support.Set <Term>(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; foreach (Term t in terms) { allTermsArray[index++] = t; } int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } IDictionary <Term, int> dfMap = new Support.Dictionary <Term, int>(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity()); return(rewrittenQuery.Weight(cacheSim)); }
/// <summary>Expert: merges the clauses of a set of BooleanQuery's into a single /// BooleanQuery. /// /// <p/>A utility for use by {@link #Combine(Query[])} implementations. /// </summary> public static Query MergeBooleanQueries(params BooleanQuery[] queries) { Support.Set <BooleanClause> allClauses = new Support.Set <BooleanClause>(); for (int i = 0; i < queries.Length; i++) { BooleanClause[] clauses = queries[i].GetClauses(); for (int j = 0; j < clauses.Length; j++) { allClauses.Add(clauses[j]); } } bool coordDisabled = queries.Length == 0?false:queries[0].IsCoordDisabled(); BooleanQuery result = new BooleanQuery(coordDisabled); foreach (BooleanClause bc in allClauses) { result.Add(bc); } return(result); }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 Support.Set<Lucene.Net.Index.Term> terms = new Support.Set<Term>(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; foreach (Term t in terms) { allTermsArray[index++] = t; } int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } IDictionary<Term, int> dfMap = new Support.Dictionary<Term, int>(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity()); return rewrittenQuery.Weight(cacheSim); }
public virtual void TestLazyFields() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Support.Set<string> loadFieldNames = new Support.Set<string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); Support.Set<string> lazyFieldNames = new Support.Set<string>(); //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); lazyFieldNames.Add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be"); System.String value_Renamed = field.StringValue(); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT); field = doc.GetFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be"); Assert.IsTrue(field.BinaryValue() == null, "binaryValue isn't null for lazy string field"); value_Renamed = field.StringValue(); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(value_Renamed.Equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be"); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy() == true, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT); field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.StringValue() == null, "stringValue isn't null for lazy binary field"); byte[] bytes = field.BinaryValue(); Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be"); Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, ""); for (int i = 0; i < bytes.Length; i++) { Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched"); } }
/// <summary>Returns the merges necessary to optimize the index. /// This merge policy defines "optimized" to mean only one /// segment in the index, where that segment has no /// deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, Support.Set <SegmentInfo> segmentsToOptimize) { MergeSpecification spec; System.Diagnostics.Debug.Assert(maxNumSegments > 0); if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.Count; while (last > 0) { SegmentInfo info = infos.Info(--last); if (segmentsToOptimize.Contains(info)) { last++; break; } } if (last > 0) { spec = new MergeSpecification(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.merges.Count) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !IsOptimized(infos.Info(0))) { spec.Add(MakeOneMerge(infos, infos.Range(0, last))); } } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // optimize was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) { sumSize += Size(infos.Info(j + i)); } if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); } } } else { spec = null; } } else { spec = null; } return(spec); }
public virtual void TestLazyFieldsAfterClose() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Support.Set<string> loadFieldNames = new Support.Set<string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); Support.Set<string> lazyFieldNames = new Support.Set<string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); lazyFieldNames.Add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be"); reader.Close(); try { field.StringValue(); Assert.Fail("did not hit AlreadyClosedException as expected"); } catch (AlreadyClosedException e) { // expected } }
/// <summary> Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. If both are null, the /// Document will not have any {@link Field} on it. /// </summary> /// <param name="fieldsToLoad">A Set of {@link String} field names to load. May be empty, but not null /// </param> /// <param name="lazyFieldsToLoad">A Set of {@link String} field names to load lazily. May be empty, but not null /// </param> public SetBasedFieldSelector(Support.Set<string> fieldsToLoad, Support.Set<string> lazyFieldsToLoad) { this.fieldsToLoad = fieldsToLoad; this.lazyFieldsToLoad = lazyFieldsToLoad; }
public virtual void TestLazyPerformance() { System.String tmpIODir = Support.AppSettings.Get("tempDir", ""); System.String userName = System.Environment.UserName; System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName; System.IO.FileInfo file = new System.IO.FileInfo(path); _TestUtil.RmDir(file); FSDirectory tmpDir = FSDirectory.Open(file); Assert.IsTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); Assert.IsTrue(fieldInfos != null); FieldsReader reader; long lazyTime = 0; long regularTime = 0; int length = 50; Support.Set<string> lazyFieldNames = new Support.Set<string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(new Support.Set<string>(), lazyFieldNames); for (int i = 0; i < length; i++) { reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc; doc = reader.Doc(0, null); //Load all of them Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == false, "field is lazy"); System.String value_Renamed; long start; long finish; start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this was always 0ms. value_Renamed = field.StringValue(); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(field != null, "field is null and it shouldn't be"); regularTime += (finish - start); reader.Close(); reader = null; doc = null; //Hmmm, are we still in cache??? System.GC.Collect(); reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); doc = reader.Doc(0, fieldSelector); field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == true, "field is not lazy"); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this took around 50 - 70ms value_Renamed = field.StringValue(); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); lazyTime += (finish - start); reader.Close(); } System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); }
public SingleInstanceLock(Support.Set<string> locks, System.String lockName) { this.locks = locks; this.lockName = lockName; }
/// <summary>Expert: merges the clauses of a set of BooleanQuery's into a single /// BooleanQuery. /// /// <p/>A utility for use by {@link #Combine(Query[])} implementations. /// </summary> public static Query MergeBooleanQueries(params BooleanQuery[] queries) { Support.Set<BooleanClause> allClauses = new Support.Set<BooleanClause>(); for (int i = 0; i < queries.Length; i++) { BooleanClause[] clauses = queries[i].GetClauses(); for (int j = 0; j < clauses.Length; j++) { allClauses.Add(clauses[j]); } } bool coordDisabled = queries.Length == 0?false:queries[0].IsCoordDisabled(); BooleanQuery result = new BooleanQuery(coordDisabled); foreach(BooleanClause bc in allClauses) { result.Add(bc); } return result; }
/// <summary> Determine what set of merge operations is necessary in order to optimize /// the index. {@link IndexWriter} calls this when its /// {@link IndexWriter#Optimize()} method is called. This call is always /// synchronized on the {@link IndexWriter} instance so only one thread at a /// time will call this method. /// /// </summary> /// <param name="segmentInfos">the total set of segments in the index /// </param> /// <param name="maxSegmentCount">requested maximum number of segments in the index (currently this /// is always 1) /// </param> /// <param name="segmentsToOptimize">contains the specific SegmentInfo instances that must be merged /// away. This may be a subset of all SegmentInfos. /// </param> public abstract MergeSpecification FindMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Support.Set <SegmentInfo> segmentsToOptimize);
public virtual void TestRewrite2() { SpanQuery q1 = new SpanTermQuery(new Term("last", "smith")); SpanQuery q2 = new SpanTermQuery(new Term("last", "jones")); SpanQuery q = new SpanNearQuery(new SpanQuery[]{q1, new FieldMaskingSpanQuery(q2, "last")}, 1, true); Query qr = searcher.Rewrite(q); QueryUtils.CheckEqual(q, qr); Support.Set<Term> set_Renamed = new Support.Set<Term>(); qr.ExtractTerms(set_Renamed); Assert.AreEqual(2, set_Renamed.Count); }
/// <summary> Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. If both are null, the /// Document will not have any {@link Field} on it. /// </summary> /// <param name="fieldsToLoad">A Set of {@link String} field names to load. May be empty, but not null /// </param> /// <param name="lazyFieldsToLoad">A Set of {@link String} field names to load lazily. May be empty, but not null /// </param> public SetBasedFieldSelector(Support.Set <string> fieldsToLoad, Support.Set <string> lazyFieldsToLoad) { this.fieldsToLoad = fieldsToLoad; this.lazyFieldsToLoad = lazyFieldsToLoad; }
public SingleInstanceLock(Support.Set <string> locks, System.String lockName) { this.locks = locks; this.lockName = lockName; }
/// <summary>Expert: called when re-writing queries under MultiSearcher. /// /// Create a single query suitable for use by all subsearchers (in 1-1 /// correspondence with queries). This is an optimization of the OR of /// all queries. We handle the common optimization cases of equal /// queries and overlapping clauses of boolean OR queries (as generated /// by MultiTermQuery.rewrite()). /// Be careful overriding this method as queries[0] determines which /// method will be called and is not necessarily of the same type as /// the other queries. /// </summary> public virtual Query Combine(params Query[] queries) { Support.Set<Query> uniques = new Support.Set<Query>(); for (int i = 0; i < queries.Length; i++) { Query query = queries[i]; BooleanClause[] clauses = null; // check if we can split the query into clauses bool splittable = (query is BooleanQuery); if (splittable) { BooleanQuery bq = (BooleanQuery) query; splittable = bq.IsCoordDisabled(); clauses = bq.GetClauses(); for (int j = 0; splittable && j < clauses.Length; j++) { splittable = (clauses[j].GetOccur() == BooleanClause.Occur.SHOULD); } } if (splittable) { for (int j = 0; j < clauses.Length; j++) { uniques.Add(clauses[j].GetQuery()); } } else { uniques.Add(query); } } // optimization: if we have just one query, just return it if (uniques.Count == 1) { foreach (Query key in uniques) { return (Query) key; } } BooleanQuery result = new BooleanQuery(true); foreach (Query key in uniques) { result.Add(key, BooleanClause.Occur.SHOULD); } return result; }