public virtual Search.Query MakeLuceneQueryNoBoost(BasicQueryFactory qf) { if (fieldNames.Count == 1) { /* single field name: no new queries needed */ return(q.MakeLuceneQueryFieldNoBoost(fieldNames[0], qf)); } else { /* OR query over the fields */ IList <SrndQuery> queries = new JCG.List <SrndQuery>(); foreach (var fieldName in fieldNames) { var qc = (SrndQuery)q.Clone(); queries.Add(new FieldsQuery(qc, fieldName, fieldOp)); } OrQuery oq = new OrQuery(queries, true /* infix OR for field names */, orOperatorName); // System.out.println(getClass().toString() + ", fields expanded: " + oq.toString()); /* needs testing */ return(oq.MakeLuceneQueryField(null, qf)); } }
/// <summary> /// Return a <see cref="T:IList{SegToken}"/> of all tokens in the map, ordered by startOffset. /// </summary> /// <returns><see cref="T:IList{SegToken}"/> of all tokens in the map.</returns> public virtual IList <SegToken> ToTokenList() { IList <SegToken> result = new JCG.List <SegToken>(); int s = -1, count = 0, size = tokenListTable.Count; IList <SegToken> tokenList; while (count < size) { if (IsStartExist(s)) { tokenList = tokenListTable[s]; foreach (SegToken st in tokenList) { result.Add(st); } count++; } s++; } return(result); }
public override object Add(object prefix, object output) { if (Debugging.AssertsEnabled) { Debugging.Assert(!(prefix is IList)); } if (!(output is IList)) { return(outputs.Add((T)prefix, (T)output)); } else { IList outputList = (IList)output; IList <T> addedList = new JCG.List <T>(outputList.Count); foreach (object _output in outputList) { addedList.Add(outputs.Add((T)prefix, (T)_output)); } return(addedList); } }
private void NextSentence() { var tokenList = new JCG.List <string>(); var typeList = new JCG.List <string>(); sentenceTokenAttrs.Clear(); bool endOfSentence = false; while (!endOfSentence && (moreTokensAvailable = m_input.IncrementToken())) { if (!keywordAtt.IsKeyword) { tokenList.Add(termAtt.ToString()); typeList.Add(typeAtt.Type); } endOfSentence = 0 != (flagsAtt.Flags & OpenNLPTokenizer.EOS_FLAG_BIT); sentenceTokenAttrs.Add(m_input.CloneAttributes()); } sentenceTokens = tokenList.Count > 0 ? tokenList.ToArray() : null; sentenceTokenTypes = typeList.Count > 0 ? typeList.ToArray() : null; }
/** * Do the measurements. */ private BenchmarkResult Measure(ICallable <int> callable) { double NANOS_PER_MS = 1000000; try { JCG.List <double> times = new JCG.List <double>(); for (int i = 0; i < warmup + rounds; i++) { long start = J2N.Time.NanoTime(); guard = Convert.ToInt32(callable.Call()); times.Add((J2N.Time.NanoTime() - start) / NANOS_PER_MS); } return(new BenchmarkResult(times, warmup, rounds)); } catch (Exception e) when(e.IsException()) { e.printStackTrace(); throw RuntimeException.Create(e); } }
public virtual void TestCopyJDKSet() { ISet <string> set = new JCG.HashSet <string>(); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new JCG.List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpperInvariant()); } set.addAll(TEST_STOP_WORDS); CharArraySet copy = CharArraySet.Copy(TEST_VERSION_CURRENT, set); assertEquals(set.Count, copy.size()); assertEquals(set.Count, copy.size()); assertTrue(copy.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copy.contains(@string)); } IList <string> newWords = new JCG.List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(set.Contains(@string)); } }
/// <summary> /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>. /// </summary> public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList) { IList <SubInfo> tempSubInfos = new JCG.List <SubInfo>(); IList <SubInfo> realSubInfos = new JCG.List <SubInfo>(); ISet <string> distinctTerms = new JCG.HashSet <string>(); int length = 0; foreach (WeightedPhraseInfo phraseInfo in phraseInfoList) { float phraseTotalBoost = 0; foreach (TermInfo ti in phraseInfo.TermsInfos) { if (distinctTerms.Add(ti.Text)) { phraseTotalBoost += ti.Weight * phraseInfo.Boost; } length++; } tempSubInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets, phraseInfo.Seqnum, phraseTotalBoost)); } // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query // would cause an equal weight for all fragments regardless of how much words they contain. // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments // we "bend" the length with a standard-normalization a little bit. float norm = length * (1 / (float)Math.Sqrt(length)); float totalBoost = 0; foreach (SubInfo tempSubInfo in tempSubInfos) { float subInfoBoost = tempSubInfo.Boost * norm; realSubInfos.Add(new SubInfo(tempSubInfo.Text, tempSubInfo.TermsOffsets, tempSubInfo.Seqnum, subInfoBoost)); totalBoost += subInfoBoost; } FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, realSubInfos, totalBoost)); }
/** Reads the stream, consuming a format that is a tab-separated values of 3 columns: * an "id", a "name" and the "shape". Empty lines and lines starting with a '#' are skipped. * The stream is closed. */ public static IEnumerator <SpatialTestData> GetTestData(Stream @in, SpatialContext ctx) { IList <SpatialTestData> results = new JCG.List <SpatialTestData>(); TextReader bufInput = new StreamReader(@in, Encoding.UTF8); try { String line; while ((line = bufInput.ReadLine()) != null) { if (line.Length == 0 || line[0] == '#') { continue; } SpatialTestData data = new SpatialTestData(); String[] vals = line.Split('\t').TrimEnd(); if (vals.Length != 3) { throw RuntimeException.Create("bad format; expecting 3 tab-separated values for line: " + line); } data.id = vals[0]; data.name = vals[1]; try { data.shape = ctx.ReadShapeFromWkt(vals[2]); } catch (Spatial4n.Exceptions.ParseException e) // LUCENENET: Spatial4n has its own ParseException that is different than the one in Support { throw RuntimeException.Create(e); } results.Add(data); } } finally { bufInput.Dispose(); } return(results.GetEnumerator()); }
public override void Build(IInputEnumerator enumerator) { // LUCENENT: Added guard clause for null if (enumerator is null) { throw new ArgumentNullException(nameof(enumerator)); } if (enumerator.HasPayloads) { throw new ArgumentException("this suggester doesn't support payloads"); } if (enumerator.HasContexts) { throw new ArgumentException("this suggester doesn't support contexts"); } root = new TernaryTreeNode(); // buffer first #pragma warning disable 612, 618 if (enumerator.Comparer != BytesRef.UTF8SortedAsUTF16Comparer) { // make sure it's sorted and the comparer uses UTF16 sort order enumerator = new SortedInputEnumerator(enumerator, BytesRef.UTF8SortedAsUTF16Comparer); } #pragma warning restore 612, 618 JCG.List <string> tokens = new JCG.List <string>(); JCG.List <object> vals = new JCG.List <object>(); BytesRef spare; CharsRef charsSpare = new CharsRef(); while (enumerator.MoveNext()) { spare = enumerator.Current; charsSpare.Grow(spare.Length); UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare); tokens.Add(charsSpare.ToString()); vals.Add(enumerator.Weight); } autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root); }
private void MergeNorms(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = codec.NormsFormat.NormsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in mergeState.FieldInfos) { if (field.HasNorms) { IList <NumericDocValues> toMerge = new JCG.List <NumericDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { NumericDocValues norms = reader.GetNormValues(field.Name); if (norms == null) { norms = DocValues.EMPTY_NUMERIC; } toMerge.Add(norms); docsWithField.Add(new Lucene.Net.Util.Bits.MatchAllBits(reader.MaxDoc)); } consumer.MergeNumericField(field, mergeState, toMerge, docsWithField); } } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
private void MergeTerms(SegmentWriteState segmentWriteState) { IList <Fields> fields = new JCG.List <Fields>(); IList <ReaderSlice> slices = new JCG.List <ReaderSlice>(); int docBase = 0; for (int readerIndex = 0; readerIndex < mergeState.Readers.Count; readerIndex++) { AtomicReader reader = mergeState.Readers[readerIndex]; Fields f = reader.Fields; int maxDoc = reader.MaxDoc; if (f != null) { slices.Add(new ReaderSlice(docBase, maxDoc, readerIndex)); fields.Add(f); } docBase += maxDoc; } FieldsConsumer consumer = codec.PostingsFormat.FieldsConsumer(segmentWriteState); bool success = false; try { consumer.Merge(mergeState, new MultiFields(fields.ToArray(/*Fields.EMPTY_ARRAY*/), slices.ToArray(/*ReaderSlice.EMPTY_ARRAY*/))); success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
public virtual void TestCapitalization() { CharArraySet keep = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "and", "the", "it", "BIG" }, false); AssertCapitalizesTo("kiTTEN", new string[] { "Kitten" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); AssertCapitalizesTo("and", new string[] { "And" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); AssertCapitalizesTo("AnD", new string[] { "And" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); //first is not forced, but it's not a keep word, either AssertCapitalizesTo("AnD", new string[] { "And" }, true, keep, false, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); AssertCapitalizesTo("big", new string[] { "Big" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); AssertCapitalizesTo("BIG", new string[] { "BIG" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); AssertCapitalizesToKeyword("Hello thEre my Name is Ryan", "Hello there my name is ryan", true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); // now each token AssertCapitalizesTo("Hello thEre my Name is Ryan", new string[] { "Hello", "There", "My", "Name", "Is", "Ryan" }, false, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); // now only the long words AssertCapitalizesTo("Hello thEre my Name is Ryan", new string[] { "Hello", "There", "my", "Name", "is", "Ryan" }, false, keep, true, null, 3, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); // without prefix AssertCapitalizesTo("McKinley", new string[] { "Mckinley" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); // Now try some prefixes IList <char[]> okPrefix = new JCG.List <char[]>(); okPrefix.Add("McK".ToCharArray()); AssertCapitalizesTo("McKinley", new string[] { "McKinley" }, true, keep, true, okPrefix, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); // now try some stuff with numbers AssertCapitalizesTo("1st 2nd third", new string[] { "1st", "2nd", "Third" }, false, keep, false, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); AssertCapitalizesToKeyword("the The the", "The The the", false, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); }
public virtual void TestMergeRandom() { PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)]; JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>(); for (int i = 0; i < pb.Length; i++) { JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); int nterms = TestUtil.NextInt32(Random, 0, 10000); for (int j = 0; j < nterms; j++) { Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4)); terms.Add(term); } superSet.UnionWith(terms); PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } pb[i] = b.Finish(); } JCG.List <IEnumerator <Term> > subs = new JCG.List <IEnumerator <Term> >(); for (int i = 0; i < pb.Length; i++) { subs.Add(pb[i].GetEnumerator()); } IEnumerator <Term> expected = superSet.GetEnumerator(); IEnumerator <Term> actual = new MergedEnumerator <Term>(subs.ToArray()); while (actual.MoveNext()) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, actual.Current); } Assert.IsFalse(expected.MoveNext()); }
// extract properties to array, e.g. for "10.7:100.4:-2.3" return int[]{10.7,100.4,-2.3}. private double[] PropToDoubleArray(string s) { if (s.IndexOf(':') < 0) { return(new double[] { double.Parse(s, CultureInfo.InvariantCulture) }); } IList <double> a = new JCG.List <double>(); StringTokenizer st = new StringTokenizer(s, ":"); while (st.MoveNext()) { string t = st.Current; a.Add(double.Parse(t, CultureInfo.InvariantCulture)); } double[] res = new double[a.Count]; for (int i = 0; i < a.Count; i++) { res[i] = a[i]; } return(res); }
// extract properties to array, e.g. for "true:true:false" return boolean[]{true,false,false}. private bool[] PropToBooleanArray(string s) { if (s.IndexOf(':') < 0) { return(new bool[] { bool.Parse(s) }); } IList <bool> a = new JCG.List <bool>(); StringTokenizer st = new StringTokenizer(s, ":"); while (st.MoveNext()) { string t = st.Current; a.Add(bool.Parse(t)); } bool[] res = new bool[a.Count]; for (int i = 0; i < a.Count; i++) { res[i] = a[i]; } return(res); }
/// <summary> /// Returns the files required for replication. By default, this method returns /// all files that exist in the new revision, but not in the handler. /// </summary> protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles) { IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles; if (handlerRevisionFiles == null) { return(newRevisionFiles); } Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >(); foreach (var e in handlerRevisionFiles) { // put the handler files in a Set, for faster contains() checks later ISet <string> handlerFiles = new JCG.HashSet <string>(); foreach (RevisionFile file in e.Value) { handlerFiles.Add(file.FileName); } // make sure to preserve revisionFiles order IList <RevisionFile> res = new JCG.List <RevisionFile>(); string source = e.Key; if (Debugging.AssertsEnabled) { Debugging.Assert(newRevisionFiles.ContainsKey(source), "source not found in newRevisionFiles: {0}", newRevisionFiles); } foreach (RevisionFile file in newRevisionFiles[source]) { if (!handlerFiles.Contains(file.FileName)) { res.Add(file); } } requiredFiles[source] = res; } return(requiredFiles); }
/// <summary> /// Returns a list of facet entries to be rendered based on the specified offset and limit. /// The facet entries are retrieved from the facet entries collected during merging. /// </summary> /// <param name="offset">The offset in the collected facet entries during merging</param> /// <param name="limit">The number of facets to return starting from the offset.</param> /// <returns>a list of facet entries to be rendered based on the specified offset and limit</returns> public virtual IList <FacetEntry> GetFacetEntries(int offset, int limit) { IList <FacetEntry> entries = new JCG.List <FacetEntry>(); int skipped = 0; int included = 0; foreach (FacetEntry facetEntry in facetEntries) { if (skipped < offset) { skipped++; continue; } if (included++ >= limit) { break; } entries.Add(facetEntry); } return(entries); }
/// <summary> /// Create the scorer used to score our associated <see cref="DisjunctionMaxQuery"/> </summary> public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) { IList <Scorer> scorers = new JCG.List <Scorer>(); foreach (Weight w in m_weights) { // we will advance() subscorers Scorer subScorer = w.GetScorer(context, acceptDocs); if (subScorer != null) { scorers.Add(subScorer); } } if (scorers.Count == 0) { // no sub-scorers had any documents return(null); } DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, outerInstance.tieBreakerMultiplier, scorers.ToArray()); return(result); }
public virtual void TestOffsetCorrection() { const string INPUT = "Günther Günther is here"; // create MappingCharFilter IList <string> mappingRules = new JCG.List <string>(); mappingRules.Add("\"ü\" => \"ü\""); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("ü", "ü"); NormalizeCharMap normMap = builder.Build(); CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); // create PatternTokenizer TokenStream stream = new PatternTokenizer(charStream, new Regex("[,;/\\s]+", RegexOptions.Compiled), -1); AssertTokenStreamContents(stream, new string[] { "Günther", "Günther", "is", "here" }, new int[] { 0, 13, 26, 29 }, new int[] { 12, 25, 28, 33 }, INPUT.Length); charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); stream = new PatternTokenizer(charStream, new Regex("Günther", RegexOptions.Compiled), 0); AssertTokenStreamContents(stream, new string[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.Length); }
/// <summary> /// Optimize (remove empty rows) from the given Trie and return the resulting /// Trie. /// </summary> /// <param name="orig">the <see cref="Trie"/> to consolidate</param> /// <returns>the newly consolidated Trie</returns> public override Trie Optimize(Trie orig) { IList <string> cmds = orig.cmds; IList <Row> rows = new JCG.List <Row>(); IList <Row> orows = orig.rows; int[] remap = new int[orows.Count]; for (int j = orows.Count - 1; j >= 0; j--) { Row now = new Remap(orows[j], remap); bool merged = false; for (int i = 0; i < rows.Count; i++) { Row q = Merge(now, rows[i]); if (q != null) { rows[i] = q; merged = true; remap[j] = i; break; } } if (merged == false) { remap[j] = rows.Count; rows.Add(now); } } int root = remap[orig.root]; Arrays.Fill(remap, -1); rows = RemoveGaps(root, rows, new JCG.List <Row>(), remap); return(new Trie(orig.forward, remap[root], cmds, rows)); }
/// <summary> /// Guesses the languages of a word. /// </summary> /// <param name="input">The word.</param> /// <returns>A Set of Strings of language names that are potential matches for the input word.</returns> public virtual LanguageSet GuessLanguages(string input) { string text = input.ToLowerInvariant(); ISet <string> langs = new JCG.HashSet <string>(this.languages.GetLanguages()); foreach (LangRule rule in this.rules) { if (rule.Matches(text)) { if (rule.acceptOnMatch) { IList <string> toRemove = new JCG.List <string>(); foreach (var item in langs) { if (!rule.languages.Contains(item)) { toRemove.Add(item); } } foreach (var item in toRemove) { langs.Remove(item); } } else { foreach (var item in rule.languages) { langs.Remove(item); } } } } LanguageSet ls = LanguageSet.From(langs); return(ls.Equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls); }
public SrndQuery NotQuery() { SrndQuery q; IList <SrndQuery> queries = null; Token oprt = null; q = NQuery(); while (true) { switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case RegexpToken.NOT: ; break; default: jj_la1[2] = jj_gen; goto label_4; } oprt = Jj_consume_token(RegexpToken.NOT); /* keep only last used operator */ if (queries is null) { queries = new JCG.List <SrndQuery>(); queries.Add(q); } q = NQuery(); queries.Add(q); } label_4: { if (true) { return((queries is null) ? q : GetNotQuery(queries, oprt)); } } throw Error.Create("Missing return statement in function"); }
public virtual void TestIntersection() { int numBits = TestUtil.NextInt32(Random, 100, 1 << 20); int numDocIdSets = TestUtil.NextInt32(Random, 1, 4); IList <OpenBitSet> fixedSets = new JCG.List <OpenBitSet>(numDocIdSets); for (int i = 0; i < numDocIdSets; ++i) { fixedSets.Add(RandomOpenSet(numBits, Random.NextSingle())); } IList <WAH8DocIdSet> compressedSets = new JCG.List <WAH8DocIdSet>(numDocIdSets); foreach (OpenBitSet set in fixedSets) { compressedSets.Add(CopyOf(set, numBits)); } WAH8DocIdSet union = WAH8DocIdSet.Intersect(compressedSets); OpenBitSet expected = new OpenBitSet(numBits); expected.Set(0, expected.Length); foreach (OpenBitSet set in fixedSets) { for (int previousDoc = -1, doc = set.NextSetBit(0); ; previousDoc = doc, doc = set.NextSetBit(doc + 1)) { if (doc == -1) { expected.Clear(previousDoc + 1, set.Length); break; } else { expected.Clear(previousDoc + 1, doc); } } } AssertEquals(numBits, expected, union); }
/// <summary> /// Appends a new packet of buffered deletes to the stream, /// setting its generation: /// </summary> public virtual long Push(FrozenBufferedUpdates packet) { UninterruptableMonitor.Enter(this); try { /* * The insert operation must be atomic. If we let threads increment the gen * and push the packet afterwards we risk that packets are out of order. * With DWPT this is possible if two or more flushes are racing for pushing * updates. If the pushed packets get our of order would loose documents * since deletes are applied to the wrong segments. */ packet.DelGen = nextGen++; if (Debugging.AssertsEnabled) { Debugging.Assert(packet.Any()); Debugging.Assert(CheckDeleteStats()); Debugging.Assert(packet.DelGen < nextGen); Debugging.Assert(updates.Count == 0 || updates[updates.Count - 1].DelGen < packet.DelGen, "Delete packets must be in order"); } updates.Add(packet); numTerms.AddAndGet(packet.numTermDeletes); bytesUsed.AddAndGet(packet.bytesUsed); if (infoStream.IsEnabled("BD")) { infoStream.Message("BD", "push deletes " + packet + " delGen=" + packet.DelGen + " packetCount=" + updates.Count + " totBytesUsed=" + bytesUsed); } if (Debugging.AssertsEnabled) { Debugging.Assert(CheckDeleteStats()); } return(packet.DelGen); } finally { UninterruptableMonitor.Exit(this); } }
/// <summary> /// Read quality queries from trec 1MQ format topics file. /// </summary> /// <param name="reader">where queries are read from.</param> /// <returns>the result quality queries.</returns> /// <exception cref="IOException">if cannot read the queries.</exception> public virtual QualityQuery[] ReadQueries(TextReader reader) { IList <QualityQuery> res = new JCG.List <QualityQuery>(); string line; try { while (null != (line = reader.ReadLine())) { line = line.Trim(); if (line.StartsWith("#", StringComparison.Ordinal)) { continue; } // id int k = line.IndexOf(':'); string id = line.Substring(0, k - 0).Trim(); // qtext string qtext = line.Substring(k + 1).Trim(); // we got a topic! IDictionary <string, string> fields = new Dictionary <string, string> { [name] = qtext }; //System.out.println("id: "+id+" qtext: "+qtext+" line: "+line); QualityQuery topic = new QualityQuery(id, fields); res.Add(topic); } } finally { reader.Dispose(); } // sort result array (by ID) QualityQuery[] qq = res.ToArray(); Array.Sort(qq); return(qq); }
internal virtual string FieldValue(int maxTF) { IList <string> shuffled = new JCG.List <string>(); StringBuilder sb = new StringBuilder(); int i = Random.Next(terms.Length); while (i < terms.Length) { int tf = TestUtil.NextInt32(Random, 1, maxTF); for (int j = 0; j < tf; j++) { shuffled.Add(terms[i]); } i++; } shuffled.Shuffle(Random); foreach (string term in shuffled) { sb.Append(term); sb.Append(' '); } return(sb.ToString()); }
private TermsFilter TermsFilter(bool singleField, IEnumerable <Term> termList) { if (!singleField) { return(new TermsFilter(termList.ToList())); } TermsFilter filter; var bytes = new JCG.List <BytesRef>(); string field = null; foreach (Term term in termList) { bytes.Add(term.Bytes); if (field != null) { assertEquals(term.Field, field); } field = term.Field; } assertNotNull(field); filter = new TermsFilter(field, bytes); return(filter); }
public MultiPhraseWeight(MultiPhraseQuery outerInstance, IndexSearcher searcher) { this.outerInstance = outerInstance; this.similarity = searcher.Similarity; IndexReaderContext context = searcher.TopReaderContext; // compute idf var allTermStats = new JCG.List <TermStatistics>(); foreach (Term[] terms in outerInstance.termArrays) { foreach (Term term in terms) { if (!termContexts.TryGetValue(term, out TermContext termContext) || termContext is null) { termContext = TermContext.Build(context, term); termContexts[term] = termContext; } allTermStats.Add(searcher.TermStatistics(term, termContext)); } } stats = similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance.field), allTermStats.ToArray()); }
/// <summary> /// Constructor which deserializes from the given <see cref="IDataInput"/>. /// </summary> /// <exception cref="IOException"></exception> public SessionToken(IDataInput reader) { Id = reader.ReadUTF(); Version = reader.ReadUTF(); Dictionary <string, IList <RevisionFile> > sourceFiles = new Dictionary <string, IList <RevisionFile> >(); int numSources = reader.ReadInt32(); while (numSources > 0) { string source = reader.ReadUTF(); int numFiles = reader.ReadInt32(); IList <RevisionFile> files = new JCG.List <RevisionFile>(numFiles); for (int i = 0; i < numFiles; i++) { files.Add(new RevisionFile(reader.ReadUTF(), reader.ReadInt64())); } sourceFiles.Add(source, files); --numSources; } SourceFiles = sourceFiles; }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); fieldName = Random.NextBoolean() ? "field" : ""; // sometimes use an empty string as field name RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000))); Document doc = new Document(); Field field = NewStringField(fieldName, "", Field.Store.NO); doc.Add(field); JCG.List <string> terms = new JCG.List <string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random); field.SetStringValue(s); terms.Add(s); writer.AddDocument(doc); } if (Verbose) { // utf16 order terms.Sort(); Console.WriteLine("UTF16 order:"); foreach (string s in terms) { Console.WriteLine(" " + UnicodeUtil.ToHexString(s)); } } reader = writer.GetReader(); searcher1 = NewSearcher(reader); searcher2 = NewSearcher(reader); writer.Dispose(); }