public virtual void TestConcurrency() { // tests that addTaxonomy and addCategory work in parallel int numCategories = AtLeast(10000); // build an input taxonomy index Directory src = NewDirectory(); var tw = new DirectoryTaxonomyWriter(src); for (int i = 0; i < numCategories; i++) { tw.AddCategory(new FacetLabel("a", Convert.ToString(i, CultureInfo.InvariantCulture))); } tw.Dispose(); // now add the taxonomy to an empty taxonomy, while adding the categories // again, in parallel -- in the end, no duplicate categories should exist. Directory dest = NewDirectory(); var destTw = new DirectoryTaxonomyWriter(dest); var t = new ThreadAnonymousInnerClassHelper2(this, numCategories, destTw); t.Start(); IOrdinalMap map = new MemoryOrdinalMap(); destTw.AddTaxonomy(src, map); t.Join(); destTw.Dispose(); // now validate var dtr = new DirectoryTaxonomyReader(dest); // +2 to account for the root category + "a" Assert.AreEqual(numCategories + 2, dtr.Count); var categories = new JCG.HashSet <FacetLabel>(); for (int i = 1; i < dtr.Count; i++) { FacetLabel cat = dtr.GetPath(i); Assert.True(categories.Add(cat), "category " + cat + " already existed"); } dtr.Dispose(); IOUtils.Dispose(src, dest); }
public virtual void TestMerge() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); int numDocs = AtLeast(100); int numDeletes = Random.Next(numDocs); ISet <int> deletes = new JCG.HashSet <int>(); while (deletes.Count < numDeletes) { deletes.Add(Random.Next(numDocs)); } foreach (Options options in ValidOptions()) { RandomDocument[] docs = new RandomDocument[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options); } using Directory dir = NewDirectory(); using RandomIndexWriter writer = new RandomIndexWriter(Random, dir); for (int i = 0; i < numDocs; ++i) { writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); if (Rarely()) { writer.Commit(); } } foreach (int delete in deletes) { writer.DeleteDocuments(new Term("id", "" + delete)); } // merge with deletes writer.ForceMerge(1); using IndexReader reader = writer.GetReader(); for (int i = 0; i < numDocs; ++i) { if (!deletes.Contains(i)) { int docID = DocID(reader, "" + i); AssertEquals(docs[i], reader.GetTermVectors(docID)); } } } }
private static ISet <string> LoadDefaultStopTagSet() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006) { try { CharArraySet tagset = LoadStopwordSet(false, typeof(JapaneseAnalyzer), "stoptags.txt", "#"); var DEFAULT_STOP_TAGS = new JCG.HashSet <string>(); foreach (string element in tagset) { DEFAULT_STOP_TAGS.Add(element); } return(DEFAULT_STOP_TAGS); } catch (IOException ex) { // default set should always be present as it is part of the distribution (JAR) throw new Exception("Unable to load default stoptag set", ex); } }
private void checkHits(SpatialArgs args, int assertNumFound, int[] assertIds) { SearchResults got = executeQuery(strategy.MakeQuery(args), 100); assertEquals("" + args, assertNumFound, got.numFound); if (assertIds != null) { ISet <int> gotIds = new JCG.HashSet <int>(); foreach (SearchResult result in got.results) { gotIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture)); } foreach (int assertId in assertIds) { assertTrue("has " + assertId, gotIds.Contains(assertId)); } } }
public static bool RetainAll <T>(this ICollection <T> source, ICollection <T> collection) { if (source is null) { throw new ArgumentNullException(nameof(source)); } if (collection is null) { throw new ArgumentNullException(nameof(collection)); } if (source.Count == 0) { return(false); } if (source is ISet <T> set) { int originalCount = set.Count; set.IntersectWith(collection); return(originalCount != set.Count); } else if (source is IList <T> list) { int removed = list.RemoveAll((value) => !collection.Contains(value)); return(removed > 0); } // Slow path for unknown collection types var toRemove = new JCG.HashSet <T>(); foreach (var e in source) { if (!collection.Contains(e)) { toRemove.Add(e); } } if (toRemove.Count > 0) { return(source.RemoveAll(toRemove)); } return(false); }
/// <summary> /// Reverses the language of the given (non-singleton) automaton while returning /// the set of new initial states. /// </summary> public static ISet <State> Reverse(Automaton a) { a.ExpandSingleton(); // reverse all edges Dictionary <State, ISet <Transition> > m = new Dictionary <State, ISet <Transition> >(); State[] states = a.GetNumberedStates(); ISet <State> accept = new JCG.HashSet <State>(); foreach (State s in states) { if (s.Accept) { accept.Add(s); } } foreach (State r in states) { m[r] = new JCG.HashSet <Transition>(); r.accept = false; } foreach (State r in states) { foreach (Transition t in r.GetTransitions()) { m[t.to].Add(new Transition(t.min, t.max, r)); } } foreach (State r in states) { ISet <Transition> tr = m[r]; r.SetTransitions(tr.ToArray(/*new Transition[tr.Count]*/)); } // make new initial+final states a.initial.accept = true; a.initial = new State(); foreach (State r in accept) { a.initial.AddEpsilon(r); // ensures that all initial states are reachable } a.deterministic = false; a.ClearNumberedStates(); return(accept); }
protected internal RandomDocumentFactory(BaseTermVectorsFormatTestCase baseTermVectorsFormatTestCase, int distinctFieldNames, int disctinctTerms) { this.outerInstance = baseTermVectorsFormatTestCase; ISet <string> fieldNames = new JCG.HashSet <string>(); while (fieldNames.Count < distinctFieldNames) { fieldNames.Add(TestUtil.RandomSimpleString(Random)); fieldNames.Remove("id"); } this.fieldNames = fieldNames.ToArray(/*new string[0]*/); terms = new string[disctinctTerms]; termBytes = new BytesRef[disctinctTerms]; for (int i = 0; i < disctinctTerms; ++i) { terms[i] = TestUtil.RandomRealisticUnicodeString(Random); termBytes[i] = new BytesRef(terms[i]); } }
public ICollection <DocFieldConsumerPerField> Fields() { ICollection <DocFieldConsumerPerField> fields = new JCG.HashSet <DocFieldConsumerPerField>(); for (int i = 0; i < fieldHash.Length; i++) { DocFieldProcessorPerField field = fieldHash[i]; while (field != null) { fields.Add(field.consumer); field = field.next; } } if (Debugging.AssertsEnabled) { Debugging.Assert(fields.Count == totalFieldCount); } return(fields); }
public virtual void TestShrinkToAfterShortestMatch() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer(this))); Document doc = new Document(); doc.Add(new TextField("content", new StringReader("a b c d e f g h i j a k"))); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); IndexSearcher @is = NewSearcher(reader); writer.Dispose(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); TopDocs topDocs = @is.Search(snq, 1); ISet <string> payloadSet = new JCG.HashSet <string>(); for (int i = 0; i < topDocs.ScoreDocs.Length; i++) { while (spans.Next()) { var payloads = spans.GetPayload(); foreach (var payload in payloads) { payloadSet.Add(Encoding.UTF8.GetString(payload)); } } } Assert.AreEqual(2, payloadSet.Count); Assert.IsTrue(payloadSet.Contains("a:Noise:10")); Assert.IsTrue(payloadSet.Contains("k:Noise:11")); reader.Dispose(); directory.Dispose(); }
public static Languages GetInstance(string languagesResourceName) { // read languages list ISet <string> ls = new JCG.HashSet <string>(); Stream langIS = typeof(Languages).FindAndGetManifestResourceStream(languagesResourceName); if (langIS == null) { throw new ArgumentException("Unable to resolve required resource: " + languagesResourceName); } using (TextReader reader = new StreamReader(langIS, ResourceConstants.ENCODING)) { bool inExtendedComment = false; string rawLine; while ((rawLine = reader.ReadLine()) != null) { string line = rawLine.Trim(); if (inExtendedComment) { if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal)) { inExtendedComment = false; } } else { if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal)) { inExtendedComment = true; } else if (line.Length > 0) { ls.Add(line); } } } } return(new Languages(ls.AsReadOnly())); }
public bool MoveNext() { while (currentDocId < docCount) { currentDocId++; if (liveDocs != null && !liveDocs.Get(currentDocId)) { continue; } Document doc = outerInstance.m_reader.Document(currentDocId, relevantFields); BytesRef tempPayload = null; ISet <BytesRef> tempContexts = new JCG.HashSet <BytesRef>(); if (hasPayloads) { IIndexableField payload = doc.GetField(outerInstance.m_payloadField); if (payload is null || (payload.GetBinaryValue() is null && payload.GetStringValue() is null)) { continue; } tempPayload = payload.GetBinaryValue() ?? new BytesRef(payload.GetStringValue()); } if (hasContexts) { IIndexableField[] contextFields = doc.GetFields(outerInstance.m_contextsField); foreach (IIndexableField contextField in contextFields) { if (contextField.GetBinaryValue() is null && contextField.GetStringValue() is null) { continue; } else { tempContexts.Add(contextField.GetBinaryValue() ?? new BytesRef(contextField.GetStringValue())); } } }
/// <summary> /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>. /// </summary> public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList) { IList <SubInfo> tempSubInfos = new JCG.List <SubInfo>(); IList <SubInfo> realSubInfos = new JCG.List <SubInfo>(); ISet <string> distinctTerms = new JCG.HashSet <string>(); int length = 0; foreach (WeightedPhraseInfo phraseInfo in phraseInfoList) { float phraseTotalBoost = 0; foreach (TermInfo ti in phraseInfo.TermsInfos) { if (distinctTerms.Add(ti.Text)) { phraseTotalBoost += ti.Weight * phraseInfo.Boost; } length++; } tempSubInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets, phraseInfo.Seqnum, phraseTotalBoost)); } // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query // would cause an equal weight for all fragments regardless of how much words they contain. // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments // we "bend" the length with a standard-normalization a little bit. float norm = length * (1 / (float)Math.Sqrt(length)); float totalBoost = 0; foreach (SubInfo tempSubInfo in tempSubInfos) { float subInfoBoost = tempSubInfo.Boost * norm; realSubInfos.Add(new SubInfo(tempSubInfo.Text, tempSubInfo.TermsOffsets, tempSubInfo.Seqnum, subInfoBoost)); totalBoost += subInfoBoost; } FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, realSubInfos, totalBoost)); }
/// <summary> /// Returns the files required for replication. By default, this method returns /// all files that exist in the new revision, but not in the handler. /// </summary> protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles) { IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles; if (handlerRevisionFiles == null) { return(newRevisionFiles); } Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >(); foreach (var e in handlerRevisionFiles) { // put the handler files in a Set, for faster contains() checks later ISet <string> handlerFiles = new JCG.HashSet <string>(); foreach (RevisionFile file in e.Value) { handlerFiles.Add(file.FileName); } // make sure to preserve revisionFiles order IList <RevisionFile> res = new JCG.List <RevisionFile>(); string source = e.Key; if (Debugging.AssertsEnabled) { Debugging.Assert(newRevisionFiles.ContainsKey(source), "source not found in newRevisionFiles: {0}", newRevisionFiles); } foreach (RevisionFile file in newRevisionFiles[source]) { if (!handlerFiles.Contains(file.FileName)) { res.Add(file); } } requiredFiles[source] = res; } return(requiredFiles); }
public virtual void TestReserved() { string test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>"; ISet <string> set = new JCG.HashSet <string>(); set.Add("reserved"); TextReader reader = new HTMLStripCharFilter(new StringReader(test), set); StringBuilder builder = new StringBuilder(); int ch = 0; while ((ch = reader.Read()) > 0) { builder.Append((char)ch); } string result = builder.ToString(); // System.out.println("Result: " + result); assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", StringComparison.Ordinal), result.IndexOf("reserved", StringComparison.Ordinal) == 9); assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 15, StringComparison.Ordinal), result.IndexOf("reserved", 15, StringComparison.Ordinal) == 38); assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 41, StringComparison.Ordinal), result.IndexOf("reserved", 41, StringComparison.Ordinal) == 54); assertTrue("Other tag should be removed", result.IndexOf("other", StringComparison.Ordinal) == -1); }
private void _CheckHits(bool bbox, IPoint pt, double distKM, int assertNumFound, params int[] assertIds) { SpatialOperation op = SpatialOperation.Intersects; double distDEG = DistanceUtils.Dist2Degrees(distKM, DistanceUtils.EarthMeanRadiusKilometers); IShape shape = ctx.MakeCircle(pt, distDEG); if (bbox) { shape = shape.BoundingBox; } SpatialArgs args = new SpatialArgs(op, shape); //args.setDistPrecision(0.025); Query query; if (Random.nextBoolean()) { query = strategy.MakeQuery(args); } else { query = new FilteredQuery(new MatchAllDocsQuery(), strategy.MakeFilter(args)); } SearchResults results = executeQuery(query, 100); assertEquals("" + shape, assertNumFound, results.numFound); if (assertIds != null) { ISet <int> resultIds = new JCG.HashSet <int>(); foreach (SearchResult result in results.results) { resultIds.Add(int.Parse(result.document.Get("id"), CultureInfo.InvariantCulture)); } foreach (int assertId in assertIds) { assertTrue("has " + assertId, resultIds.Contains(assertId)); } } }
private List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > CreateExpectedResult(IndexContext context, string term, Sort groupSort, int topN) { List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > result = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(); IDictionary <string, ISet <string> > groupCounts = context.searchTermToGroupCounts[term]; int i = 0; foreach (string group in groupCounts.Keys) { if (topN <= i++) { break; } ISet <BytesRef> uniqueValues = new JCG.HashSet <BytesRef>(); foreach (string val in groupCounts[group]) { uniqueValues.Add(val != null ? new BytesRef(val) : null); } var gc = new GroupCount(group != null ? new BytesRef(group) : (BytesRef)null, uniqueValues); result.Add(gc); } return(result); }
/// <summary> /// Returns an automaton that accepts the union of the languages of the given /// automata. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Union(ICollection <Automaton> l) { JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; State s = new State(); foreach (Automaton b in l) { if (BasicOperations.IsEmpty(b)) { continue; } Automaton bb = b; if (has_aliases) { bb = bb.CloneExpanded(); } else { bb = bb.CloneExpandedIfRequired(); } s.AddEpsilon(bb.initial); } Automaton a_ = new Automaton { initial = s, deterministic = false }; //a.clearHashCode(); a_.ClearNumberedStates(); a_.CheckMinimizeAlways(); return(a_); }
/// <summary> /// Cleans up the index directory from old index files. This method uses the /// last commit found by <see cref="GetLastCommit(Directory)"/>. If it matches the /// expected <paramref name="segmentsFile"/>, then all files not referenced by this commit point /// are deleted. /// </summary> /// <remarks> /// <b>NOTE:</b> This method does a best effort attempt to clean the index /// directory. It suppresses any exceptions that occur, as this can be retried /// the next time. /// </remarks> public static void CleanupOldIndexFiles(Directory directory, string segmentsFile) { try { IndexCommit commit = GetLastCommit(directory); // commit == null means weird IO errors occurred, ignore them // if there were any IO errors reading the expected commit point (i.e. // segments files mismatch), then ignore that commit either. if (commit != null && commit.SegmentsFileName.Equals(segmentsFile, StringComparison.Ordinal)) { ISet <string> commitFiles = new JCG.HashSet <string>(commit.FileNames); commitFiles.Add(IndexFileNames.SEGMENTS_GEN); Regex matcher = IndexFileNames.CODEC_FILE_PATTERN; foreach (string file in directory.ListAll()) { if (!commitFiles.Contains(file) && (matcher.IsMatch(file) || file.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal))) { try { directory.DeleteFile(file); } catch { // suppress, it's just a best effort } } } } } catch { // ignore any errors that happens during this state and only log it. this // cleanup will have a chance to succeed the next time we get a new // revision. } }
private void CheckTermsOrder(IndexReader r, ISet <string> allTerms, bool isTop) { TermsEnum terms = MultiFields.GetFields(r).GetTerms("f").GetIterator(null); BytesRef last = new BytesRef(); ISet <string> seenTerms = new JCG.HashSet <string>(); while (true) { BytesRef term = terms.Next(); if (term == null) { break; } Assert.IsTrue(last.CompareTo(term) < 0); last.CopyBytes(term); string s = term.Utf8ToString(); Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")"); seenTerms.Add(s); } if (isTop) { Assert.IsTrue(allTerms.SetEquals(seenTerms)); } // Test seeking: IEnumerator <string> it = seenTerms.GetEnumerator(); while (it.MoveNext()) { BytesRef tr = new BytesRef(it.Current); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString())); } }
public override void Run() { try { Document document = new Document(); Field field = NewTextField("field", "", Field.Store.NO); document.Add(field); startingGun.Wait(); while (!(postings.Count == 0)) { StringBuilder text = new StringBuilder(); ISet <string> visited = new JCG.HashSet <string>(); for (int i = 0; i < maxTermsPerDoc; i++) { string token; if (!postings.TryDequeue(out token)) { break; } if (visited.Contains(token)) { // Put it back: postings.Enqueue(token); break; } text.Append(' '); text.Append(token); visited.Add(token); } field.SetStringValue(text.ToString()); iw.AddDocument(document); } } catch (Exception e) { throw new Exception(e.Message, e); } }
public virtual void TestOverrideGetters() { // Test that IndexWriterConfig overrides all getters, so that javadocs // contain all methods for the users. Also, ensures that IndexWriterConfig // doesn't declare getters that are not declared on LiveIWC. ISet<string> liveGetters = new JCG.HashSet<string>(); foreach (MethodInfo m in typeof(LiveIndexWriterConfig).GetMethods()) { if (m.Name.StartsWith("get", StringComparison.Ordinal) && !m.IsStatic) { liveGetters.Add(m.Name); } } foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods()) { if (m.Name.StartsWith("get", StringComparison.Ordinal) && !m.Name.StartsWith("get_", StringComparison.Ordinal) && !m.IsStatic) { Assert.AreEqual(typeof(IndexWriterConfig), m.DeclaringType, "method " + m.Name + " not overrided by IndexWriterConfig"); Assert.IsTrue(liveGetters.Contains(m.Name), "method " + m.Name + " not declared on LiveIndexWriterConfig"); } } }
private void PruneDups(IList <OneSyn> syns) { ISet <string> seen = new JCG.HashSet <string>(); foreach (OneSyn syn in syns) { int idx = 0; while (idx < [email protected]) { string @out = syn.@out[idx]; if (!seen.Contains(@out)) { seen.Add(@out); idx++; } else { [email protected](idx); } } seen.Clear(); } }
private NormalizeCharMap RandomMap() { Random random = Random; NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); // we can't add duplicate keys, or NormalizeCharMap gets angry ISet <string> keys = new JCG.HashSet <string>(); int num = random.Next(5); //System.out.println("NormalizeCharMap="); for (int i = 0; i < num; i++) { string key = TestUtil.RandomSimpleString(random); if (!keys.Contains(key) && key.Length != 0) { string value = TestUtil.RandomSimpleString(random); builder.Add(key, value); keys.Add(key); //System.out.println("mapping: '" + key + "' => '" + value + "'"); } } return(builder.Build()); }
internal AttributedIterator(AttributedString attrString, AttributedCharacterIteratorAttribute[] attributes, int begin, int end) { if (begin < 0 || end > attrString.text.Length || begin > end) { throw new ArgumentException(); } this.begin = begin; this.end = end; offset = begin; this.attrString = attrString; if (attributes != null) { var set = new JCG.HashSet <AttributedCharacterIteratorAttribute>( (attributes.Length * 4 / 3) + 1); for (int i = attributes.Length; --i >= 0;) { set.Add(attributes[i]); } attributesAllowed = set; } }
protected virtual void AssertEquals(RandomDocument doc, Fields fields) { // compare field names Assert.AreEqual(doc is null, fields is null); Assert.AreEqual(doc.fieldNames.Length, fields.Count); ISet <string> fields1 = new JCG.HashSet <string>(); ISet <string> fields2 = new JCG.HashSet <string>(); for (int i = 0; i < doc.fieldNames.Length; ++i) { fields1.Add(doc.fieldNames[i]); } foreach (string field in fields) { fields2.Add(field); } Assert.IsTrue(fields1.SetEquals(fields2)); for (int i = 0; i < doc.fieldNames.Length; ++i) { AssertEquals(doc.tokenStreams[i], doc.fieldTypes[i], fields.GetTerms(doc.fieldNames[i])); } }
public override Query Rewrite(Query original) { Query rewritten = base.Rewrite(original); ISet <Term> terms = new JCG.HashSet <Term>(); rewritten.ExtractTerms(terms); // Make a single request to remote nodes for term // stats: for (int nodeID = 0; nodeID < nodeVersions.Length; nodeID++) { if (nodeID == MyNodeID) { continue; } ISet <Term> missing = new JCG.HashSet <Term>(); foreach (Term term in terms) { TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term); if (!outerInstance.termStatsCache.ContainsKey(key)) { missing.Add(term); } } if (missing.Count != 0) { foreach (KeyValuePair <Term, TermStatistics> ent in outerInstance.outerInstance.GetNodeTermStats(missing, nodeID, nodeVersions[nodeID])) { TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], ent.Key); outerInstance.termStatsCache[key] = ent.Value; } } } return(rewritten); }
private static ISet <string> DifFiles(string[] files1, string[] files2) { ISet <string> set1 = new JCG.HashSet <string>(); ISet <string> set2 = new JCG.HashSet <string>(); ISet <string> extra = new JCG.HashSet <string>(); for (int x = 0; x < files1.Length; x++) { set1.Add(files1[x]); } for (int x = 0; x < files2.Length; x++) { set2.Add(files2[x]); } IEnumerator <string> i1 = set1.GetEnumerator(); while (i1.MoveNext()) { string o = i1.Current; if (!set2.Contains(o)) { extra.Add(o); } } IEnumerator <string> i2 = set2.GetEnumerator(); while (i2.MoveNext()) { string o = i2.Current; if (!set1.Contains(o)) { extra.Add(o); } } return(extra); }
/// <summary> /// Returns whitespace- and/or comma-separated set of values, or null if none are found </summary> public virtual ISet <string> GetSet(IDictionary <string, string> args, string name) { if (args.TryGetValue(name, out string s)) { args.Remove(name); ISet <string> set = null; Match matcher = ITEM_PATTERN.Match(s); if (matcher.Success) { set = new JCG.HashSet <string> { matcher.Groups[0].Value }; matcher = matcher.NextMatch(); while (matcher.Success) { set.Add(matcher.Groups[0].Value); matcher = matcher.NextMatch(); } } return(set); } return(null); }
public override LanguageSet RestrictTo(LanguageSet other) { if (other == Languages.NO_LANGUAGES) { return(other); } else if (other == Languages.ANY_LANGUAGE) { return(this); } else { SomeLanguages sl = (SomeLanguages)other; ISet <string> ls = new JCG.HashSet <string>(Math.Min(languages.Count, sl.languages.Count)); foreach (string lang in languages) { if (sl.languages.Contains(lang)) { ls.Add(lang); } } return(From(ls)); } }
private void AddTerms(IndexReader reader, FieldVals f) { if (f.queryString is null) { return; } Terms terms = MultiFields.GetTerms(reader, f.fieldName); if (terms is null) { return; } TokenStream ts = analyzer.GetTokenStream(f.fieldName, f.queryString); try { ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>(); int corpusNumDocs = reader.NumDocs; ISet <string> processedTerms = new JCG.HashSet <string>(); ts.Reset(); while (ts.IncrementToken()) { string term = termAtt.ToString(); if (!processedTerms.Contains(term)) { processedTerms.Add(term); ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term float minScore = 0; Term startTerm = new Term(f.fieldName, term); AttributeSource atts = new AttributeSource(); IMaxNonCompetitiveBoostAttribute maxBoostAtt = atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>(); #pragma warning disable 612, 618 SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength); #pragma warning restore 612, 618 //store the df so all variants use same idf int df = reader.DocFreq(startTerm); int numVariants = 0; int totalVariantDocFreqs = 0; BytesRef possibleMatch; IBoostAttribute boostAtt = fe.Attributes.AddAttribute <IBoostAttribute>(); while (fe.MoveNext()) { possibleMatch = fe.Term; numVariants++; totalVariantDocFreqs += fe.DocFreq; float score = boostAtt.Boost; if (variantsQ.Count < MAX_VARIANTS_PER_TERM || score > minScore) { ScoreTerm st = new ScoreTerm(new Term(startTerm.Field, BytesRef.DeepCopyOf(possibleMatch)), score, startTerm); variantsQ.InsertWithOverflow(st); minScore = variantsQ.Top.Score; // maintain minScore } maxBoostAtt.MaxNonCompetitiveBoost = variantsQ.Count >= MAX_VARIANTS_PER_TERM ? minScore : float.NegativeInfinity; } if (numVariants > 0) { int avgDf = totalVariantDocFreqs / numVariants; if (df == 0) //no direct match we can use as df for all variants { df = avgDf; //use avg df of all variants } // take the top variants (scored by edit distance) and reset the score // to include an IDF factor then add to the global queue for ranking // overall top query terms int size = variantsQ.Count; for (int i = 0; i < size; i++) { ScoreTerm st = variantsQ.Pop(); st.Score = (st.Score * st.Score) * sim.Idf(df, corpusNumDocs); q.InsertWithOverflow(st); } } } } ts.End(); } finally { IOUtils.DisposeWhileHandlingException(ts); } }