예제 #1
0
 public virtual Search.Query MakeLuceneQueryNoBoost(BasicQueryFactory qf)
 {
     if (fieldNames.Count == 1)
     { /* single field name: no new queries needed */
         return(q.MakeLuceneQueryFieldNoBoost(fieldNames[0], qf));
     }
     else
     { /* OR query over the fields */
         IList <SrndQuery> queries = new JCG.List <SrndQuery>();
         foreach (var fieldName in fieldNames)
         {
             var qc = (SrndQuery)q.Clone();
             queries.Add(new FieldsQuery(qc, fieldName, fieldOp));
         }
         OrQuery oq = new OrQuery(queries,
                                  true /* infix OR for field names */,
                                  orOperatorName);
         // System.out.println(getClass().toString() + ", fields expanded: " + oq.toString()); /* needs testing */
         return(oq.MakeLuceneQueryField(null, qf));
     }
 }
예제 #2
0
        /// <summary>
        /// Return a <see cref="T:IList{SegToken}"/> of all tokens in the map, ordered by startOffset.
        /// </summary>
        /// <returns><see cref="T:IList{SegToken}"/> of all tokens in the map.</returns>
        public virtual IList <SegToken> ToTokenList()
        {
            IList <SegToken> result = new JCG.List <SegToken>();
            int s = -1, count = 0, size = tokenListTable.Count;
            IList <SegToken> tokenList;

            while (count < size)
            {
                if (IsStartExist(s))
                {
                    tokenList = tokenListTable[s];
                    foreach (SegToken st in tokenList)
                    {
                        result.Add(st);
                    }
                    count++;
                }
                s++;
            }
            return(result);
        }
예제 #3
0
 public override object Add(object prefix, object output)
 {
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(!(prefix is IList));
     }
     if (!(output is IList))
     {
         return(outputs.Add((T)prefix, (T)output));
     }
     else
     {
         IList     outputList = (IList)output;
         IList <T> addedList  = new JCG.List <T>(outputList.Count);
         foreach (object _output in outputList)
         {
             addedList.Add(outputs.Add((T)prefix, (T)_output));
         }
         return(addedList);
     }
 }
예제 #4
0
        private void NextSentence()
        {
            var tokenList = new JCG.List <string>();
            var typeList  = new JCG.List <string>();

            sentenceTokenAttrs.Clear();
            bool endOfSentence = false;

            while (!endOfSentence && (moreTokensAvailable = m_input.IncrementToken()))
            {
                if (!keywordAtt.IsKeyword)
                {
                    tokenList.Add(termAtt.ToString());
                    typeList.Add(typeAtt.Type);
                }
                endOfSentence = 0 != (flagsAtt.Flags & OpenNLPTokenizer.EOS_FLAG_BIT);
                sentenceTokenAttrs.Add(m_input.CloneAttributes());
            }
            sentenceTokens     = tokenList.Count > 0 ? tokenList.ToArray() : null;
            sentenceTokenTypes = typeList.Count > 0 ? typeList.ToArray() : null;
        }
예제 #5
0
        /**
         * Do the measurements.
         */
        private BenchmarkResult Measure(ICallable <int> callable)
        {
            double NANOS_PER_MS = 1000000;

            try
            {
                JCG.List <double> times = new JCG.List <double>();
                for (int i = 0; i < warmup + rounds; i++)
                {
                    long start = J2N.Time.NanoTime();
                    guard = Convert.ToInt32(callable.Call());
                    times.Add((J2N.Time.NanoTime() - start) / NANOS_PER_MS);
                }
                return(new BenchmarkResult(times, warmup, rounds));
            }
            catch (Exception e) when(e.IsException())
            {
                e.printStackTrace();
                throw RuntimeException.Create(e);
            }
        }
예제 #6
0
        public virtual void TestCopyJDKSet()
        {
            ISet <string> set = new JCG.HashSet <string>();

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new JCG.List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpperInvariant());
            }
            set.addAll(TEST_STOP_WORDS);

            CharArraySet copy = CharArraySet.Copy(TEST_VERSION_CURRENT, set);

            assertEquals(set.Count, copy.size());
            assertEquals(set.Count, copy.size());

            assertTrue(copy.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copy.contains(@string));
            }

            IList <string> newWords = new JCG.List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(set.Contains(@string));
            }
        }
예제 #7
0
        /// <summary>
        /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>.
        /// </summary>
        public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList)
        {
            IList <SubInfo> tempSubInfos  = new JCG.List <SubInfo>();
            IList <SubInfo> realSubInfos  = new JCG.List <SubInfo>();
            ISet <string>   distinctTerms = new JCG.HashSet <string>();
            int             length        = 0;

            foreach (WeightedPhraseInfo phraseInfo in phraseInfoList)
            {
                float phraseTotalBoost = 0;
                foreach (TermInfo ti in phraseInfo.TermsInfos)
                {
                    if (distinctTerms.Add(ti.Text))
                    {
                        phraseTotalBoost += ti.Weight * phraseInfo.Boost;
                    }
                    length++;
                }
                tempSubInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets,
                                             phraseInfo.Seqnum, phraseTotalBoost));
            }

            // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
            // would cause an equal weight for all fragments regardless of how much words they contain.
            // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
            // we "bend" the length with a standard-normalization a little bit.
            float norm = length * (1 / (float)Math.Sqrt(length));

            float totalBoost = 0;

            foreach (SubInfo tempSubInfo in tempSubInfos)
            {
                float subInfoBoost = tempSubInfo.Boost * norm;
                realSubInfos.Add(new SubInfo(tempSubInfo.Text, tempSubInfo.TermsOffsets,
                                             tempSubInfo.Seqnum, subInfoBoost));
                totalBoost += subInfoBoost;
            }

            FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, realSubInfos, totalBoost));
        }
예제 #8
0
        /** Reads the stream, consuming a format that is a tab-separated values of 3 columns:
         * an "id", a "name" and the "shape".  Empty lines and lines starting with a '#' are skipped.
         * The stream is closed.
         */
        public static IEnumerator <SpatialTestData> GetTestData(Stream @in, SpatialContext ctx)
        {
            IList <SpatialTestData> results = new JCG.List <SpatialTestData>();
            TextReader bufInput             = new StreamReader(@in, Encoding.UTF8);

            try
            {
                String line;
                while ((line = bufInput.ReadLine()) != null)
                {
                    if (line.Length == 0 || line[0] == '#')
                    {
                        continue;
                    }

                    SpatialTestData data = new SpatialTestData();
                    String[]        vals = line.Split('\t').TrimEnd();
                    if (vals.Length != 3)
                    {
                        throw RuntimeException.Create("bad format; expecting 3 tab-separated values for line: " + line);
                    }
                    data.id   = vals[0];
                    data.name = vals[1];
                    try
                    {
                        data.shape = ctx.ReadShapeFromWkt(vals[2]);
                    }
                    catch (Spatial4n.Exceptions.ParseException e) // LUCENENET: Spatial4n has its own ParseException that is different than the one in Support
                    {
                        throw RuntimeException.Create(e);
                    }
                    results.Add(data);
                }
            }
            finally
            {
                bufInput.Dispose();
            }
            return(results.GetEnumerator());
        }
예제 #9
0
        public override void Build(IInputEnumerator enumerator)
        {
            // LUCENENT: Added guard clause for null
            if (enumerator is null)
            {
                throw new ArgumentNullException(nameof(enumerator));
            }

            if (enumerator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
#pragma warning disable 612, 618
            if (enumerator.Comparer != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparer uses UTF16 sort order
                enumerator = new SortedInputEnumerator(enumerator, BytesRef.UTF8SortedAsUTF16Comparer);
            }
#pragma warning restore 612, 618

            JCG.List <string> tokens = new JCG.List <string>();
            JCG.List <object> vals   = new JCG.List <object>();
            BytesRef          spare;
            CharsRef          charsSpare = new CharsRef();
            while (enumerator.MoveNext())
            {
                spare = enumerator.Current;
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(enumerator.Weight);
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }
예제 #10
0
        private void MergeNorms(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = codec.NormsFormat.NormsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in mergeState.FieldInfos)
                {
                    if (field.HasNorms)
                    {
                        IList <NumericDocValues> toMerge       = new JCG.List <NumericDocValues>();
                        IList <IBits>            docsWithField = new JCG.List <IBits>();
                        foreach (AtomicReader reader in mergeState.Readers)
                        {
                            NumericDocValues norms = reader.GetNormValues(field.Name);
                            if (norms == null)
                            {
                                norms = DocValues.EMPTY_NUMERIC;
                            }
                            toMerge.Add(norms);
                            docsWithField.Add(new Lucene.Net.Util.Bits.MatchAllBits(reader.MaxDoc));
                        }
                        consumer.MergeNumericField(field, mergeState, toMerge, docsWithField);
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
예제 #11
0
        private void MergeTerms(SegmentWriteState segmentWriteState)
        {
            IList <Fields>      fields = new JCG.List <Fields>();
            IList <ReaderSlice> slices = new JCG.List <ReaderSlice>();

            int docBase = 0;

            for (int readerIndex = 0; readerIndex < mergeState.Readers.Count; readerIndex++)
            {
                AtomicReader reader = mergeState.Readers[readerIndex];
                Fields       f      = reader.Fields;
                int          maxDoc = reader.MaxDoc;
                if (f != null)
                {
                    slices.Add(new ReaderSlice(docBase, maxDoc, readerIndex));
                    fields.Add(f);
                }
                docBase += maxDoc;
            }

            FieldsConsumer consumer = codec.PostingsFormat.FieldsConsumer(segmentWriteState);
            bool           success  = false;

            try
            {
                consumer.Merge(mergeState, new MultiFields(fields.ToArray(/*Fields.EMPTY_ARRAY*/), slices.ToArray(/*ReaderSlice.EMPTY_ARRAY*/)));
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
예제 #12
0
        public virtual void TestCapitalization()
        {
            CharArraySet keep = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "and", "the", "it", "BIG" }, false);

            AssertCapitalizesTo("kiTTEN", new string[] { "Kitten" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            AssertCapitalizesTo("and", new string[] { "And" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            AssertCapitalizesTo("AnD", new string[] { "And" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            //first is not forced, but it's not a keep word, either
            AssertCapitalizesTo("AnD", new string[] { "And" }, true, keep, false, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            AssertCapitalizesTo("big", new string[] { "Big" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            AssertCapitalizesTo("BIG", new string[] { "BIG" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            AssertCapitalizesToKeyword("Hello thEre my Name is Ryan", "Hello there my name is ryan", true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            // now each token
            AssertCapitalizesTo("Hello thEre my Name is Ryan", new string[] { "Hello", "There", "My", "Name", "Is", "Ryan" }, false, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            // now only the long words
            AssertCapitalizesTo("Hello thEre my Name is Ryan", new string[] { "Hello", "There", "my", "Name", "is", "Ryan" }, false, keep, true, null, 3, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            // without prefix
            AssertCapitalizesTo("McKinley", new string[] { "Mckinley" }, true, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            // Now try some prefixes
            IList <char[]> okPrefix = new JCG.List <char[]>();

            okPrefix.Add("McK".ToCharArray());

            AssertCapitalizesTo("McKinley", new string[] { "McKinley" }, true, keep, true, okPrefix, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            // now try some stuff with numbers
            AssertCapitalizesTo("1st 2nd third", new string[] { "1st", "2nd", "Third" }, false, keep, false, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);

            AssertCapitalizesToKeyword("the The the", "The The the", false, keep, true, null, 0, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
        }
예제 #13
0
        public virtual void TestMergeRandom()
        {
            PrefixCodedTerms[]   pb       = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)];
            JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>();

            for (int i = 0; i < pb.Length; i++)
            {
                JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
                int nterms = TestUtil.NextInt32(Random, 0, 10000);
                for (int j = 0; j < nterms; j++)
                {
                    Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4));
                    terms.Add(term);
                }
                superSet.UnionWith(terms);

                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
                foreach (Term @ref in terms)
                {
                    b.Add(@ref);
                }
                pb[i] = b.Finish();
            }

            JCG.List <IEnumerator <Term> > subs = new JCG.List <IEnumerator <Term> >();
            for (int i = 0; i < pb.Length; i++)
            {
                subs.Add(pb[i].GetEnumerator());
            }

            IEnumerator <Term> expected = superSet.GetEnumerator();
            IEnumerator <Term> actual   = new MergedEnumerator <Term>(subs.ToArray());

            while (actual.MoveNext())
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, actual.Current);
            }
            Assert.IsFalse(expected.MoveNext());
        }
예제 #14
0
파일: Config.cs 프로젝트: ywscr/lucenenet
        // extract properties to array, e.g. for "10.7:100.4:-2.3" return int[]{10.7,100.4,-2.3}.
        private double[] PropToDoubleArray(string s)
        {
            if (s.IndexOf(':') < 0)
            {
                return(new double[] { double.Parse(s, CultureInfo.InvariantCulture) });
            }

            IList <double>  a  = new JCG.List <double>();
            StringTokenizer st = new StringTokenizer(s, ":");

            while (st.MoveNext())
            {
                string t = st.Current;
                a.Add(double.Parse(t, CultureInfo.InvariantCulture));
            }
            double[] res = new double[a.Count];
            for (int i = 0; i < a.Count; i++)
            {
                res[i] = a[i];
            }
            return(res);
        }
예제 #15
0
파일: Config.cs 프로젝트: ywscr/lucenenet
        // extract properties to array, e.g. for "true:true:false" return boolean[]{true,false,false}.
        private bool[] PropToBooleanArray(string s)
        {
            if (s.IndexOf(':') < 0)
            {
                return(new bool[] { bool.Parse(s) });
            }

            IList <bool>    a  = new JCG.List <bool>();
            StringTokenizer st = new StringTokenizer(s, ":");

            while (st.MoveNext())
            {
                string t = st.Current;
                a.Add(bool.Parse(t));
            }
            bool[] res = new bool[a.Count];
            for (int i = 0; i < a.Count; i++)
            {
                res[i] = a[i];
            }
            return(res);
        }
예제 #16
0
        /// <summary>
        /// Returns the files required for replication. By default, this method returns
        /// all files that exist in the new revision, but not in the handler.
        /// </summary>
        protected virtual IDictionary <string, IList <RevisionFile> > RequiredFiles(IDictionary <string, IList <RevisionFile> > newRevisionFiles)
        {
            IDictionary <string, IList <RevisionFile> > handlerRevisionFiles = handler.CurrentRevisionFiles;

            if (handlerRevisionFiles == null)
            {
                return(newRevisionFiles);
            }

            Dictionary <string, IList <RevisionFile> > requiredFiles = new Dictionary <string, IList <RevisionFile> >();

            foreach (var e in handlerRevisionFiles)
            {
                // put the handler files in a Set, for faster contains() checks later
                ISet <string> handlerFiles = new JCG.HashSet <string>();
                foreach (RevisionFile file in e.Value)
                {
                    handlerFiles.Add(file.FileName);
                }

                // make sure to preserve revisionFiles order
                IList <RevisionFile> res = new JCG.List <RevisionFile>();
                string source            = e.Key;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(newRevisionFiles.ContainsKey(source), "source not found in newRevisionFiles: {0}", newRevisionFiles);
                }
                foreach (RevisionFile file in newRevisionFiles[source])
                {
                    if (!handlerFiles.Contains(file.FileName))
                    {
                        res.Add(file);
                    }
                }
                requiredFiles[source] = res;
            }

            return(requiredFiles);
        }
예제 #17
0
            /// <summary>
            /// Returns a list of facet entries to be rendered based on the specified offset and limit.
            /// The facet entries are retrieved from the facet entries collected during merging.
            /// </summary>
            /// <param name="offset">The offset in the collected facet entries during merging</param>
            /// <param name="limit">The number of facets to return starting from the offset.</param>
            /// <returns>a list of facet entries to be rendered based on the specified offset and limit</returns>
            public virtual IList <FacetEntry> GetFacetEntries(int offset, int limit)
            {
                IList <FacetEntry> entries = new JCG.List <FacetEntry>();

                int skipped  = 0;
                int included = 0;

                foreach (FacetEntry facetEntry in facetEntries)
                {
                    if (skipped < offset)
                    {
                        skipped++;
                        continue;
                    }
                    if (included++ >= limit)
                    {
                        break;
                    }
                    entries.Add(facetEntry);
                }
                return(entries);
            }
예제 #18
0
            /// <summary>
            /// Create the scorer used to score our associated <see cref="DisjunctionMaxQuery"/> </summary>
            public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
            {
                IList <Scorer> scorers = new JCG.List <Scorer>();

                foreach (Weight w in m_weights)
                {
                    // we will advance() subscorers
                    Scorer subScorer = w.GetScorer(context, acceptDocs);
                    if (subScorer != null)
                    {
                        scorers.Add(subScorer);
                    }
                }
                if (scorers.Count == 0)
                {
                    // no sub-scorers had any documents
                    return(null);
                }
                DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, outerInstance.tieBreakerMultiplier, scorers.ToArray());

                return(result);
            }
예제 #19
0
        public virtual void TestOffsetCorrection()
        {
            const string INPUT = "G&uuml;nther G&uuml;nther is here";

            // create MappingCharFilter
            IList <string> mappingRules = new JCG.List <string>();

            mappingRules.Add("\"&uuml;\" => \"ü\"");
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("&uuml;", "ü");
            NormalizeCharMap normMap    = builder.Build();
            CharFilter       charStream = new MappingCharFilter(normMap, new StringReader(INPUT));

            // create PatternTokenizer
            TokenStream stream = new PatternTokenizer(charStream, new Regex("[,;/\\s]+", RegexOptions.Compiled), -1);

            AssertTokenStreamContents(stream, new string[] { "Günther", "Günther", "is", "here" }, new int[] { 0, 13, 26, 29 }, new int[] { 12, 25, 28, 33 }, INPUT.Length);

            charStream = new MappingCharFilter(normMap, new StringReader(INPUT));
            stream     = new PatternTokenizer(charStream, new Regex("Günther", RegexOptions.Compiled), 0);
            AssertTokenStreamContents(stream, new string[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.Length);
        }
예제 #20
0
        /// <summary>
        /// Optimize (remove empty rows) from the given Trie and return the resulting
        /// Trie.
        /// </summary>
        /// <param name="orig">the <see cref="Trie"/> to consolidate</param>
        /// <returns>the newly consolidated Trie</returns>
        public override Trie Optimize(Trie orig)
        {
            IList <string> cmds  = orig.cmds;
            IList <Row>    rows  = new JCG.List <Row>();
            IList <Row>    orows = orig.rows;

            int[] remap = new int[orows.Count];

            for (int j = orows.Count - 1; j >= 0; j--)
            {
                Row  now    = new Remap(orows[j], remap);
                bool merged = false;

                for (int i = 0; i < rows.Count; i++)
                {
                    Row q = Merge(now, rows[i]);
                    if (q != null)
                    {
                        rows[i]  = q;
                        merged   = true;
                        remap[j] = i;
                        break;
                    }
                }

                if (merged == false)
                {
                    remap[j] = rows.Count;
                    rows.Add(now);
                }
            }

            int root = remap[orig.root];

            Arrays.Fill(remap, -1);
            rows = RemoveGaps(root, rows, new JCG.List <Row>(), remap);

            return(new Trie(orig.forward, remap[root], cmds, rows));
        }
예제 #21
0
        /// <summary>
        /// Guesses the languages of a word.
        /// </summary>
        /// <param name="input">The word.</param>
        /// <returns>A Set of Strings of language names that are potential matches for the input word.</returns>
        public virtual LanguageSet GuessLanguages(string input)
        {
            string text = input.ToLowerInvariant();

            ISet <string> langs = new JCG.HashSet <string>(this.languages.GetLanguages());

            foreach (LangRule rule in this.rules)
            {
                if (rule.Matches(text))
                {
                    if (rule.acceptOnMatch)
                    {
                        IList <string> toRemove = new JCG.List <string>();
                        foreach (var item in langs)
                        {
                            if (!rule.languages.Contains(item))
                            {
                                toRemove.Add(item);
                            }
                        }
                        foreach (var item in toRemove)
                        {
                            langs.Remove(item);
                        }
                    }
                    else
                    {
                        foreach (var item in rule.languages)
                        {
                            langs.Remove(item);
                        }
                    }
                }
            }

            LanguageSet ls = LanguageSet.From(langs);

            return(ls.Equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls);
        }
예제 #22
0
        public SrndQuery NotQuery()
        {
            SrndQuery         q;
            IList <SrndQuery> queries = null;
            Token             oprt    = null;

            q = NQuery();

            while (true)
            {
                switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
                {
                case RegexpToken.NOT:
                    ;
                    break;

                default:
                    jj_la1[2] = jj_gen;
                    goto label_4;
                }
                oprt = Jj_consume_token(RegexpToken.NOT);
                /* keep only last used operator */
                if (queries is null)
                {
                    queries = new JCG.List <SrndQuery>();
                    queries.Add(q);
                }
                q = NQuery();
                queries.Add(q);
            }
label_4:
            { if (true)
              {
                  return((queries is null) ? q : GetNotQuery(queries, oprt));
              }
            }
            throw Error.Create("Missing return statement in function");
        }
예제 #23
0
        public virtual void TestIntersection()
        {
            int numBits                  = TestUtil.NextInt32(Random, 100, 1 << 20);
            int numDocIdSets             = TestUtil.NextInt32(Random, 1, 4);
            IList <OpenBitSet> fixedSets = new JCG.List <OpenBitSet>(numDocIdSets);

            for (int i = 0; i < numDocIdSets; ++i)
            {
                fixedSets.Add(RandomOpenSet(numBits, Random.NextSingle()));
            }
            IList <WAH8DocIdSet> compressedSets = new JCG.List <WAH8DocIdSet>(numDocIdSets);

            foreach (OpenBitSet set in fixedSets)
            {
                compressedSets.Add(CopyOf(set, numBits));
            }

            WAH8DocIdSet union    = WAH8DocIdSet.Intersect(compressedSets);
            OpenBitSet   expected = new OpenBitSet(numBits);

            expected.Set(0, expected.Length);
            foreach (OpenBitSet set in fixedSets)
            {
                for (int previousDoc = -1, doc = set.NextSetBit(0); ; previousDoc = doc, doc = set.NextSetBit(doc + 1))
                {
                    if (doc == -1)
                    {
                        expected.Clear(previousDoc + 1, set.Length);
                        break;
                    }
                    else
                    {
                        expected.Clear(previousDoc + 1, doc);
                    }
                }
            }
            AssertEquals(numBits, expected, union);
        }
예제 #24
0
 /// <summary>
 /// Appends a new packet of buffered deletes to the stream,
 /// setting its generation:
 /// </summary>
 public virtual long Push(FrozenBufferedUpdates packet)
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         /*
          * The insert operation must be atomic. If we let threads increment the gen
          * and push the packet afterwards we risk that packets are out of order.
          * With DWPT this is possible if two or more flushes are racing for pushing
          * updates. If the pushed packets get our of order would loose documents
          * since deletes are applied to the wrong segments.
          */
         packet.DelGen = nextGen++;
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(packet.Any());
             Debugging.Assert(CheckDeleteStats());
             Debugging.Assert(packet.DelGen < nextGen);
             Debugging.Assert(updates.Count == 0 || updates[updates.Count - 1].DelGen < packet.DelGen, "Delete packets must be in order");
         }
         updates.Add(packet);
         numTerms.AddAndGet(packet.numTermDeletes);
         bytesUsed.AddAndGet(packet.bytesUsed);
         if (infoStream.IsEnabled("BD"))
         {
             infoStream.Message("BD", "push deletes " + packet + " delGen=" + packet.DelGen + " packetCount=" + updates.Count + " totBytesUsed=" + bytesUsed);
         }
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(CheckDeleteStats());
         }
         return(packet.DelGen);
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }
예제 #25
0
        /// <summary>
        /// Read quality queries from trec 1MQ format topics file.
        /// </summary>
        /// <param name="reader">where queries are read from.</param>
        /// <returns>the result quality queries.</returns>
        /// <exception cref="IOException">if cannot read the queries.</exception>
        public virtual QualityQuery[] ReadQueries(TextReader reader)
        {
            IList <QualityQuery> res = new JCG.List <QualityQuery>();
            string line;

            try
            {
                while (null != (line = reader.ReadLine()))
                {
                    line = line.Trim();
                    if (line.StartsWith("#", StringComparison.Ordinal))
                    {
                        continue;
                    }
                    // id
                    int    k  = line.IndexOf(':');
                    string id = line.Substring(0, k - 0).Trim();
                    // qtext
                    string qtext = line.Substring(k + 1).Trim();
                    // we got a topic!
                    IDictionary <string, string> fields = new Dictionary <string, string>
                    {
                        [name] = qtext
                    };
                    //System.out.println("id: "+id+" qtext: "+qtext+"  line: "+line);
                    QualityQuery topic = new QualityQuery(id, fields);
                    res.Add(topic);
                }
            }
            finally
            {
                reader.Dispose();
            }
            // sort result array (by ID)
            QualityQuery[] qq = res.ToArray();
            Array.Sort(qq);
            return(qq);
        }
예제 #26
0
        internal virtual string FieldValue(int maxTF)
        {
            IList <string> shuffled = new JCG.List <string>();
            StringBuilder  sb       = new StringBuilder();
            int            i        = Random.Next(terms.Length);

            while (i < terms.Length)
            {
                int tf = TestUtil.NextInt32(Random, 1, maxTF);
                for (int j = 0; j < tf; j++)
                {
                    shuffled.Add(terms[i]);
                }
                i++;
            }
            shuffled.Shuffle(Random);
            foreach (string term in shuffled)
            {
                sb.Append(term);
                sb.Append(' ');
            }
            return(sb.ToString());
        }
예제 #27
0
        private TermsFilter TermsFilter(bool singleField, IEnumerable <Term> termList)
        {
            if (!singleField)
            {
                return(new TermsFilter(termList.ToList()));
            }
            TermsFilter filter;
            var         bytes = new JCG.List <BytesRef>();
            string      field = null;

            foreach (Term term in termList)
            {
                bytes.Add(term.Bytes);
                if (field != null)
                {
                    assertEquals(term.Field, field);
                }
                field = term.Field;
            }
            assertNotNull(field);
            filter = new TermsFilter(field, bytes);
            return(filter);
        }
예제 #28
0
            public MultiPhraseWeight(MultiPhraseQuery outerInstance, IndexSearcher searcher)
            {
                this.outerInstance = outerInstance;
                this.similarity    = searcher.Similarity;
                IndexReaderContext context = searcher.TopReaderContext;

                // compute idf
                var allTermStats = new JCG.List <TermStatistics>();

                foreach (Term[] terms in outerInstance.termArrays)
                {
                    foreach (Term term in terms)
                    {
                        if (!termContexts.TryGetValue(term, out TermContext termContext) || termContext is null)
                        {
                            termContext        = TermContext.Build(context, term);
                            termContexts[term] = termContext;
                        }
                        allTermStats.Add(searcher.TermStatistics(term, termContext));
                    }
                }
                stats = similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance.field), allTermStats.ToArray());
            }
예제 #29
0
        /// <summary>
        /// Constructor which deserializes from the given <see cref="IDataInput"/>.
        /// </summary>
        /// <exception cref="IOException"></exception>
        public SessionToken(IDataInput reader)
        {
            Id      = reader.ReadUTF();
            Version = reader.ReadUTF();

            Dictionary <string, IList <RevisionFile> > sourceFiles = new Dictionary <string, IList <RevisionFile> >();
            int numSources = reader.ReadInt32();

            while (numSources > 0)
            {
                string source   = reader.ReadUTF();
                int    numFiles = reader.ReadInt32();

                IList <RevisionFile> files = new JCG.List <RevisionFile>(numFiles);
                for (int i = 0; i < numFiles; i++)
                {
                    files.Add(new RevisionFile(reader.ReadUTF(), reader.ReadInt64()));
                }
                sourceFiles.Add(source, files);
                --numSources;
            }
            SourceFiles = sourceFiles;
        }
예제 #30
0
        public override void SetUp()
        {
            base.SetUp();
            dir       = NewDirectory();
            fieldName = Random.NextBoolean() ? "field" : ""; // sometimes use an empty string as field name
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));
            Document          doc    = new Document();
            Field             field  = NewStringField(fieldName, "", Field.Store.NO);

            doc.Add(field);
            JCG.List <string> terms = new JCG.List <string>();
            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random);
                field.SetStringValue(s);
                terms.Add(s);
                writer.AddDocument(doc);
            }

            if (Verbose)
            {
                // utf16 order
                terms.Sort();
                Console.WriteLine("UTF16 order:");
                foreach (string s in terms)
                {
                    Console.WriteLine("  " + UnicodeUtil.ToHexString(s));
                }
            }

            reader    = writer.GetReader();
            searcher1 = NewSearcher(reader);
            searcher2 = NewSearcher(reader);
            writer.Dispose();
        }