Exemplo n.º 1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public synchronized void checkIn(T t) throws org.maltparser.core.exception.MaltChainedException
        public override void checkIn(T t)
        {
            lock (this)
            {
                resetObject(t);
                inuse.remove(t);
                if (available.size() < keepThreshold)
                {
                    available.add(t);
                }
            }
        }
Exemplo n.º 2
0
        private void TestRandomWords(int maxNumWords, int numIter)
        {
            Random random = new Random(Random().Next());

            for (int iter = 0; iter < numIter; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter " + iter);
                }
                for (int inputMode = 0; inputMode < 2; inputMode++)
                {
                    int            numWords = random.nextInt(maxNumWords + 1);
                    ISet <IntsRef> termsSet = new HashSet <IntsRef>();
                    IntsRef[]      terms    = new IntsRef[numWords];
                    while (termsSet.size() < numWords)
                    {
                        string term = FSTTester <object> .GetRandomString(random);

                        termsSet.Add(FSTTester <object> .ToIntsRef(term, inputMode));
                    }
                    DoTest(inputMode, termsSet.ToArray());
                }
            }
        }
 private void CreateRandomIndexes(int maxSegments)
 {
     dir = NewDirectory();
     numDocs = AtLeast(150);
     int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
     ISet<string> randomTerms = new HashSet<string>();
     while (randomTerms.size() < numTerms)
     {
         randomTerms.add(TestUtil.RandomSimpleString(Random()));
     }
     terms = new List<string>(randomTerms);
     int seed = Random().Next();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
     iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = RandomDocument();
         iw.AddDocument(doc);
         if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0))
         {
             iw.Commit();
         }
         if (Random().nextInt(15) == 0)
         {
             string term = RandomInts.RandomFrom(Random(), terms);
             iw.DeleteDocuments(new Term("s", term));
         }
     }
     reader = iw.Reader;
 }
Exemplo n.º 4
0
 /// <summary>
 /// Sets up the iterator with the inner array of the hash set and a link to the
 /// parent set.
 /// </summary>
 /// <param name="the_array">the array of entries.</param>
 /// <param name="the_parent">the parent set.</param>
 public HashIterator(ref HashEntry <T>[] the_array, HashSet <T> the_parent)
 {
     my_array       = the_array;
     my_parent      = the_parent;
     my_mod_count   = my_parent.getModCount();
     my_total_items = my_parent.size();
 }
Exemplo n.º 5
0
        private void sameSizeDifferentElements(List <DSString> the_one, List <DSString> the_two)
        {
            //make sure the number of items returned is the same
            Assert.AreEqual(the_one.size(), the_two.size());

            //make sure the search is not
            bool found = false;

            for (int i = 0; i < the_one.size(); i++)
            {
                if (!the_one.get(i).Equals(the_two.get(i)))
                {
                    found = true;
                }
            }
            Assert.IsTrue(found);

            //make sure all elements found cover all vertices possible
            Set <DSString> one = new HashSet <DSString>();
            Set <DSString> two = new HashSet <DSString>();

            for (int i = 0; i < the_one.size(); i++)
            {
                one.add(the_one.get(i));
                two.add(the_two.get(i));
            }

            Assert.AreEqual(one.size(), two.size());
        }
        static void HashSetTest()
        {
            HashSet <int> evenNumbers = new HashSet <int>();
            HashSet <int> oddNumbers  = new HashSet <int>();

            for (int i = 0; i < 5; i++)
            {
                evenNumbers.add(i * 2);

                oddNumbers.add((i * 2) + 1);
            }

            Console.WriteLine("Even numbers: " + evenNumbers.size());
            Console.WriteLine("Odd numbers: " + oddNumbers.size());

            Console.WriteLine("\n\nEvens:");
            evenNumbers.print();
            Console.WriteLine("\n\nOdds:");
            oddNumbers.print();

            oddNumbers.remove(7);
            oddNumbers.remove(3);

            Console.WriteLine("\n\nOdds:");
            oddNumbers.print();
        }
Exemplo n.º 7
0
        private void CreateRandomIndexes(int maxSegments)
        {
            dir     = NewDirectory();
            numDocs = AtLeast(150);
            int           numTerms    = TestUtil.NextInt(Random(), 1, numDocs / 5);
            ISet <string> randomTerms = new HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random()));
            }
            terms = new List <string>(randomTerms);
            int seed = Random().Next();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));

            iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
            iw = new RandomIndexWriter(new Random(seed), dir, iwc);
            for (int i = 0; i < numDocs; ++i)
            {
                Document doc = RandomDocument();
                iw.AddDocument(doc);
                if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0))
                {
                    iw.Commit();
                }
                if (Random().nextInt(15) == 0)
                {
                    string term = RandomInts.RandomFrom(Random(), terms);
                    iw.DeleteDocuments(new Term("s", term));
                }
            }
            reader = iw.Reader;
        }
Exemplo n.º 8
0
        /**
         * True iff simplices are neighbors.
         * Two simplices are neighbors if they are the same dimension and they share
         * a facet.
         * @param simplex the other Simplex
         * @return true iff this Simplex is a neighbor of simplex
         */
        public bool isNeighbor(Simplex simplex)
        {
            HashSet h = new HashSet(this);

            h.removeAll(simplex);
            return((this.size() == simplex.size()) && (h.size() == 1));
        }
Exemplo n.º 9
0
        public virtual Collection getNbest(int n)
        {
            HashSet hashSet = new HashSet();
            BoundedPriorityQueue boundedPriorityQueue = new BoundedPriorityQueue(n);

            boundedPriorityQueue.add(new Nbest.NBestPath(this, "<s>", this.lattice.getInitialNode(), (double)0f, (double)0f));
            while (hashSet.size() < n && boundedPriorityQueue.size() > 0)
            {
                Nbest.NBestPath nbestPath = (Nbest.NBestPath)boundedPriorityQueue.poll();
                if (nbestPath.node.equals(this.lattice.terminalNode))
                {
                    hashSet.add(nbestPath.path);
                }
                else
                {
                    Iterator iterator = nbestPath.node.getLeavingEdges().iterator();
                    while (iterator.hasNext())
                    {
                        Edge            edge          = (Edge)iterator.next();
                        Node            toNode        = edge.getToNode();
                        double          num           = nbestPath.forwardScore + edge.getAcousticScore() + edge.getLMScore();
                        double          num2          = num + toNode.getBackwardScore();
                        string          newPathString = this.getNewPathString(nbestPath, toNode);
                        Nbest.NBestPath item          = new Nbest.NBestPath(this, newPathString, toNode, num2, num);
                        boundedPriorityQueue.add(item);
                    }
                }
            }
            return(hashSet);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Determines whether this set is a subset of the_other set.
        /// </summary>
        /// <param name="the_other">The other set to consider in the subset operation.</param>
        /// <returns>True if this is a subset of the other set, otherwise false.</returns>
        public static bool subset <T>(Set <T> the_first, Set <T> the_other) where T : class, Comparable <T>
        {
            //if the difference between this set and the_other is the empty set, then
            //this is a subset of the_other set.
            Set <T> check = new HashSet <T>();

            addDifferentEntries(ref check, the_first, the_other.toArray());

            return(check.size() == 0);
        }
    public static int test()
    {
        Set <Short> s = new HashSet <Short>();

        for (short i = 0; i < 100; i++)
        {
            s.add(i);
            s.remove(i - 1);
        }
        return(s.size());
    }
Exemplo n.º 12
0
        public static bool moreInfo     = false;     // True iff more info in toString

        /**
         * Constructor.
         * @param collection a Collection holding the Simplex vertices
         * @throws IllegalArgumentException if there are duplicate vertices
         */
        public Simplex(Collection collection)
        {
            this.vertices = Collections.unmodifiableList(new java.util.ArrayList(collection));
            this.idNumber = idGenerator++;
            Set noDups = new HashSet(this);

            if (noDups.size() != this.vertices.size())
            {
                throw new InvalidOperationException("Duplicate vertices in Simplex");
            }
        }
Exemplo n.º 13
0
        public void TestWithContexts()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), true, true);
            IDictionary <string, Document> docs = res.Value;
            List <string> invalidDocTerms       = res.Key;

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
                ISet <BytesRef>        oriCtxs    = new HashSet <BytesRef>();
                IEnumerable <BytesRef> contextSet = inputIterator.Contexts;
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXT_FIELD_NAME))
                {
                    oriCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(oriCtxs.size(), contextSet.Count());
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
Exemplo n.º 14
0
        public void TestMultiThreaded()
        {
            FileInfo    file    = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
            PerfRunData runData = createPerfRunData(file, false, typeof(ThreadingDocMaker).AssemblyQualifiedName);

            ThreadClass[] threads = new ThreadClass[10];
            using (WriteLineDocTask wldt = new WriteLineDocTask(runData))
            {
                for (int i = 0; i < threads.Length; i++)
                {
                    threads[i] = new ThreadAnonymousHelper("t" + i, wldt);
                }

                foreach (ThreadClass t in threads)
                {
                    t.Start();
                }
                foreach (ThreadClass t in threads)
                {
                    t.Join();
                }
            } // wldt.Dispose();

            ISet <String> ids = new HashSet <string>();
            TextReader    br  = new StreamReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read, FileShare.None), Encoding.UTF8);

            try
            {
                String line = br.ReadLine();
                assertHeaderLine(line); // header line is written once, no matter how many threads there are
                for (int i = 0; i < threads.Length; i++)
                {
                    line = br.ReadLine();
                    assertNotNull($"line for index {i.ToString()} is missing", line); // LUCENENET specific - ensure the line is there before splitting
                    String[] parts = line.Split(WriteLineDocTask.SEP).TrimEnd();
                    assertEquals(line, 3, parts.Length);
                    // check that all thread names written are the same in the same line
                    String tname = parts[0].Substring(parts[0].IndexOf('_'));
                    ids.add(tname);
                    assertEquals(tname, parts[1].Substring(parts[1].IndexOf('_')));
                    assertEquals(tname, parts[2].Substring(parts[2].IndexOf('_')));
                }
                // only threads.length lines should exist
                assertNull(br.ReadLine());
                assertEquals(threads.Length, ids.size());
            }
            finally
            {
                br.Dispose();
            }
        }
Exemplo n.º 15
0
        /**
         * Makes a bunch of single-char tokens (the max # unique terms will at most be 26).
         * puts the # unique terms into expected, to be checked against the norm.
         */
        private string AddValue()
        {
            StringBuilder    sb    = new StringBuilder();
            HashSet <string> terms = new HashSet <string>();
            int num = TestUtil.NextInt(Random(), 0, 255);

            for (int i = 0; i < num; i++)
            {
                sb.append(' ');
                char term = (char)TestUtil.NextInt(Random(), 'a', 'z');
                sb.append(term);
                terms.add("" + term);
            }
            expected.Add(terms.size());
            return(sb.toString());
        }
Exemplo n.º 16
0
        public void TestFastFilter()
        {
            DuplicateFilter df = new DuplicateFilter(KEY_FIELD);

            df.ProcessingMode = (ProcessingMode.PM_FAST_INVALIDATION);
            HashSet <string> results = new HashSet <string>();

            ScoreDoc[] hits = searcher.Search(tq, df, 1000).ScoreDocs;
            assertTrue("Filtered searching should have found some matches", hits.Length > 0);

            foreach (ScoreDoc hit in hits)
            {
                Document d   = searcher.Doc(hit.Doc);
                string   url = d.Get(KEY_FIELD);
                assertFalse("No duplicate urls should be returned", results.contains(url));
                results.add(url);
            }
            assertEquals("Two urls found", 2, results.size());
        }
Exemplo n.º 17
0
        public Optional <Pair <Graph, int> > kernelProcedure3(Graph g, HashSet <int> A, HashSet <int> B, int k)
        {
            int kPrime = k;

            // P3
            foreach (Edge nonEdge in g.inducedBy(A).getNonEdges())
            {
                int x = nonEdge.from, y = nonEdge.to;

                HashSet <int> bNeighbors = g.neighborhood(x).toSet().intersect(g.neighborhood(y).toSet()).intersect(B);
                HashSet <int> Axy        = new HashSet <int>();

                foreach (int b in bNeighbors)
                {
                    Graph gPrime = g.inducedBy(g.vertices().remove(b));

                    if (gPrime.hasPath(x, y))
                    {
                        Axy.Add(b);
                    }
                }

                if (Axy.size() > 2 * k)
                {
                    g = g.addEdge(nonEdge);
                    kPrime--;

                    if (kPrime < 0)
                    {
                        return(Optional.empty());
                    }
                }
                else
                {
                    HashSet <int> set = Set.of(Axy);
                    A = A.union(set);
                    B = B.minus(set);
                }
            }

            return(Optional.of(Tuple.of(g.inducedBy(A), kPrime)));
        }
Exemplo n.º 18
0
 private void initUnitMaps(GrammarNode grammarNode)
 {
     if (this.nodeToNextUnitArrayMap.get(grammarNode) == null)
     {
         HashSet      hashSet    = new HashSet();
         HashSet      hashSet2   = new HashSet();
         GrammarArc[] successors = grammarNode.getSuccessors();
         GrammarArc[] array      = successors;
         int          num        = array.Length;
         for (int i = 0; i < num; i++)
         {
             GrammarArc  grammarArc   = array[i];
             GrammarNode grammarNode2 = grammarArc.getGrammarNode();
             this.collectNextUnits(grammarNode2, hashSet, hashSet2);
         }
         int[] array2 = new int[hashSet2.size()];
         num = 0;
         Iterator iterator = hashSet2.iterator();
         while (iterator.hasNext())
         {
             Unit  unit   = (Unit)iterator.next();
             int[] array3 = array2;
             int   num2   = num;
             num++;
             array3[num2] = unit.getBaseID();
         }
         this.nodeToNextUnitArrayMap.put(grammarNode, array2);
     }
     if (this.nodeToUnitSetMap.get(grammarNode) == null)
     {
         HashSet hashSet  = new HashSet();
         HashSet hashSet2 = new HashSet();
         this.collectNextUnits(grammarNode, hashSet, hashSet2);
         this.nodeToUnitSetMap.put(grammarNode, hashSet2);
     }
 }
Exemplo n.º 19
0
        //searches for words in the dictionary that match unique permutations of all subsets
        //of the user input.
        private void searchForWords()
        {
            string input = txtLetters.Text;

            //check for bad input
            if (input.Length == 0)
            {
                MessageBox.Show("You must enter letters in the textbox to get results.");
                return;
            }
            else if (input.Length > 8)
            {
                MessageBox.Show("A maximum of eight characters is allowed due to the time-complexity of this algorithm.");
                return;
            }

            //tree to hold words
            if (!my_dictionary_loaded)
            {
                my_dictionary_loaded = true;
                addDictionary();
            }

            //get characters
            char[] letters = input.ToLower().ToCharArray();

            //get unique permutations
            Subsets s = new Subsets();

            s.addObserver(this);

            lblInfo.Text = "Getting letter permutations...";
            List <List <DSInteger> > permutation_indices = s.getUniquePermutationIndices <DSInteger>(letters.Length);

            lblInfo.Text        = "Building possible words...";
            pbrProgress.Value   = 0;
            pbrProgress.Maximum = permutation_indices.size();

            //get word candidates from the permutation indices
            Set <DSString> word_candidates = new HashSet <DSString>();

            for (int i = 0; i < permutation_indices.size(); i++)
            {
                StringBuilder    builder     = new StringBuilder();
                List <DSInteger> permutation = permutation_indices.get(i);
                for (int j = 0; j < permutation.size(); j++)
                {
                    builder.Append(letters[permutation.get(j).value]);
                }

                DSString possible = new DSString(builder.ToString());
                if (!word_candidates.contains(possible))
                {
                    word_candidates.add(possible);
                }

                //show progress
                updateProgress();
            }

            pbrProgress.Value   = 0;
            pbrProgress.Maximum = word_candidates.size();
            lblInfo.Text        = "Check Search Tree for words...";

            //sort candidates according to length and then alphabetically
            DSString[] sorted = word_candidates.toArray();
            Sorting <DSString> .Sort(Sorts.QuickSort, ref sorted, new StringLengthComparator());

            //clear old lookups
            lstWords.Items.Clear();

            //lookup each word in the bst
            for (int i = sorted.Length - 1; i >= 0; i--)
            {
                DSString current = sorted[i];
                if (my_bst.contains(current))
                {
                    lstWords.Items.Add(current.value);
                }

                //show progress
                updateProgress();
            }

            //show words found
            lblInfo.Text = "Words found: " + lstWords.Items.Count;
        }
 /**
  * Makes a bunch of single-char tokens (the max # unique terms will at most be 26).
  * puts the # unique terms into expected, to be checked against the norm.
  */
 private string AddValue()
 {
     StringBuilder sb = new StringBuilder();
     HashSet<string> terms = new HashSet<string>();
     int num = TestUtil.NextInt(Random(), 0, 255);
     for (int i = 0; i < num; i++)
     {
         sb.append(' ');
         char term = (char)TestUtil.NextInt(Random(), 'a', 'z');
         sb.append(term);
         terms.add("" + term);
     }
     expected.Add(terms.size());
     return sb.toString();
 }
Exemplo n.º 21
0
        public void TestRandom()
        {
            string[]      terms = new string[TestUtil.NextInt32(Random, 2, 10)];
            ISet <string> seen  = new HashSet <string>();

            while (seen.size() < terms.Length)
            {
                string token = TestUtil.RandomSimpleString(Random, 1, 5);
                if (!seen.contains(token))
                {
                    terms[seen.size()] = token;
                    seen.add(token);
                }
            }

            Analyzer a = new MockAnalyzer(Random);

            int  numDocs   = AtLeast(10);
            long totTokens = 0;

            string[][] docs = new string[numDocs][];
            for (int i = 0; i < numDocs; i++)
            {
                docs[i] = new string[AtLeast(100)];
                if (VERBOSE)
                {
                    Console.Write("  doc " + i + ":");
                }
                for (int j = 0; j < docs[i].Length; j++)
                {
                    docs[i][j] = GetZipfToken(terms);
                    if (VERBOSE)
                    {
                        Console.Write(" " + docs[i][j]);
                    }
                }
                if (VERBOSE)
                {
                    Console.WriteLine();
                }
                totTokens += docs[i].Length;
            }

            int grams = TestUtil.NextInt32(Random, 1, 4);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + terms.Length + " terms; " + numDocs + " docs; " + grams + " grams");
            }

            // Build suggester model:
            FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte)0x20);

            sug.Build(new TestRandomInputIterator(this, docs));

            // Build inefficient but hopefully correct model:
            List <IDictionary <string, int?> > gramCounts = new List <IDictionary <string, int?> >(grams);

            for (int gram = 0; gram < grams; gram++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: build model for gram=" + gram);
                }
                IDictionary <string, int?> model = new HashMap <string, int?>();
                gramCounts.Add(model);
                foreach (string[] doc in docs)
                {
                    for (int i = 0; i < doc.Length - gram; i++)
                    {
                        StringBuilder b = new StringBuilder();
                        for (int j = i; j <= i + gram; j++)
                        {
                            if (j > i)
                            {
                                b.append(' ');
                            }
                            b.append(doc[j]);
                        }
                        string token    = b.toString();
                        int?   curCount = model.ContainsKey(token) ? model[token] : null;
                        if (curCount == null)
                        {
                            model.Put(token, 1);
                        }
                        else
                        {
                            model.Put(token, 1 + curCount);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("  add '" + token + "' -> count=" + (model.ContainsKey(token) ? model[token].ToString() : ""));
                        }
                    }
                }
            }

            int lookups = AtLeast(100);

            for (int iter = 0; iter < lookups; iter++)
            {
                string[] tokens = new string[TestUtil.NextInt32(Random, 1, 5)];
                for (int i = 0; i < tokens.Length; i++)
                {
                    tokens[i] = GetZipfToken(terms);
                }

                // Maybe trim last token; be sure not to create the
                // empty string:
                int trimStart;
                if (tokens.Length == 1)
                {
                    trimStart = 1;
                }
                else
                {
                    trimStart = 0;
                }
                int trimAt = TestUtil.NextInt32(Random, trimStart, tokens[tokens.Length - 1].Length);
                tokens[tokens.Length - 1] = tokens[tokens.Length - 1].Substring(0, trimAt - 0);

                int           num = TestUtil.NextInt32(Random, 1, 100);
                StringBuilder b   = new StringBuilder();
                foreach (string token in tokens)
                {
                    b.append(' ');
                    b.append(token);
                }
                string query = b.toString();
                query = query.Substring(1);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " query='" + query + "' num=" + num);
                }

                // Expected:
                List <Lookup.LookupResult> expected = new List <Lookup.LookupResult>();
                double backoff = 1.0;
                seen = new HashSet <string>();

                if (VERBOSE)
                {
                    Console.WriteLine("  compute expected");
                }
                for (int i = grams - 1; i >= 0; i--)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("    grams=" + i);
                    }

                    if (tokens.Length < i + 1)
                    {
                        // Don't have enough tokens to use this model
                        if (VERBOSE)
                        {
                            Console.WriteLine("      skip");
                        }
                        continue;
                    }

                    if (i == 0 && tokens[tokens.Length - 1].Length == 0)
                    {
                        // Never suggest unigrams from empty string:
                        if (VERBOSE)
                        {
                            Console.WriteLine("      skip unigram priors only");
                        }
                        continue;
                    }

                    // Build up "context" ngram:
                    b = new StringBuilder();
                    for (int j = tokens.Length - i - 1; j < tokens.Length - 1; j++)
                    {
                        b.append(' ');
                        b.append(tokens[j]);
                    }
                    string context = b.toString();
                    if (context.Length > 0)
                    {
                        context = context.Substring(1);
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("      context='" + context + "'");
                    }
                    long contextCount;
                    if (context.Length == 0)
                    {
                        contextCount = totTokens;
                    }
                    else
                    {
                        //int? count = gramCounts.get(i - 1).get(context);
                        var gramCount = gramCounts[i - 1];
                        int?count     = gramCount.ContainsKey(context) ? gramCount[context] : null;
                        if (count == null)
                        {
                            // We never saw this context:
                            backoff *= FreeTextSuggester.ALPHA;
                            if (VERBOSE)
                            {
                                Console.WriteLine("      skip: never saw context");
                            }
                            continue;
                        }
                        contextCount = count.GetValueOrDefault();
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("      contextCount=" + contextCount);
                    }
                    IDictionary <string, int?> model = gramCounts[i];

                    // First pass, gather all predictions for this model:
                    if (VERBOSE)
                    {
                        Console.WriteLine("      find terms w/ prefix=" + tokens[tokens.Length - 1]);
                    }
                    List <Lookup.LookupResult> tmp = new List <Lookup.LookupResult>();
                    foreach (string term in terms)
                    {
                        if (term.StartsWith(tokens[tokens.Length - 1], StringComparison.Ordinal))
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("        term=" + term);
                            }
                            if (seen.contains(term))
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("          skip seen");
                                }
                                continue;
                            }
                            string ngram = (context + " " + term).Trim();
                            //Integer count = model.get(ngram);
                            int?count = model.ContainsKey(ngram) ? model[ngram] : null;
                            if (count != null)
                            {
                                // LUCENENET NOTE: We need to calculate this as decimal because when using double it can sometimes
                                // return numbers that are greater than long.MaxValue, which results in a negative long number.
                                // This is also the way it is being done in the FreeTextSuggester to work around the issue.
                                Lookup.LookupResult lr = new Lookup.LookupResult(ngram, (long)(long.MaxValue * ((decimal)backoff * (decimal)count / contextCount)));
                                tmp.Add(lr);
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      add tmp key='" + lr.Key + "' score=" + lr.Value);
                                }
                            }
                        }
                    }

                    // Second pass, trim to only top N, and fold those
                    // into overall suggestions:
                    tmp.Sort(byScoreThenKey);
                    if (tmp.size() > num)
                    {
                        //tmp.subList(num, tmp.size()).clear();
                        tmp.RemoveRange(num, tmp.size() - num);
                    }
                    foreach (Lookup.LookupResult result in tmp)
                    {
                        string key = result.Key.toString();
                        int    idx = key.LastIndexOf(' ');
                        string lastToken;
                        if (idx != -1)
                        {
                            lastToken = key.Substring(idx + 1);
                        }
                        else
                        {
                            lastToken = key;
                        }
                        if (!seen.contains(lastToken))
                        {
                            seen.add(lastToken);
                            expected.Add(result);
                            if (VERBOSE)
                            {
                                Console.WriteLine("      keep key='" + result.Key + "' score=" + result.Value);
                            }
                        }
                    }

                    backoff *= FreeTextSuggester.ALPHA;
                }

                expected.Sort(byScoreThenKey);

                if (expected.size() > num)
                {
                    expected.RemoveRange(num, expected.size() - num);
                }

                // Actual:
                IList <Lookup.LookupResult> actual = sug.DoLookup(query, num);

                if (VERBOSE)
                {
                    Console.WriteLine("  expected: " + expected);
                    Console.WriteLine("    actual: " + actual);
                }

                assertEquals(expected.ToString(), actual.ToString());
            }
        }
        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int numDocs = AtLeast(150);
            int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
            ISet<string> randomTerms = new HashSet<string>();
            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random()));
            }
            terms = new List<string>(randomTerms);
            long seed = Random().NextLong();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2);
            for (int i = 0; i < numDocs; ++i)
            {
                if (Random().nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomInts.RandomFrom(Random(), terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random().nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();
            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.w.AddDocument(doc2);
            iw2.w.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates())
            {
                // update NDV of docs belonging to one term (covers many documents)
                long value = Random().NextLong();
                string term = RandomInts.RandomFrom(Random(), terms);
                iw1.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }
 public String[] getFolders()
 {
     String[] folders = new String[listFolders.size()];
     return(this.listFolders.toArray(folders));
 }
Exemplo n.º 24
0
        public TypeBuilder[] getDependencies()
        {
            checkCreated();
            var dependencies = new HashSet <TypeBuilder>();

            addType(dependencies, baseType);
            foreach (var t in interfaces)
            {
                addType(dependencies, t);
            }
            foreach (var a in annotations)
            {
                addType(dependencies, a.Type);
            }
            foreach (var nt in nestedTypes)
            {
                foreach (var t in ((TypeBuilder)nt).getDependencies())
                {
                    dependencies.add((TypeBuilder)t);
                }
            }
            foreach (var f in fields)
            {
                foreach (var a in f.Annotations)
                {
                    addType(dependencies, a.Type);
                }
                addType(dependencies, f.Type);
            }
            foreach (var m in methods)
            {
                if (!m.IsExcludedFromCompilation)
                {
                    foreach (var a in m.Annotations)
                    {
                        addType(dependencies, a.Type);
                    }
                    addType(dependencies, m.ReturnType);
                    foreach (var p in m.Parameters)
                    {
                        foreach (var a in p.Annotations)
                        {
                            addType(dependencies, a.Type);
                        }
                        addType(dependencies, p.Type);
                    }
                    foreach (var e in m.Exceptions)
                    {
                        addType(dependencies, e);
                    }
                    foreach (var instruction in ((MethodBuilder)m).CodeGenerator.Instructions)
                    {
                        switch (instruction.Opcode)
                        {
                        case Getfield:
                        case Getstatic:
                        case Putfield:
                        case Putstatic:
                            addType(dependencies, instruction.Field.DeclaringType);
                            break;

                        case Invokedynamic:
                        case Invokeinterface:
                        case Invokespecial:
                        case Invokestatic:
                        case Invokevirtual:
                            addType(dependencies, instruction.Method.DeclaringType);
                            break;

                        case Anewarray:
                        case Checkcast:
                        case Instanceof:
                        case New:
                            addType(dependencies, instruction.Type);
                            break;
                        }
                    }
                }
            }
            return(dependencies.toArray(new TypeBuilder[dependencies.size()]));
        }
Exemplo n.º 25
0
        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int           numDocs     = AtLeast(150);
            int           numTerms    = TestUtil.NextInt(Random(), 1, numDocs / 5);
            ISet <string> randomTerms = new HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random()));
            }
            terms = new List <string>(randomTerms);
            long seed = Random().NextLong();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));

            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2);

            for (int i = 0; i < numDocs; ++i)
            {
                if (Random().nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomInts.RandomFrom(Random(), terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random().nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();

            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.w.AddDocument(doc2);
            iw2.w.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates())
            {
                // update NDV of docs belonging to one term (covers many documents)
                long   value = Random().NextLong();
                string term  = RandomInts.RandomFrom(Random(), terms);
                iw1.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader       = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }
        public void TestWithContexts()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair<List<string>, IDictionary<string, Document>> res = GenerateIndexDocuments(AtLeast(1000), true, true);
            IDictionary<string, Document> docs = res.Value;
            List<string> invalidDocTerms = res.Key;
            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader ir = DirectoryReader.Open(dir);
            IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;
            BytesRef f;
            while ((f = inputIterator.Next()) != null)
            {
                string field = f.Utf8ToString();
                Document doc = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.NumericValue) : 0);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue));
                ISet<BytesRef> oriCtxs = new HashSet<BytesRef>();
                IEnumerable<BytesRef> contextSet = inputIterator.Contexts;
                foreach (IndexableField ctxf in doc.GetFields(CONTEXT_FIELD_NAME))
                {
                    oriCtxs.add(ctxf.BinaryValue);
                }
                assertEquals(oriCtxs.size(), contextSet.Count());
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
 protected internal virtual void showTokenCount()
 {
     if (this.logger.isLoggable(Level.INFO))
     {
         HashSet  hashSet  = new HashSet();
         Iterator iterator = this.activeList.iterator();
         while (iterator.hasNext())
         {
             for (Token token = (Token)iterator.next(); token != null; token = token.getPredecessor())
             {
                 hashSet.add(token);
             }
         }
         this.logger.info(new StringBuilder().append("Token Lattice size: ").append(hashSet.size()).toString());
         hashSet  = new HashSet();
         iterator = this.resultList.iterator();
         while (iterator.hasNext())
         {
             for (Token token = (Token)iterator.next(); token != null; token = token.getPredecessor())
             {
                 hashSet.add(token);
             }
         }
         this.logger.info(new StringBuilder().append("Result Lattice size: ").append(hashSet.size()).toString());
     }
 }
        protected internal virtual void showTokenCount()
        {
            HashSet  hashSet  = new HashSet();
            Iterator iterator = this.activeList.iterator();

            while (iterator.hasNext())
            {
                for (Token token = (Token)iterator.next(); token != null; token = token.getPredecessor())
                {
                    hashSet.add(token);
                }
            }
            [email protected](new StringBuilder().append("Token Lattice size: ").append(hashSet.size()).toString());
            hashSet  = new HashSet();
            iterator = this.resultList.iterator();
            while (iterator.hasNext())
            {
                for (Token token = (Token)iterator.next(); token != null; token = token.getPredecessor())
                {
                    hashSet.add(token);
                }
            }
            [email protected](new StringBuilder().append("Result Lattice size: ").append(hashSet.size()).toString());
        }