Ejemplo n.º 1
0
        public void RotateCopyTest2()
        {
            int[] data = { 1, 2, 3, 4, 5 };

            IList <int> lst = new List <int>();

            using (IInputIterator <int> inputIterator = new InputIterator <int>(data))
            {
                IShallowClone cloneable = (IShallowClone)inputIterator;

                IInputIterator <int> cloneObj = cloneable.ShallowClone() as IInputIterator <int>;
                cloneObj.End();

                ICursor cursor = (ICursor)cloneObj;
                cursor.SetPosition(cursor.GetPosition() - 2);

                //set the first element to 4element position

                using (IOutputIterator <int> outputIterator = new BackInsertIterator <int>(lst))
                {
                    Algorithm.RotateCopy(inputIterator, cloneObj, outputIterator);
                }
            }

            bool isCorrectData = (lst[0] == 4 && lst[1] == 5 &&
                                  lst[2] == 1 && lst[3] == 2 &&
                                  lst[4] == 3);

            Assert.IsTrue(isCorrectData);
        }
 /// <summary>
 /// Creates a new iterator, buffering entries from the specified iterator </summary>
 public BufferedInputIterator(IInputIterator source)
 {
     BytesRef spare;
     int freqIndex = 0;
     hasPayloads = source.HasPayloads;
     hasContexts = source.HasContexts;
     while ((spare = source.Next()) != null)
     {
         entries.Append(spare);
         if (hasPayloads)
         {
             payloads.Append(source.Payload);
         }
         if (hasContexts)
         {
             contextSets.Add(source.Contexts);
         }
         if (freqIndex >= freqs.Length)
         {
             freqs = ArrayUtil.Grow(freqs, freqs.Length + 1);
         }
         freqs[freqIndex++] = source.Weight;
     }
     comp = source.Comparator;
 }
        public void TestWithValueSource()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, 10);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
        public AstNode Parse(IInputIterator inputIterator)
        {
            string grammar = @"
                WhiteSpace: [\s\n\t ]+;
                Comment: '/*' (!'*/' .)* '*/';
                (?<S>): (WhiteSpace / Comment)*;
                (?<Variable>): '\k<' S (?<Name>[a-zA-Z][a-zA-Z0-9]*) S '>';
                (?<Digit>): [0-9]+('.'[0-9]+)?;
                Value: Variable / Digit / '(' S Expr S ')';
                (?<Product \rsc>): Value S ((?<Symbol> '*' / '/') S Value)*;
                (?<Sum \rsc>): Product S ((?<Symbol>'+' / '-') S Product)*;
                (?<Expr \rsc>): S Sum S;
            ".Trim();

            AExpression rules = PEGrammar.Load(grammar);

            var visitor = new NpegParserVisitor(inputIterator);
            rules.Accept(visitor);

            if (visitor.IsMatch)
            {
                return visitor.AST;
            }

            throw new InvalidInputException();
        }
Ejemplo n.º 5
0
        public void TestFileWithDifferentDelimiter()
        {
            KeyValuePair <List <List <string> >, string> fileInput = generateFileInput(AtLeast(100), " , ", true, true);
            Stream                inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
            FileDictionary        dictionary  = new FileDictionary(inputReader, " , ");
            List <List <string> > entries     = fileInput.Key;
            IInputIterator        inputIter   = dictionary.GetEntryIterator();

            assertTrue(inputIter.HasPayloads);
            BytesRef term;
            int      count = 0;

            while ((term = inputIter.Next()) != null)
            {
                assertTrue(entries.size() > count);
                List <string> entry = entries[count];
                assertTrue(entry.size() >= 2); // at least term and weight
                assertEquals(entry[0], term.Utf8ToString());
                assertEquals(long.Parse(entry[1], CultureInfo.InvariantCulture), inputIter.Weight);
                if (entry.size() == 3)
                {
                    assertEquals(entry[2], inputIter.Payload.Utf8ToString());
                }
                else
                {
                    assertEquals(inputIter.Payload.Length, 0);
                }
                count++;
            }
            assertEquals(count, entries.size());
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Creates a new iterator, buffering entries from the specified iterator </summary>
        public BufferedInputIterator(IInputIterator source)
        {
            BytesRef spare;
            int      freqIndex = 0;

            hasPayloads = source.HasPayloads;
            hasContexts = source.HasContexts;
            while ((spare = source.Next()) != null)
            {
                m_entries.Append(spare);
                if (hasPayloads)
                {
                    m_payloads.Append(source.Payload);
                }
                if (hasContexts)
                {
                    m_contextSets.Add(source.Contexts);
                }
                if (freqIndex >= m_freqs.Length)
                {
                    m_freqs = ArrayUtil.Grow(m_freqs, m_freqs.Length + 1);
                }
                m_freqs[freqIndex++] = source.Weight;
            }
            comp = source.Comparer;
        }
        public AstNode Parse(IInputIterator inputIterator)
        {
            string grammar = @"
				WhiteSpace: [\s\n\t ]+;
				Comment: '/*' (!'*/' .)* '*/';
				(?<S>): (WhiteSpace / Comment)*;
				(?<Variable>): '\k<' S (?<Name>[a-zA-Z][a-zA-Z0-9]*) S '>';
				(?<Digit>): [0-9]+('.'[0-9]+)?;
				Value: Variable / Digit / '(' S Expr S ')';
				(?<Product \rsc>): Value S ((?<Symbol> '*' / '/') S Value)*;
				(?<Sum \rsc>): Product S ((?<Symbol>'+' / '-') S Product)*;
				(?<Expr \rsc>): S Sum S;
            ".Trim();

            AExpression rules = PEGrammar.Load(grammar);

            var visitor = new NpegParserVisitor(inputIterator);

            rules.Accept(visitor);

            if (visitor.IsMatch)
            {
                return(visitor.AST);
            }

            throw new InvalidInputException();
        }
Ejemplo n.º 8
0
        public override void Build(IInputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
#pragma warning disable 612, 618
            if (tfit.Comparer != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparer uses UTF16 sort order
                tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparer);
            }
#pragma warning restore 612, 618

            List <string> tokens = new List <string>();
            List <object> vals   = new List <object>();
            BytesRef      spare;
            CharsRef      charsSpare = new CharsRef();
            while ((spare = tfit.Next()) != null)
            {
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(Convert.ToInt64(tfit.Weight));
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }
 public LimitingRepetitionVariableLengthExpressionSolver(IInputIterator userExpressionIterator, IInputIterator inputIterator, List <AstNode> variableValues)
 {
     _userExpressionIterator = userExpressionIterator;
     _inputIterator          = inputIterator;
     _variableValues         = variableValues;
     _results.Push(new Stack <double>());
 }
Ejemplo n.º 10
0
        public override void Build(IInputIterator iter)
        {
            if (m_searcherMgr != null)
            {
                m_searcherMgr.Dispose();
                m_searcherMgr = null;
            }

            if (writer != null)
            {
                writer.Dispose();
                writer = null;
            }

            AtomicReader r       = null;
            bool         success = false;

            try
            {
                // First pass: build a temporary normal Lucene index,
                // just indexing the suggestions as they iterate:
                writer = new IndexWriter(dir, GetIndexWriterConfig(matchVersion, GramAnalyzer, OpenMode.CREATE));
                //long t0 = System.nanoTime();

                // TODO: use threads?
                BytesRef text;
                while ((text = iter.Next()) != null)
                {
                    BytesRef payload;
                    if (iter.HasPayloads)
                    {
                        payload = iter.Payload;
                    }
                    else
                    {
                        payload = null;
                    }

                    Add(text, iter.Contexts, iter.Weight, payload);
                }

                //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");

                m_searcherMgr = new SearcherManager(writer, true, null);
                success       = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(r);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(writer, r);
                    writer = null;
                }
            }
        }
Ejemplo n.º 11
0
 /// <summary>
 /// Creates a new sorted wrapper, sorting by BytesRef
 /// (ascending) then cost (ascending).
 /// </summary>
 public SortedInputIterator(IInputIterator source, IComparer <BytesRef> comparator)
 {
     this.tieBreakByCostComparator = new ComparatorAnonymousInnerClassHelper(this);
     this.hasPayloads = source.HasPayloads;
     this.hasContexts = source.HasContexts;
     this.source      = source;
     this.comparator  = comparator;
     this.reader      = Sort();
 }
Ejemplo n.º 12
0
 internal WFSTInputIterator(WFSTCompletionLookup outerInstance, IInputIterator source)
     : base(source)
 {
     this.outerInstance = outerInstance;
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(source.HasPayloads == false);
     }
 }
Ejemplo n.º 13
0
        /// <summary>
        /// Creates a new sorted wrapper, sorting by BytesRef
        /// (ascending) then cost (ascending).
        /// </summary>
        public SortedInputIterator(IInputIterator source, IComparer <BytesRef> comparer)
        {
            this.tieBreakByCostComparer = Comparer <BytesRef> .Create((left, right) =>
            {
                SortedInputIterator outerInstance = this;

                BytesRef leftScratch     = new BytesRef();
                BytesRef rightScratch    = new BytesRef();
                ByteArrayDataInput input = new ByteArrayDataInput();
                // Make shallow copy in case decode changes the BytesRef:
                leftScratch.Bytes   = left.Bytes;
                leftScratch.Offset  = left.Offset;
                leftScratch.Length  = left.Length;
                rightScratch.Bytes  = right.Bytes;
                rightScratch.Offset = right.Offset;
                rightScratch.Length = right.Length;
                long leftCost       = outerInstance.Decode(leftScratch, input);
                long rightCost      = outerInstance.Decode(rightScratch, input);
                if (outerInstance.HasPayloads)
                {
                    outerInstance.DecodePayload(leftScratch, input);
                    outerInstance.DecodePayload(rightScratch, input);
                }
                if (outerInstance.HasContexts)
                {
                    outerInstance.DecodeContexts(leftScratch, input);
                    outerInstance.DecodeContexts(rightScratch, input);
                }
                // LUCENENET NOTE: outerInstance.Comparer != outerInstance.comparer!!
                int cmp = outerInstance.comparer.Compare(leftScratch, rightScratch);
                if (cmp != 0)
                {
                    return(cmp);
                }
                if (leftCost < rightCost)
                {
                    return(-1);
                }
                else if (leftCost > rightCost)
                {
                    return(1);
                }
                else
                {
                    return(0);
                }
            });

            this.hasPayloads = source.HasPayloads;
            this.hasContexts = source.HasContexts;
            this.source      = source;
            this.comparer    = comparer;
            this.reader      = Sort();
        }
Ejemplo n.º 14
0
        public void TestWithContexts()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), true, true);
            IDictionary <string, Document> docs = res.Value;
            List <string> invalidDocTerms       = res.Key;

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
                ISet <BytesRef>        oriCtxs    = new HashSet <BytesRef>();
                IEnumerable <BytesRef> contextSet = inputIterator.Contexts;
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXT_FIELD_NAME))
                {
                    oriCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(oriCtxs.size(), contextSet.Count());
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
 /// <summary>
 /// Creates a new iterator, wrapping the specified iterator and
 /// returning elements in a random order.
 /// </summary>
 public UnsortedInputIterator(IInputIterator source)
     : base(source)
 {
     ords = new int[entries.Size()];
     Random random = new Random();
     for (int i = 0; i < ords.Length; i++)
     {
         ords[i] = i;
     }
     for (int i = 0; i < ords.Length; i++)
     {
         int randomPosition = random.Next(ords.Length);
         int temp = ords[i];
         ords[i] = ords[randomPosition];
         ords[randomPosition] = temp;
     }
 }
Ejemplo n.º 16
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="inputIterator">
        /// InputIterator is set to the location we wanted to start with
        /// Requires IShallowClone</param>
        /// <param name="count">how many element starting from begin position </param>
        public LimitInputIterator(IInputIterator <T> inputIterator, int count)
        {
            this.inputIterator = inputIterator;

            beginIterator = ShallowClone() as IInputIterator <T>;

            this.count = count - 1;

            if (count < 0)
            {
                count = 0;
            }

            this.currentIndex = 0;

            Begin();
        }
Ejemplo n.º 17
0
        public void TestWithoutPayload()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <string> invalidDocTerms       = res.Key;

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? weightField.GetInt64ValueOrDefault() : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }


            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public void TestWithContext()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
                ISet <BytesRef> originalCtxs = new HashSet <BytesRef>();
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXTS_FIELD_NAME))
                {
                    originalCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(originalCtxs, inputIterator.Contexts);
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Creates a new iterator, wrapping the specified iterator and
        /// returning elements in a random order.
        /// </summary>
        public UnsortedInputIterator(IInputIterator source)
            : base(source)
        {
            ords = new int[m_entries.Length];
            Random random = new Random();

            for (int i = 0; i < ords.Length; i++)
            {
                ords[i] = i;
            }
            for (int i = 0; i < ords.Length; i++)
            {
                int randomPosition = random.Next(ords.Length);
                int temp           = ords[i];
                ords[i] = ords[randomPosition];
                ords[randomPosition] = temp;
            }
        }
        public void TestBasic()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                long w1 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_1).NumericValue);
                long w2 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_2).NumericValue);
                long w3 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_3).NumericValue);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 21
0
        public void TestWithoutPayload()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd));
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertEquals(inputIterator.Payload, null);
            }
            assertTrue(docs.Count == 0);
            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 22
0
        public void TestEmptyReader()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            // Make sure the index is created?
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);

            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();

            assertNull(inputIterator.Next());
            assertEquals(inputIterator.Weight, 0);
            assertNull(inputIterator.Payload);

            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 23
0
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            count = 0;
            var            scratch     = new BytesRef();
            IInputIterator iter        = new WFSTInputIterator(this, iterator);
            var            scratchInts = new Int32sRef();
            BytesRef       previous    = null;
            var            outputs     = PositiveInt32Outputs.Singleton;
            var            builder     = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

            while ((scratch = iter.Next()) != null)
            {
                long cost = iter.Weight;

                if (previous == null)
                {
                    previous = new BytesRef();
                }
                else if (scratch.Equals(previous))
                {
                    continue; // for duplicate suggestions, the best weight is actually
                    // added
                }
                Lucene.Net.Util.Fst.Util.ToInt32sRef(scratch, scratchInts);
                builder.Add(scratchInts, cost);
                previous.CopyBytes(scratch);
                count++;
            }
            fst = builder.Finish();
        }
Ejemplo n.º 24
0
        public void TestFileWithTerm()
        {
            KeyValuePair <List <List <string> >, string> fileInput = generateFileInput(AtLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, false, false);
            Stream                inputReader = new MemoryStream(fileInput.Value.getBytes(Encoding.UTF8));
            FileDictionary        dictionary  = new FileDictionary(inputReader);
            List <List <string> > entries     = fileInput.Key;
            IInputIterator        inputIter   = dictionary.GetEntryIterator();

            assertFalse(inputIter.HasPayloads);
            BytesRef term;
            int      count = 0;

            while ((term = inputIter.Next()) != null)
            {
                assertTrue(entries.size() > count);
                List <string> entry = entries[count];
                assertTrue(entry.size() >= 1); // at least a term
                assertEquals(entry[0], term.Utf8ToString());
                assertEquals(1, inputIter.Weight);
                assertNull(inputIter.Payload);
                count++;
            }
            assertEquals(count, entries.size());
        }
Ejemplo n.º 25
0
        public void FindEndTest()
        {
            int[] data1 = { 1, 2, 3, 4, 1, 2, 3, 4 };

            int[] data2 = { 1, 2, 3 };

            int expectedIndex = 4;

            int actualIndex = -1;

            using (IInputIterator <int> inputIterator1 = new InputIterator <int>(data1))
            {
                using (IInputIterator <int> inputIterator2 = new InputIterator <int>(data2))
                {
                    IInputIterator <int> tmp = Algorithm.FindEnd(inputIterator1, inputIterator2);

                    ICursor cursor = (ICursor)tmp;

                    actualIndex = cursor.GetPosition();
                }
            }

            Assert.IsTrue(expectedIndex == actualIndex);
        }
Ejemplo n.º 26
0
        public override void Build(IInputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.Comparer != null)
            {
                // make sure it's unsorted
                // WTF - this could result in yet another sorted iteration....
                tfit = new UnsortedInputIterator(tfit);
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            count = 0;
            trie  = new JaspellTernarySearchTrie {
                MatchAlmostDiff = editDistance
            };
            BytesRef spare;

            var charsSpare = new CharsRef();

            while ((spare = tfit.Next()) != null)
            {
                long weight = tfit.Weight;
                if (spare.Length == 0)
                {
                    continue;
                }
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                trie.Put(charsSpare.ToString(), weight);
            }
        }
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));
            Random        rand       = Random();
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1)
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                docs.Remove(termToDel);
                assertTrue(null != toDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0);
            assertEquals(ir.NumDocs, docs.size());
            ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) };

            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, w2 + w1);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 28
0
 public static byte[] ValueAsBytes(this TokenMatch match, IInputIterator iterator)
 {
     return iterator.Text(match.Start, match.End);
 }
Ejemplo n.º 29
0
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            string prefix     = this.GetType().Name;
            var    directory  = OfflineSorter.DefaultTempDir();
            var    tempInput  = FileSupport.CreateTempFile(prefix, ".input", directory);
            var    tempSorted = FileSupport.CreateTempFile(prefix, ".sorted", directory);

            hasPayloads = iterator.HasPayloads;

            var writer = new OfflineSorter.ByteSequencesWriter(tempInput);

            OfflineSorter.ByteSequencesReader reader = null;
            var scratch = new BytesRef();

            TokenStreamToAutomaton ts2a = GetTokenStreamToAutomaton();

            bool success = false;

            count = 0;
            byte[] buffer = new byte[8];
            try
            {
                var      output = new ByteArrayDataOutput(buffer);
                BytesRef surfaceForm;

                while ((surfaceForm = iterator.Next()) != null)
                {
                    ISet <Int32sRef> paths = ToFiniteStrings(surfaceForm, ts2a);

                    maxAnalyzedPathsForOneInput = Math.Max(maxAnalyzedPathsForOneInput, paths.Count);

                    foreach (Int32sRef path in paths)
                    {
                        Util.Fst.Util.ToBytesRef(path, scratch);

                        // length of the analyzed text (FST input)
                        if (scratch.Length > ushort.MaxValue - 2)
                        {
                            throw new System.ArgumentException("cannot handle analyzed forms > " + (ushort.MaxValue - 2) +
                                                               " in length (got " + scratch.Length + ")");
                        }
                        ushort analyzedLength = (ushort)scratch.Length;

                        // compute the required length:
                        // analyzed sequence + weight (4) + surface + analyzedLength (short)
                        int requiredLength = analyzedLength + 4 + surfaceForm.Length + 2;

                        BytesRef payload;

                        if (hasPayloads)
                        {
                            if (surfaceForm.Length > (ushort.MaxValue - 2))
                            {
                                throw new ArgumentException("cannot handle surface form > " + (ushort.MaxValue - 2) +
                                                            " in length (got " + surfaceForm.Length + ")");
                            }
                            payload = iterator.Payload;
                            // payload + surfaceLength (short)
                            requiredLength += payload.Length + 2;
                        }
                        else
                        {
                            payload = null;
                        }

                        buffer = ArrayUtil.Grow(buffer, requiredLength);

                        output.Reset(buffer);

                        output.WriteInt16((short)analyzedLength);

                        output.WriteBytes(scratch.Bytes, scratch.Offset, scratch.Length);

                        output.WriteInt32(EncodeWeight(iterator.Weight));

                        if (hasPayloads)
                        {
                            for (int i = 0; i < surfaceForm.Length; i++)
                            {
                                if (surfaceForm.Bytes[i] == PAYLOAD_SEP)
                                {
                                    throw new ArgumentException(
                                              "surface form cannot contain unit separator character U+001F; this character is reserved");
                                }
                            }
                            output.WriteInt16((short)surfaceForm.Length);
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                            output.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                        }
                        else
                        {
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                        }

                        Debug.Assert(output.Position == requiredLength, output.Position + " vs " + requiredLength);

                        writer.Write(buffer, 0, output.Position);
                    }
                    count++;
                }
                writer.Dispose();

                // Sort all input/output pairs (required by FST.Builder):
                (new OfflineSorter(new AnalyzingComparer(hasPayloads))).Sort(tempInput, tempSorted);

                // Free disk space:
                tempInput.Delete();

                reader = new OfflineSorter.ByteSequencesReader(tempSorted);

                var outputs = new PairOutputs <long?, BytesRef>(PositiveInt32Outputs.Singleton,
                                                                ByteSequenceOutputs.Singleton);
                var builder = new Builder <PairOutputs <long?, BytesRef> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);

                // Build FST:
                BytesRef  previousAnalyzed = null;
                BytesRef  analyzed         = new BytesRef();
                BytesRef  surface          = new BytesRef();
                Int32sRef scratchInts      = new Int32sRef();
                var       input            = new ByteArrayDataInput();

                // Used to remove duplicate surface forms (but we
                // still index the hightest-weight one).  We clear
                // this when we see a new analyzed form, so it cannot
                // grow unbounded (at most 256 entries):
                var seenSurfaceForms = new JCG.HashSet <BytesRef>();

                var dedup = 0;
                while (reader.Read(scratch))
                {
                    input.Reset(scratch.Bytes, scratch.Offset, scratch.Length);
                    ushort analyzedLength = (ushort)input.ReadInt16();
                    analyzed.Grow(analyzedLength + 2);
                    input.ReadBytes(analyzed.Bytes, 0, analyzedLength);
                    analyzed.Length = analyzedLength;

                    long cost = input.ReadInt32();

                    surface.Bytes = scratch.Bytes;
                    if (hasPayloads)
                    {
                        surface.Length = (ushort)input.ReadInt16();
                        surface.Offset = input.Position;
                    }
                    else
                    {
                        surface.Offset = input.Position;
                        surface.Length = scratch.Length - surface.Offset;
                    }

                    if (previousAnalyzed == null)
                    {
                        previousAnalyzed = new BytesRef();
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else if (analyzed.Equals(previousAnalyzed))
                    {
                        dedup++;
                        if (dedup >= maxSurfaceFormsPerAnalyzedForm)
                        {
                            // More than maxSurfaceFormsPerAnalyzedForm
                            // dups: skip the rest:
                            continue;
                        }
                        if (seenSurfaceForms.Contains(surface))
                        {
                            continue;
                        }
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else
                    {
                        dedup = 0;
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Clear();
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }

                    // TODO: I think we can avoid the extra 2 bytes when
                    // there is no dup (dedup==0), but we'd have to fix
                    // the exactFirst logic ... which would be sort of
                    // hairy because we'd need to special case the two
                    // (dup/not dup)...

                    // NOTE: must be byte 0 so we sort before whatever
                    // is next
                    analyzed.Bytes[analyzed.Offset + analyzed.Length]     = 0;
                    analyzed.Bytes[analyzed.Offset + analyzed.Length + 1] = (byte)dedup;
                    analyzed.Length += 2;

                    Util.Fst.Util.ToInt32sRef(analyzed, scratchInts);
                    //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
                    if (!hasPayloads)
                    {
                        builder.Add(scratchInts, outputs.NewPair(cost, BytesRef.DeepCopyOf(surface)));
                    }
                    else
                    {
                        int      payloadOffset = input.Position + surface.Length;
                        int      payloadLength = scratch.Length - payloadOffset;
                        BytesRef br            = new BytesRef(surface.Length + 1 + payloadLength);
                        Array.Copy(surface.Bytes, surface.Offset, br.Bytes, 0, surface.Length);
                        br.Bytes[surface.Length] = PAYLOAD_SEP;
                        Array.Copy(scratch.Bytes, payloadOffset, br.Bytes, surface.Length + 1, payloadLength);
                        br.Length = br.Bytes.Length;
                        builder.Add(scratchInts, outputs.NewPair(cost, br));
                    }
                }
                fst = builder.Finish();

                //Util.dotToFile(fst, "/tmp/suggest.dot");

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(reader, writer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(reader, writer);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
Ejemplo n.º 30
0
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <String> invalidDocTerms       = res.Key;
            Random        rand       = Random();
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                IIndexableField f2 = doc.GetField(FIELD_NAME);
                if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.GetStringValue()))
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                assertTrue(toDel != null);
                docs.Remove(termToDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertEquals(ir.NumDocs, docs.size());
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public override void Build(IInputIterator iter)
        {
            if (searcherMgr != null)
            {
                searcherMgr.Dispose();
                searcherMgr = null;
            }

            if (writer != null)
            {
                writer.Dispose();
                writer = null;
            }

            AtomicReader r = null;
            bool success = false;
            try
            {
                // First pass: build a temporary normal Lucene index,
                // just indexing the suggestions as they iterate:
                writer = new IndexWriter(dir, GetIndexWriterConfig(matchVersion, GramAnalyzer, IndexWriterConfig.OpenMode_e.CREATE));
                //long t0 = System.nanoTime();

                // TODO: use threads?
                BytesRef text;
                while ((text = iter.Next()) != null)
                {
                    BytesRef payload;
                    if (iter.HasPayloads)
                    {
                        payload = iter.Payload;
                    }
                    else
                    {
                        payload = null;
                    }

                    Add(text, iter.Contexts, iter.Weight, payload);
                }

                //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");

                searcherMgr = new SearcherManager(writer, true, null);
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(r);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(writer, r);
                    writer = null;
                }
            }
        }
Ejemplo n.º 32
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        ///  amount of temporary RAM while building.  Note that
        ///  the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(IInputIterator iterator, double ramBufferSizeMB)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();
            // TODO: messy ... java7 has Files.createTempDirectory
            // ... but 4.x is java6:
            DirectoryInfo tempIndexPath = null;
            Random        random        = new Random();

            while (true)
            {
                tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + random.Next(int.MaxValue)));
                tempIndexPath.Create();
                if (System.IO.Directory.Exists(tempIndexPath.FullName))
                {
                    break;
                }
            }

            using (Directory dir = FSDirectory.Open(tempIndexPath))
            {
#pragma warning disable 612, 618
                IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer);
#pragma warning restore 612, 618
                iwc.SetOpenMode(OpenMode.CREATE);
                iwc.SetRAMBufferSizeMB(ramBufferSizeMB);
                IndexWriter writer = new IndexWriter(dir, iwc);

                var ft = new FieldType(TextField.TYPE_NOT_STORED);
                // TODO: if only we had IndexOptions.TERMS_ONLY...
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                ft.OmitNorms    = true;
                ft.Freeze();

                Document doc   = new Document();
                Field    field = new Field("body", "", ft);
                doc.Add(field);

                totTokens = 0;
                IndexReader reader = null;

                bool success = false;
                count = 0;
                try
                {
                    while (true)
                    {
                        BytesRef surfaceForm = iterator.Next();
                        if (surfaceForm == null)
                        {
                            break;
                        }
                        field.SetStringValue(surfaceForm.Utf8ToString());
                        writer.AddDocument(doc);
                        count++;
                    }
                    reader = DirectoryReader.Open(writer, false);

                    Terms terms = MultiFields.GetTerms(reader, "body");
                    if (terms == null)
                    {
                        throw new System.ArgumentException("need at least one suggestion");
                    }

                    // Move all ngrams into an FST:
                    TermsEnum termsEnum = terms.GetIterator(null);

                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                    Int32sRef scratchInts = new Int32sRef();
                    while (true)
                    {
                        BytesRef term = termsEnum.Next();
                        if (term == null)
                        {
                            break;
                        }
                        int ngramCount = CountGrams(term);
                        if (ngramCount > grams)
                        {
                            throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                        }
                        if (ngramCount == 1)
                        {
                            totTokens += termsEnum.TotalTermFreq;
                        }

                        builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq));
                    }

                    fst = builder.Finish();
                    if (fst == null)
                    {
                        throw new System.ArgumentException("need at least one suggestion");
                    }
                    //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                    /*
                     * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                     * Util.toDot(fst, pw, true, true);
                     * pw.close();
                     */

                    success = true;
                }
                finally
                {
                    try
                    {
                        if (success)
                        {
                            IOUtils.Dispose(writer, reader);
                        }
                        else
                        {
                            IOUtils.DisposeWhileHandlingException(writer, reader);
                        }
                    }
                    finally
                    {
                        foreach (string file in dir.ListAll())
                        {
                            FileInfo path = new FileInfo(Path.Combine(tempIndexPath.FullName, file));
                            try
                            {
                                path.Delete();
                            }
                            catch (Exception e)
                            {
                                throw new InvalidOperationException("failed to remove " + path, e);
                            }
                        }

                        try
                        {
                            tempIndexPath.Delete();
                        }
                        catch (Exception e)
                        {
                            throw new InvalidOperationException("failed to remove " + tempIndexPath, e);
                        }
                    }
                }
            }
        }
Ejemplo n.º 33
0
 public InterpreterAstNode(IInputIterator inputIterator)
 {
     _inputIterator = inputIterator;
 }
Ejemplo n.º 34
0
 public override void Build(IInputIterator iterator)
 {
     Build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
 }
Ejemplo n.º 35
0
 public static string ValueAsString(this TokenMatch match, IInputIterator iterator)
 {
     var matchedBytes = iterator.Text(match.Start, match.End);
     return Encoding.UTF8.GetString(matchedBytes, 0, matchedBytes.Length);
 }
Ejemplo n.º 36
0
 public PeGrammarAstNodeFactory(IInputIterator inputIterator)
 {
     _inputIterator = inputIterator;
 }
Ejemplo n.º 37
0
 public StatementAstNode(IInputIterator inputIterator)
 {
     _inputIterator = inputIterator;
 }
Ejemplo n.º 38
0
        public override void Build(IInputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.Comparator != null)
            {
                // make sure it's unsorted
                // WTF - this could result in yet another sorted iteration....
                tfit = new UnsortedInputIterator(tfit);
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            count = 0;
            trie = new JaspellTernarySearchTrie { MatchAlmostDiff = editDistance };
            BytesRef spare;

            var charsSpare = new CharsRef();

            while ((spare = tfit.Next()) != null)
            {

                long weight = tfit.Weight;
                if (spare.Length == 0)
                {
                    continue;
                }
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                trie.Put(charsSpare.ToString(), Convert.ToInt64(weight));
            }
        }
Ejemplo n.º 39
0
 /// <summary>
 /// Creates a new sorted wrapper, using <see cref="BytesRef.UTF8SortedAsUnicodeComparer"/>
 /// for sorting.
 /// </summary>
 public SortedInputIterator(IInputIterator source)
     : this(source, BytesRef.UTF8SortedAsUnicodeComparer)
 {
 }
Ejemplo n.º 40
0
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            FileInfo tempInput  = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir());
            FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir());

            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
            OfflineSorter.ByteSequencesReader reader = null;
            ExternalRefSorter sorter = null;

            // Push floats up front before sequences to sort them. For now, assume they are non-negative.
            // If negative floats are allowed some trickery needs to be done to find their byte order.
            bool success = false;

            count = 0;
            try
            {
                byte[] buffer = new byte[0];
                ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
                BytesRef            spare;
                while ((spare = iterator.Next()) != null)
                {
                    if (spare.Length + 4 >= buffer.Length)
                    {
                        buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
                    }

                    output.Reset(buffer);
                    output.WriteInt32(EncodeWeight(iterator.Weight));
                    output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
                    writer.Write(buffer, 0, output.Position);
                }
                writer.Dispose();

                // We don't know the distribution of scores and we need to bucket them, so we'll sort
                // and divide into equal buckets.
                OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
                tempInput.Delete();
                FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

                int inputLines = info.Lines;
                reader = new OfflineSorter.ByteSequencesReader(tempSorted);
                long line                = 0;
                int  previousBucket      = 0;
                int  previousScore       = 0;
                ByteArrayDataInput input = new ByteArrayDataInput();
                BytesRef           tmp1  = new BytesRef();
                BytesRef           tmp2  = new BytesRef();
                while (reader.Read(tmp1))
                {
                    input.Reset(tmp1.Bytes);
                    int currentScore = input.ReadInt32();

                    int bucket;
                    if (line > 0 && currentScore == previousScore)
                    {
                        bucket = previousBucket;
                    }
                    else
                    {
                        bucket = (int)(line * buckets / inputLines);
                    }
                    previousScore  = currentScore;
                    previousBucket = bucket;

                    // Only append the input, discard the weight.
                    tmp2.Bytes  = tmp1.Bytes;
                    tmp2.Offset = input.Position;
                    tmp2.Length = tmp1.Length - input.Position;
                    builder.Add(tmp2, bucket);

                    line++;
                    count++;
                }

                // The two FSTCompletions share the same automaton.
                this.higherWeightsCompletion = builder.Build();
                this.normalCompletion        = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer, sorter);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer, sorter);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            FileInfo tempInput = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir());
            FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir());

            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
            OfflineSorter.ByteSequencesReader reader = null;
            ExternalRefSorter sorter = null;

            // Push floats up front before sequences to sort them. For now, assume they are non-negative.
            // If negative floats are allowed some trickery needs to be done to find their byte order.
            bool success = false;
            count = 0;
            try
            {
                byte[] buffer = new byte[0];
                ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
                BytesRef spare;
                while ((spare = iterator.Next()) != null)
                {
                    if (spare.Length + 4 >= buffer.Length)
                    {
                        buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
                    }

                    output.Reset(buffer);
                    output.WriteInt(EncodeWeight(iterator.Weight));
                    output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
                    writer.Write(buffer, 0, output.Position);
                }
                writer.Dispose();

                // We don't know the distribution of scores and we need to bucket them, so we'll sort
                // and divide into equal buckets.
                OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
                tempInput.Delete();
                FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

                int inputLines = info.Lines;
                reader = new OfflineSorter.ByteSequencesReader(tempSorted);
                long line = 0;
                int previousBucket = 0;
                int previousScore = 0;
                ByteArrayDataInput input = new ByteArrayDataInput();
                BytesRef tmp1 = new BytesRef();
                BytesRef tmp2 = new BytesRef();
                while (reader.Read(tmp1))
                {
                    input.Reset(tmp1.Bytes);
                    int currentScore = input.ReadInt();

                    int bucket;
                    if (line > 0 && currentScore == previousScore)
                    {
                        bucket = previousBucket;
                    }
                    else
                    {
                        bucket = (int)(line * buckets / inputLines);
                    }
                    previousScore = currentScore;
                    previousBucket = bucket;

                    // Only append the input, discard the weight.
                    tmp2.Bytes = tmp1.Bytes;
                    tmp2.Offset = input.Position;
                    tmp2.Length = tmp1.Length - input.Position;
                    builder.Add(tmp2, bucket);

                    line++;
                    count++;
                }

                // The two FSTCompletions share the same automaton.
                this.higherWeightsCompletion = builder.Build();
                this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer, sorter);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer, sorter);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
Ejemplo n.º 42
0
        public override void Build(IInputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
            #pragma warning disable 612, 618
            if (tfit.Comparator != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparator uses UTF16 sort order
                tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparer);
            }
            #pragma warning restore 612, 618

            List<string> tokens = new List<string>();
            List<object> vals = new List<object>(); // LUCENENET TODO: Should this be long? in Java it was Number, but we can probably do better than object
            BytesRef spare;
            CharsRef charsSpare = new CharsRef();
            while ((spare = tfit.Next()) != null)
            {
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(Convert.ToInt64(tfit.Weight));
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }