Exemple #1
0
            internal SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, DocsEnum input, bool withFreqs, Sorter.DocMap docMap)
                : base(input)
            {
                this.maxDoc    = maxDoc;
                this.withFreqs = withFreqs;
                if (reuse != null)
                {
                    if (reuse.maxDoc == maxDoc)
                    {
                        sorter = reuse.sorter;
                    }
                    else
                    {
                        sorter = new DocFreqSorter(maxDoc);
                    }
                    docs  = reuse.docs;
                    freqs = reuse.freqs; // maybe null
                }
                else
                {
                    docs   = new int[64];
                    sorter = new DocFreqSorter(maxDoc);
                }
                docIt = -1;
                int i = 0;
                int doc;

                if (withFreqs)
                {
                    if (freqs == null || freqs.Length < docs.Length)
                    {
                        freqs = new int[docs.Length];
                    }
                    while ((doc = input.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        if (i >= docs.Length)
                        {
                            docs  = ArrayUtil.Grow(docs, docs.Length + 1);
                            freqs = ArrayUtil.Grow(freqs, freqs.Length + 1);
                        }
                        docs[i]  = docMap.OldToNew(doc);
                        freqs[i] = input.Freq;
                        ++i;
                    }
                }
                else
                {
                    freqs = null;
                    while ((doc = input.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        if (i >= docs.Length)
                        {
                            docs = ArrayUtil.Grow(docs, docs.Length + 1);
                        }
                        docs[i++] = docMap.OldToNew(doc);
                    }
                }
                // TimSort can save much time compared to other sorts in case of
                // reverse sorting, or when sorting a concatenation of sorted readers
                sorter.Reset(docs, freqs);
                sorter.Sort(0, i);
                upto = i;
            }
Exemple #2
0
 /// <summary>
 ///     Classify the specified input into a group.
 /// </summary>
 /// <param name="input">The input data.</param>
 /// <returns>The group the data was classified into.</returns>
 public int ComputeClassification(double[] input)
 {
     return(ArrayUtil.IndexOfLargest(ComputeRegression(input)));
 }
Exemple #3
0
            public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
            {
                Debug.Assert(outerInstance.termArrays.Count > 0);
                AtomicReader reader   = (context.AtomicReader);
                IBits        liveDocs = acceptDocs;

                PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[outerInstance.termArrays.Count];

                Terms fieldTerms = reader.GetTerms(outerInstance.field);

                if (fieldTerms == null)
                {
                    return(null);
                }

                // Reuse single TermsEnum below:
                TermsEnum termsEnum = fieldTerms.GetIterator(null);

                for (int pos = 0; pos < postingsFreqs.Length; pos++)
                {
                    Term[] terms = outerInstance.termArrays[pos];

                    DocsAndPositionsEnum postingsEnum;
                    int docFreq;

                    if (terms.Length > 1)
                    {
                        postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);

                        // coarse -- this overcounts since a given doc can
                        // have more than one term:
                        docFreq = 0;
                        for (int termIdx = 0; termIdx < terms.Length; termIdx++)
                        {
                            Term      term      = terms[termIdx];
                            TermState termState = termContexts[term].Get(context.Ord);
                            if (termState == null)
                            {
                                // Term not in reader
                                continue;
                            }
                            termsEnum.SeekExact(term.Bytes, termState);
                            docFreq += termsEnum.DocFreq;
                        }

                        if (docFreq == 0)
                        {
                            // None of the terms are in this reader
                            return(null);
                        }
                    }
                    else
                    {
                        Term      term      = terms[0];
                        TermState termState = termContexts[term].Get(context.Ord);
                        if (termState == null)
                        {
                            // Term not in reader
                            return(null);
                        }
                        termsEnum.SeekExact(term.Bytes, termState);
                        postingsEnum = termsEnum.DocsAndPositions(liveDocs, null, DocsAndPositionsFlags.NONE);

                        if (postingsEnum == null)
                        {
                            // term does exist, but has no positions
                            Debug.Assert(termsEnum.Docs(liveDocs, null, DocsFlags.NONE) != null, "termstate found but no term exists in reader");
                            throw new InvalidOperationException("field \"" + term.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.Text() + ")");
                        }

                        docFreq = termsEnum.DocFreq;
                    }

                    postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, (int)outerInstance.positions[pos], terms);
                }

                // sort by increasing docFreq order
                if (outerInstance.slop == 0)
                {
                    ArrayUtil.TimSort(postingsFreqs);
                }

                if (outerInstance.slop == 0)
                {
                    ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.GetSimScorer(stats, context));
                    if (s.noDocs)
                    {
                        return(null);
                    }
                    else
                    {
                        return(s);
                    }
                }
                else
                {
                    return(new SloppyPhraseScorer(this, postingsFreqs, outerInstance.slop, similarity.GetSimScorer(stats, context)));
                }
            }
Exemple #4
0
        private TermsFilter(FieldAndTermEnum iter, int length)
        {
            // TODO: maybe use oal.index.PrefixCodedTerms instead?
            // If number of terms is more than a few hundred it
            // should be a win

            // TODO: we also pack terms in FieldCache/DocValues
            // ... maybe we can refactor to share that code

            // TODO: yet another option is to build the union of the terms in
            // an automaton an call intersect on the termsenum if the density is high

            int hash            = 9;
            var serializedTerms = Arrays.Empty <byte>();

            this.offsets = new int[length + 1];
            int           lastEndOffset     = 0;
            int           index             = 0;
            var           termsAndFields    = new List <TermsAndField>();
            TermsAndField lastTermsAndField = null;
            BytesRef      previousTerm      = null;
            string        previousField     = null;
            BytesRef      currentTerm;
            string        currentField;

            while (iter.MoveNext())
            {
                currentTerm  = iter.Current;
                currentField = iter.Field;
                if (currentField == null)
                {
                    throw new ArgumentException("Field must not be null");
                }
                if (previousField != null)
                {
                    // deduplicate
                    if (previousField.Equals(currentField, StringComparison.Ordinal))
                    {
                        if (previousTerm.BytesEquals(currentTerm))
                        {
                            continue;
                        }
                    }
                    else
                    {
                        int _start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
                        lastTermsAndField = new TermsAndField(_start, index, previousField);
                        termsAndFields.Add(lastTermsAndField);
                    }
                }
                hash = PRIME * hash + currentField.GetHashCode();
                hash = PRIME * hash + currentTerm.GetHashCode();
                if (serializedTerms.Length < lastEndOffset + currentTerm.Length)
                {
                    serializedTerms = ArrayUtil.Grow(serializedTerms, lastEndOffset + currentTerm.Length);
                }
                Array.Copy(currentTerm.Bytes, currentTerm.Offset, serializedTerms, lastEndOffset, currentTerm.Length);
                offsets[index] = lastEndOffset;
                lastEndOffset += currentTerm.Length;
                index++;
                previousTerm  = currentTerm;
                previousField = currentField;
            }
            offsets[index] = lastEndOffset;
            int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;

            lastTermsAndField = new TermsAndField(start, index, previousField);
            termsAndFields.Add(lastTermsAndField);
            this.termsBytes     = ArrayUtil.Shrink(serializedTerms, lastEndOffset);
            this.termsAndFields = termsAndFields.ToArray();
            this.hashCode       = hash;
        }
        internal readonly bool isSegmentPrivate; // set to true iff this frozen packet represents
        // a segment private deletes. in that case is should
        // only have Queries

        public FrozenBufferedUpdates(BufferedUpdates deletes, bool isSegmentPrivate)
        {
            this.isSegmentPrivate = isSegmentPrivate;
            Debug.Assert(!isSegmentPrivate || deletes.terms.Count == 0, "segment private package should only have del queries");
            Term[] termsArray = deletes.terms.Keys.ToArray(/*new Term[deletes.Terms.Count]*/);
            termCount = termsArray.Length;
            ArrayUtil.TimSort(termsArray);
            PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
            foreach (Term term in termsArray)
            {
                builder.Add(term);
            }
            terms = builder.Finish();

            queries     = new Query[deletes.queries.Count];
            queryLimits = new int[deletes.queries.Count];
            int upto = 0;

            foreach (KeyValuePair <Query, int?> ent in deletes.queries)
            {
                queries[upto] = ent.Key;
                if (ent.Value.HasValue)
                {
                    queryLimits[upto] = ent.Value.Value;
                }
                else
                {
                    // LUCENENET NOTE: According to this: http://stackoverflow.com/a/13914344
                    // we are supposed to throw an exception in this case, rather than
                    // silently fail.
                    throw new NullReferenceException();
                }
                upto++;
            }

            // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
            // so that it maps to all fields it affects, sorted by their docUpto, and traverse
            // that Term only once, applying the update to all fields that still need to be
            // updated.
            IList <NumericDocValuesUpdate> allNumericUpdates = new List <NumericDocValuesUpdate>();
            int numericUpdatesSize = 0;

            foreach (var numericUpdates in deletes.numericUpdates.Values)
            {
                foreach (NumericDocValuesUpdate update in numericUpdates.Values)
                {
                    allNumericUpdates.Add(update);
                    numericUpdatesSize += update.GetSizeInBytes();
                }
            }
            numericDVUpdates = allNumericUpdates.ToArray();

            // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
            // so that it maps to all fields it affects, sorted by their docUpto, and traverse
            // that Term only once, applying the update to all fields that still need to be
            // updated.
            IList <BinaryDocValuesUpdate> allBinaryUpdates = new List <BinaryDocValuesUpdate>();
            int binaryUpdatesSize = 0;

            foreach (var binaryUpdates in deletes.binaryUpdates.Values)
            {
                foreach (BinaryDocValuesUpdate update in binaryUpdates.Values)
                {
                    allBinaryUpdates.Add(update);
                    binaryUpdatesSize += update.GetSizeInBytes();
                }
            }
            binaryDVUpdates = allBinaryUpdates.ToArray();

            bytesUsed = (int)terms.GetSizeInBytes() + queries.Length * BYTES_PER_DEL_QUERY + numericUpdatesSize + numericDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF + binaryUpdatesSize + binaryDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF;

            numTermDeletes = deletes.numTermDeletes;
        }
Exemple #6
0
        /// <summary>
        /// Add a new position &amp; payload </summary>
        public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset)
        {
            // if (DEBUG) {
            //   System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: ""));
            // }
            posDeltaBuffer[posBufferUpto] = position - lastPosition;
            if (fieldHasPayloads)
            {
                if (payload == null || payload.Length == 0)
                {
                    // no payload
                    payloadLengthBuffer[posBufferUpto] = 0;
                }
                else
                {
                    payloadLengthBuffer[posBufferUpto] = payload.Length;
                    if (payloadByteUpto + payload.Length > payloadBytes.Length)
                    {
                        payloadBytes = ArrayUtil.Grow(payloadBytes, payloadByteUpto + payload.Length);
                    }
                    Array.Copy(payload.Bytes, payload.Offset, payloadBytes, payloadByteUpto, payload.Length);
                    payloadByteUpto += payload.Length;
                }
            }

            if (fieldHasOffsets)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(startOffset >= lastStartOffset);
                    Debugging.Assert(endOffset >= startOffset);
                }
                offsetStartDeltaBuffer[posBufferUpto] = startOffset - lastStartOffset;
                offsetLengthBuffer[posBufferUpto]     = endOffset - startOffset;
                lastStartOffset = startOffset;
            }

            posBufferUpto++;
            lastPosition = position;
            if (posBufferUpto == Lucene41PostingsFormat.BLOCK_SIZE)
            {
                // if (DEBUG) {
                //   System.out.println("  write pos bulk block @ fp=" + posOut.getFilePointer());
                // }
                forUtil.WriteBlock(posDeltaBuffer, encoded, posOut);

                if (fieldHasPayloads)
                {
                    forUtil.WriteBlock(payloadLengthBuffer, encoded, payOut);
                    payOut.WriteVInt32(payloadByteUpto);
                    payOut.WriteBytes(payloadBytes, 0, payloadByteUpto);
                    payloadByteUpto = 0;
                }
                if (fieldHasOffsets)
                {
                    forUtil.WriteBlock(offsetStartDeltaBuffer, encoded, payOut);
                    forUtil.WriteBlock(offsetLengthBuffer, encoded, payOut);
                }
                posBufferUpto = 0;
            }
        }
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingStoredFieldsReader matchingFieldsReader = null;
                if (matchingSegmentReader != null)
                {
                    StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
                    if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader)
                    {
                        matchingFieldsReader = (CompressingStoredFieldsReader)fieldsReader;
                    }
                }

                int  maxDoc   = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;

                if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != CompressionMode || matchingFieldsReader.ChunkSize != ChunkSize) // the way data is decompressed depends on the chunk size -  means reader version is not the same as the writer version
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Document doc = reader.Document(i);
                        AddDocument(doc, mergeState.FieldInfos);
                        ++docCount;
                        mergeState.checkAbort.Work(300);
                    }
                }
                else
                {
                    int docID = NextLiveDoc(0, liveDocs, maxDoc);
                    if (docID < maxDoc)
                    {
                        // not all docs were deleted
                        CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID);
                        int[] startOffsets = new int[0];
                        do
                        {
                            // go to the next chunk that contains docID
                            it.Next(docID);
                            // transform lengths into offsets
                            if (startOffsets.Length < it.ChunkDocs)
                            {
                                startOffsets = new int[ArrayUtil.Oversize(it.ChunkDocs, 4)];
                            }
                            for (int i = 1; i < it.ChunkDocs; ++i)
                            {
                                startOffsets[i] = startOffsets[i - 1] + it.Lengths[i - 1];
                            }

                            if (NumBufferedDocs == 0 && startOffsets[it.ChunkDocs - 1] < ChunkSize && startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] >= ChunkSize && NextDeletedDoc(it.DocBase, liveDocs, it.DocBase + it.ChunkDocs) == it.DocBase + it.ChunkDocs) // no deletion in the chunk -  chunk is large enough -  chunk is small enough -  starting a new chunk
                            {
                                Debug.Assert(docID == it.DocBase);

                                // no need to decompress, just copy data
                                IndexWriter.WriteIndex(it.ChunkDocs, FieldsStream.FilePointer);
                                WriteHeader(this.DocBase, it.ChunkDocs, it.NumStoredFields, it.Lengths);
                                it.CopyCompressedData(FieldsStream);
                                this.DocBase += it.ChunkDocs;
                                docID         = NextLiveDoc(it.DocBase + it.ChunkDocs, liveDocs, maxDoc);
                                docCount     += it.ChunkDocs;
                                mergeState.checkAbort.Work(300 * it.ChunkDocs);
                            }
                            else
                            {
                                // decompress
                                it.Decompress();
                                if (startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] != it.Bytes.Length)
                                {
                                    throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] + ", got " + it.Bytes.Length);
                                }
                                // copy non-deleted docs
                                for (; docID < it.DocBase + it.ChunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc))
                                {
                                    int diff = docID - it.DocBase;
                                    StartDocument(it.NumStoredFields[diff]);
                                    BufferedDocs.WriteBytes(it.Bytes.Bytes, it.Bytes.Offset + startOffsets[diff], it.Lengths[diff]);
                                    FinishDocument();
                                    ++docCount;
                                    mergeState.checkAbort.Work(300);
                                }
                            }
                        } while (docID < maxDoc);

                        it.CheckIntegrity();
                    }
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
Exemple #8
0
            private void LoadTerms()
            {
                var posIntOutputs = PositiveInt32Outputs.Singleton;
                var outputsInner  = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs);
                var outputs       = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner);

                // honestly, wtf kind of generic mess is this.
                var b     = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);
                var input = (IndexInput)_outerInstance._input.Clone();

                input.Seek(_termsStart);

                var  lastTerm      = new BytesRef(10);
                long lastDocsStart = -1;
                int  docFreq       = 0;
                long totalTermFreq = 0;
                var  visitedDocs   = new FixedBitSet(_maxDoc);

                var scratchIntsRef = new Int32sRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(input, _scratch);
                    if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                            _sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }

                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        docFreq++;
                        _sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        int docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        visitedDocs.Set(docId);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        totalTermFreq += ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                        }
                        lastDocsStart = input.GetFilePointer();
                        int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len);
                        lastTerm.Length    = len;
                        docFreq            = 0;
                        _sumTotalTermFreq += totalTermFreq;
                        totalTermFreq      = 0;
                        _termCount++;
                    }
                }
                _docCount = visitedDocs.Cardinality();
                _fst      = b.Finish();
            }
Exemple #9
0
        private IEnumerable <long?> GetMergeSortedSetOrdsEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs, OrdinalMap map)
        {
            int          readerUpto      = -1;
            int          docIDUpto       = 0;
            AtomicReader currentReader   = null;
            IBits        currentLiveDocs = null;
            var          ords            = new long[8];
            int          ordUpto         = 0;
            int          ordLength       = 0;

            while (true)
            {
                if (readerUpto == readers.Length)
                {
                    yield break;
                }

                if (ordUpto < ordLength)
                {
                    var value = ords[ordUpto];
                    ordUpto++;
                    yield return(value);

                    continue;
                }

                if (currentReader == null || docIDUpto == currentReader.MaxDoc)
                {
                    readerUpto++;
                    if (readerUpto < readers.Length)
                    {
                        currentReader   = readers[readerUpto];
                        currentLiveDocs = currentReader.LiveDocs;
                    }
                    docIDUpto = 0;
                    continue;
                }

                if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto))
                {
                    Debug.Assert(docIDUpto < currentReader.MaxDoc);
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.SetDocument(docIDUpto);
                    ordUpto = ordLength = 0;
                    long ord;
                    while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (ordLength == ords.Length)
                        {
                            ords = ArrayUtil.Grow(ords, ordLength + 1);
                        }
                        ords[ordLength] = map.GetGlobalOrd(readerUpto, ord);
                        ordLength++;
                    }
                    docIDUpto++;
                    continue;
                }

                docIDUpto++;
            }
        }
Exemple #10
0
            public override int NextDoc()
            {
                bool first = true;

                _in.Seek(_nextDocStart);
                long posStart = 0;

                while (true)
                {
                    long lineStart = _in.GetFilePointer();
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    //System.out.println("NEXT DOC: " + scratch.utf8ToString());
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        _tf    = 0;
                        first  = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        _tf      = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        posStart = _in.GetFilePointer();
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                                             StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END));
                        }

                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }
Exemple #11
0
            public override int NextPosition()
            {
                int pos;

                if (_readPositions)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), () => "got line=" + _scratch.Utf8ToString());
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length,
                                            _scratchUtf162);
                    pos = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }
                else
                {
                    pos = -1;
                }

                if (_readOffsets)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), () => "got line=" + _scratch.Utf8ToString());
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162);
                    _startOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), () => "got line=" + _scratch.Utf8ToString());
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162);
                    _endOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }

                long fp = _in.GetFilePointer();

                SimpleTextUtil.ReadLine(_in, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                {
                    int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length;
                    if (_scratch2.Bytes.Length < len)
                    {
                        _scratch2.Grow(len);
                    }
                    Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len);
                    _scratch2.Length = len;
                    _payload         = _scratch2;
                }
                else
                {
                    _payload = null;
                    _in.Seek(fp);
                }
                return(pos);
            }
Exemple #12
0
            public override int NextDoc()
            {
                if (_docId == NO_MORE_DOCS)
                {
                    return(_docId);
                }
                bool first    = true;
                int  termFreq = 0;

                while (true)
                {
                    long lineStart = _in.GetFilePointer();
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId   = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        termFreq = 0;
                        first    = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        termFreq = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip termFreq++;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(
                                StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                                StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END), () => "scratch=" + _scratch.Utf8ToString());
                        }

                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }
Exemple #13
0
        /// <summary>
        /// Low level api. Returns a token stream generated from a <see cref="Terms"/>. This
        /// can be used to feed the highlighter with a pre-parsed token
        /// stream.  The <see cref="Terms"/> must have offsets available.
        /// <para/>
        /// In my tests the speeds to recreate 1000 token streams using this method are:
        /// <list type="bullet">
        ///     <item><description>
        ///     with TermVector offset only data stored - 420  milliseconds
        ///     </description></item>
        ///     <item><description>
        ///     with TermVector offset AND position data stored - 271 milliseconds
        ///     (nb timings for TermVector with position data are based on a tokenizer with contiguous
        ///     positions - no overlaps or gaps)
        ///     </description></item>
        ///     <item><description>
        ///     The cost of not using TermPositionVector to store
        ///     pre-parsed content and using an analyzer to re-parse the original content:
        ///     - reanalyzing the original content - 980 milliseconds
        ///     </description></item>
        /// </list>
        ///
        /// The re-analyze timings will typically vary depending on -
        /// <list type="number">
        ///     <item><description>
        ///     The complexity of the analyzer code (timings above were using a
        ///     stemmer/lowercaser/stopword combo)
        ///     </description></item>
        ///     <item><description>
        ///     The  number of other fields (Lucene reads ALL fields off the disk
        ///     when accessing just one document field - can cost dear!)
        ///     </description></item>
        ///     <item><description>
        ///     Use of compression on field storage - could be faster due to compression (less disk IO)
        ///     or slower (more CPU burn) depending on the content.
        ///     </description></item>
        /// </list>
        /// </summary>
        /// <param name="tpv"></param>
        /// <param name="tokenPositionsGuaranteedContiguous">true if the token position numbers have no overlaps or gaps. If looking
        /// to eek out the last drops of performance, set to true. If in doubt, set to false.</param>
        /// <exception cref="ArgumentException">if no offsets are available</exception>
        public static TokenStream GetTokenStream(Terms tpv,
                                                 bool tokenPositionsGuaranteedContiguous)
        {
            if (!tpv.HasOffsets)
            {
                throw new ArgumentException("Cannot create TokenStream from Terms without offsets");
            }

            if (!tokenPositionsGuaranteedContiguous && tpv.HasPositions)
            {
                return(new TokenStreamFromTermPositionVector(tpv));
            }

            bool hasPayloads = tpv.HasPayloads;

            // code to reconstruct the original sequence of Tokens
            TermsEnum termsEnum   = tpv.GetIterator(null);
            int       totalTokens = 0;

            while (termsEnum.Next() != null)
            {
                totalTokens += (int)termsEnum.TotalTermFreq;
            }
            Token[]      tokensInOriginalOrder = new Token[totalTokens];
            List <Token> unsortedTokens        = null;

            termsEnum = tpv.GetIterator(null);
            BytesRef             text;
            DocsAndPositionsEnum dpEnum = null;

            while ((text = termsEnum.Next()) != null)
            {
                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                if (dpEnum == null)
                {
                    throw new ArgumentException("Required TermVector Offset information was not found");
                }
                string term = text.Utf8ToString();

                dpEnum.NextDoc();
                int freq = dpEnum.Freq;
                for (int posUpto = 0; posUpto < freq; posUpto++)
                {
                    int pos = dpEnum.NextPosition();
                    if (dpEnum.StartOffset < 0)
                    {
                        throw new ArgumentException("Required TermVector Offset information was not found");
                    }
                    Token token = new Token(term, dpEnum.StartOffset, dpEnum.EndOffset);
                    if (hasPayloads)
                    {
                        // Must make a deep copy of the returned payload,
                        // since D&PEnum API is allowed to re-use on every
                        // call:
                        token.Payload = BytesRef.DeepCopyOf(dpEnum.GetPayload());
                    }

                    if (tokenPositionsGuaranteedContiguous && pos != -1)
                    {
                        // We have positions stored and a guarantee that the token position
                        // information is contiguous

                        // This may be fast BUT wont work if Tokenizers used which create >1
                        // token in same position or
                        // creates jumps in position numbers - this code would fail under those
                        // circumstances

                        // tokens stored with positions - can use this to index straight into
                        // sorted array
                        tokensInOriginalOrder[pos] = token;
                    }
                    else
                    {
                        // tokens NOT stored with positions or not guaranteed contiguous - must
                        // add to list and sort later
                        if (unsortedTokens == null)
                        {
                            unsortedTokens = new List <Token>();
                        }
                        unsortedTokens.Add(token);
                    }
                }
            }

            // If the field has been stored without position data we must perform a sort
            if (unsortedTokens != null)
            {
                tokensInOriginalOrder = unsortedTokens.ToArray();
                ArrayUtil.TimSort(tokensInOriginalOrder, new TokenComparer());
                //tokensInOriginalOrder = tokensInOriginalOrder
                //    .OrderBy(t => t, new TokenComparer() )
                //    .ToArray();
            }
            return(new StoredTokenStream(tokensInOriginalOrder));
        }
Exemple #14
0
        private void Trigger_Imp(object sender, object incomingArg, object outgoingArg, BlockingTriggerYieldInstruction instruction)
        {
            switch (this._activationType)
            {
            case TriggerActivationType.TriggerAllOnTarget:
            {
                if (_triggerAllCache == null)
                {
                    //_triggerAllCache = _triggerable.GetComponentsAlt<ITriggerableMechanism>();
                    var go = GameObjectUtil.GetGameObjectFromSource(_triggerable);
                    if (go != null)
                    {
                        _triggerAllCache = go.GetComponents <ITriggerableMechanism>();
                    }
                    else if (_triggerable is ITriggerableMechanism)
                    {
                        _triggerAllCache = new ITriggerableMechanism[] { _triggerable as ITriggerableMechanism }
                    }
                    ;
                    else
                    {
                        _triggerAllCache = ArrayUtil.Empty <ITriggerableMechanism>();
                    }

                    if (_triggerableArgs.Length > 1)
                    {
                        System.Array.Sort(_triggerableArgs, TriggerableMechanismOrderComparer.Default);
                    }
                }
                if (instruction != null)
                {
                    foreach (var t in _triggerAllCache)
                    {
                        if (t.CanTrigger)
                        {
                            if (t is IBlockingTriggerableMechanism)
                            {
                                (t as IBlockingTriggerableMechanism).Trigger(sender, outgoingArg, instruction);
                            }
                            else
                            {
                                t.Trigger(sender, outgoingArg);
                            }
                        }
                    }
                }
                else
                {
                    foreach (var t in _triggerAllCache)
                    {
                        if (t.CanTrigger)
                        {
                            t.Trigger(sender, outgoingArg);
                        }
                    }
                }
            }
            break;

            case TriggerActivationType.TriggerSelectedTarget:
            {
                //UnityEngine.Object targ = _triggerable;
                //if (targ is IProxy) targ = (targ as IProxy).GetTarget(incomingArg);
                //TriggerSelectedTarget(targ, sender, outgoingArg, instruction);
                TriggerSelectedTarget(_triggerable, sender, outgoingArg, instruction);
            }
            break;

            case TriggerActivationType.SendMessage:
            {
                object targ = _triggerable;
                if (targ is IProxy)
                {
                    targ = (targ as IProxy).GetTarget(incomingArg);
                }
                SendMessageToTarget(targ, _methodName, outgoingArg);
            }
            break;

            case TriggerActivationType.CallMethodOnSelectedTarget:
            {
                CallMethodOnSelectedTarget(_triggerable, _methodName, _triggerableArgs);
            }
            break;

            case TriggerActivationType.EnableTarget:
            {
                object targ = _triggerable;
                if (targ is IProxy)
                {
                    targ = (targ as IProxy).GetTarget(incomingArg);
                }
                EnableTarget(_triggerable, ConvertUtil.ToEnum <EnableMode>(_methodName));
            }
            break;

            case TriggerActivationType.DestroyTarget:
            {
                object targ = _triggerable;
                if (targ is IProxy)
                {
                    targ = (targ as IProxy).GetTarget(incomingArg);
                }
                DestroyTarget(_triggerable);
            }
            break;
            }
        }
Exemple #15
0
        public void LibraryTransitionTest()
        {
            LibraryManager        libraryManager;
            TestDocumentContainer docContainer;
            int         startRev;
            SrmDocument docLoaded = CreateNISTLibraryDocument(out libraryManager, out docContainer, out startRev);

            // Test tolerance range
            SrmSettings settings = docLoaded.Settings.ChangeTransitionLibraries(l =>
                                                                                l.ChangeIonMatchTolerance(TransitionLibraries.MIN_MATCH_TOLERANCE));
            SrmDocument docLowTol = docLoaded.ChangeSettings(settings);

            // Use the original low tolerance for transition testing, since
            // the new low tolerance is for high accuracy data.
            docLowTol = docLowTol.ChangeSettings(settings.ChangeTransitionLibraries(l =>
                                                                                    l.ChangeIonMatchTolerance(0.1)));
            settings = docLowTol.Settings.ChangeTransitionLibraries(l =>
                                                                    l.ChangeIonMatchTolerance(TransitionLibraries.MAX_MATCH_TOLERANCE));
            SrmDocument docHighTol = docLoaded.ChangeSettings(settings);

            Assert.AreEqual(docLowTol.PeptideTransitionCount, docHighTol.PeptideTransitionCount);

            var transLow  = docLowTol.PeptideTransitions.ToArray();
            var transHigh = docHighTol.PeptideTransitions.ToArray();
            int diffCount = 0;

            for (int i = 0; i < transLow.Length; i++)
            {
                if (!Equals(transLow[i], transHigh[i]))
                {
                    diffCount++;
                }
            }
            Assert.AreEqual(2, diffCount);

            Assert.IsTrue(ArrayUtil.ReferencesEqual(docLoaded.PeptideTransitionGroups.ToArray(), docHighTol.PeptideTransitionGroups.ToArray()));
            Assert.IsTrue(HasMaxTransitionRank(docHighTol, 3));

            SrmSettings setThrow = settings;

            AssertEx.ThrowsException <InvalidDataException>(() =>
                                                            setThrow.ChangeTransitionLibraries(l => l.ChangeIonMatchTolerance(TransitionLibraries.MAX_MATCH_TOLERANCE * 2)));
            AssertEx.ThrowsException <InvalidDataException>(() =>
                                                            setThrow.ChangeTransitionLibraries(l => l.ChangeIonMatchTolerance(TransitionLibraries.MIN_MATCH_TOLERANCE / 2)));

            // Picked transition count
            settings = docLoaded.Settings.ChangeTransitionLibraries(l => l.ChangeIonCount(5));
            SrmDocument docHighIons = docLoaded.ChangeSettings(settings);

            AssertEx.IsDocumentState(docHighIons, ++startRev, 2, 4, 20);
            Assert.IsTrue(HasMaxTransitionRank(docHighIons, 5));
            Assert.IsFalse(HasMinTransitionOrdinal(docHighIons, 4));

            settings = settings.ChangeTransitionLibraries(l => l.ChangePick(TransitionLibraryPick.none));
            SrmDocument docFilteredIons = docHighIons.ChangeSettings(settings);

            AssertEx.IsDocumentState(docFilteredIons, ++startRev, 2, 4, 15); // Proline ions
            Assert.IsFalse(HasMaxTransitionRank(docFilteredIons, 5));

            settings = settings.ChangeTransitionFilter(f => f.ChangeFragmentRangeFirstName("ion 4")
                                                       .ChangeFragmentRangeLastName("last ion").ChangeMeasuredIons(new MeasuredIon[0]));
            settings = settings.ChangeTransitionLibraries(l => l.ChangePick(TransitionLibraryPick.filter));
            SrmDocument docRankedFiltered = docFilteredIons.ChangeSettings(settings);

            AssertEx.IsDocumentState(docRankedFiltered, ++startRev, 2, 4, 20);
            Assert.IsTrue(HasMaxTransitionRank(docRankedFiltered, 5));
            Assert.IsTrue(HasMinTransitionOrdinal(docRankedFiltered, 4));
            AssertEx.Serializable(docRankedFiltered, (doc1, doc2) => ValidateLibraryDocs(doc1, doc2, libraryManager));
        }
        public int[] GetRandomAcceptedString(Random r)
        {
            IList <int?> soFar = new List <int?>();

            if (a.IsSingleton)
            {
                // accepts only one
                var s = a.Singleton;

                int charUpto = 0;
                while (charUpto < s.Length)
                {
                    int cp = s.CodePointAt(charUpto);
                    charUpto += Character.CharCount(cp);
                    soFar.Add(cp);
                }
            }
            else
            {
                var s = a.initial;

                while (true)
                {
                    if (s.accept)
                    {
                        if (s.numTransitions == 0)
                        {
                            // stop now
                            break;
                        }
                        else
                        {
                            if (r.NextBoolean())
                            {
                                break;
                            }
                        }
                    }

                    if (s.numTransitions == 0)
                    {
                        throw new Exception("this automaton has dead states");
                    }

                    bool cheat = r.NextBoolean();

                    Transition t;
                    if (cheat)
                    {
                        // pick a transition that we know is the fastest
                        // path to an accept state
                        IList <Transition> toAccept = new List <Transition>();
                        for (int i = 0; i < s.numTransitions; i++)
                        {
                            Transition t0 = s.TransitionsArray[i];
                            if (leadsToAccept.ContainsKey(t0))
                            {
                                toAccept.Add(t0);
                            }
                        }
                        if (toAccept.Count == 0)
                        {
                            // this is OK -- it means we jumped into a cycle
                            t = s.TransitionsArray[r.Next(s.numTransitions)];
                        }
                        else
                        {
                            t = toAccept[r.Next(toAccept.Count)];
                        }
                    }
                    else
                    {
                        t = s.TransitionsArray[r.Next(s.numTransitions)];
                    }
                    soFar.Add(AutomatonTestUtil.GetRandomCodePoint(r, t));
                    s = t.to;
                }
            }

            return(ArrayUtil.ToInt32Array(soFar));
        }
        /// <summary>
        /// Suggest similar words.
        ///
        /// <para>Unlike <seealso cref="SpellChecker"/>, the similarity used to fetch the most
        /// relevant terms is an edit distance, therefore typically a low value
        /// for numSug will work very well.
        ///
        /// </para>
        /// </summary>
        /// <param name="term"> Term you want to spell check on </param>
        /// <param name="numSug"> the maximum number of suggested words </param>
        /// <param name="ir"> IndexReader to find terms from </param>
        /// <param name="suggestMode"> specifies when to return suggested words </param>
        /// <param name="accuracy"> return only suggested words that match with this similarity </param>
        /// <returns> sorted list of the suggested words according to the comparator </returns>
        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
        public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir, SuggestMode suggestMode, float accuracy)
        {
            CharsRef spare = new CharsRef();
            string   text  = term.Text();

            if (minQueryLength > 0 && text.CodePointCount(0, text.Length) < minQueryLength)
            {
                return(new SuggestWord[0]);
            }

            if (lowerCaseTerms)
            {
                term = new Term(term.Field(), text.ToLower(Locale.ROOT));
            }

            int docfreq = ir.DocFreq(term);

            if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0)
            {
                return(new SuggestWord[0]);
            }

            int maxDoc = ir.MaxDoc();

            if (maxQueryFrequency >= 1f && docfreq > maxQueryFrequency)
            {
                return(new SuggestWord[0]);
            }
            else if (docfreq > (int)Math.Ceiling(maxQueryFrequency * (float)maxDoc))
            {
                return(new SuggestWord[0]);
            }

            if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR)
            {
                docfreq = 0;
            }

            if (thresholdFrequency >= 1f)
            {
                docfreq = Math.Max(docfreq, (int)thresholdFrequency);
            }
            else if (thresholdFrequency > 0f)
            {
                docfreq = Math.Max(docfreq, (int)(thresholdFrequency * (float)maxDoc) - 1);
            }

            ICollection <ScoreTerm> terms = null;
            int inspections = numSug * maxInspections;

            // try ed=1 first, in case we get lucky
            terms = suggestSimilar(term, inspections, ir, docfreq, 1, accuracy, spare);
            if (maxEdits > 1 && terms.Count < inspections)
            {
                var moreTerms = new HashSet <ScoreTerm>();
                moreTerms.AddAll(terms);
                moreTerms.AddAll(suggestSimilar(term, inspections, ir, docfreq, maxEdits, accuracy, spare));
                terms = moreTerms;
            }

            // create the suggestword response, sort it, and trim it to size.

            var suggestions = new SuggestWord[terms.Count];
            int index       = suggestions.Length - 1;

            foreach (ScoreTerm s in terms)
            {
                SuggestWord suggestion = new SuggestWord();
                if (s.termAsString == null)
                {
                    UnicodeUtil.UTF8toUTF16(s.term, spare);
                    s.termAsString = spare.ToString();
                }
                suggestion.@string   = s.termAsString;
                suggestion.score     = s.score;
                suggestion.freq      = s.docfreq;
                suggestions[index--] = suggestion;
            }

            ArrayUtil.TimSort(suggestions, Collections.ReverseOrder(comparator));
            if (numSug < suggestions.Length)
            {
                SuggestWord[] trimmed = new SuggestWord[numSug];
                Array.Copy(suggestions, 0, trimmed, 0, numSug);
                suggestions = trimmed;
            }
            return(suggestions);
        }
        internal readonly bool isSegmentPrivate; // set to true iff this frozen packet represents
        // a segment private deletes. in that case is should
        // only have Queries

        public FrozenBufferedUpdates(BufferedUpdates deletes, bool isSegmentPrivate)
        {
            this.isSegmentPrivate = isSegmentPrivate;
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(!isSegmentPrivate || deletes.terms.Count == 0, "segment private package should only have del queries");
            }
            Term[] termsArray = deletes.terms.Keys.ToArray(/*new Term[deletes.terms.Count]*/);

            termCount = termsArray.Length;
            ArrayUtil.TimSort(termsArray);
            PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
            foreach (Term term in termsArray)
            {
                builder.Add(term);
            }
            terms = builder.Finish();

            queries     = new Query[deletes.queries.Count];
            queryLimits = new int[deletes.queries.Count];
            int upto = 0;

            foreach (KeyValuePair <Query, int> ent in deletes.queries)
            {
                queries[upto]     = ent.Key;
                queryLimits[upto] = ent.Value;
                upto++;
            }

            // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
            // so that it maps to all fields it affects, sorted by their docUpto, and traverse
            // that Term only once, applying the update to all fields that still need to be
            // updated.
            IList <NumericDocValuesUpdate> allNumericUpdates = new JCG.List <NumericDocValuesUpdate>();
            int numericUpdatesSize = 0;

            foreach (var numericUpdates in deletes.numericUpdates.Values)
            {
                foreach (NumericDocValuesUpdate update in numericUpdates.Values)
                {
                    allNumericUpdates.Add(update);
                    numericUpdatesSize += update.GetSizeInBytes();
                }
            }
            numericDVUpdates = allNumericUpdates.ToArray();

            // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
            // so that it maps to all fields it affects, sorted by their docUpto, and traverse
            // that Term only once, applying the update to all fields that still need to be
            // updated.
            IList <BinaryDocValuesUpdate> allBinaryUpdates = new JCG.List <BinaryDocValuesUpdate>();
            int binaryUpdatesSize = 0;

            foreach (var binaryUpdates in deletes.binaryUpdates.Values)
            {
                foreach (BinaryDocValuesUpdate update in binaryUpdates.Values)
                {
                    allBinaryUpdates.Add(update);
                    binaryUpdatesSize += update.GetSizeInBytes();
                }
            }
            binaryDVUpdates = allBinaryUpdates.ToArray();

            bytesUsed = (int)terms.GetSizeInBytes() + queries.Length * BYTES_PER_DEL_QUERY + numericUpdatesSize + numericDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF + binaryUpdatesSize + binaryDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF;

            numTermDeletes = deletes.numTermDeletes;
        }
Exemple #19
0
 public object[] GetSingleRowData()
 {
     return(ArrayUtil.ResizeArrayIfDifferent <object>(this.InitialiseNavigator().GetNext(), this.MetaData.GetColumnCount()));
 }
        /// <summary>
        /// Expert: highlights the top-N passages from multiple fields,
        /// for the provided int[] docids, to custom object as
        /// returned by the <see cref="PassageFormatter"/>.  Use
        /// this API to render to something other than <see cref="string"/>.
        /// </summary>
        /// <param name="fieldsIn">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
        /// <param name="query">query to highlight.</param>
        /// <param name="searcher">searcher that was previously used to execute the query.</param>
        /// <param name="docidsIn">containing the document IDs to highlight.</param>
        /// <param name="maxPassagesIn">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param>
        /// <returns>
        /// <see cref="T:IDictionary{string, object[]}"/> keyed on field name, containing the array of formatted snippets
        /// corresponding to the documents in <paramref name="docidsIn"/>.
        /// If no highlights were found for a document, the
        /// first <paramref name="maxPassagesIn"/> from the field will
        /// be returned.
        /// </returns>
        /// <exception cref="IOException">if an I/O error occurred during processing</exception>
        /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
        protected internal virtual IDictionary <string, object[]> HighlightFieldsAsObjects(string[] fieldsIn, Query query, IndexSearcher searcher, int[] docidsIn, int[] maxPassagesIn)
        {
            if (fieldsIn.Length < 1)
            {
                throw new ArgumentException("fieldsIn must not be empty");
            }
            if (fieldsIn.Length != maxPassagesIn.Length)
            {
                throw new ArgumentException("invalid number of maxPassagesIn");
            }
            IndexReader reader    = searcher.IndexReader;
            Query       rewritten = Rewrite(query);

            JCG.SortedSet <Term> queryTerms = new JCG.SortedSet <Term>();
            rewritten.ExtractTerms(queryTerms);

            IndexReaderContext          readerContext = reader.Context;
            IList <AtomicReaderContext> leaves        = readerContext.Leaves;

            // Make our own copies because we sort in-place:
            int[] docids = new int[docidsIn.Length];
            System.Array.Copy(docidsIn, 0, docids, 0, docidsIn.Length);
            string[] fields = new string[fieldsIn.Length];
            System.Array.Copy(fieldsIn, 0, fields, 0, fieldsIn.Length);
            int[] maxPassages = new int[maxPassagesIn.Length];
            System.Array.Copy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.Length);

            // sort for sequential io
            ArrayUtil.TimSort(docids);
            new InPlaceMergeSorterAnonymousHelper(fields, maxPassages).Sort(0, fields.Length);

            // pull stored data:
            IList <string[]> contents = LoadFieldValues(searcher, fields, docids, maxLength);

            IDictionary <string, object[]> highlights = new Dictionary <string, object[]>();

            for (int i = 0; i < fields.Length; i++)
            {
                string field       = fields[i];
                int    numPassages = maxPassages[i];
                Term   floor       = new Term(field, "");
                Term   ceiling     = new Term(field, UnicodeUtil.BIG_TERM);

                // LUCENENET: Call custom GetViewBetween overload to mimic Java's exclusive upper bound behavior.
                var fieldTerms = queryTerms.GetViewBetween(floor, lowerValueInclusive: true, ceiling, upperValueInclusive: false);

                // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)

                // Strip off the redundant field:
                BytesRef[] terms    = new BytesRef[fieldTerms.Count];
                int        termUpto = 0;
                foreach (Term term in fieldTerms)
                {
                    terms[termUpto++] = term.Bytes;
                }
                IDictionary <int, object> fieldHighlights = HighlightField(field, contents[i], GetBreakIterator(field), terms, docids, leaves, numPassages, query);

                object[] result = new object[docids.Length];
                for (int j = 0; j < docidsIn.Length; j++)
                {
                    fieldHighlights.TryGetValue(docidsIn[j], out result[j]);
                }
                highlights[field] = result;
            }
            return(highlights);
        }
Exemple #21
0
 public virtual void ResolveReferences(Session session, RangeVariable[] outerRanges)
 {
     this._leftQueryExpression.ResolveReferences(session, outerRanges);
     this._rightQueryExpression.ResolveReferences(session, outerRanges);
     this.AddUnresolvedExpressions(this._leftQueryExpression.UnresolvedExpressions);
     this.AddUnresolvedExpressions(this._rightQueryExpression.UnresolvedExpressions);
     if (this._unionCorresponding)
     {
         string[] columnNames = this._leftQueryExpression.GetColumnNames();
         string[] array       = this._rightQueryExpression.GetColumnNames();
         if (this._unionCorrespondingColumns == null)
         {
             this._unionCorrespondingColumns = new OrderedHashSet <string>();
             OrderedIntHashSet set  = new OrderedIntHashSet();
             OrderedIntHashSet set2 = new OrderedIntHashSet();
             for (int i = 0; i < columnNames.Length; i++)
             {
                 string str   = columnNames[i];
                 int    index = ArrayUtil.Find(array, str);
                 if ((str.Length > 0) && (index != -1))
                 {
                     if (!this._leftQueryExpression.AccessibleColumns[i])
                     {
                         throw Error.GetError(0x15ca);
                     }
                     if (!this._rightQueryExpression.AccessibleColumns[index])
                     {
                         throw Error.GetError(0x15ca);
                     }
                     set.Add(i);
                     set2.Add(index);
                     this._unionCorrespondingColumns.Add(str);
                 }
             }
             if (this._unionCorrespondingColumns.IsEmpty())
             {
                 throw Error.GetError(0x15ca);
             }
             this._leftQueryExpression.UnionColumnMap  = set.ToArray();
             this._rightQueryExpression.UnionColumnMap = set2.ToArray();
         }
         else
         {
             this._leftQueryExpression.UnionColumnMap  = new int[this._unionCorrespondingColumns.Size()];
             this._rightQueryExpression.UnionColumnMap = new int[this._unionCorrespondingColumns.Size()];
             for (int i = 0; i < this._unionCorrespondingColumns.Size(); i++)
             {
                 string str2  = this._unionCorrespondingColumns.Get(i);
                 int    index = ArrayUtil.Find(columnNames, str2);
                 if (index == -1)
                 {
                     throw Error.GetError(0x157d);
                 }
                 if (!this._leftQueryExpression.AccessibleColumns[index])
                 {
                     throw Error.GetError(0x15ca);
                 }
                 this._leftQueryExpression.UnionColumnMap[i] = index;
                 index = ArrayUtil.Find(array, str2);
                 if (index == -1)
                 {
                     throw Error.GetError(0x157d);
                 }
                 if (!this._rightQueryExpression.AccessibleColumns[index])
                 {
                     throw Error.GetError(0x15ca);
                 }
                 this._rightQueryExpression.UnionColumnMap[i] = index;
             }
         }
         this._columnCount           = this._unionCorrespondingColumns.Size();
         this.UnionColumnTypes       = new SqlType[this._columnCount];
         this.UnionColumnNullability = new byte[this._columnCount];
         this.ResolveColumnRefernecesInUnionOrderBy();
     }
     else
     {
         this._columnCount = this._leftQueryExpression.GetColumnCount();
         int columnCount = this._rightQueryExpression.GetColumnCount();
         if (this._columnCount != columnCount)
         {
             throw Error.GetError(0x15da);
         }
         this.UnionColumnTypes       = new SqlType[this._columnCount];
         this.UnionColumnNullability = new byte[this._columnCount];
         this._leftQueryExpression.UnionColumnMap = this._rightQueryExpression.UnionColumnMap = new int[this._columnCount];
         ArrayUtil.FillSequence(this._leftQueryExpression.UnionColumnMap);
         this.ResolveColumnRefernecesInUnionOrderBy();
     }
 }
        // algorithm: treat sentence snippets as miniature documents
        // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
        // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
        private Passage[] HighlightDoc(string field, BytesRef[] terms, int contentLength, BreakIterator bi, int doc,
                                       TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n)
        {
            PassageScorer scorer = GetScorer(field);

            if (scorer == null)
            {
                throw new NullReferenceException("PassageScorer cannot be null");
            }
            JCG.PriorityQueue <OffsetsEnum> pq = new JCG.PriorityQueue <OffsetsEnum>();
            float[] weights = new float[terms.Length];
            // initialize postings
            for (int i = 0; i < terms.Length; i++)
            {
                DocsAndPositionsEnum de = postings[i];
                int pDoc;
                if (de == EMPTY)
                {
                    continue;
                }
                else if (de == null)
                {
                    postings[i] = EMPTY; // initially
                    if (!termsEnum.SeekExact(terms[i]))
                    {
                        continue; // term not found
                    }
                    de = postings[i] = termsEnum.DocsAndPositions(null, null, DocsAndPositionsFlags.OFFSETS);
                    if (de == null)
                    {
                        // no positions available
                        throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                    }
                    pDoc = de.Advance(doc);
                }
                else
                {
                    pDoc = de.DocID;
                    if (pDoc < doc)
                    {
                        pDoc = de.Advance(doc);
                    }
                }

                if (doc == pDoc)
                {
                    weights[i] = scorer.Weight(contentLength, de.Freq);
                    de.NextPosition();
                    pq.Add(new OffsetsEnum(de, i));
                }
            }

            pq.Add(new OffsetsEnum(EMPTY, int.MaxValue)); // a sentinel for termination

            JCG.PriorityQueue <Passage> passageQueue = new JCG.PriorityQueue <Passage>(n, Comparer <Passage> .Create((left, right) =>
            {
                if (left.score < right.score)
                {
                    return(-1);
                }
                else if (left.score > right.score)
                {
                    return(1);
                }
                else
                {
                    return(left.startOffset - right.startOffset);
                }
            }));
            Passage current = new Passage();

            while (pq.TryDequeue(out OffsetsEnum off))
            {
                DocsAndPositionsEnum dp = off.dp;
                int start = dp.StartOffset;
                if (start == -1)
                {
                    throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                }
                int end = dp.EndOffset;
                // LUCENE-5166: this hit would span the content limit... however more valid
                // hits may exist (they are sorted by start). so we pretend like we never
                // saw this term, it won't cause a passage to be added to passageQueue or anything.
                Debug.Assert(EMPTY.StartOffset == int.MaxValue);
                if (start < contentLength && end > contentLength)
                {
                    continue;
                }
                if (start >= current.endOffset)
                {
                    if (current.startOffset >= 0)
                    {
                        // finalize current
                        current.score *= scorer.Norm(current.startOffset);
                        // new sentence: first add 'current' to queue
                        if (passageQueue.Count == n && current.score < passageQueue.Peek().score)
                        {
                            current.Reset(); // can't compete, just reset it
                        }
                        else
                        {
                            passageQueue.Enqueue(current);
                            if (passageQueue.Count > n)
                            {
                                current = passageQueue.Dequeue();
                                current.Reset();
                            }
                            else
                            {
                                current = new Passage();
                            }
                        }
                    }
                    // if we exceed limit, we are done
                    if (start >= contentLength)
                    {
                        Passage[] passages = passageQueue.ToArray();
                        foreach (Passage p in passages)
                        {
                            p.Sort();
                        }
                        // sort in ascending order
                        ArrayUtil.TimSort(passages, Comparer <Passage> .Create((left, right) => left.startOffset - right.startOffset));
                        return(passages);
                    }
                    // advance breakiterator
                    Debug.Assert(BreakIterator.Done < 0);
                    current.startOffset = Math.Max(bi.Preceding(start + 1), 0);
                    current.endOffset   = Math.Min(bi.Next(), contentLength);
                }
                int tf = 0;
                while (true)
                {
                    tf++;
                    BytesRef term = terms[off.id];
                    if (term == null)
                    {
                        // multitermquery match, pull from payload
                        term = off.dp.GetPayload();
                        Debug.Assert(term != null);
                    }
                    current.AddMatch(start, end, term);
                    if (off.pos == dp.Freq)
                    {
                        break; // removed from pq
                    }
                    else
                    {
                        off.pos++;
                        dp.NextPosition();
                        start = dp.StartOffset;
                        end   = dp.EndOffset;
                    }
                    if (start >= current.endOffset || end > contentLength)
                    {
                        pq.Enqueue(off);
                        break;
                    }
                }
                current.score += weights[off.id] * scorer.Tf(tf, current.endOffset - current.startOffset);
            }

            // Dead code but compiler disagrees:
            Debug.Assert(false);
            return(null);
        }
Exemple #23
0
    private void GenerateAtlasFromBinary()
    {
        UInt32 num = this.ATLAS_W * this.ATLAS_H;

        Color32[] array = new Color32[num];
        UInt32    num2  = 0u;
        UInt32    num3  = 1u;

        for (Int32 i = 0; i < (Int32)this.overlayCount; i++)
        {
            BGOVERLAY_DEF bGOVERLAY_DEF = this.overlayList[i];
            for (Int32 j = 0; j < (Int32)bGOVERLAY_DEF.spriteCount; j++)
            {
                BGSPRITE_LOC_DEF bGSPRITE_LOC_DEF = bGOVERLAY_DEF.spriteList[j];
                bGSPRITE_LOC_DEF.atlasX = (UInt16)num2;
                bGSPRITE_LOC_DEF.atlasY = (UInt16)num3;
                if (bGSPRITE_LOC_DEF.res == 0)
                {
                    Int32 index = ArrayUtil.GetIndex(bGSPRITE_LOC_DEF.clutX * 16, bGSPRITE_LOC_DEF.clutY, (Int32)this.vram.width, (Int32)this.vram.height);
                    for (UInt32 num4 = 0u; num4 < (UInt32)bGSPRITE_LOC_DEF.h; num4 += 1u)
                    {
                        Int32  index2 = ArrayUtil.GetIndex(bGSPRITE_LOC_DEF.texX * 64 + bGSPRITE_LOC_DEF.u / 4, (Int32)(bGSPRITE_LOC_DEF.texY * 256u + bGSPRITE_LOC_DEF.v + num4), (Int32)this.vram.width, (Int32)this.vram.height);
                        Int32  index3 = ArrayUtil.GetIndex((Int32)num2, (Int32)(num3 + num4), (Int32)this.ATLAS_W, (Int32)this.ATLAS_H);
                        UInt32 num5   = 0u;
                        while (num5 < (UInt64)(bGSPRITE_LOC_DEF.w / 2))
                        {
                            Byte   b    = this.vram.rawData[index2 * 2 + (Int32)num5];
                            Byte   b2   = (Byte)(b & 15);
                            Byte   b3   = (Byte)(b >> 4 & 15);
                            Int32  num6 = (index + b2) * 2;
                            UInt16 num7 = (UInt16)(this.vram.rawData[num6] | this.vram.rawData[num6 + 1] << 8);
                            Int32  num8 = index3 + (Int32)(num5 * 2u);
                            ConvertColor16toColor32(num7, out array[num8]);
                            if (bGSPRITE_LOC_DEF.trans != 0 && num7 != 0)
                            {
                                if (bGSPRITE_LOC_DEF.alpha == 0)
                                {
                                    array[num8].a = 127;
                                }
                                else if (bGSPRITE_LOC_DEF.alpha == 3)
                                {
                                    array[num8].a = 63;
                                }
                            }
                            num6 = (index + b3) * 2;
                            num7 = (UInt16)(this.vram.rawData[num6] | this.vram.rawData[num6 + 1] << 8);
                            num8 = index3 + (Int32)(num5 * 2u) + 1;
                            ConvertColor16toColor32(num7, out array[num8]);
                            if (bGSPRITE_LOC_DEF.trans != 0 && num7 != 0)
                            {
                                if (bGSPRITE_LOC_DEF.alpha == 0)
                                {
                                    array[num8].a = 127;
                                }
                                else if (bGSPRITE_LOC_DEF.alpha == 3)
                                {
                                    array[num8].a = 63;
                                }
                            }
                            num5 += 1u;
                        }
                    }
                }
                else if (bGSPRITE_LOC_DEF.res == 1)
                {
                    Int32 index4 = ArrayUtil.GetIndex(bGSPRITE_LOC_DEF.clutX * 16, bGSPRITE_LOC_DEF.clutY, (Int32)this.vram.width, (Int32)this.vram.height);
                    for (UInt32 num9 = 0u; num9 < (UInt32)bGSPRITE_LOC_DEF.h; num9 += 1u)
                    {
                        Int32 index5 = ArrayUtil.GetIndex(bGSPRITE_LOC_DEF.texX * 64 + bGSPRITE_LOC_DEF.u / 2, (Int32)(bGSPRITE_LOC_DEF.texY * 256u + bGSPRITE_LOC_DEF.v + num9), (Int32)this.vram.width, (Int32)this.vram.height);
                        Int32 index6 = ArrayUtil.GetIndex((Int32)num2, (Int32)(num3 + num9), (Int32)this.ATLAS_W, (Int32)this.ATLAS_H);
                        for (UInt32 num10 = 0u; num10 < (UInt32)bGSPRITE_LOC_DEF.w; num10 += 1u)
                        {
                            Byte   b4    = this.vram.rawData[index5 * 2 + (Int32)num10];
                            Int32  num11 = (index4 + b4) * 2;
                            UInt16 num12 = (UInt16)(this.vram.rawData[num11] | this.vram.rawData[num11 + 1] << 8);
                            Int32  num13 = index6 + (Int32)num10;
                            ConvertColor16toColor32(num12, out array[num13]);
                            if (bGSPRITE_LOC_DEF.trans != 0 && num12 != 0)
                            {
                                if (bGSPRITE_LOC_DEF.alpha == 0)
                                {
                                    array[num13].a = 127;
                                }
                                else if (bGSPRITE_LOC_DEF.alpha == 3)
                                {
                                    array[num13].a = 63;
                                }
                            }
                        }
                    }
                }
                for (UInt32 num14 = 0u; num14 < (UInt32)bGSPRITE_LOC_DEF.h; num14 += 1u)
                {
                    Int32 index7 = ArrayUtil.GetIndex((Int32)(num2 + this.SPRITE_W), (Int32)(num3 + num14), (Int32)this.ATLAS_W, (Int32)this.ATLAS_H);
                    array[index7] = array[index7 - 1];
                }
                for (UInt32 num15 = 0u; num15 < (UInt32)bGSPRITE_LOC_DEF.w; num15 += 1u)
                {
                    Int32 index8 = ArrayUtil.GetIndex((Int32)(num2 + num15), (Int32)num3, (Int32)this.ATLAS_W, (Int32)this.ATLAS_H);
                    Int32 index9 = ArrayUtil.GetIndex((Int32)(num2 + num15), (Int32)(num3 - 1u), (Int32)this.ATLAS_W, (Int32)this.ATLAS_H);
                    array[index9] = array[index8];
                }
                Int32 index10 = ArrayUtil.GetIndex((Int32)(num2 + this.SPRITE_W - 1u), (Int32)num3, (Int32)this.ATLAS_W, (Int32)this.ATLAS_H);
                Int32 index11 = ArrayUtil.GetIndex((Int32)(num2 + this.SPRITE_W), (Int32)(num3 - 1u), (Int32)this.ATLAS_W, (Int32)this.ATLAS_H);
                array[index11] = array[index10];
                num2          += this.SPRITE_W + 1u;
                if (num2 >= this.ATLAS_W || this.ATLAS_W - num2 < this.SPRITE_W + 1u)
                {
                    num2  = 0u;
                    num3 += this.SPRITE_H + 1u;
                }
            }
        }
        this.atlas.SetPixels32(array);
        this.atlas.Apply();
    }
Exemple #24
0
 public static string[] RootedPath(this AwsS3ClientConfig config, params string[] path)
 {
     return(ArrayUtil.Concat(new[] { config.Bucket }, config.RootPath.Split('/'), path));
 }
 protected override void Swap(int i, int j)
 {
     ArrayUtil.Swap(outerInstance.subSpansByDoc, i, j);
 }
        /// <summary>
        /// Prints the filename and size of each file within a given compound file.
        /// Add the -extract flag to extract files to the current working directory.
        /// In order to make the extracted version of the index work, you have to copy
        /// the segments file from the compound index into the directory where the extracted files are stored. </summary>
        ///// <param name="args"> Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt; </param>
        public static void Main(string[] args)
        {
            string filename = null;
            bool   extract  = false;
            string dirImpl  = null;

            int j = 0;

            while (j < args.Length)
            {
                string arg = args[j];
                if ("-extract".Equals(arg, StringComparison.Ordinal))
                {
                    extract = true;
                }
                else if ("-dir-impl".Equals(arg, StringComparison.Ordinal))
                {
                    if (j == args.Length - 1)
                    {
                        // LUCENENET specific - our wrapper console shows the correct usage
                        throw new ArgumentException("ERROR: missing value for --directory-type option");
                        //Console.WriteLine("ERROR: missing value for -dir-impl option");
                        //Environment.Exit(1);
                    }
                    j++;
                    dirImpl = args[j];
                }
                else if (filename == null)
                {
                    filename = arg;
                }
                j++;
            }

            if (filename == null)
            {
                // LUCENENET specific - our wrapper console shows the correct usage
                throw new ArgumentException("ERROR: CFS-FILE is required");
                //Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
                //return;
            }

            Store.Directory       dir     = null;
            CompoundFileDirectory cfr     = null;
            IOContext             context = IOContext.READ;

            try
            {
                FileInfo file    = new FileInfo(filename);
                string   dirname = file.DirectoryName;
                filename = file.Name;
                if (dirImpl == null)
                {
                    dir = FSDirectory.Open(new DirectoryInfo(dirname));
                }
                else
                {
                    dir = CommandLineUtil.NewFSDirectory(dirImpl, new DirectoryInfo(dirname));
                }

                cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false);

                string[] files = cfr.ListAll();
                ArrayUtil.TimSort(files); // sort the array of filename so that the output is more readable

                for (int i = 0; i < files.Length; ++i)
                {
                    long len = cfr.FileLength(files[i]);

                    if (extract)
                    {
                        Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
                        using (IndexInput ii = cfr.OpenInput(files[i], context))
                        {
                            using (FileStream f = new FileStream(files[i], FileMode.Open, FileAccess.ReadWrite))
                            {
                                // read and write with a small buffer, which is more effective than reading byte by byte
                                byte[] buffer = new byte[1024];
                                int    chunk  = buffer.Length;
                                while (len > 0)
                                {
                                    int bufLen = (int)Math.Min(chunk, len);
                                    ii.ReadBytes(buffer, 0, bufLen);
                                    f.Write(buffer, 0, bufLen);
                                    len -= bufLen;
                                }
                            }
                        }
                    }
                    else
                    {
                        Console.WriteLine(files[i] + ": " + len + " bytes");
                    }
                }
            }
            catch (IOException ioe)
            {
                Console.WriteLine(ioe.ToString());
                //Console.Write(ioe.StackTrace);
            }
            finally
            {
                try
                {
                    if (dir != null)
                    {
                        dir.Dispose();
                    }
                    if (cfr != null)
                    {
                        cfr.Dispose();
                    }
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.ToString());
                    //Console.Write(ioe.StackTrace);
                }
            }
        }
Exemple #27
0
        //--- Class Methods ---
        public static ServiceBE StartService(ServiceBE service, bool forceRefresh, bool disableOnFailure)
        {
            // create subordinate request id for service start
            var dreamContext = DreamContext.Current;
            var requestId    = dreamContext.GetState <string>(DreamHeaders.DREAM_REQUEST_ID);

            dreamContext.SetState(DreamHeaders.DREAM_REQUEST_ID, requestId + "-service_" + service.Id);

            try {
                var stopwatch = Stopwatch.StartNew();
                service.ServiceLastStatus = string.Empty;
                StopService(service.Id, service, ServiceStopType.Restart);
                DekiContext context            = DekiContext.Current;
                bool        dirtyServiceEntity = false;
                XUri        location;
                ServiceRepository.IServiceInfo serviceInfo = null;
                try {
                    // check if service is local
                    if (service.ServiceLocal)
                    {
                        if (string.IsNullOrEmpty(service.SID))
                        {
                            throw new Exception("missing SID");
                        }

                        // start service
                        if (IsLocalAuthService(service))
                        {
                            // this service is the built-in authentication provider; no need to start it
                            location = context.Deki.Self;
                        }
                        else
                        {
                            // convert local service configuration into an xdoc
                            XDoc config = new XDoc("config");
                            foreach (KeyValuePair <string, string> configEntry in ArrayUtil.AllKeyValues(service.Config))
                            {
                                config.InsertValueAt(configEntry.Key, configEntry.Value);
                            }

                            // if no apikey was provided, create a random one so that CreateService doesn't inject the parent one
                            if (config["apikey"].IsEmpty)
                            {
                                config.Elem("apikey", StringUtil.CreateAlphaNumericKey(16));
                            }

                            // add information for service to callback into deki
                            if (config["uri.deki"].IsEmpty)
                            {
                                config.Elem("uri.deki", context.Deki.Self);
                                config.Elem("wikiid.deki", context.Instance.Id);

                                // Providing master apikey to service for setups that don't use per instance keys
                                config.Elem("apikey.deki", context.Instance.ApiKey.IfNullOrEmpty(context.Deki.MasterApiKey));
                            }

                            // the service location must use the service ID and the instance ID
                            string servicePath = string.Format("services/{0}/{1}", context.Instance.Id, service.Id);
                            _log.DebugFormat("starting service '{0}' at path {1} w/ namespace {2}", service.SID, servicePath, service.Preferences["namespace"]);
                            serviceInfo = context.Instance.CreateLocalService(service, servicePath, config);
                            location    = serviceInfo.ServiceUri;
                        }

                        // check if the service uri has changed since last invocation (happens when service is started for the first time or server GUID has changed)
                        if (!service.Uri.EqualsInvariantIgnoreCase(location.ToString()))
                        {
                            dirtyServiceEntity = true;
                            service.Uri        = location.ToString();
                        }
                    }
                    else
                    {
                        _log.DebugFormat("registering remote service '{0}'", service.SID);
                        if (string.IsNullOrEmpty(service.Uri))
                        {
                            throw new Exception("missing URI");
                        }
                        location    = new XUri(service.Uri);
                        serviceInfo = context.Instance.RegisterRemoteService(service, location);
                    }

                    // check if service is an Extension service
                    if (service.Type == ServiceType.EXT)
                    {
                        if (service.ServiceLocal)
                        {
                            _log.DebugFormat("registering service '{0}' as extension", service.SID);
                        }
                        ExtensionBL.StartExtensionService(context, service, serviceInfo, forceRefresh);
                    }

                    //Successfully starting a service enables it.
                    if (!service.ServiceEnabled)
                    {
                        dirtyServiceEntity     = true;
                        service.ServiceEnabled = true;
                    }
                } catch (Exception e) {
                    dirtyServiceEntity = true;
                    DreamMessage dm = null;
                    if (e is DreamResponseException)
                    {
                        dm = ((DreamResponseException)e).Response;
                        string message = dm.HasDocument ? dm.ToDocument()[".//message"].AsText.IfNullOrEmpty(e.Message) : dm.ToText();
                        service.ServiceLastStatus = string.Format("unable to initialize service ({0})", message);
                    }
                    else
                    {
                        service.ServiceLastStatus = e.GetCoroutineStackTrace();
                    }
                    if (serviceInfo != null)
                    {
                        try {
                            context.Instance.DeregisterService(service.Id);
                        } catch { }
                    }

                    // A service that fails to start becomes disabled if it's started explicitly (not during deki startup)
                    if (disableOnFailure)
                    {
                        service.ServiceEnabled = false;
                    }
                    _log.ErrorExceptionMethodCall(e, "StartService", string.Format("Unable to start local service id '{0}' with SID '{1}' Error: '{2}'", service.Id, service.SID, service.ServiceLastStatus));
                    if (dm != null)
                    {
                        throw new ExternalServiceResponseException(dm);
                    }
                    else
                    {
                        throw;
                    }
                } finally {
                    // don't update remote services that haven't changed
                    if (dirtyServiceEntity)
                    {
                        service = UpdateService(service);
                    }
                }
                stopwatch.Stop();
                _log.InfoFormat("Service '{0}' ({1}) started in {2}ms", service.Description, service.SID, stopwatch.ElapsedMilliseconds);
                return(service);
            } finally {
                // restore the request id
                dreamContext.SetState(DreamHeaders.DREAM_REQUEST_ID, requestId);
            }
        }
Exemple #28
0
        public void LibraryOnlyPeptidesTest()
        {
            LibraryManager        libraryManager;
            TestDocumentContainer docContainer;
            int         startRev;
            SrmDocument docLoaded = CreateNISTLibraryDocument(out libraryManager, out docContainer, out startRev);

            Assert.IsTrue(HasAllLibraryInfo(docLoaded));
            AssertEx.Serializable(docLoaded, (doc1, doc2) => ValidateLibraryDocs(doc1, doc2, libraryManager));

            SrmSettings settings = docLoaded.Settings.ChangePeptideFilter(f => f.ChangeMaxPeptideLength(14));

            settings = settings.ChangePeptideLibraries(l => l.ChangePick(PeptidePick.filter));
            SrmDocument docFilter = docLoaded.ChangeSettings(settings);

            Assert.IsFalse(HasAllLibraryInfo(docFilter));
            AssertEx.IsDocumentState(docFilter, ++startRev, 2, 14, 45);

            settings  = settings.ChangePeptideLibraries(l => l.ChangePick(PeptidePick.either));
            docFilter = docFilter.ChangeSettings(settings);
            Assert.IsFalse(HasAllLibraryInfo(docFilter));
            AssertEx.IsDocumentState(docFilter, ++startRev, 2, 16, 51);
            AssertEx.Serializable(docFilter, (doc1, doc2) => ValidateLibraryDocs(doc1, doc2, libraryManager));

            settings  = settings.ChangePeptideLibraries(l => l.ChangePick(PeptidePick.both));
            docFilter = docFilter.ChangeSettings(settings);
            Assert.IsTrue(HasAllLibraryInfo(docFilter));
            AssertEx.IsDocumentState(docFilter, ++startRev, 2, 2, 6);

            // Check all possible rankings
            settings = docLoaded.Settings.ChangePeptideLibraries(l => l.ChangeRankId(LibrarySpec.PEP_RANK_COPIES));
            SrmDocument docRank = docLoaded.ChangeSettings(settings);

            CheckRanks(docRank, null);
            settings = settings.ChangePeptideLibraries(l => l.ChangeRankId(LibrarySpec.PEP_RANK_TOTAL_INTENSITY));
            SrmDocument docRank2 = docRank.ChangeSettings(settings);

            CheckRanks(docRank2, docRank);
            settings = settings.ChangePeptideLibraries(l => l.ChangeRankId(LibrarySpec.PEP_RANK_PICKED_INTENSITY));
// ReSharper disable InconsistentNaming
            SrmDocument docRank3_1 = docRank2.ChangeSettings(settings);

// ReSharper restore InconsistentNaming
            CheckRanks(docRank3_1, docRank2);
            // Turns out TFRatio and picked intensity rank the same with these peptides
            settings = settings.ChangePeptideLibraries(l => l.ChangeRankId(NistLibSpecBase.PEP_RANK_TFRATIO));
// ReSharper disable InconsistentNaming
            SrmDocument docRank3_2 = docRank2.ChangeSettings(settings);

// ReSharper restore InconsistentNaming
            CheckRanks(docRank3_2, docRank2);
            AssertEx.Serializable(docRank3_2, (doc1, doc2) => ValidateLibraryDocs(doc1, doc2, libraryManager));

            SrmSettings setThrow1 = settings;

            AssertEx.ThrowsException <InvalidDataException>(() => setThrow1.ChangePeptideLibraries(l => l.ChangeRankId(XHunterLibSpec.PEP_RANK_EXPECT)));
            AssertEx.ThrowsException <InvalidDataException>(() => setThrow1.ChangePeptideLibraries(l => l.ChangePick(PeptidePick.filter)));
            AssertEx.ThrowsException <InvalidDataException>(() => setThrow1.ChangePeptideLibraries(l => l.ChangePick(PeptidePick.either)));

            // Check peptide limits based on rank
            settings = settings.ChangePeptideLibraries(l => l.ChangePeptideCount(1));
            SrmDocument docLimited = docRank3_1.ChangeSettings(settings);

            // Should now be 2 peptides with rank 1
            AssertEx.IsDocumentState(docLimited, ++startRev, 2, 2, 6);
            foreach (var nodePep in docLimited.Peptides)
            {
                Assert.AreEqual(1, nodePep.Rank ?? 0);
            }
            settings   = settings.ChangePeptideLibraries(l => l.ChangePeptideCount(3));
            docLimited = docRank3_1.ChangeSettings(settings);
            Assert.IsTrue(ArrayUtil.ReferencesEqual(docRank3_1.Peptides.ToArray(), docLimited.Peptides.ToArray()));
            AssertEx.Serializable(docLimited, (doc1, doc2) => ValidateLibraryDocs(doc1, doc2, libraryManager));

            SrmSettings setThrow2 = settings;

            AssertEx.ThrowsException <InvalidDataException>(() => setThrow2.ChangePeptideLibraries(l => l.ChangePick(PeptidePick.filter)));
            AssertEx.ThrowsException <InvalidDataException>(() => setThrow2.ChangePeptideLibraries(l => l.ChangePick(PeptidePick.either)));
            AssertEx.NoExceptionThrown <InvalidDataException>(() => setThrow2.ChangePeptideLibraries(l => l.ChangeRankId(null)));

            settings = settings.ChangePeptideLibraries(l => l.ChangePeptideCount(null));

            SrmDocument docUnlimited = docLimited.ChangeSettings(settings);

            Assert.IsTrue(ArrayUtil.ReferencesEqual(docLimited.Peptides.ToArray(), docUnlimited.Peptides.ToArray()));

            SrmSettings setThrow3 = settings;

            AssertEx.ThrowsException <InvalidDataException>(() => setThrow3.ChangePeptideLibraries(l => l.ChangePeptideCount(PeptideLibraries.MIN_PEPTIDE_COUNT - 1)));
            AssertEx.ThrowsException <InvalidDataException>(() => setThrow3.ChangePeptideLibraries(l => l.ChangePeptideCount(PeptideLibraries.MAX_PEPTIDE_COUNT + 1)));
        }
        public PeptideGroupDocNode ChangeSettings(SrmSettings settingsNew, SrmSettingsDiff diff)
        {
            if (diff.Monitor != null)
            {
                diff.Monitor.ProcessGroup(this);
            }

            if (diff.DiffPeptides && settingsNew.PeptideSettings.Filter.AutoSelect && AutoManageChildren)
            {
                IList <DocNode> childrenNew = new List <DocNode>();

                int countPeptides = 0;
                int countIons     = 0;

                Dictionary <int, DocNode>           mapIndexToChild = CreateGlobalIndexToChildMap();
                Dictionary <PeptideModKey, DocNode> mapIdToChild    = CreatePeptideModToChildMap();

                foreach (PeptideDocNode nodePep in GetPeptideNodes(settingsNew, true))
                {
                    PeptideDocNode  nodePepResult = nodePep;
                    SrmSettingsDiff diffNode      = SrmSettingsDiff.ALL;

                    DocNode existing;
                    // Add values that existed before the change. First check for exact match by
                    // global index, which will happen when explicit modifications are added,
                    // and then by content identity.
                    if (mapIndexToChild.TryGetValue(nodePep.Id.GlobalIndex, out existing) ||
                        mapIdToChild.TryGetValue(nodePep.Key, out existing))
                    {
                        nodePepResult = (PeptideDocNode)existing;
                        diffNode      = diff;
                    }

                    if (nodePepResult != null)
                    {
                        // Materialize children of the peptide.
                        nodePepResult = nodePepResult.ChangeSettings(settingsNew, diffNode);

                        childrenNew.Add(nodePepResult);

                        // Make sure a single peptide group does not exceed document limits.
                        countPeptides++;
                        countIons += nodePepResult.TransitionCount;
                        if (countIons > SrmDocument.MAX_TRANSITION_COUNT)
                        {
                            throw new InvalidDataException(String.Format(
                                                               Resources.PeptideGroupDocNode_ChangeSettings_The_current_document_settings_would_cause_the_number_of_targeted_transitions_to_exceed__0_n0___The_document_settings_must_be_more_restrictive_or_add_fewer_proteins_,
                                                               SrmDocument.MAX_TRANSITION_COUNT));
                        }
                        if (countPeptides > SrmDocument.MAX_PEPTIDE_COUNT)
                        {
                            throw new InvalidDataException(String.Format(
                                                               Resources.PeptideGroupDocNode_ChangeSettings_The_current_document_settings_would_cause_the_number_of_peptides_to_exceed__0_n0___The_document_settings_must_be_more_restrictive_or_add_fewer_proteins_,
                                                               SrmDocument.MAX_PEPTIDE_COUNT));
                        }
                    }
                }

                if (PeptideGroup.Sequence != null)
                {
                    childrenNew = PeptideGroup.RankPeptides(childrenNew, settingsNew, true);
                }

                return((PeptideGroupDocNode)ChangeChildrenChecked(childrenNew));
            }
            else
            {
                var nodeResult = this;

                if (diff.DiffPeptides && diff.SettingsOld != null)
                {
                    // If variable modifications changed, remove all peptides with variable
                    // modifications which are no longer possible.
                    var modsNew    = settingsNew.PeptideSettings.Modifications;
                    var modsVarNew = modsNew.VariableModifications.ToArray();
                    var modsOld    = diff.SettingsOld.PeptideSettings.Modifications;
                    var modsVarOld = modsOld.VariableModifications.ToArray();
                    if (modsNew.MaxVariableMods < modsOld.MaxVariableMods ||
                        !ArrayUtil.EqualsDeep(modsVarNew, modsVarOld))
                    {
                        IList <DocNode> childrenNew = new List <DocNode>();
                        foreach (PeptideDocNode nodePeptide in nodeResult.Children)
                        {
                            if (nodePeptide.AreVariableModsPossible(modsNew.MaxVariableMods, modsVarNew))
                            {
                                childrenNew.Add(nodePeptide);
                            }
                        }

                        nodeResult = (PeptideGroupDocNode)nodeResult.ChangeChildrenChecked(childrenNew);
                    }
                }

                // Check for changes affecting children
                if (diff.DiffPeptideProps || diff.DiffExplicit ||
                    diff.DiffTransitionGroups || diff.DiffTransitionGroupProps ||
                    diff.DiffTransitions || diff.DiffTransitionProps ||
                    diff.DiffResults)
                {
                    IList <DocNode> childrenNew = new List <DocNode>();

                    // Enumerate the nodes making necessary changes.
                    foreach (PeptideDocNode nodePeptide in nodeResult.Children)
                    {
                        childrenNew.Add(nodePeptide.ChangeSettings(settingsNew, diff));
                    }

                    childrenNew = RankChildren(settingsNew, childrenNew);

                    nodeResult = (PeptideGroupDocNode)nodeResult.ChangeChildrenChecked(childrenNew);
                }
                return(nodeResult);
            }
        }
 /// <summary>
 /// NOTE: This was parseIntAt() in Lucene.
 /// </summary>
 private int ParseInt32At(int offset)
 {
     UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + offset, _scratch.Length - offset, _scratchUtf16);
     return(ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length));
 }