Example #1
0
        internal static EntryProximity  EstimateProximity(List <long> Left, List <long> Right)
        {
            int            iLeft = 0, iRight = 0;
            EntryProximity Result = EntryProximity.Document;

            while ((iLeft < Left.Count) && (iRight < Right.Count))
            {
                long leftOff  = Left[iLeft];
                long rightOff = Right[iRight];

                if (MaskEncoder.Sentence(leftOff) == MaskEncoder.Sentence(rightOff))
                {
                    Result = EntryProximity.Sentence;
                    if (isPhraseProximity(MaskEncoder.TokenOrder(leftOff), MaskEncoder.TokenOrder(rightOff)))
                    {
                        Result = EntryProximity.Phrase;
                        break;
                    }
                }
                if (MaskEncoder.OffsetNormal(leftOff) < MaskEncoder.OffsetNormal(rightOff))
                {
                    iLeft++;
                }
                else
                {
                    iRight++;
                }
            }

            return(Result);
        }
Example #2
0
        private static List <long> SelectRestrictedMatchedEntries(IEnumerable <long> offsets, uint sectionId)
        {
            List <long> result = new List <long>();

            foreach (long offset in offsets)
            {
                if (MaskEncoder.SectionId(offset) == sectionId)
                {
                    result.Add(offset);
                }
            }

            return((result.Count == 0) ? null : result);
        }
Example #3
0
        private void LogTerm(Word word)
        {
            int HC = word.HC;

            IntHashTableOfInt.Entry e = _termCounterInDoc.GetEntry(HC);

            //-----------------------------------------------------------------
            //  update term's count in this doc
            //-----------------------------------------------------------------
            int termFreq;

            if (e == null)
            {
                termFreq = _termCounterInDoc[HC] = 1;
            }
            else
            {
                e.Value = termFreq = e.Value + 1;
            }

            //  _termMaxFrequency is declared as ushort. And we artificially limit
            //  its upper value to some value (near to Uint16.MaxValue) to avoid
            //  integer overflow.
            if (_termMaxFrequency < termFreq)
            {
                _termMaxFrequency = (ushort)Math.Min(termFreq, _ciMaxMeaningfulCount);
            }

            //-----------------------------------------------------------------
            long mask = MaskEncoder.Mask(word.TokenOrder, word.SentenceNumber, word.StartOffset);

            IntHashTable.Entry entry = _tokens.GetEntry(HC);
            if (entry == null)
            {
                _tokens[HC] = mask;
            }
            else
            {
                List <long> offsets = entry.Value as List <long>;
                if (offsets == null)
                {
                    offsets = new List <long>(4);
                    offsets.Add((long)entry.Value);
                    entry.Value = offsets;
                }
                offsets.Add(mask);
            }
        }
Example #4
0
        private static List <long> JoinInstancesOfEntries(List <long> left, List <long> right,
                                                          EntryProximity requiredProximity)
        {
            List <long> joinedList = new List <long>();

            if (requiredProximity == EntryProximity.Phrase)
            {
                //  Assumption is made that all offsets in the entries are
                //  sorted in asceding order.

                int leftIndex = 0, rightIndex = 0;
                while (leftIndex < left.Count && rightIndex < right.Count)
                {
                    int order1 = MaskEncoder.TokenOrder(left[leftIndex]),
                        order2 = MaskEncoder.TokenOrder(right[rightIndex]);

                    if (ProximityEstimator.isPhraseProximity(order1, order2))
                    {
                        joinedList.Add(left[leftIndex]);
                        joinedList.Add(right[rightIndex]);
                    }
                    if (order1 < order2)
                    {
                        leftIndex++;
                    }
                    else
                    {
                        rightIndex++;
                    }
                }
            }
            else
            {
                joinedList.AddRange(left);
                joinedList.AddRange(right);
            }

            return(joinedList);
        }