internal static EntryProximity EstimateProximity(List <long> Left, List <long> Right) { int iLeft = 0, iRight = 0; EntryProximity Result = EntryProximity.Document; while ((iLeft < Left.Count) && (iRight < Right.Count)) { long leftOff = Left[iLeft]; long rightOff = Right[iRight]; if (MaskEncoder.Sentence(leftOff) == MaskEncoder.Sentence(rightOff)) { Result = EntryProximity.Sentence; if (isPhraseProximity(MaskEncoder.TokenOrder(leftOff), MaskEncoder.TokenOrder(rightOff))) { Result = EntryProximity.Phrase; break; } } if (MaskEncoder.OffsetNormal(leftOff) < MaskEncoder.OffsetNormal(rightOff)) { iLeft++; } else { iRight++; } } return(Result); }
private static List <long> SelectRestrictedMatchedEntries(IEnumerable <long> offsets, uint sectionId) { List <long> result = new List <long>(); foreach (long offset in offsets) { if (MaskEncoder.SectionId(offset) == sectionId) { result.Add(offset); } } return((result.Count == 0) ? null : result); }
private void LogTerm(Word word) { int HC = word.HC; IntHashTableOfInt.Entry e = _termCounterInDoc.GetEntry(HC); //----------------------------------------------------------------- // update term's count in this doc //----------------------------------------------------------------- int termFreq; if (e == null) { termFreq = _termCounterInDoc[HC] = 1; } else { e.Value = termFreq = e.Value + 1; } // _termMaxFrequency is declared as ushort. And we artificially limit // its upper value to some value (near to Uint16.MaxValue) to avoid // integer overflow. if (_termMaxFrequency < termFreq) { _termMaxFrequency = (ushort)Math.Min(termFreq, _ciMaxMeaningfulCount); } //----------------------------------------------------------------- long mask = MaskEncoder.Mask(word.TokenOrder, word.SentenceNumber, word.StartOffset); IntHashTable.Entry entry = _tokens.GetEntry(HC); if (entry == null) { _tokens[HC] = mask; } else { List <long> offsets = entry.Value as List <long>; if (offsets == null) { offsets = new List <long>(4); offsets.Add((long)entry.Value); entry.Value = offsets; } offsets.Add(mask); } }
private static List <long> JoinInstancesOfEntries(List <long> left, List <long> right, EntryProximity requiredProximity) { List <long> joinedList = new List <long>(); if (requiredProximity == EntryProximity.Phrase) { // Assumption is made that all offsets in the entries are // sorted in asceding order. int leftIndex = 0, rightIndex = 0; while (leftIndex < left.Count && rightIndex < right.Count) { int order1 = MaskEncoder.TokenOrder(left[leftIndex]), order2 = MaskEncoder.TokenOrder(right[rightIndex]); if (ProximityEstimator.isPhraseProximity(order1, order2)) { joinedList.Add(left[leftIndex]); joinedList.Add(right[rightIndex]); } if (order1 < order2) { leftIndex++; } else { rightIndex++; } } } else { joinedList.AddRange(left); joinedList.AddRange(right); } return(joinedList); }