public IEnumerable <EntryResult> Search(string query, int maxScore = int.MaxValue, int patternMatchLimit = 2999) { //using (new DebugTimer("File Index Search")) { if (!m_IndexReady) { return(Enumerable.Empty <EntryResult>()); } var tokens = getQueryTokensHandler(query); Array.Sort(tokens, SortTokensByPatternMatches); var lengths = tokens.Select(p => p.Length).ToArray(); var patterns = tokens.Select(p => p.GetHashCode()).ToArray(); if (patterns.Length == 0) { return(Enumerable.Empty <EntryResult>()); } var wiec = new WordIndexEntryComparer(); var entryIndexes = new HashSet <int>(); lock (this) { var remains = GetPatternFileIndexes(patterns[0], lengths[0], maxScore, wiec, entryIndexes, patternMatchLimit).ToList(); patternMatchCount[patterns[0]] = remains.Count; if (remains.Count == 0) { return(Enumerable.Empty <EntryResult>()); } //Debug.Log($"R({remains.Count>entryIndexes.Count}):" + GetDebugPatternMatchDebugString(tokens)); for (int i = 1; i < patterns.Length; ++i) { var newMatches = GetPatternFileIndexes(patterns[i], lengths[i], maxScore, wiec, entryIndexes).ToArray(); IntersectPatternMatches(remains, newMatches); //Debug.Log($"I({entryIndexes.Count}>{newMatches.Length}>{remains.Count}):" + GetDebugPatternMatchDebugString(tokens)); } return(remains.Select(fi => new EntryResult { path = m_Entries[fi.index], index = fi.index, score = fi.score })); } } }
public IEnumerable <EntryResult> Search(string query, int maxScore = int.MaxValue) { //using (new DebugTimer("File Index Search")) { if (!m_IndexReady) { return(Enumerable.Empty <EntryResult>()); } var tokens = getQueryTokensHandler(query); var lengths = tokens.Select(p => p.Length).ToArray(); var patterns = tokens.Select(p => p.GetHashCode()).ToArray(); if (patterns.Length == 0) { return(Enumerable.Empty <EntryResult>()); } var wiec = new WordIndexEntryComparer(); lock (this) { var remains = GetPatternFileIndexes(patterns[0], lengths[0], maxScore, wiec).ToList(); if (remains.Count == 0) { return(Enumerable.Empty <EntryResult>()); } for (int i = 1; i < patterns.Length; ++i) { var newMatches = GetPatternFileIndexes(patterns[i], lengths[i], maxScore, wiec).ToArray(); IntersectPatternMatches(remains, newMatches); } return(remains.OrderBy(r => r.score).Select(fi => new EntryResult { path = m_Entries[fi.index], index = fi.index, score = fi.score })); } } }
protected void UpdateIndexWithNewContent(string[] updated, string[] removed, string[] moved) { if (!m_IndexReady) { return; } //using( new DebugTimer("Refreshing index with " + String.Join("\r\n\t", updated) + $"\r\nRemoved: {String.Join("\r\n\t", removed)}" + $"\r\nMoved: {String.Join("\r\n\t", moved)}\r\n")) { lock (this) { List <string> entries = null; List <WordIndexEntry> words = null; // Filter already known entries. updated = updated.Where(u => Array.FindIndex(m_Entries, e => e == u) == -1).ToArray(); bool updateIndex = false; if (updated.Length > 0) { entries = new List <string>(m_Entries); words = new List <WordIndexEntry>(m_WordIndexEntries); var wiec = new WordIndexEntryComparer(); var partialIndex = BuildPartialIndex(String.Empty, 0, updated, 0); // Update entry file indexes for (int i = 0; i < partialIndex.Count; ++i) { var pk = partialIndex[i]; var updatedEntry = updated[pk.fileIndex]; var matchedFileIndex = entries.FindIndex(e => e == updatedEntry); if (matchedFileIndex == -1) { entries.Add(updatedEntry); matchedFileIndex = entries.Count - 1; } var newWordIndex = new WordIndexEntry(pk.key, pk.length, matchedFileIndex, pk.score); var insertIndex = words.BinarySearch(newWordIndex, wiec); if (insertIndex > -1) { words.Insert(insertIndex, newWordIndex); } else { words.Insert(~insertIndex, newWordIndex); } } updateIndex = true; } // Remove items if (removed.Length > 0) { entries = entries ?? new List <string>(m_Entries); words = words ?? new List <WordIndexEntry>(m_WordIndexEntries); for (int i = 0; i < removed.Length; ++i) { var entryToBeRemoved = removed[i]; var entryIndex = entries.FindIndex(e => e == entryToBeRemoved); if (entryIndex > -1) { updateIndex |= words.RemoveAll(w => w.fileIndex == entryIndex) > 0; } } } if (updateIndex) { UpdateIndexes(entries.ToArray(), SortIndexes(words)); } } } }
private IEnumerable <PatternMatch> GetPatternFileIndexes(int key, int length, int maxScore, WordIndexEntryComparer wiec, HashSet <int> entryIndexes, int limit = int.MaxValue) { bool foundAll = entryIndexes == null || entryIndexes.Count == 0; // Find a match in the sorted word indexes. int foundIndex = Array.BinarySearch(m_WordIndexEntries, new WordIndexEntry(key, length), wiec); // Rewind to first element while (foundIndex > 0 && m_WordIndexEntries[foundIndex - 1].key == key && m_WordIndexEntries[foundIndex - 1].length == length) { foundIndex--; } if (foundIndex < 0) { return(Enumerable.Empty <PatternMatch>()); } var matches = new List <PatternMatch>(); do { bool intersects = foundAll || entryIndexes.Contains(m_WordIndexEntries[foundIndex].fileIndex); if (intersects && m_WordIndexEntries[foundIndex].score < maxScore) { if (foundAll && entryIndexes != null) { entryIndexes.Add(m_WordIndexEntries[foundIndex].fileIndex); } matches.Add(new PatternMatch(m_WordIndexEntries[foundIndex].fileIndex, m_WordIndexEntries[foundIndex].score)); if (matches.Count >= limit) { return(matches); } } // Advance to last matching element foundIndex++; } while (foundIndex < m_WordIndexEntries.Length && m_WordIndexEntries[foundIndex].key == key && m_WordIndexEntries[foundIndex].length == length); return(matches); }
private IEnumerable <PatternMatch> GetPatternFileIndexes(int key, int length, int maxScore, WordIndexEntryComparer wiec) { // Find a match in the sorted word indexes. int foundIndex = Array.BinarySearch(m_WordIndexEntries, new WordIndexEntry(key, length), wiec); // Rewind to first element while (foundIndex > 0 && m_WordIndexEntries[foundIndex - 1].key == key && m_WordIndexEntries[foundIndex - 1].length == length) { foundIndex--; } if (foundIndex < 0) { return(Enumerable.Empty <PatternMatch>()); } var matches = new List <PatternMatch>(); do { if (m_WordIndexEntries[foundIndex].score < maxScore) { matches.Add(new PatternMatch(m_WordIndexEntries[foundIndex].fileIndex, m_WordIndexEntries[foundIndex].score)); } foundIndex++; // Advance to last matching element } while (foundIndex < m_WordIndexEntries.Length && m_WordIndexEntries[foundIndex].key == key && m_WordIndexEntries[foundIndex].length == length); return(matches); }