private static int CompareByIndex(KeywordMatch x, KeywordMatch y) { if (x == null) { if (y == null) { return(0); // equal. } else { return(-1); // y is greater. } } else { if (y == null) { return(1); // x is greater. } else { int retval = x.MatchResult.Index.CompareTo(y.MatchResult.Index); return(retval); } } }
/// <summary> /// return all possible keyword matches in range dictionary /// step 1 - store all matching keywords in dictionary /// step 2 - add all combinations of 1st and 2nd keywords in matchlist /// Possibly able to improve performance using Linq instead of loops /// </summary> /// <param name="rangeDic"></param> /// <returns></returns> private List <KeywordMatch> GetRangeMatches(string firstKeyword, string secondKeyword, Dictionary <int, string> rangeDic) { List <KeywordMatch> matchList = new List <KeywordMatch>(); Dictionary <int, int> keywordDic = new Dictionary <int, int>(); //key = index, value = 1 or 2 designating first or second keyword //step 1 - store all matching keywords in dictionary for (int idx = rangeDic.Keys.Min(); idx <= rangeDic.Keys.Max(); idx++) { if (rangeDic[idx] == firstKeyword) { keywordDic.Add(idx, 1); } else if (rangeDic[idx] == secondKeyword) { keywordDic.Add(idx, 2); } } //step 2 - add all combinations of 1st and 2nd keywords in matchList if (keywordDic.ContainsValue(1) && keywordDic.ContainsValue(2)) { IEnumerable <KeyValuePair <int, int> > firstKeywordList = keywordDic.Where(k => k.Value == 1); IEnumerable <KeyValuePair <int, int> > secondKeywordList = keywordDic.Where(k => k.Value == 2); foreach (KeyValuePair <int, int> firstKvp in firstKeywordList) { foreach (KeyValuePair <int, int> secondKvp in secondKeywordList) { KeywordMatch keywordMatch = new KeywordMatch() { FirstPosition = firstKvp.Key, SecondPosition = secondKvp.Key }; matchList.Add(keywordMatch); } } } return(matchList); }
public List <KeywordMatch> BinarySearch(SearchParams searchParams, string file) { m_terminate.Reset(); List <KeywordMatch> matches = new List <KeywordMatch>(); KeywordMatch match = new KeywordMatch(); System.Text.ASCIIEncoding ascii = new System.Text.ASCIIEncoding(); byte[] keywords = ascii.GetBytes(searchParams.Keywords); byte[] keywordsLowerCase = ascii.GetBytes(searchParams.Keywords.ToLower()); byte[] keywordsUpperCase = ascii.GetBytes(searchParams.Keywords.ToUpper()); int overlap = keywords.Count() - 1; const int bufflen = 64 * 1024; byte[] buff = new byte[bufflen + overlap]; using (BinaryReader br = new BinaryReader(File.Open(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { long maxlen = br.BaseStream.Length; long idx = 0; long bytesRead = 0; while (idx < maxlen || m_terminate.WaitOne(0)) { // search data must overlap bytesRead = br.Read(buff, 0, bufflen + overlap); if (0 == bytesRead || bytesRead <= overlap) { break; } // search byte[] matchkeywords = keywords; long bytesMax = bytesRead - (searchParams.Keywords.Length - 1); bool found = false; for (int bidx = 0; bidx < bytesMax; bidx++) { found = false; matchkeywords = keywords; if (searchParams.UseCaseSensitiveMatch) { if (buff[bidx] == keywords[0]) { found = true; for (int kidx = 1, sidx = bidx; kidx < keywords.Length && sidx < bytesMax; kidx++, sidx++) { if (buff[sidx] != keywords[kidx]) { break; } } } } else { if (buff[bidx] == keywordsLowerCase[0]) { matchkeywords[0] = keywordsLowerCase[0]; found = true; } else if (buff[bidx] == keywordsUpperCase[0]) { matchkeywords[0] = keywordsUpperCase[0]; found = true; } if (found) { for (int kidx = 1, sidx = bidx + 1; kidx < keywords.Length && sidx < bytesMax; kidx++, sidx++) { if (buff[sidx] == keywordsLowerCase[kidx]) { matchkeywords[kidx] = keywordsLowerCase[kidx]; } else if (buff[sidx] == keywordsUpperCase[kidx]) { matchkeywords[kidx] = keywordsUpperCase[kidx]; } else { found = false; break; } } } } // check for match if (found) { match.SetMatch(idx + bidx, matchkeywords, matchkeywords, FileContentType.Binary); matches.Add(match); match = new KeywordMatch(); } else if (searchParams.ContextLinesCount > 0) { if (matches.Count > 0 && matches.Last().ContextPost.Count < searchParams.ContextLinesCount) { matches.Last().ContextPost.Enqueue(String.Format("{0:X2}", buff[bidx])); } else { if (match.ContextPre.Count != 0 && match.ContextPre.Count >= searchParams.ContextLinesCount) { match.ContextPre.Dequeue(); } match.ContextPre.Enqueue(String.Format("{0:X2}", buff[bidx])); } } } idx += (bytesRead - overlap); if (m_terminate.WaitOne(0)) { break; } } } return(matches.Count() == 0 ? null : matches); }
public List <KeywordMatch> TextSearch(SearchParams searchParams, string file) { m_terminate.Reset(); // test regex now Regex regex = null; if (searchParams.UseRegexMatch) { try { regex = new Regex(searchParams.Keywords, searchParams.UseCaseSensitiveMatch ? RegexOptions.None : RegexOptions.IgnoreCase); } catch (ArgumentException) { return(null); } } List <KeywordMatch> matches = new List <KeywordMatch>(); KeywordMatch match = new KeywordMatch(); using (StreamReader sr = new StreamReader(File.Open(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { string line = string.Empty; Int64 lineCount = 1; while (!sr.EndOfStream || m_terminate.WaitOne(0)) { string keywordsMatched = string.Empty; line = sr.ReadLine(); if (null == line) { break; } bool found = false; if (searchParams.UseRegexMatch) { Match regexmatch = regex.Match(line); if (regexmatch != null) { keywordsMatched = regexmatch.Value; } } else { int matchIndex = line.IndexOf(searchParams.Keywords, searchParams.UseCaseSensitiveMatch ? StringComparison.CurrentCulture : StringComparison.CurrentCultureIgnoreCase); if (-1 != matchIndex) { found = true; keywordsMatched = line.Substring(matchIndex, searchParams.Keywords.Length); if (searchParams.UseWholeWordMatch) { // test start if (matchIndex != 0 && !char.IsWhiteSpace(line[matchIndex - 1])) { found = false; } // test end if (matchIndex != line.Length - searchParams.Keywords.Length && !char.IsWhiteSpace(line[matchIndex + searchParams.Keywords.Length])) { found = false; } } } } // store match if (!searchParams.UseNegateSearch == found) { match.SetMatch(lineCount, line, keywordsMatched, FileContentType.Text); matches.Add(match); match = new KeywordMatch(); } else if (searchParams.ContextLinesCount > 0) { if (matches.Count > 0 && matches.Last().ContextPost.Count < searchParams.ContextLinesCount) { matches.Last().ContextPost.Enqueue(line); } else { if (match.ContextPre.Count != 0 && match.ContextPre.Count >= searchParams.ContextLinesCount) { match.ContextPre.Dequeue(); } match.ContextPre.Enqueue(line); } } lineCount++; if (m_terminate.WaitOne(0)) { break; } } } return(matches.Count() == 0 ? null : matches); }