private void init() { SearchTools.Init(); string[] srcRef = File.ReadAllLines(@"refs\prenom.ref"); srcRef = SearchTools.ArrayToLower(srcRef); refs = SearchTools.buildArray(srcRef, 1); }
public static int Search4(string txt, string[][] refs, bool allowPartofWord, int length) { if (txt == null) { return(0); } var note = 0; Parallel.ForEach(refs, (refgroup) => { var pref = refgroup[0].Substring(0, length); int pos = txt.IndexOf(pref, StringComparison.InvariantCultureIgnoreCase); while (pos > -1) { foreach (string s in refgroup) { var co = length; while (pos + co < txt.Length && co < s.Length && ( txt[pos + co] == s[co] || //same caracter txt[pos + co] + 32 == s[co] || SearchTools.includeAccent(txt[pos + co], s[co]))) //include different casse { co++; } if (co == s.Length) { bool isWord = (pos == 0 || SearchTools.isWordSep(txt[pos - 1]) && (pos + s.Length >= txt.Length || SearchTools.isWordSep(txt[pos + s.Length]))); if (isWord || allowPartofWord) { note += s.Length; } break; } } if (pos + 1 >= txt.Length) { break; } pos = txt.IndexOf(pref, pos + 1, StringComparison.InvariantCultureIgnoreCase); } }); return(note); }
/// <summary> /// Search for a list of string in a text /// sample test 16.6763 second /// </summary> /// <param name="txt">text</param> /// <param name="refs">array of string to search (grouped by first letter)</param> /// <param name="allowPartofWord">if set allow word or part of word match</param> /// <param name="length">number of character to use in group</param> /// <param name="result">result list</param> /// <returns></returns> public static void Search4(string txt, string[][] refs, bool allowPartofWord, int length, List <SearchResult> result) { Parallel.ForEach(refs, (refgroup) => { var pref = refgroup[0].Substring(0, length); int pos = txt.IndexOf(pref, StringComparison.InvariantCultureIgnoreCase); while (pos > -1) { foreach (string s in refgroup) { var co = length; while (pos + co < txt.Length && co < s.Length && ( txt[pos + co] == s[co] || //same caracter txt[pos + co] + 32 == s[co] || SearchTools.includeAccent(txt[pos + co], s[co]))) //include different casse { co++; } if (co == s.Length) { bool isWord = (pos == 0 || SearchTools.isWordSep(txt[pos - 1]) && (pos + s.Length > txt.Length || SearchTools.isWordSep(txt[pos + s.Length]))); if (isWord || allowPartofWord) { result.Add(new SearchResult { Word = s, NearNumber = pos > 2 && SearchTools.isNumber(txt[pos - 2]), isPartOfWord = !isWord, Position = pos }); } break; } } if (pos + 1 >= txt.Length) { break; } pos = txt.IndexOf(pref, pos + 1, StringComparison.InvariantCultureIgnoreCase); } }); }
public static string GetContent(FileInfo fi) { if (!OfficeContentPath.ContainsKey(fi.Extension)) { return(null); } if (fi.Name.StartsWith("~", StringComparison.InvariantCultureIgnoreCase) || fi.Name.StartsWith("$", StringComparison.InvariantCultureIgnoreCase)) { return(null); //avoid temp file or in recycle bin } var result = new StringBuilder(); var targets = OfficeContentPath[fi.Extension].Split(','); ZipFile zipFile; using (zipFile = ZipFile.Read(fi.FullName)) { foreach (string target in targets) { var filesList = new List <ZipEntry>(); if (target.EndsWith("*", StringComparison.InvariantCultureIgnoreCase)) { filesList = zipFile.SelectEntries("*", target.Substring(0, target.Length - 1)).ToList(); } else { filesList = zipFile.SelectEntries(target).ToList(); } foreach (ZipEntry zEntry in filesList) { MemoryStream tempS = new MemoryStream(); zEntry.Extract(tempS); result.Append(Encoding.UTF8.GetString(tempS.ToArray())); tempS.Close(); } } return(SearchTools.XmlToText(result.ToString())); } }
/// <summary> /// Search for a list of string in a text /// Faster but full word only /// Sample 14,496 for 20000 iteration /// </summary> /// <param name="txt">text</param> /// <param name="refs">array of string to search (grouped by first letter)</param> /// <param name="length">number of character to use in group</param> /// <returns></returns> public static void SearchWord(string txt, string[][] refs, int length, List <SearchResult> result) { //var refgroup = refs[2]; Parallel.ForEach(refs, (refgroup) => { var pref = refgroup[0].Substring(0, length); int pos = 0; while (pos < txt.Length) { if (refgroup[0][0] == txt[pos] & (pos == 0 || SearchTools.isWordSep(txt[pos - 1])))//new word start and first char equal group letter { var txtLength = txt.Length; var co = 1; string word = txt[pos].ToString(); while ((pos + co < txt.Length && !SearchTools.isWordSep(txt[pos + co])))//search end of current word extract word { word += txt[pos + co]; co++; } foreach (string s in refgroup) { if (word.Length == s.Length && string.Equals(word, s, StringComparison.CurrentCultureIgnoreCase)) { result.Add(new SearchResult { Word = s, NearNumber = pos > 2 && SearchTools.isNumber(txt[pos - 2]), Position = pos }); pos += word.Length; break; } } } pos++; } }); }