Example #1
0
        private void init()
        {
            SearchTools.Init();

            string[] srcRef = File.ReadAllLines(@"refs\prenom.ref");
            srcRef = SearchTools.ArrayToLower(srcRef);

            refs = SearchTools.buildArray(srcRef, 1);
        }
        public static int Search4(string txt, string[][] refs, bool allowPartofWord, int length)
        {
            if (txt == null)
            {
                return(0);
            }
            var note = 0;

            Parallel.ForEach(refs, (refgroup) =>
            {
                var pref = refgroup[0].Substring(0, length);
                int pos  = txt.IndexOf(pref, StringComparison.InvariantCultureIgnoreCase);

                while (pos > -1)
                {
                    foreach (string s in refgroup)
                    {
                        var co = length;

                        while (pos + co < txt.Length &&
                               co < s.Length && (
                                   txt[pos + co] == s[co] ||                         //same caracter
                                   txt[pos + co] + 32 == s[co] ||
                                   SearchTools.includeAccent(txt[pos + co], s[co]))) //include different casse
                        {
                            co++;
                        }

                        if (co == s.Length)
                        {
                            bool isWord =
                                (pos == 0 || SearchTools.isWordSep(txt[pos - 1]) &&
                                 (pos + s.Length >= txt.Length || SearchTools.isWordSep(txt[pos + s.Length])));
                            if (isWord || allowPartofWord)
                            {
                                note += s.Length;
                            }
                            break;
                        }
                    }

                    if (pos + 1 >= txt.Length)
                    {
                        break;
                    }
                    pos = txt.IndexOf(pref, pos + 1, StringComparison.InvariantCultureIgnoreCase);
                }
            });

            return(note);
        }
        /// <summary>
        /// Search for a list of string in a text
        /// sample test 16.6763 second
        /// </summary>
        /// <param name="txt">text</param>
        /// <param name="refs">array of string to search (grouped by first letter)</param>
        /// <param name="allowPartofWord">if set allow word or part of word match</param>
        /// <param name="length">number of character to use in group</param>
        /// <param name="result">result list</param>
        /// <returns></returns>
        public static void Search4(string txt, string[][] refs, bool allowPartofWord, int length, List <SearchResult> result)
        {
            Parallel.ForEach(refs, (refgroup) =>
            {
                var pref = refgroup[0].Substring(0, length);
                int pos  = txt.IndexOf(pref, StringComparison.InvariantCultureIgnoreCase);

                while (pos > -1)
                {
                    foreach (string s in refgroup)
                    {
                        var co = length;

                        while (pos + co < txt.Length &&
                               co < s.Length && (
                                   txt[pos + co] == s[co] ||                         //same caracter
                                   txt[pos + co] + 32 == s[co] ||
                                   SearchTools.includeAccent(txt[pos + co], s[co]))) //include different casse
                        {
                            co++;
                        }

                        if (co == s.Length)
                        {
                            bool isWord =
                                (pos == 0 || SearchTools.isWordSep(txt[pos - 1]) &&
                                 (pos + s.Length > txt.Length || SearchTools.isWordSep(txt[pos + s.Length])));
                            if (isWord || allowPartofWord)
                            {
                                result.Add(new SearchResult
                                {
                                    Word         = s,
                                    NearNumber   = pos > 2 && SearchTools.isNumber(txt[pos - 2]),
                                    isPartOfWord = !isWord,
                                    Position     = pos
                                });
                            }

                            break;
                        }
                    }

                    if (pos + 1 >= txt.Length)
                    {
                        break;
                    }
                    pos = txt.IndexOf(pref, pos + 1, StringComparison.InvariantCultureIgnoreCase);
                }
            });
        }
Example #4
0
        public static string GetContent(FileInfo fi)
        {
            if (!OfficeContentPath.ContainsKey(fi.Extension))
            {
                return(null);
            }

            if (fi.Name.StartsWith("~", StringComparison.InvariantCultureIgnoreCase) ||
                fi.Name.StartsWith("$", StringComparison.InvariantCultureIgnoreCase))
            {
                return(null);                                                                    //avoid temp file or in recycle bin
            }
            var result = new StringBuilder();

            var targets = OfficeContentPath[fi.Extension].Split(',');

            ZipFile zipFile;

            using (zipFile = ZipFile.Read(fi.FullName))
            {
                foreach (string target in targets)
                {
                    var filesList = new List <ZipEntry>();
                    if (target.EndsWith("*", StringComparison.InvariantCultureIgnoreCase))
                    {
                        filesList = zipFile.SelectEntries("*", target.Substring(0, target.Length - 1)).ToList();
                    }
                    else
                    {
                        filesList = zipFile.SelectEntries(target).ToList();
                    }

                    foreach (ZipEntry zEntry in filesList)
                    {
                        MemoryStream tempS = new MemoryStream();
                        zEntry.Extract(tempS);
                        result.Append(Encoding.UTF8.GetString(tempS.ToArray()));
                        tempS.Close();
                    }
                }

                return(SearchTools.XmlToText(result.ToString()));
            }
        }
        /// <summary>
        /// Search for a list of string in a text
        /// Faster but full word only
        /// Sample 14,496 for 20000 iteration
        /// </summary>
        /// <param name="txt">text</param>
        /// <param name="refs">array of string to search (grouped by first letter)</param>
        /// <param name="length">number of character to use in group</param>
        /// <returns></returns>
        public static void SearchWord(string txt, string[][] refs, int length, List <SearchResult> result)
        {
            //var refgroup = refs[2];
            Parallel.ForEach(refs, (refgroup) =>
            {
                var pref = refgroup[0].Substring(0, length);
                int pos  = 0;
                while (pos < txt.Length)
                {
                    if (refgroup[0][0] == txt[pos] & (pos == 0 || SearchTools.isWordSep(txt[pos - 1])))//new word start and first char equal group letter

                    {
                        var txtLength = txt.Length;
                        var co        = 1;
                        string word   = txt[pos].ToString();

                        while ((pos + co < txt.Length && !SearchTools.isWordSep(txt[pos + co])))//search end of current word extract word
                        {
                            word += txt[pos + co];
                            co++;
                        }


                        foreach (string s in refgroup)
                        {
                            if (word.Length == s.Length && string.Equals(word, s, StringComparison.CurrentCultureIgnoreCase))
                            {
                                result.Add(new SearchResult
                                {
                                    Word       = s,
                                    NearNumber = pos > 2 && SearchTools.isNumber(txt[pos - 2]),
                                    Position   = pos
                                });

                                pos += word.Length;
                                break;
                            }
                        }
                    }

                    pos++;
                }
            });
        }