Example #1
0
 public FactoryFileSeparator(DirectoryInfo di)
 {
     foreach (var fi in di.GetFiles("*.txt", SearchOption.AllDirectories))
     {
         K_Google.AddFile(fi);
         FileSeprator(fi);
     }
 }
Example #2
0
        public Web(string url)
        {
            K_Google.AddWeb(url);
            List <string> lstSub = new List <string>();

            // Séparateur de mot
            Char[] delimiter = new char[] { '[', ']', '#', '^', '¦', '|', '£', '<', '>', '_', '$', '\n', '\r', '.', ' ', ',', '\'', '!', '?', '(', ')', '%', '&', '"', '=', '+', '{', '}', '*', ';', ':', '\\', '-', '/' };
            // Tableau stockant les mots
            String[] substrings;
            // Regex controlant si l'url rentré par l'utilisateur est bien une url
            Regex urlRegex = new Regex("(http|https):\\/\\/(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/|\\/([\\w#!:.?+=&%@!\\-]))?");

            using (WebClient webClient = new WebClient())
            {
                // Si l'adresse est bien un url recupere le code source et en garde que le text
                if (url != "" && urlRegex.IsMatch(url))
                {
                    Uri uri = new Uri(url);
                    SetOccurence(Separator(getTextInHTML(uri)), null, lstOccurence, url);
                }
            }
        }
Example #3
0
        public WordOnTxt(DirectoryInfo di)
        {
            string _encoding = "1252";


            foreach (var fi in di.GetFiles("*.txt", SearchOption.AllDirectories))
            {
                K_Google.AddFile(fi);



                List <string> lstWord = new List <string>();

                using (FileStream fs = File.OpenRead(fi.FullName))
                {
                    Ude.CharsetDetector cdet = new Ude.CharsetDetector();
                    cdet.Feed(fs);
                    cdet.DataEnd();
                    if (cdet.Charset != null)
                    {
                        _encoding = cdet.Charset;
                        //Console.WriteLine(_encoding);
                    }
                    else
                    {
                        Console.WriteLine("Detection failed.");
                    }
                }

                string path = fi.DirectoryName + "\\" + fi.Name;
                Console.WriteLine(path);
                StreamReader Reader = new StreamReader(path, Encoding.GetEncoding(_encoding));
                string       polpi  = Reader.ReadToEnd();
                Reader.Close();
                polpi = polpi.ToLower();



                Char[] delimiter = new char[] { '[', ']', '#', '^', '¦', '|', '£', '<', '>', '_', '$', '\n', '\r', '.', ' ', ',', '\'', '!', '?', '(', ')', '%', '&', '"', '=', '+', '{', '}', '*', ';', ':', '\\', '-', '/' };

                String[] substrings = polpi.Split(delimiter);



                //List<string> lstSub = new List<string>(substrings);

                List <string> lstSub = new List <string>();

                foreach (string word in substrings)
                {
                    if (word.Count() > 0)
                    {
                        lstSub.Add(word);
                    }
                }
                lstSub.Sort();
                foreach (var substring in lstSub)
                {
                    if (!lstWord.Contains(substring))
                    {
                        lstWord.Add(substring);
                    }
                }
                lstSub.Sort();
                foreach (var word in lstWord)
                {
                    AddWord(word);
                }
                List <string> lstWord2 = new List <string>();
                foreach (var substring in lstSub)
                {
                    if (!lstWord2.Contains(substring))
                    {
                        Ocurrence ocucu = new Ocurrence(fi, substring);
                        if (!lstOccurence.Contains(ocucu))
                        {
                            lstOccurence.Add(ocucu);
                            lstWord2.Add(substring);
                        }
                    }
                    else
                    {
                        foreach (Ocurrence occurence in lstOccurence)
                        {
                            if (occurence.Word == substring)
                            {
                                occurence.IncreamentOccurence();
                            }
                        }
                    }
                }
            }
            foreach (Ocurrence ocu in lstOccurence)
            {
                ocu.SendToDataBase();
            }
        }