public FactoryFileSeparator(DirectoryInfo di) { foreach (var fi in di.GetFiles("*.txt", SearchOption.AllDirectories)) { K_Google.AddFile(fi); FileSeprator(fi); } }
public Web(string url) { K_Google.AddWeb(url); List <string> lstSub = new List <string>(); // Séparateur de mot Char[] delimiter = new char[] { '[', ']', '#', '^', '¦', '|', '£', '<', '>', '_', '$', '\n', '\r', '.', ' ', ',', '\'', '!', '?', '(', ')', '%', '&', '"', '=', '+', '{', '}', '*', ';', ':', '\\', '-', '/' }; // Tableau stockant les mots String[] substrings; // Regex controlant si l'url rentré par l'utilisateur est bien une url Regex urlRegex = new Regex("(http|https):\\/\\/(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/|\\/([\\w#!:.?+=&%@!\\-]))?"); using (WebClient webClient = new WebClient()) { // Si l'adresse est bien un url recupere le code source et en garde que le text if (url != "" && urlRegex.IsMatch(url)) { Uri uri = new Uri(url); SetOccurence(Separator(getTextInHTML(uri)), null, lstOccurence, url); } } }
public WordOnTxt(DirectoryInfo di) { string _encoding = "1252"; foreach (var fi in di.GetFiles("*.txt", SearchOption.AllDirectories)) { K_Google.AddFile(fi); List <string> lstWord = new List <string>(); using (FileStream fs = File.OpenRead(fi.FullName)) { Ude.CharsetDetector cdet = new Ude.CharsetDetector(); cdet.Feed(fs); cdet.DataEnd(); if (cdet.Charset != null) { _encoding = cdet.Charset; //Console.WriteLine(_encoding); } else { Console.WriteLine("Detection failed."); } } string path = fi.DirectoryName + "\\" + fi.Name; Console.WriteLine(path); StreamReader Reader = new StreamReader(path, Encoding.GetEncoding(_encoding)); string polpi = Reader.ReadToEnd(); Reader.Close(); polpi = polpi.ToLower(); Char[] delimiter = new char[] { '[', ']', '#', '^', '¦', '|', '£', '<', '>', '_', '$', '\n', '\r', '.', ' ', ',', '\'', '!', '?', '(', ')', '%', '&', '"', '=', '+', '{', '}', '*', ';', ':', '\\', '-', '/' }; String[] substrings = polpi.Split(delimiter); //List<string> lstSub = new List<string>(substrings); List <string> lstSub = new List <string>(); foreach (string word in substrings) { if (word.Count() > 0) { lstSub.Add(word); } } lstSub.Sort(); foreach (var substring in lstSub) { if (!lstWord.Contains(substring)) { lstWord.Add(substring); } } lstSub.Sort(); foreach (var word in lstWord) { AddWord(word); } List <string> lstWord2 = new List <string>(); foreach (var substring in lstSub) { if (!lstWord2.Contains(substring)) { Ocurrence ocucu = new Ocurrence(fi, substring); if (!lstOccurence.Contains(ocucu)) { lstOccurence.Add(ocucu); lstWord2.Add(substring); } } else { foreach (Ocurrence occurence in lstOccurence) { if (occurence.Word == substring) { occurence.IncreamentOccurence(); } } } } } foreach (Ocurrence ocu in lstOccurence) { ocu.SendToDataBase(); } }