Beispiel #1
0
        /// <summary>
        /// Constructor that creates a directory
        /// </summary>
        public static TXTCreating GetTXT(string URL)
        {
            if (creator == null)
            {
                creator = new TXTCreating();
            }
            if (creator.VerifyURL(URL))
            {
                TempDirectorySplit = URL.Split(':');
                directoryName      = TempDirectorySplit[1].Substring(2, TempDirectorySplit[1].Length - 2);
                Directory.CreateDirectory(directoryName);
            }

            //Reset values
            codeHTML   = "";
            nbrTooLong = 0;

            return(creator);
        }
        /// <summary>
        /// Method which returns a list of words for every links on the website
        /// </summary>
        /// <param name="Creator">TXTCreating object which will take the HTML and returns it to the HTMLTxtAnalyser</param>
        /// <param name="URL">URL of the HTML to analyse</param>
        /// <returns>List of words</returns>
        public List <string> WordSearching(TXTCreating Creator, string URL)
        {
            //Variables
            char   chrOne   = ' ';
            char   chrTwo   = ' ';
            char   chrThree = ' ';
            char   chrFour  = ' ';
            char   chrFive  = ' ';
            char   chrSix   = ' ';
            char   chrSeven = ' ';
            bool   takeChr  = false;
            bool   scripted = false;
            string word     = "";

            //Objects
            List <string> lstWords = new List <string>();

            //Taking HTML and cleaning it
            string HTML = Creator.TakeHTML(URL);

            if (HTML != null)
            {
                HTML = Regex.Replace(HTML, @"\n", " ");
                HTML = Regex.Replace(HTML, @"\r", " ");
                HTML = Regex.Replace(HTML, @"\t", " ");

                //Loop to search for words
                for (int x = 0; x < HTML.Length; ++x)
                {
                    char currentChr = Convert.ToChar(HTML.Substring(x, 1));

                    chrOne   = chrTwo;
                    chrTwo   = chrThree;
                    chrThree = chrFour;
                    chrFour  = chrFive;
                    chrFive  = chrSix;
                    chrSix   = chrSeven;
                    chrSeven = currentChr;

                    if (chrTwo == 's' && chrThree == 'c' && chrFour == 'r' && chrFive == 'i' && chrSix == 'p' && chrSeven == 't')
                    {
                        scripted = IsScript(chrOne);
                    }
                    if (currentChr == '>')
                    {
                        takeChr = true;
                    }
                    else if (currentChr == '<' && takeChr)
                    {
                        takeChr = false;
                    }
                    else if (takeChr && !scripted)
                    {
                        if (currentChr == ' ')
                        {
                            if (word != "")
                            {
                                word = word.ToLower();
                                lstWords.Add(word);
                                word = "";
                            }
                        }
                        else
                        {
                            word += currentChr;
                        }
                    }
                }
            }
            else
            {
                lstWords = null;
            }
            return(lstWords);
        }
        /// <summary>
        /// Method to search links
        /// </summary>
        /// <param name="Creator">TXTCreating object which will take the HTML and returns it to the HTMLLinkFinder</param>
        /// <param name="URL">URL of the HTML to analyse</param>
        /// <returns>Return a list of links</returns>
        public List <string> Finder(TXTCreating Creator, string URL)
        {
            //Variables
            char   chrOne       = ' ';
            char   chrTwo       = ' ';
            char   chrThree     = ' ';
            char   chrFour      = ' ';
            string Link         = "";
            bool   takeCar      = false;
            bool   isLink       = false;
            int    chrRemaining = 0;

            //Objects
            List <string> lstLinks = new List <string>();

            //Taking HTML
            string HTML = Creator.TakeHTML(URL);

            if (HTML != null)
            {
                //Loop to search for links
                for (int x = 0; x < HTML.Length; ++x)
                {
                    chrOne   = chrTwo;
                    chrTwo   = chrThree;
                    chrThree = chrFour;
                    chrFour  = Convert.ToChar(HTML.Substring(x, 1));

                    if (chrFour == 'a' && chrThree == '<')
                    {
                        isLink = true;
                    }
                    else if (chrOne == '<' && chrTwo == '/' && chrThree == 'a' && chrFour == '>')
                    {
                        isLink = false;
                    }

                    if (isLink)
                    {
                        if (chrOne == 'h' && chrTwo == 'r' && chrThree == 'e' && chrFour == 'f')
                        {
                            takeCar      = true;
                            chrRemaining = 2;
                        }
                        else if (chrFour == '"' && chrRemaining == 0 && takeCar)
                        {
                            takeCar = false;
                            lstLinks.Add(Link);
                            Link = "";
                        }
                        else if (takeCar)
                        {
                            if (chrRemaining != 0)
                            {
                                chrRemaining--;
                            }
                            else
                            {
                                Link += chrFour;
                            }
                        }
                    }
                }
            }
            else
            {
                lstLinks = null;
            }

            return(lstLinks);
        }
Beispiel #4
0
        /// <summary>
        /// Method who will start the analyse and control other methods
        /// </summary>
        /// <param name="URL">URL of the website to analyse</param>
        public void Start(string URL)
        {
            //Variables
            int Analysed = 0;
            int Created  = 0;

            //Objects
            List <List <string> > TempWordLst  = new List <List <string> >();
            List <string>         lstLinks     = new List <string>();
            List <string>         lstLinksName = new List <string>();
            List <string>         TempLst      = new List <string>();
            TXTCreating           Creator      = TXTCreating.GetTXT(URL);
            HTMLTxtAnalyser       Analyser     = HTMLTxtAnalyser.GetHTMLTxt();

            //Verify the link of the website
            if (Creator.VerifyURL(URL))
            {
                //Search of links on the website
                HTMLLinkFinder Finder = HTMLLinkFinder.GetHTMLLink();
                lstLinks = Finder.Finder(Creator, URL);
                foreach (string link in lstLinks)
                {
                    if (link.Substring(0, 1) == "/")
                    {
                        TempLst = Finder.Finder(Creator, URL.Substring(0, URL.Length - 1) + link);
                        if (TempLst != null)
                        {
                            lstLinks = lstLinks.Union(TempLst).ToList();
                        }
                    }
                    else if (link.Length > 5)
                    {
                        if (link.Substring(link.Length - 5, 5) == ".html")
                        {
                            TempLst = Finder.Finder(Creator, URL + link);
                            if (TempLst != null)
                            {
                                lstLinks = lstLinks.Union(TempLst).ToList();
                            }
                        }
                    }
                    else if (link.Contains(URL))
                    {
                        TempLst = Finder.Finder(Creator, link);
                        if (TempLst != null)
                        {
                            lstLinks = lstLinks.Union(TempLst).ToList();
                        }
                    }
                }
                if (!lstLinks.Contains(URL))
                {
                    lstLinks.Add(URL);
                }

                //Search of words
                foreach (string link in lstLinks)
                {
                    if (link.Substring(0, 1) == "/")
                    {
                        TempLst = Analyser.WordSearching(Creator, URL + link.Substring(1, link.Length - 1));
                        if (TempLst != null)
                        {
                            TempWordLst.Add(TempLst);
                            lstLinksName.Add(link.Substring(1, link.Length - 1));
                        }
                    }
                    else if (link.Length > 5)
                    {
                        if (link.Substring(link.Length - 5, 5) == ".html")
                        {
                            TempLst = Finder.Finder(Creator, URL + link);
                            if (TempLst != null)
                            {
                                TempWordLst.Add(TempLst);
                                lstLinksName.Add(link);
                            }
                        }
                    }
                    else if (link.Contains(URL))
                    {
                        TempLst = Analyser.WordSearching(Creator, link);
                        if (TempLst != null)
                        {
                            TempWordLst.Add(TempLst);
                            lstLinksName.Add("Base");
                        }
                    }
                    Analysed++;
                    Console.SetCursorPosition(0, 5);
                    Console.WriteLine("Analysed : " + Analysed * 100 / lstLinks.Count + "%");
                }

                //Text creation
                for (int x = 0; x < lstLinksName.Count(); ++x)
                {
                    Creator.CreateTXT(lstLinksName[x], TempWordLst[x]);
                    Created++;
                    Console.SetCursorPosition(0, 6);
                    Console.WriteLine("Created : " + Created * 100 / lstLinksName.Count + "%");
                }
                Console.WriteLine("\n\nWebsite analysed!!!");
            }
        }