Beispiel #1
0
        /// <summary>
        /// Used when entering a new directory
        /// </summary>
        /// \todo Protect the non-first iterations + comments
        private void recursiveExploration(string _realPath, FileExplorationType _type = FileExplorationType.Full_And_File_Contents, string _parentID = "0", bool _firstIteration = true)
        {
            database = DB.getInstance();
            if (_firstIteration)
            {
                //check if the path is reachable
                if (Directory.Exists(_realPath))
                {
                    //make sure all the folders are set up to this point
                    _parentID = createRouteToPath(_realPath);
                }
                else
                {
                    throw new DriveNotFoundException();
                }
            }

            string[] folderPaths = _realPath.Split(new char[] { '/', '\\' });

            string folderName = folderPaths[folderPaths.Length - 1];

            //check that the folder name is correct. if not, do not do further operations in it to avoid weird things caused by the Directory.GetDirectories()
            if (!UNAUTHORISED_CHAR_IN_FILES_REGEX.IsMatch(folderName) && !UNAUTHORISED_CHAR_IN_FOLDER_REGEX.IsMatch(folderName))
            {
                //Add the folder to the DB

                string folderID;

                if (!_firstIteration)
                {
                    //don't add it to the first iteration
                    folderID = database.addFolder(folderName, _parentID, FileOrigin.Local);
                }
                else
                {
                    //else take the id of the parent
                    folderID = _parentID;
                }

                //Recursively call all the child directories
                try
                {
                    foreach (string childPath in Directory.GetDirectories(_realPath))
                    {
                        recursiveExploration(childPath, _type, folderID, false);
                    }
                }
                catch (DirectoryNotFoundException e)
                {
                    Debug.Print("Folder Error");
                }

                //Call all the files to be added
                foreach (string filePath in Directory.GetFiles(_realPath))
                {
                    //Check if we need to read the file content or no
                    if (_type == FileExplorationType.Full_And_File_Contents)
                    {
                        addFile(filePath, _realPath, folderID, true);
                    }
                    else
                    {
                        addFile(filePath, _realPath, folderID, false);
                    }
                }
            }
            else
            {
                Debug.WriteLine("Error : Folder with name not supported : " + _realPath);
            }
        }
Beispiel #2
0
        /// <summary>
        /// Start the indexation of the website applying the mode inputed
        /// </summary>
        /// \todo Handle the type + maybe create more folders
        /// \todo Handle website not reachable
        /// <param name="_url">The url of the website from which get the content </param>
        /// <param name="_type">The type of exploration wanted</param>
        public void index(string _url, FileExplorationType _type)
        {
            //Check if the URL is valid
            //http://stackoverflow.com/a/5926605

            bool urlIsCorrect = false;

            WebRequest  webRequest = WebRequest.Create(_url);
            WebResponse webResponse;

            try
            {
                webResponse  = webRequest.GetResponse();
                urlIsCorrect = true;
            }
            catch
            { }

            //If the page can be reach
            if (urlIsCorrect)
            {
                //make sure there's a / at the end of the link
                if (_url.Split('/').Last() != "")
                {
                    _url += '/';
                }

                //add the root folder to the DB
                database = DB.getInstance();

                string idParentFolder = database.addFolder(_url, "0", FileOrigin.Web);

                //Start the indexation
                #region Indexation

                //Initialise the web client
                WebClient wc = new WebClient();

                //Create an hashset used to explore all the links between the pages, and add the current page to it
                HashSet <string> linksHashSet = new HashSet <string>();

                linksHashSet.Add(_url);


                string domain = DOMAIN_FINDER.Match(_url).Groups[1].Value;


                string filTextContent = "";

                //Loop until the end of the hashset
                for (int i = 0; i < linksHashSet.Count; i++)
                {
                    //check if this is a readablepage or not
                    if (READABLE_EXTENSION.IsMatch(linksHashSet.ElementAt(i)))
                    {
                        //try to read the content of the page
                        try
                        {
                            filTextContent = wc.DownloadString(linksHashSet.ElementAt(i));
                        }
                        catch
                        {
                            filTextContent = "";
                        }

                        //Parse only the text, and fill the hashet with links if the page contains text
                        if (filTextContent != "")
                        {
                            filTextContent = readPageContent(domain, filTextContent, ref linksHashSet);


                            //get the file name
                            string[] filSplittedName = linksHashSet.ElementAt(i).Split(new char[] { '/', '.' });


                            //else simply add it to the file list
                            string filName = linksHashSet.ElementAt(i);

                            //check if this links start with a domain or no
                            if (filName[0] != '/')
                            {
                                //remove the domain and stock the file name
                                filName = DOMAIN_FINDER.Replace(filName, "");
                            }

                            string filExtension = "";

                            filName = filName.Split(new char[] { '.' }, 2)[0];


                            database.addFile(filName, filExtension, idParentFolder, filTextContent);
                        }
                    }
                    else
                    {
                        //else simply add it to the file list
                        string filName = linksHashSet.ElementAt(i);

                        //check if this links start with a domain or no
                        if (filName[0] != '/')
                        {
                            //remove the domain and stock the file name
                            filName = DOMAIN_FINDER.Replace(filName, "");
                        }


                        //split the extension
                        string filExtension = filName.Split(new char[] { '.' }, 2)[1];

                        filName = filName.Split(new char[] { '.' }, 2)[0];

                        database.addFile(filName, filExtension, idParentFolder);
                    }
                }
                #endregion
            }
            else
            {
                //TODO display an error
            }
        }
Beispiel #3
0
 /// <summary>
 /// Start the indexation
 /// </summary>
 /// <param name="_path">Path to index</param>
 /// <param name="_type">Type of exploration, by default explore all the files and their content</param>
 public void index(string _path, FileExplorationType _type = FileExplorationType.Full_And_File_Contents)
 {
     recursiveExploration(_path, _type);
 }