public static Dictionary <string, Actor> ReadHashActorsFromCSV(string file) { Dictionary <string, Actor> actors = new Dictionary <string, Actor>(); string[] lines = FileAO.ReadFile(file); foreach (string line in lines) { string[] parameters = line.Split(';'); Actor actor = new Actor(); int i = 0; actor.Name = parameters[i++]; actor.NameUrl = parameters[i++]; actor.Awards = int.Parse(parameters[i++]); actor.Oscar = (parameters[i++][0] == 'F') ? false : true; if (!actors.ContainsKey(actor.Name)) { actors.Add(actor.Name, actor); } } return(actors); }
public static List <Movie> ReadMoviesFromCSV(string file) { List <Movie> movies = new List <Movie>(); string[] lines = FileAO.ReadFile(file); foreach (string line in lines) { string[] parameters = line.Split(';'); Movie movie = new Movie(); int i = 0; movie.Name = parameters[i++]; movie.NameUrl = parameters[i++]; movie.Rating = float.Parse(parameters[i++]); movie.Director = parameters[i++]; movie.DirectorUrl = parameters[i++]; movie.Actors = parameters[i++]; movie.ActorsUrl = parameters[i++]; movie.Genre = parameters[i++]; movie.Certificate = parameters[i++]; movie.Runtime = float.Parse(parameters[i++]); if (parameters.Length > i) { movie.AwardedDirector = (parameters[i++][0] == 'T') ? true : false; movie.OscarDirector = (parameters[i++][0] == 'T') ? true : false; movie.AwardedActors = (parameters[i++][0] == 'T') ? true : false; movie.OscarActors = (parameters[i++][0] == 'T') ? true : false; } if (parameters.Length > i) { movie.isViolent = (parameters[i++][0] == 'T') ? true : false; movie.hasFireguns = (parameters[i++][0] == 'T') ? true : false; movie.isGoreViolent = (parameters[i++][0] == 'T') ? true : false; movie.hasSex = (parameters[i++][0] == 'T') ? true : false; movie.hasNudeScenes = (parameters[i++][0] == 'T') ? true : false; movie.aboutRelationships = (parameters[i++][0] == 'T') ? true : false; movie.aboutFamily = (parameters[i++][0] == 'T') ? true : false; movie.hasFlashbacks = (parameters[i++][0] == 'T') ? true : false; movie.hasSurpriseEnding = (parameters[i++][0] == 'T') ? true : false; movie.aboutHumanDrama = (parameters[i++][0] == 'T') ? true : false; movie.aboutNatureOrCity = (parameters[i++][0] == 'T') ? true : false; movie.hasNowadaysTechnology = (parameters[i++][0] == 'T') ? true : false; movie.isSequel = (parameters[i++][0] == 'T') ? true : false; movie.isBasedOnNovel = (parameters[i++][0] == 'T') ? true : false; movie.isWrittenByDirector = (parameters[i++][0] == 'T') ? true : false; movie.isIndependent = (parameters[i++][0] == 'T') ? true : false; movie.hasCameraWorks = (parameters[i++][0] == 'T') ? true : false; } movies.Add(movie); } return(movies); }
private void GetMovieTags() { if (logging()) { logInfo("Carregando tags dos filmes."); } int counter = 0; int actual = progressBarProcess.Value - 1; try { foreach (string key in hashMovies.Keys) { List <string> tags = hashMovies[key].GetTags(); FileAO.ExportMovieTagToCSV(hashMovies[key], @"C:\Users\Yvens\Documents\GitHub\DisciplinaAprendizado\Codes\ImdbCrawler\CSV files\movie_temp_tags.txt"); progressBarProcess.Value = ((int)((counter++ / (float)hashMovies.Count) * 100.0f)); foreach (string tag in tags) { if (hashTags.ContainsKey(tag)) { hashTags[tag]++; } else { hashTags.Add(tag, 1); } } if (progressBarProcess.Value % 10 == 0 && progressBarProcess.Value != actual) { actual = progressBarProcess.Value; FileAO.ExportTagsToTxt(hashTags, @"C:\Users\Yvens\Documents\GitHub\DisciplinaAprendizado\Codes\ImdbCrawler\CSV files\temp_tags" + progressBarProcess.Value + ".txt"); } if (logging()) { logInfo("Filme nº: " + counter); } } } catch { FileAO.ExportTagsToTxt(hashTags, @"C:\Users\Yvens\Documents\GitHub\DisciplinaAprendizado\Codes\ImdbCrawler\CSV files\temp_tags.txt"); } FileAO.ExportTagsToTxt(hashTags, @"C:\Users\Yvens\Documents\GitHub\DisciplinaAprendizado\Codes\ImdbCrawler\CSV files\tags.txt"); progressBarProcess.Value = 100; if (logging()) { logInfo("Sucesso!"); } }
private void GetActors() { int counter = 0; int actualActorsCount = hashActors.Count; progressBarProcess.Value = 0; foreach (string key in hashMovies.Keys) { Movie movie = hashMovies[key]; progressBarProcess.Value = ((int)((counter / (float)hashMovies.Count) * 100.0f)); if (movie.Actors.Length != 0) { string[] actorsName = movie.Actors.Split('@'); string[] actorsUrl = movie.ActorsUrl.Split('@'); for (int i = 0; i < actorsName.Length; i++) { Actor actor = new Actor(); actor.Name = actorsName[i]; actor.NameUrl = actorsUrl[i]; if (!hashActors.ContainsKey(actor.NameUrl)) { actor.GetInfo(); actorsMutex.WaitOne(); hashActors.Add(actor.NameUrl, actor); if (tempFile()) { FileAO.ExportActorToCSV(actor, @"C:\Users\Yvens\Documents\GitHub\DisciplinaAprendizado\Codes\ImdbCrawler\CSV files\temp_actors.txt"); } actorsMutex.Release(); } } } counter++; } progressBarProcess.Value = 100; actualActorsCount = hashActors.Count - actualActorsCount; if (logging()) { logInfo("Atores adicionados: " + actualActorsCount); } }
private void GetDirectors() { int counter = 0; int actualDirectorsCount = hashDirectors.Count; progressBarProcess.Value = 0; foreach (string key in hashMovies.Keys) { Movie movie = hashMovies[key]; progressBarProcess.Value = ((int)((counter / (float)hashMovies.Count) * 100.0f)); if (movie.Director.Length != 0) { Director director = new Director(); director.Name = movie.Director; director.NameUrl = movie.DirectorUrl; if (!hashDirectors.ContainsKey(director.NameUrl)) { director.GetInfo(); directorsMutex.WaitOne(); hashDirectors.Add(director.NameUrl, director); if (tempFile()) { FileAO.ExportDirectorToCSV(director, @"C:\Users\Yvens\Documents\GitHub\DisciplinaAprendizado\Codes\ImdbCrawler\CSV files\temp_directors.txt"); } directorsMutex.Release(); } } counter++; } progressBarProcess.Value = 100; actualDirectorsCount = hashDirectors.Count - actualDirectorsCount; if (logging()) { logInfo("Diretores adicionados: " + actualDirectorsCount); } }
public static List <Movie> ReadMoviesFromCSV(string file) { List <Movie> movies = new List <Movie>(); string[] lines = FileAO.ReadFile(file); foreach (string line in lines) { string[] parameters = line.Split(';'); Movie movie = new Movie(); int i = 0; movie.Name = parameters[i++]; movie.NameUrl = parameters[i++]; movie.Rating = float.Parse(parameters[i++]); movie.Director = parameters[i++]; movie.DirectorUrl = parameters[i++]; movie.Actors = parameters[i++]; movie.ActorsUrl = parameters[i++]; movie.Genre = parameters[i++]; movie.Certificate = parameters[i++]; movie.Runtime = float.Parse(parameters[i++]); if (parameters.Length > i) { movie.AwardedDirector = (parameters[i++][0] == 'T') ? true : false; movie.OscarDirector = (parameters[i++][0] == 'T') ? true : false; movie.AwardedActors = (parameters[i++][0] == 'T') ? true : false; movie.OscarActors = (parameters[i++][0] == 'T') ? true : false; } if (parameters.Length > i) { } movies.Add(movie); } return(movies); }
private void buttonExportDirectors_Click(object sender, EventArgs e) { SaveFileDialog saveFileDialog = new SaveFileDialog(); saveFileDialog.InitialDirectory = @"C:\Users\Yvens\Documents\GitHub\DisciplinaAprendizado\Codes\ImdbCrawler\CSV files\"; saveFileDialog.Filter = "txt files (*.txt)|*.txt"; saveFileDialog.FilterIndex = 1; saveFileDialog.RestoreDirectory = true; string path = ""; if (saveFileDialog.ShowDialog() == DialogResult.OK) { path = saveFileDialog.FileName; List <Director> directors = new List <Director>(); foreach (string key in hashDirectors.Keys) { directors.Add(hashDirectors[key]); } FileAO.ExportDirectorsToCSV(directors, path); } }
public static List <Actor> ReadActorsFromCSV(string file) { List <Actor> actors = new List <Actor>(); string[] lines = FileAO.ReadFile(file); foreach (string line in lines) { string[] parameters = line.Split(';'); Actor actor = new Actor(); int i = 0; actor.Name = parameters[i++]; actor.NameUrl = parameters[i++]; actor.Awards = int.Parse(parameters[i++]); actor.Oscar = (parameters[i++][0] == 'F') ? false : true; actors.Add(actor); } return(actors); }
public static void ExportMoviesToWeka(Dictionary <string, Movie> movies, string destination) { List <string> fileLines = new List <string>(); fileLines.Add("@RELATION movie"); fileLines.Add("@ATTRIBUTE duration {Short,Regular,Long,Very_Long}"); fileLines.Add("@ATTRIBUTE awardedDirector {0,1}"); fileLines.Add("@ATTRIBUTE oscarDirector {0,1}"); fileLines.Add("@ATTRIBUTE awardedActor {0,1}"); fileLines.Add("@ATTRIBUTE oscarActor {0,1}"); fileLines.Add("@ATTRIBUTE genre {Drama,Horror,Action,Comedy,Others}"); fileLines.Add("@ATTRIBUTE certificate {G, R, PG_13, PG, NOT_RATED}"); fileLines.Add("@ATTRIBUTE isViolent {0,1}"); fileLines.Add("@ATTRIBUTE hasFireguns {0,1}"); fileLines.Add("@ATTRIBUTE isGoreViolent {0,1}"); fileLines.Add("@ATTRIBUTE hasSex {0,1}"); fileLines.Add("@ATTRIBUTE hasNudeScenes {0,1}"); fileLines.Add("@ATTRIBUTE aboutRelationships {0,1}"); fileLines.Add("@ATTRIBUTE aboutFamily {0,1}"); fileLines.Add("@ATTRIBUTE hasFlashbacks {0,1}"); fileLines.Add("@ATTRIBUTE hasSurpriseEnding {0,1}"); fileLines.Add("@ATTRIBUTE aboutHumanDrama {0,1}"); fileLines.Add("@ATTRIBUTE aboutNatureOrCity {0,1}"); fileLines.Add("@ATTRIBUTE hasNowadaysTechnology {0,1}"); fileLines.Add("@ATTRIBUTE isSequel {0,1}"); fileLines.Add("@ATTRIBUTE isBasedOnNovel {0,1}"); fileLines.Add("@ATTRIBUTE isWrittenByDirector {0,1}"); fileLines.Add("@ATTRIBUTE isIndependent {0,1}"); fileLines.Add("@ATTRIBUTE hasCameraWorks {0,1}"); fileLines.Add("@ATTRIBUTE rating {Bad,Regular,Good}"); fileLines.Add("@DATA"); string line = ""; foreach (string key in movies.Keys) { Movie movie = movies[key]; string runtime = ""; if (movie.Runtime < 88.0f) { runtime = "Short"; } else if (movie.Runtime < 122.0f) { runtime = "Regular"; } else if (movie.Runtime < 160.0f) { runtime = "Long"; } else { runtime = "Very_Long"; } line += runtime + ","; line += ((movie.AwardedDirector) ? "1" : "0") + ","; line += ((movie.OscarDirector) ? "1" : "0") + ","; line += ((movie.AwardedActors) ? "1" : "0") + ","; line += ((movie.OscarActors) ? "1" : "0") + ","; string genre = ""; string actualGenre = movie.Genre.Split('@')[0]; #region defining genre if (actualGenre.Equals("Drama") || actualGenre.Equals("Family") || actualGenre.Equals("Romance") || actualGenre.Equals("History") || actualGenre.Equals("Reality-TV") || actualGenre.Equals("Adult") || actualGenre.Equals("Biography")) { genre = "Drama"; } else if (actualGenre.Equals("Horror") || actualGenre.Equals("Thriller") || actualGenre.Equals("Mistery")) { genre = "Horror"; } else if (actualGenre.Equals("Action") || actualGenre.Equals("Adventure") || actualGenre.Equals("Crime") || actualGenre.Equals("Sci-Fi") || actualGenre.Equals("Fantasy") || actualGenre.Equals("War") || actualGenre.Equals("Western") || actualGenre.Equals("Sport")) { genre = "Action"; } else if (actualGenre.Equals("Comedy")) { genre = "Comedy"; } else { genre = "Others"; } #endregion line += genre + ","; string certificate = ""; if (movie.Certificate.Equals("R")) { certificate = "R"; } else if (movie.Certificate.Equals("G")) { certificate = "G"; } else if (movie.Certificate.Equals("PG_13")) { certificate = "PG_13"; } else if (movie.Certificate.Equals("PG")) { certificate = "PG"; } else { certificate = "NOT_RATED"; } line += certificate + ","; movie.GetClassification(); string rating = movie.Classification; //isViolent + ";" + hasFireguns + ";" + isGoreViolent + ";" + hasSex + ";" + hasNudeScenes + ";" + //aboutRelationships + ";" + aboutFamily + ";" + hasFlashbacks + ";" + hasSurpriseEnding + ";" + //aboutHumanDrama + ";" + aboutNatureOrCity + ";" + hasNowadaysTechnology + ";" + isSequel + ";" + //isBasedOnNovel + ";" + isWrittenByDirector + ";" + isIndependent + ";" + hasCameraWorks; line += ((movie.IsViolent) ? "1" : "0") + ","; line += ((movie.HasFireguns) ? "1" : "0") + ","; line += ((movie.IsGoreViolent) ? "1" : "0") + ","; line += ((movie.HasSex) ? "1" : "0") + ","; line += ((movie.HasNudeScenes) ? "1" : "0") + ","; line += ((movie.AboutRelationships) ? "1" : "0") + ","; line += ((movie.AboutFamily) ? "1" : "0") + ","; line += ((movie.HasFlashbacks) ? "1" : "0") + ","; line += ((movie.HasSurpriseEnding) ? "1" : "0") + ","; line += ((movie.AboutHumanDrama) ? "1" : "0") + ","; line += ((movie.AboutNatureOrCity) ? "1" : "0") + ","; line += ((movie.HasNowadaysTechnology) ? "1" : "0") + ","; line += ((movie.IsSequel) ? "1" : "0") + ","; line += ((movie.IsBasedOnNovel) ? "1" : "0") + ","; line += ((movie.IsWrittenByDirector) ? "1" : "0") + ","; line += ((movie.IsIndependent) ? "1" : "0") + ","; line += ((movie.HasCameraWorks) ? "1" : "0") + ","; line += rating; fileLines.Add(line); line = ""; } FileAO.ExportToArff(fileLines, destination); }
public static void ExportMoviesToWeka(List <Movie> movies, string destination) { List <string> fileLines = new List <string>(); fileLines.Add("@RELATION movie"); fileLines.Add("@ATTRIBUTE duration {Short,Regular,Long,Very_Long}"); fileLines.Add("@ATTRIBUTE awardedDirector {0,1}"); fileLines.Add("@ATTRIBUTE oscarDirector {0,1}"); fileLines.Add("@ATTRIBUTE awardedActor {0,1}"); fileLines.Add("@ATTRIBUTE oscarActor {0,1}"); fileLines.Add("@ATTRIBUTE genre {Drama,Horror,Action,Comedy,Others}"); fileLines.Add("@ATTRIBUTE certificate {G, R, PG_13, PG, NOT_RATED}"); fileLines.Add("@ATTRIBUTE rating {Very Bad, Bad,Regular,Good, Excellent}"); // fileLines.Add("@DATA"); string line = ""; foreach (Movie movie in movies) { string runtime = ""; if (movie.Runtime < 88.0f) { runtime = "Short"; } else if (movie.Runtime < 122.0f) { runtime = "Regular"; } else if (movie.Runtime < 160.0f) { runtime = "Long"; } else { runtime = "Very_Long"; } line += runtime + ","; line += ((movie.AwardedDirector)? "1" : "0") + ","; line += ((movie.OscarDirector)? "1" : "0") + ","; line += ((movie.AwardedActors)? "1" : "0") + ","; line += ((movie.OscarActors)? "1" : "0") + ","; string genre = ""; string actualGenre = movie.Genre.Split('@')[0]; movie.Genre = actualGenre; movie.GetMovieGenre(); line += genre + ","; string certificate = ""; if (movie.Certificate.Equals("R")) { certificate = "R"; } else if (movie.Certificate.Equals("G")) { certificate = "G"; } else if (movie.Certificate.Equals("PG_13")) { certificate = "PG_13"; } else if (movie.Certificate.Equals("PG")) { certificate = "PG"; } else { certificate = "NOT_RATED"; } line += certificate + ","; movie.GetClassification(); string rating = movie.Classification; line += rating; fileLines.Add(line); line = ""; } FileAO.ExportToArff(fileLines, destination); }
public static void GenreStatistics(string filepath) { string[] lines = FileAO.ReadFile(filepath); GenreStatistics(lines); }
public static void ExportMoviesToWeka(List <Movie> movies, string destination) { List <string> fileLines = new List <string>(); fileLines.Add("@RELATION movie"); //fileLines.Add("@ATTRIBUTE duration NUMERIC"); fileLines.Add("@ATTRIBUTE duration {Short,Regular,Long,Very_Long}"); fileLines.Add("@ATTRIBUTE awardedDirector {0,1}"); fileLines.Add("@ATTRIBUTE oscarDirector {0,1}"); fileLines.Add("@ATTRIBUTE awardedActor {0,1}"); fileLines.Add("@ATTRIBUTE oscarActor {0,1}"); fileLines.Add("@ATTRIBUTE genre {Drama,Horror,Action,Comedy,Others}"); fileLines.Add("@ATTRIBUTE certificate {G, R, PG_13, PG, NOT_RATED}"); fileLines.Add("@ATTRIBUTE rating {Bad,Regular,Good}"); fileLines.Add("@DATA"); string line = ""; int a = 0, b = 0, c = 0; int drama = 0, horror = 0, action = 0, comedy = 0, others = 0; int shorte = 0, regular = 0, longe = 0, verylong = 0; foreach (Movie movie in movies) { //line += movie.Runtime + ","; string runtime = ""; if (movie.Runtime < 88.0f) { runtime = "Short"; shorte++; } else if (movie.Runtime < 122.0f) { runtime = "Regular"; regular++; } else if (movie.Runtime < 160.0f) { runtime = "Long"; longe++; } else { runtime = "Very_Long"; verylong++; } line += runtime + ","; line += ((movie.AwardedDirector)? "1" : "0") + ","; line += ((movie.OscarDirector)? "1" : "0") + ","; line += ((movie.AwardedActors)? "1" : "0") + ","; line += ((movie.OscarActors)? "1" : "0") + ","; string genre = ""; string actualGenre = movie.Genre.Split('@')[0]; #region defining genre if (actualGenre.Equals("Drama") || actualGenre.Equals("Family") || actualGenre.Equals("Romance") || actualGenre.Equals("History") || actualGenre.Equals("Reality-TV") || actualGenre.Equals("Adult") || actualGenre.Equals("Biography")) { genre = "Drama"; drama++; } else if (actualGenre.Equals("Horror") || actualGenre.Equals("Thriller") || actualGenre.Equals("Mistery")) { genre = "Horror"; horror++; } else if (actualGenre.Equals("Action") || actualGenre.Equals("Adventure") || actualGenre.Equals("Crime") || actualGenre.Equals("Sci-Fi") || actualGenre.Equals("Fantasy") || actualGenre.Equals("War") || actualGenre.Equals("Western") || actualGenre.Equals("Sport")) { genre = "Action"; action++; } else if (actualGenre.Equals("Comedy")) { genre = "Comedy"; comedy++; } else { genre = "Others"; others++; } #endregion line += genre + ","; string certificate = ""; if (movie.Certificate.Equals("R")) { certificate = "R"; } else if (movie.Certificate.Equals("G")) { certificate = "G"; } else if (movie.Certificate.Equals("PG_13")) { certificate = "PG_13"; } else if (movie.Certificate.Equals("PG")) { certificate = "PG"; } else { certificate = "NOT_RATED"; } line += certificate + ","; movie.GetClassification(); string rating = movie.Classification; line += rating; fileLines.Add(line); line = ""; } Console.WriteLine("drama = " + drama); Console.WriteLine("horror = " + horror); Console.WriteLine("action = " + action); Console.WriteLine("comedy = " + comedy); Console.WriteLine("others = " + others); Console.WriteLine(); Console.WriteLine("Bad = " + c); Console.WriteLine("Regular = " + b); Console.WriteLine("Good = " + a); Console.WriteLine(); Console.WriteLine("shorte = " + shorte); Console.WriteLine("regular = " + regular); Console.WriteLine("longe = " + longe); Console.WriteLine("verylong = " + verylong); FileAO.ExportToArff(fileLines, destination); }
public static void ExportMoviesToWeka(Dictionary <string, Movie> movies, string destination) { List <string> fileLines = new List <string>(); fileLines.Add("@RELATION movie"); fileLines.Add("@ATTRIBUTE duration {Short,Regular,Long,Very_Long}"); fileLines.Add("@ATTRIBUTE awardedDirector {0,1}"); fileLines.Add("@ATTRIBUTE oscarDirector {0,1}"); fileLines.Add("@ATTRIBUTE awardedActor {0,1}"); fileLines.Add("@ATTRIBUTE oscarActor {0,1}"); fileLines.Add("@ATTRIBUTE genre {Drama,Horror,Action,Comedy,Others}"); fileLines.Add("@ATTRIBUTE certificate {G, R, PG_13, PG, NOT_RATED}"); fileLines.Add("@ATTRIBUTE rating {Bad,Regular,Good}"); fileLines.Add("@DATA"); string line = ""; foreach (string key in movies.Keys) { Movie movie = movies[key]; string runtime = ""; if (movie.Runtime < 88.0f) { runtime = "Short"; } else if (movie.Runtime < 122.0f) { runtime = "Regular"; } else if (movie.Runtime < 160.0f) { runtime = "Long"; } else { runtime = "Very_Long"; } line += runtime + ","; line += ((movie.AwardedDirector) ? "1" : "0") + ","; line += ((movie.OscarDirector) ? "1" : "0") + ","; line += ((movie.AwardedActors) ? "1" : "0") + ","; line += ((movie.OscarActors) ? "1" : "0") + ","; string genre = ""; string actualGenre = movie.Genre.Split('@')[0]; #region defining genre if (actualGenre.Equals("Drama") || actualGenre.Equals("Family") || actualGenre.Equals("Romance") || actualGenre.Equals("History") || actualGenre.Equals("Reality-TV") || actualGenre.Equals("Adult") || actualGenre.Equals("Biography")) { genre = "Drama"; } else if (actualGenre.Equals("Horror") || actualGenre.Equals("Thriller") || actualGenre.Equals("Mistery")) { genre = "Horror"; } else if (actualGenre.Equals("Action") || actualGenre.Equals("Adventure") || actualGenre.Equals("Crime") || actualGenre.Equals("Sci-Fi") || actualGenre.Equals("Fantasy") || actualGenre.Equals("War") || actualGenre.Equals("Western") || actualGenre.Equals("Sport")) { genre = "Action"; } else if (actualGenre.Equals("Comedy")) { genre = "Comedy"; } else { genre = "Others"; } #endregion line += genre + ","; string certificate = ""; if (movie.Certificate.Equals("R")) { certificate = "R"; } else if (movie.Certificate.Equals("G")) { certificate = "G"; } else if (movie.Certificate.Equals("PG_13")) { certificate = "PG_13"; } else if (movie.Certificate.Equals("PG")) { certificate = "PG"; } else { certificate = "NOT_RATED"; } line += certificate + ","; movie.GetClassification(); string rating = movie.Classification; line += rating; fileLines.Add(line); line = ""; } FileAO.ExportToArff(fileLines, destination); }