private async void StartProcess(WikiDump dump, string path) { await Task.Delay(500); stopWatch.Start(); dispatcherTimer.Start(); // Dump download ActivateProgressStep(1); DumpDownload dumpDownload = new DumpDownload(this); await dumpDownload.Start(dump, path); // Dump decompression ActivateProgressStep(2); DumpDecompress dumpDecompress = new DumpDecompress(this); await dumpDecompress.Start(dump, path); // Reading dump ActivateProgressStep(3); DumpRead dumpRead = new DumpRead(this); await dumpRead.Start(dump, path); // Creating WG files ActivateProgressStep(4); FileCreator fileCreator = new FileCreator(this); await fileCreator.PrepareMaps(dump, path); // Finishing up (clean up) ActivateProgressStep(5); await fileCreator.CreateGraphFiles(); ActivateProgressStep(6); stopWatch.Stop(); }
private void FetchAllDumpsCompleted(object sender, DownloadStringCompletedEventArgs e) { if (e.Error != null) { return; } int inProgressCount = 0; JObject json = JObject.Parse(e.Result); IList <JToken> results = json["wikis"].Children().ToList(); foreach (JToken result in results) { WikiDump dump = new WikiDump(); dump.Name = ((JProperty)result).Name; long fileSize = 0; foreach (string job in neededJobs) { if ((string)result.Children().ToList()[0]["jobs"][job]["status"] == "done") { dump.LastUpdated = DateTime.Parse((string)result.Children().ToList()[0]["jobs"][job]["updated"]); dump.Date = ((string)result.Children().ToList()[0]["jobs"][job]["files"].First.Children().ToList()[0]["url"]).Substring(dump.Name.Length + 2, 8); dump.Files.Add((string)result.Children().ToList()[0]["jobs"][job]["files"].First.Children().ToList()[0]["url"]); fileSize += (long)result.Children().ToList()[0]["jobs"][job]["files"].First.Children().ToList()[0]["size"]; } else { dump.Name = ((JProperty)result).Name + " (in progress)"; dump.IsReady = false; inProgressCount++; } } dump.Size = (double)fileSize / 1000000; dump_list.Items.Add(dump); } fetching_status.Content = "Fetched " + results.Count + " items."; loading_icon.Source = new BitmapImage(new Uri(@"/Assets/ok.png", UriKind.Relative)); this.fetchAllDumpsLoading = false; AnimateLoader(loading_icon, this.fetchAllDumpsLoading); if (inProgressCount > 0) { MessageBox.Show("Some dumps are being created right now. Try fetching them again in a few hours or choose created one.", "Dumps status"); } }
public async Task Start(WikiDump dump, string path) { try { var tasks = new List <Task>(); totalToRecive = dump.Size * 1000000; foreach (string file in dump.Files) { using (WebClient wc = new WebClient()) { progress.Add(file, 0); tasks.Add(DownloadFile(wc, file, path)); } } await Task.WhenAll(tasks.ToArray()); window.UpdateProgress(1, 1); } catch (WebException ex) { window.ErrorProgress(1, "Error occured when trying to download dump from server:\n" + ex.Message, "Dump download error"); } }
public async Task Start(WikiDump dump, string path) { try { var tasks = new List <Task>(); window.UpdateProgress(2, 0, "Decompressed " + filesDecompressed + " out of " + window.numberOfFiles + " files"); foreach (string file in dump.Files) { tasks.Add(DecompressGZip(new FileInfo(path + file.Replace('/', '\\')))); } await Task.WhenAll(tasks.ToArray()).ContinueWith((action) => { // Deleting compressed gz files foreach (string file in dump.Files) { File.Delete(path + file.Replace('/', '\\')); } }); } catch (Exception ex) { window.ErrorProgress(2, "Error occured when trying to decompress downloaded dump:\n" + ex.Message, "Dump decompressing error"); } }
public async Task Start(WikiDump dump, string path) { try { List <FileInfo> files = new List <FileInfo>(); window.UpdateProgress(3, 1, "Processed " + filesCreated + " out of " + numberOfFiles + " files"); // Page and category titles files.Add(new FileInfo(path + Array.Find(dump.Files.ToArray(), (el) => el.Contains("page.sql")).Replace('/', '\\').Replace(".gz", ""))); await CreateTitlesMaps(files[0]); await sortMapnumeric(files[0].FullName.Substring(0, files[0].FullName.Length - 4), files[0].DirectoryName); await sortMapnumeric(files[0].FullName.Substring(0, files[0].FullName.Length - 8) + "category", files[0].DirectoryName); // Titles sorting (for search use) //await sortTitles(files[0].FullName.Substring(0, files[0].FullName.Length - 4), files[0].DirectoryName); //await sortTitles(files[0].FullName.Substring(0, files[0].FullName.Length - 8) + "category", files[0].DirectoryName); // Page to page links files.Add(new FileInfo(path + Array.Find(dump.Files.ToArray(), (el) => el.Contains("pagelinks.sql")).Replace('/', '\\').Replace(".gz", ""))); await CreatePageLinksMap(files[1], DUMP_TYPE.PAGELINKS); await sortMapnumeric(files[1].FullName.Substring(0, files[1].FullName.Length - 4), files[1].DirectoryName); // Category links files.Add(new FileInfo(path + Array.Find(dump.Files.ToArray(), (el) => el.Contains("categorylinks.sql")).Replace('/', '\\').Replace(".gz", ""))); await CreateCategoryLinksMap(files[2], DUMP_TYPE.CATEGORYLINKS); await sortMapnumeric(files[2].FullName.Substring(0, files[2].FullName.Length - 4) + "frompage", files[2].DirectoryName); await sortMapnumeric(files[2].FullName.Substring(0, files[2].FullName.Length - 4) + "fromcategory", files[2].DirectoryName); // Deleting sql files foreach (FileInfo file in files) { File.Delete(file.FullName); } } catch (Exception ex) { window.ErrorProgress(3, "Error occured when trying to read dump:\n" + ex.Message, "Dump reading error"); } }
public async Task PrepareMaps(WikiDump dump, string path) { // Dump name variables: // dump.Name => "simplewiki" // dump.Date => "20191101" this.pathToMaps = path + "\\" + dump.Name + "\\" + dump.Date + "\\"; this.pageTitlesMap = dump.Name + "-" + dump.Date + "-" + "page.map"; this.categoryTitlesMap = dump.Name + "-" + dump.Date + "-" + "category.map"; this.pageLinksMap = dump.Name + "-" + dump.Date + "-" + "pagelinks.map"; this.catFromCatMap = dump.Name + "-" + dump.Date + "-" + "categorylinksfromcategory.map"; this.catFromPageMap = dump.Name + "-" + dump.Date + "-" + "categorylinksfrompage.map"; string extension = ".wg"; this.outOffsetFileName = pathToMaps + dump.Name + extension + "m"; this.outTitleFileName = pathToMaps + dump.Name + extension + "t"; this.outGraphFileName = pathToMaps + dump.Name + extension + "g"; this.outInfoFileName = pathToMaps + dump.Name + extension + "i"; this.outSortedFileName = pathToMaps + dump.Name + extension + "s"; // Progress update: window.UpdateProgress(4, 1, "Generating reverse maps: Page Links"); // ZLICZANIE WSZYSTKICH STRON using (FileStream fs = File.Open(pathToMaps + pageTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (BufferedStream bs = new BufferedStream(fs)) using (StreamReader sr = new StreamReader(bs)) { string line; while ((line = sr.ReadLine()) != null) { this.amountOfPages += 1; } } // TWORZENIE ODWROTNYCH ODWZOROWAŃ await Task.Run(() => createReverseMap(pageLinksMap)); window.UpdateProgress(4, 1, "Generating reverse maps: Category From Page"); await Task.Run(() => createReverseMap(catFromPageMap)); //await createReverseMap(catFromPageMap); window.UpdateProgress(4, 1, "Generating reverse maps: Category From Category"); await Task.Run(() => createReverseMap(catFromCatMap)); //await createReverseMap(catFromCatMap); window.UpdateProgress(4, 1, "Sorting Titles and mapping Wiki IDs"); await Task.Delay(5000); Dictionary <int, string> sortedTitles = new Dictionary <int, string>(); // Przechodzi po tytułach wszytstkich artykułów, tworzy odwzorowanie ID artykułu na jego miejsce w kolejności using (FileStream fs = File.Open(pathToMaps + this.pageTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (BufferedStream bs = new BufferedStream(fs)) using (StreamReader sr = new StreamReader(bs)) { string line; while ((line = sr.ReadLine()) != null) { GraphObject g = new GraphObject(); g.id = System.Convert.ToInt32(line.Split('\t')[0]); g.title = line.Split('\t')[1]; g.order = this.currentAmount; pageDict[g.id] = g.order; sortedTitles[g.order] = g.title; this.currentAmount += 1; } } // odwzorowanie kategorii using (FileStream fs = File.Open(pathToMaps + this.categoryTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (BufferedStream bs = new BufferedStream(fs)) using (StreamReader sr = new StreamReader(bs)) { string line; while ((line = sr.ReadLine()) != null) { GraphObject g = new GraphObject(); g.id = System.Convert.ToInt32(line.Split('\t')[0]); g.title = line.Split('\t')[1]; g.order = this.currentAmount; sortedTitles[g.order] = g.title; catDict[g.id] = g.order; this.currentAmount += 1; } } // Sortowanie i zapis tytułów // PLIK .wgs BinaryWriter bwSortedTitles = createNewBinaryFile(this.outSortedFileName); List <KeyValuePair <int, string> > sortedTitlesList = sortedTitles.ToList(); // Zwalnianie pamieci sortedTitles.Clear(); sortedTitlesList.Sort( delegate(KeyValuePair <int, string> pair1, KeyValuePair <int, string> pair2) { return(pair1.Value.CompareTo(pair2.Value)); } ); foreach (var v in sortedTitlesList) { bwSortedTitles.Write(Encoding.UTF8.GetBytes(v.Value)); bwSortedTitles.Write(Encoding.UTF8.GetBytes(";")); bwSortedTitles.Write(Encoding.UTF8.GetBytes(v.Key.ToString())); bwSortedTitles.Write(Encoding.UTF8.GetBytes("\n")); } bwSortedTitles.Close(); }