private async void StartProcess(WikiDump dump, string path)
        {
            await Task.Delay(500);

            stopWatch.Start();
            dispatcherTimer.Start();

            // Dump download
            ActivateProgressStep(1);
            DumpDownload dumpDownload = new DumpDownload(this);
            await dumpDownload.Start(dump, path);

            // Dump decompression
            ActivateProgressStep(2);
            DumpDecompress dumpDecompress = new DumpDecompress(this);
            await dumpDecompress.Start(dump, path);

            // Reading dump
            ActivateProgressStep(3);
            DumpRead dumpRead = new DumpRead(this);
            await dumpRead.Start(dump, path);

            // Creating WG files
            ActivateProgressStep(4);
            FileCreator fileCreator = new FileCreator(this);
            await fileCreator.PrepareMaps(dump, path);

            // Finishing up (clean up)
            ActivateProgressStep(5);
            await fileCreator.CreateGraphFiles();

            ActivateProgressStep(6);
            stopWatch.Stop();
        }
        private void FetchAllDumpsCompleted(object sender, DownloadStringCompletedEventArgs e)
        {
            if (e.Error != null)
            {
                return;
            }
            int            inProgressCount = 0;
            JObject        json            = JObject.Parse(e.Result);
            IList <JToken> results         = json["wikis"].Children().ToList();

            foreach (JToken result in results)
            {
                WikiDump dump = new WikiDump();
                dump.Name = ((JProperty)result).Name;
                long fileSize = 0;
                foreach (string job in neededJobs)
                {
                    if ((string)result.Children().ToList()[0]["jobs"][job]["status"] == "done")
                    {
                        dump.LastUpdated = DateTime.Parse((string)result.Children().ToList()[0]["jobs"][job]["updated"]);
                        dump.Date        = ((string)result.Children().ToList()[0]["jobs"][job]["files"].First.Children().ToList()[0]["url"]).Substring(dump.Name.Length + 2, 8);
                        dump.Files.Add((string)result.Children().ToList()[0]["jobs"][job]["files"].First.Children().ToList()[0]["url"]);
                        fileSize += (long)result.Children().ToList()[0]["jobs"][job]["files"].First.Children().ToList()[0]["size"];
                    }
                    else
                    {
                        dump.Name    = ((JProperty)result).Name + " (in progress)";
                        dump.IsReady = false;
                        inProgressCount++;
                    }
                }
                dump.Size = (double)fileSize / 1000000;
                dump_list.Items.Add(dump);
            }

            fetching_status.Content   = "Fetched " + results.Count + " items.";
            loading_icon.Source       = new BitmapImage(new Uri(@"/Assets/ok.png", UriKind.Relative));
            this.fetchAllDumpsLoading = false;
            AnimateLoader(loading_icon, this.fetchAllDumpsLoading);
            if (inProgressCount > 0)
            {
                MessageBox.Show("Some dumps are being created right now. Try fetching them again in a few hours or choose created one.", "Dumps status");
            }
        }
Esempio n. 3
0
        public async Task Start(WikiDump dump, string path)
        {
            try {
                var tasks = new List <Task>();
                totalToRecive = dump.Size * 1000000;
                foreach (string file in dump.Files)
                {
                    using (WebClient wc = new WebClient()) {
                        progress.Add(file, 0);
                        tasks.Add(DownloadFile(wc, file, path));
                    }
                }
                await Task.WhenAll(tasks.ToArray());

                window.UpdateProgress(1, 1);
            } catch (WebException ex) {
                window.ErrorProgress(1, "Error occured when trying to download dump from server:\n" + ex.Message, "Dump download error");
            }
        }
 public async Task Start(WikiDump dump, string path)
 {
     try {
         var tasks = new List <Task>();
         window.UpdateProgress(2, 0, "Decompressed " + filesDecompressed + " out of " + window.numberOfFiles + " files");
         foreach (string file in dump.Files)
         {
             tasks.Add(DecompressGZip(new FileInfo(path + file.Replace('/', '\\'))));
         }
         await Task.WhenAll(tasks.ToArray()).ContinueWith((action) => {
             // Deleting compressed gz files
             foreach (string file in dump.Files)
             {
                 File.Delete(path + file.Replace('/', '\\'));
             }
         });
     } catch (Exception ex) {
         window.ErrorProgress(2, "Error occured when trying to decompress downloaded dump:\n" + ex.Message, "Dump decompressing error");
     }
 }
Esempio n. 5
0
        public async Task Start(WikiDump dump, string path)
        {
            try {
                List <FileInfo> files = new List <FileInfo>();
                window.UpdateProgress(3, 1, "Processed " + filesCreated + " out of " + numberOfFiles + " files");

                // Page and category titles
                files.Add(new FileInfo(path + Array.Find(dump.Files.ToArray(), (el) => el.Contains("page.sql")).Replace('/', '\\').Replace(".gz", "")));
                await CreateTitlesMaps(files[0]);
                await sortMapnumeric(files[0].FullName.Substring(0, files[0].FullName.Length - 4), files[0].DirectoryName);
                await sortMapnumeric(files[0].FullName.Substring(0, files[0].FullName.Length - 8) + "category", files[0].DirectoryName);

                // Titles sorting (for search use)
                //await sortTitles(files[0].FullName.Substring(0, files[0].FullName.Length - 4), files[0].DirectoryName);
                //await sortTitles(files[0].FullName.Substring(0, files[0].FullName.Length - 8) + "category", files[0].DirectoryName);

                // Page to page links
                files.Add(new FileInfo(path + Array.Find(dump.Files.ToArray(), (el) => el.Contains("pagelinks.sql")).Replace('/', '\\').Replace(".gz", "")));
                await CreatePageLinksMap(files[1], DUMP_TYPE.PAGELINKS);
                await sortMapnumeric(files[1].FullName.Substring(0, files[1].FullName.Length - 4), files[1].DirectoryName);

                // Category links
                files.Add(new FileInfo(path + Array.Find(dump.Files.ToArray(), (el) => el.Contains("categorylinks.sql")).Replace('/', '\\').Replace(".gz", "")));
                await CreateCategoryLinksMap(files[2], DUMP_TYPE.CATEGORYLINKS);
                await sortMapnumeric(files[2].FullName.Substring(0, files[2].FullName.Length - 4) + "frompage", files[2].DirectoryName);
                await sortMapnumeric(files[2].FullName.Substring(0, files[2].FullName.Length - 4) + "fromcategory", files[2].DirectoryName);

                // Deleting sql files
                foreach (FileInfo file in files)
                {
                    File.Delete(file.FullName);
                }
            } catch (Exception ex) {
                window.ErrorProgress(3, "Error occured when trying to read dump:\n" + ex.Message, "Dump reading error");
            }
        }
Esempio n. 6
0
        public async Task PrepareMaps(WikiDump dump, string path)
        {
            // Dump name variables:
            // dump.Name => "simplewiki"
            // dump.Date => "20191101"
            this.pathToMaps        = path + "\\" + dump.Name + "\\" + dump.Date + "\\";
            this.pageTitlesMap     = dump.Name + "-" + dump.Date + "-" + "page.map";
            this.categoryTitlesMap = dump.Name + "-" + dump.Date + "-" + "category.map";
            this.pageLinksMap      = dump.Name + "-" + dump.Date + "-" + "pagelinks.map";
            this.catFromCatMap     = dump.Name + "-" + dump.Date + "-" + "categorylinksfromcategory.map";
            this.catFromPageMap    = dump.Name + "-" + dump.Date + "-" + "categorylinksfrompage.map";

            string extension = ".wg";

            this.outOffsetFileName = pathToMaps + dump.Name + extension + "m";
            this.outTitleFileName  = pathToMaps + dump.Name + extension + "t";
            this.outGraphFileName  = pathToMaps + dump.Name + extension + "g";
            this.outInfoFileName   = pathToMaps + dump.Name + extension + "i";
            this.outSortedFileName = pathToMaps + dump.Name + extension + "s";

            // Progress update:
            window.UpdateProgress(4, 1, "Generating reverse maps: Page Links");

            // ZLICZANIE WSZYSTKICH STRON
            using (FileStream fs = File.Open(pathToMaps + pageTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            this.amountOfPages += 1;
                        }
                    }
            // TWORZENIE ODWROTNYCH ODWZOROWAŃ
            await Task.Run(() => createReverseMap(pageLinksMap));

            window.UpdateProgress(4, 1, "Generating reverse maps: Category From Page");
            await Task.Run(() => createReverseMap(catFromPageMap));

            //await createReverseMap(catFromPageMap);
            window.UpdateProgress(4, 1, "Generating reverse maps: Category From Category");
            await Task.Run(() => createReverseMap(catFromCatMap));

            //await createReverseMap(catFromCatMap);
            window.UpdateProgress(4, 1, "Sorting Titles and mapping Wiki IDs");
            await Task.Delay(5000);

            Dictionary <int, string> sortedTitles = new Dictionary <int, string>();

            // Przechodzi po tytułach wszytstkich artykułów, tworzy odwzorowanie ID artykułu na jego miejsce w kolejności
            using (FileStream fs = File.Open(pathToMaps + this.pageTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            GraphObject g = new GraphObject();
                            g.id    = System.Convert.ToInt32(line.Split('\t')[0]);
                            g.title = line.Split('\t')[1];
                            g.order = this.currentAmount;

                            pageDict[g.id]        = g.order;
                            sortedTitles[g.order] = g.title;

                            this.currentAmount += 1;
                        }
                    }

            // odwzorowanie kategorii
            using (FileStream fs = File.Open(pathToMaps + this.categoryTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            GraphObject g = new GraphObject();
                            g.id    = System.Convert.ToInt32(line.Split('\t')[0]);
                            g.title = line.Split('\t')[1];
                            g.order = this.currentAmount;

                            sortedTitles[g.order] = g.title;
                            catDict[g.id]         = g.order;

                            this.currentAmount += 1;
                        }
                    }

            // Sortowanie i zapis tytułów
            // PLIK .wgs
            BinaryWriter bwSortedTitles = createNewBinaryFile(this.outSortedFileName);
            List <KeyValuePair <int, string> > sortedTitlesList = sortedTitles.ToList();

            // Zwalnianie pamieci
            sortedTitles.Clear();

            sortedTitlesList.Sort(
                delegate(KeyValuePair <int, string> pair1,
                         KeyValuePair <int, string> pair2) {
                return(pair1.Value.CompareTo(pair2.Value));
            }
                );
            foreach (var v in sortedTitlesList)
            {
                bwSortedTitles.Write(Encoding.UTF8.GetBytes(v.Value));
                bwSortedTitles.Write(Encoding.UTF8.GetBytes(";"));
                bwSortedTitles.Write(Encoding.UTF8.GetBytes(v.Key.ToString()));
                bwSortedTitles.Write(Encoding.UTF8.GetBytes("\n"));
            }
            bwSortedTitles.Close();
        }