示例#1
0
        private async Task saveGraphObjectInitialParameters(GraphObject g, BinaryWriter bw)
        {
            g.offsetGraph = this.currentGraphOffset;
            g.offsetTitle = this.currentTitleOffset;

            bw.Write(BitConverter.GetBytes(g.offsetGraph));
            bw.Write(BitConverter.GetBytes(g.offsetTitle));
            bw.Write(BitConverter.GetBytes(g.id));
        }
示例#2
0
        public async Task PrepareMaps(WikiDump dump, string path)
        {
            // Dump name variables:
            // dump.Name => "simplewiki"
            // dump.Date => "20191101"
            this.pathToMaps        = path + "\\" + dump.Name + "\\" + dump.Date + "\\";
            this.pageTitlesMap     = dump.Name + "-" + dump.Date + "-" + "page.map";
            this.categoryTitlesMap = dump.Name + "-" + dump.Date + "-" + "category.map";
            this.pageLinksMap      = dump.Name + "-" + dump.Date + "-" + "pagelinks.map";
            this.catFromCatMap     = dump.Name + "-" + dump.Date + "-" + "categorylinksfromcategory.map";
            this.catFromPageMap    = dump.Name + "-" + dump.Date + "-" + "categorylinksfrompage.map";

            string extension = ".wg";

            this.outOffsetFileName = pathToMaps + dump.Name + extension + "m";
            this.outTitleFileName  = pathToMaps + dump.Name + extension + "t";
            this.outGraphFileName  = pathToMaps + dump.Name + extension + "g";
            this.outInfoFileName   = pathToMaps + dump.Name + extension + "i";
            this.outSortedFileName = pathToMaps + dump.Name + extension + "s";

            // Progress update:
            window.UpdateProgress(4, 1, "Generating reverse maps: Page Links");

            // ZLICZANIE WSZYSTKICH STRON
            using (FileStream fs = File.Open(pathToMaps + pageTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            this.amountOfPages += 1;
                        }
                    }
            // TWORZENIE ODWROTNYCH ODWZOROWAŃ
            await Task.Run(() => createReverseMap(pageLinksMap));

            window.UpdateProgress(4, 1, "Generating reverse maps: Category From Page");
            await Task.Run(() => createReverseMap(catFromPageMap));

            //await createReverseMap(catFromPageMap);
            window.UpdateProgress(4, 1, "Generating reverse maps: Category From Category");
            await Task.Run(() => createReverseMap(catFromCatMap));

            //await createReverseMap(catFromCatMap);
            window.UpdateProgress(4, 1, "Sorting Titles and mapping Wiki IDs");
            await Task.Delay(5000);

            Dictionary <int, string> sortedTitles = new Dictionary <int, string>();

            // Przechodzi po tytułach wszytstkich artykułów, tworzy odwzorowanie ID artykułu na jego miejsce w kolejności
            using (FileStream fs = File.Open(pathToMaps + this.pageTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            GraphObject g = new GraphObject();
                            g.id    = System.Convert.ToInt32(line.Split('\t')[0]);
                            g.title = line.Split('\t')[1];
                            g.order = this.currentAmount;

                            pageDict[g.id]        = g.order;
                            sortedTitles[g.order] = g.title;

                            this.currentAmount += 1;
                        }
                    }

            // odwzorowanie kategorii
            using (FileStream fs = File.Open(pathToMaps + this.categoryTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            GraphObject g = new GraphObject();
                            g.id    = System.Convert.ToInt32(line.Split('\t')[0]);
                            g.title = line.Split('\t')[1];
                            g.order = this.currentAmount;

                            sortedTitles[g.order] = g.title;
                            catDict[g.id]         = g.order;

                            this.currentAmount += 1;
                        }
                    }

            // Sortowanie i zapis tytułów
            // PLIK .wgs
            BinaryWriter bwSortedTitles = createNewBinaryFile(this.outSortedFileName);
            List <KeyValuePair <int, string> > sortedTitlesList = sortedTitles.ToList();

            // Zwalnianie pamieci
            sortedTitles.Clear();

            sortedTitlesList.Sort(
                delegate(KeyValuePair <int, string> pair1,
                         KeyValuePair <int, string> pair2) {
                return(pair1.Value.CompareTo(pair2.Value));
            }
                );
            foreach (var v in sortedTitlesList)
            {
                bwSortedTitles.Write(Encoding.UTF8.GetBytes(v.Value));
                bwSortedTitles.Write(Encoding.UTF8.GetBytes(";"));
                bwSortedTitles.Write(Encoding.UTF8.GetBytes(v.Key.ToString()));
                bwSortedTitles.Write(Encoding.UTF8.GetBytes("\n"));
            }
            bwSortedTitles.Close();
        }
示例#3
0
        private async Task saveGraphObjectNeightbours(GraphObject g, BinaryWriter bw, FileStream[] linkStream)
        {
            const int BYTES_MAP_ID       = 3;
            const int UP_NB_HEADER_BYTES = 3;
            // linkStream[0] - linki w dol
            // linkStream[1] - linki w gore
            // linkStream[2] - powiazane artykuly z kategoria / kategorie do ktorych nalezy artykul
            int upNb = 0, oldUpNb = 0;
            int downNb = 0, oldDownNb = 0;

            string[] nbArray    = { };
            string[] upArray    = { };
            string[] thirdArray = { };

            string[] lines = new string[3];
            lines[0] = this.retrieveConnections(linkStream[0], 0, g.id);
            lines[1] = this.retrieveConnections(linkStream[1], 1, g.id);
            lines[2] = this.retrieveConnections(linkStream[2], 2, g.id);

            string id;

            id = lines[0].Split('\t')[0];
            if (g.id == Int32.Parse(id))
            {
                nbArray = (lines[0].Split('\t')[1]).Split(',');
                downNb  = nbArray.Length;
            }
            id = lines[1].Split('\t')[0];
            if (g.id == Int32.Parse(id))
            {
                upArray = (lines[1].Split('\t')[1]).Split(',');
                upNb    = upArray.Length;
            }
            oldUpNb   = upNb;
            oldDownNb = downNb;

            id = lines[2].Split('\t')[0];
            if (g.id == Int32.Parse(id))
            {
                thirdArray = (lines[2].Split('\t')[1]).Split(',');
                if (g.isArticle)
                {
                    upNb += thirdArray.Length;
                    string[] newArray = new string[upNb];
                    Array.Copy(upArray, newArray, upArray.Length);
                    Array.Copy(thirdArray, 0, newArray, upArray.Length, thirdArray.Length);
                    upArray = newArray;
                }
                else
                {
                    downNb += thirdArray.Length;
                    string[] newArray = new string[downNb];
                    Array.Copy(nbArray, newArray, nbArray.Length);
                    Array.Copy(thirdArray, 0, newArray, nbArray.Length, thirdArray.Length);
                    nbArray = newArray;
                }
            }


            string[] nbArrayFixed = new string[downNb];
            string[] upArrayFixed = new string[upNb];

            for (int i = 0; i < downNb; ++i)
            {
                if (g.isArticle == true)
                {
                    nbArrayFixed[i] = this.findOrderOfId(nbArray[i], true);                                      // Artykuly "w dol" lacza sie tylko z artykulami
                }
                else
                {
                    if (i < oldDownNb)
                    {
                        nbArrayFixed[i] = this.findOrderOfId(nbArray[i], false);                                    // Kategorie "w dol" lacza sie z artykulami i kategoriami
                    }
                    else
                    {
                        nbArrayFixed[i] = this.findOrderOfId(nbArray[i], true);
                    }
                }
            }
            for (int i = 0; i < upNb; ++i)
            {
                if (g.isArticle == false)
                {
                    upArrayFixed[i] = this.findOrderOfId(upArray[i], false);                                      // Kategorie "w gore" lacza sie tylko z kategoriami
                }
                else
                {
                    if (i < oldUpNb)
                    {
                        upArrayFixed[i] = this.findOrderOfId(upArray[i], true);                                  // Artykuly "w dol" lacza sie z artykulami i kategoriami
                    }
                    else
                    {
                        upArrayFixed[i] = this.findOrderOfId(upArray[i], false);
                    }
                }
            }
            this.appendPagelinkInfo(bw, upArrayFixed, nbArrayFixed);
            this.currentGraphOffset += (upNb + downNb) * BYTES_MAP_ID + UP_NB_HEADER_BYTES;
        }
示例#4
0
 private async Task saveGraphObjectTitle(GraphObject g, BinaryWriter bw)
 {
     byte[] nodeData = Encoding.UTF8.GetBytes(g.title);
     bw.Write(nodeData);
 }
示例#5
0
        public async Task CreateGraphFiles()
        {
            // Progress update:
            window.UpdateProgress(5, 1, "Parsing page maps");
            // PLIK .wgi
            BinaryWriter bwInfoOffset = createNewBinaryFile(this.outInfoFileName);
            // PLIK .wgm
            BinaryWriter bwStreamOffset = createNewBinaryFile(this.outOffsetFileName);
            // PLIK .wgt
            BinaryWriter bwStreamTitles = createNewBinaryFile(this.outTitleFileName);
            // PLIK .wgg
            BinaryWriter bwStreamGraph = createNewBinaryFile(this.outGraphFileName);

            FileStream[] linkStream_Page = new FileStream[3];
            linkStream_Page[0] = new FileStream(pathToMaps + this.pageLinksMap, FileMode.Open);              // Strona -> Linki w dol (strony)
            linkStream_Page[1] = new FileStream(pathToMaps + "R_" + this.pageLinksMap, FileMode.Open);       // Strona -> Linki w gore (strony)
            linkStream_Page[2] = new FileStream(pathToMaps + this.catFromPageMap, FileMode.Open);            // Strona -> Powiazane kategorie


            FileStream[] linkStream_Cat = new FileStream[3];
            linkStream_Cat[0] = new FileStream(pathToMaps + "R_" + this.catFromCatMap, FileMode.Open);      // Kategoria -> Linki w dol (kategorie)
            linkStream_Cat[1] = new FileStream(pathToMaps + this.catFromCatMap, FileMode.Open);             // Kategoria -> Linki w gore (kategorie)
            linkStream_Cat[2] = new FileStream(pathToMaps + "R_" + this.catFromPageMap, FileMode.Open);     // Kategoria -> Artykuly w kategorii

            this.currentTitleOffset = 0;
            this.currentGraphOffset = 0;
            // Na podstawie wygenerowanych map odwzorowan id na numer porzadkowy tworzymy MAP i GRAPH
            using (FileStream fs = File.Open(pathToMaps + this.pageTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            GraphObject g = new GraphObject();
                            g.id        = System.Convert.ToInt32(line.Split('\t')[0]);
                            g.isArticle = true;
                            g.title     = line.Split('\t')[1];

                            await saveGraphObjectInitialParameters(g, bwStreamOffset);
                            await saveGraphObjectNeightbours(g, bwStreamGraph, linkStream_Page);
                            await saveGraphObjectTitle(g, bwStreamTitles);

                            this.currentTitleOffset += Encoding.UTF8.GetBytes(g.title).Length;
                            this.amountOfObjects    += 1;
                        }
                    }
            window.UpdateProgress(5, 1, "Parsing category maps");
            // teraz kategorie
            pageLinksOffset = new int[3] {
                0, 0, 0
            };
            using (FileStream fs = File.Open(pathToMaps + this.categoryTitlesMap, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                using (BufferedStream bs = new BufferedStream(fs))
                    using (StreamReader sr = new StreamReader(bs)) {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            GraphObject g = new GraphObject();
                            g.id        = System.Convert.ToInt32(line.Split('\t')[0]);
                            g.isArticle = false;
                            g.title     = line.Split('\t')[1];

                            await saveGraphObjectInitialParameters(g, bwStreamOffset);
                            await saveGraphObjectNeightbours(g, bwStreamGraph, linkStream_Cat);
                            await saveGraphObjectTitle(g, bwStreamTitles);

                            this.currentTitleOffset += Encoding.UTF8.GetBytes(g.title).Length;
                            this.amountOfObjects    += 1;
                        }
                    }
            bwInfoOffset.Write(BitConverter.GetBytes(this.amountOfPages));
            bwInfoOffset.Close();
            bwStreamGraph.Close();
            bwStreamOffset.Close();
            bwStreamTitles.Close();
            for (int i = 0; i < 3; ++i)
            {
                linkStream_Page[i].Close();
                linkStream_Cat[i].Close();
            }
            window.UpdateProgress(5, 1, "Removing map files");
            await removeMapFiles();
        }