Exemplo n.º 1
        static void Run1()
            Console.Write("hello \n");

            string authorstxt      = @"C:\Users\jakub\Documents\Visual Studio 2017\dblp-reducer\data\Authors_short.txt";
            string authorsxml      = @"C:\dblp_result\636733187090414687\Authors.xml";
            string publicationsxml = @"C:\dblp_result\636733187090414687\Publications.xml";

            // TODO: load list of restricted authors
            List <string> AuthorsName = Loader.TextListLoader.LoadAuthors(authorstxt);

            // TODO: load authors.xml
            Loader.XmlLoader XmlParser  = new Loader.XmlLoader();
            Authors          parsedData = XmlParser.LoadAuthors(authorsxml);

            // TODO: Find authors from restricted list in xml file and return list of ids
            List <Author>               AuthorsList      = parsedData.AuthorsList;
            int                         TotalCount       = AuthorsName.Count;
            HashSet <Author>            FoundAuthorsList = new HashSet <Author>();
            Dictionary <string, string> FoundNames       = new Dictionary <string, string>();

            foreach (string queryName in AuthorsName)
                var item = AuthorsList.Where(x => Compare(x.Name, queryName)).FirstOrDefault();

                if (item != null && !FoundNames.ContainsKey(queryName))
                    FoundNames.Add(queryName, item.Name);

            bool Compare(string nameA, string nameB)
                if (nameA == nameB)
                    int LastSpaceIndexA = nameA.LastIndexOf(' ');
                    int LastSpaceIndexB = nameB.LastIndexOf(' ');

                    if (LastSpaceIndexA > 0 && LastSpaceIndexB > 0)
                        string nameALastName = nameA.Substring(LastSpaceIndexA, (nameA.Length - LastSpaceIndexA));
                        string nameBLastName = nameB.Substring(LastSpaceIndexB, (nameB.Length - LastSpaceIndexB));

                        if (nameALastName == nameBLastName)
                            if (nameA.Substring(0, (nameA.IndexOf(' '))) == nameB.Substring(0, (nameB.IndexOf(' '))))

            // TODO: Print stats
            Console.WriteLine("Count of found = " + FoundAuthorsList.Count + " of " + TotalCount);

            foreach (KeyValuePair <string, string> kvp in FoundNames)
                Console.WriteLine(kvp.Key + " - " + kvp.Value);
            foreach (string name in AuthorsName)
                if (!FoundNames.ContainsKey(name))

            // TODO: Load publications.xml
            Publications publications = XmlParser.LoadPublications(publicationsxml);

            // TODO: Walkthrou publications and find coauthors.

            bool first = true;
            Dictionary <int, List <int> > CooperationNetwork = new Dictionary <int, List <int> >();

            foreach (Publication publication in publications.PublicationsList)
                first = true;
                foreach (Author author in publication.AuthorsList)
                    if (FoundAuthorsList.Contains(author))
                        if (first)
                            CooperationNetwork.Add(publication.Id, new List <int>());
                            first = false;

            Dictionary <int, List <int> > AnotherDic      = new Dictionary <int, List <int> >();
            Dictionary <int, int>         NodeToIndexDict = new Dictionary <int, int>();
            Dictionary <int, int>         IndexToNodeDict = new Dictionary <int, int>();
            int index = 0;

            foreach (KeyValuePair <int, List <int> > item in CooperationNetwork)
                if (item.Value.Count >= 2)
                    AnotherDic.Add(item.Key, item.Value);

                    foreach (int author in item.Value)
                        if (!NodeToIndexDict.ContainsKey(author))
                            NodeToIndexDict.Add(author, index);
                            IndexToNodeDict.Add(index, author);

            int[,] WeightedAdjacencyMatrix = new int[NodeToIndexDict.Count, NodeToIndexDict.Count];
            foreach (KeyValuePair <int, List <int> > item in CooperationNetwork)
                for (int i = 0; i < item.Value.Count - 1; i++)
                    WeightedAdjacencyMatrix[NodeToIndexDict[item.Value[i]], NodeToIndexDict[item.Value[i + 1]]]++;

            // TODO: Export network of coauthors to file
            GmlExporter exporter = new GmlExporter();

            exporter.Export("stanford.gml", FoundAuthorsList.ToList(), WeightedAdjacencyMatrix, IndexToNodeDict);
Exemplo n.º 2
        static void Main2(string[] args)
            Console.Write("hello \n");

            string authorstxt = @"C:\Users\jakub\Documents\Visual Studio 2017\dblp-reducer\data\Authors.txt";

            // TODO: load list of restricted authors
            List <string> AuthorsName = Loader.TextListLoader.LoadAuthors(authorstxt);

            string html = string.Empty;
            string url  = @"https://dblp.uni-trier.de/search/author?xauthor=Jure%20Leskovec";

            string search_url = @"http://dblp.uni-trier.de/search/author?xauthor=";

            string coauthors_url(string author_url)
                return(@"http://dblp.uni-trier.de/rec/pers/" + author_url + "/xc");

            List <Author>            FoundAuthors = new List <Author>();
            Dictionary <string, int> DictAuthor   = new Dictionary <string, int>();
            Dictionary <int, string> NameAuthor   = new Dictionary <int, string>();

            int index = 0;

            foreach (string author_name in AuthorsName)
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(search_url + author_name);
                //request.AutomaticDecompression = DecompressionMethods.GZip;
                using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                    using (Stream stream = response.GetResponseStream())
                        using (StreamReader reader = new StreamReader(stream))
                            html = reader.ReadToEnd();

                MemoryStream  memStream  = new MemoryStream(Encoding.UTF8.GetBytes(html));
                XmlSerializer serializer = new XmlSerializer(typeof(Authors));
                Authors       parsedData = (Authors)serializer.Deserialize(memStream);
                if (parsedData.AuthorsList.Count > 0)
                    Author NewAuthor = parsedData.AuthorsList[0];
                    NewAuthor.Id   = index;
                    NewAuthor.Name = NewAuthor.NameW;

                    if (!DictAuthor.ContainsKey(NewAuthor.url))
                        DictAuthor.Add(NewAuthor.url, index);
                        NameAuthor.Add(NewAuthor.Id, NewAuthor.NameW);


             * using (StreamWriter outputFile = new StreamWriter("FoundAuthors.txt"))
             * {
             *  foreach (Author a in FoundAuthors)
             *      outputFile.WriteLine(a.url);
             * }*/

            int[,] WeightedAdjacencyMatrix = new int[FoundAuthors.Count, FoundAuthors.Count];

            foreach (Author author in FoundAuthors)
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(coauthors_url(author.url));
                using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                    using (Stream stream = response.GetResponseStream())
                        using (StreamReader reader = new StreamReader(stream))
                            html = reader.ReadToEnd();

                MemoryStream  memStream  = new MemoryStream(Encoding.UTF8.GetBytes(html));
                XmlSerializer serializer = new XmlSerializer(typeof(Coauthors));
                Coauthors     parsedData = (Coauthors)serializer.Deserialize(memStream);
                if (parsedData.AuthorsList.Count > 0)
                    foreach (Author coauthor in parsedData.AuthorsList)
                        if (DictAuthor.ContainsKey(coauthor.url))
                            Console.WriteLine(author.Name + " - " + NameAuthor[DictAuthor[coauthor.url]] + " " + coauthor.count);
                            WeightedAdjacencyMatrix[author.Id, DictAuthor[coauthor.url]] = coauthor.count;

            GmlExporter exporter = new GmlExporter();

            exporter.Export("stanford.gml", FoundAuthors, WeightedAdjacencyMatrix);