Esempio n. 1
0
        private static DblpContext LoadDblpXml(string dblpXml)
        {
            var context = new DblpContext();
            var objs = new Dictionary<Int32, RawObject>();
            var links = new Dictionary<Int32, RawLink>();
            var objAtts = new HashSet<String>();
            var lnkAtts = new HashSet<String>();
            var progress = 0;
            //Open file
            using (var f = System.IO.File.OpenText(dblpXml))
            {
                //Read Raw Objects
                //Read Raw Links
                //Read Raw Attributes and apply them to raw links and objects

                var elements = GetXmlElements(f);
                foreach (var e in elements)
                {
                    progress++;
                    var obj = ParseObject(e);
                    var lnk = ParseLink(e);
                    var att = ParseAttribute(e);
                    if (obj != null)
                        objs.Add(obj.Id, obj);
                    if (lnk != null)
                        links.Add(lnk.Id, lnk);
                    if (att != null)
                    {
                        switch (att.Key)

                        {
                            case "in-year":
                            case "in-number":
                            case "in-volume":
                            case "month":
                            case "pages":
                            case "in-proceedings":
                            case "link-type":
                                var thelnk = default(RawLink);
                                links.TryGetValue(att.ObjectId, out thelnk);
                                thelnk.Attributes.Add(att.Key, att.Value);
                                lnkAtts.Add(att.Key);
                                break;
                            default:
                                var theObj = default(RawObject);
                                objs.TryGetValue(att.ObjectId, out theObj);
                                theObj.Attributes.Add(att.Key, att.Value);
                                objAtts.Add(att.Key);
                                break;
                        }
                        if (progress % 1000 == 0)
                            Console.Write("/");
                    }
                    if (progress % 10000 == 0)
                        Console.Write(".");
                }

                f.Close();
            }

            Console.WriteLine();
            Console.WriteLine("Writing the Link File");
            progress = 0;
            //Write the result in two files: Links, and Objects
            var lnkAttsList = lnkAtts.ToList();

            using(var lnkFile = System.IO.File.CreateText(dblpXml+".lnk.csv"))
            {
                progress ++;
                if (progress % 10000 == 0)
                    Console.Write(".");

                lnkFile.Write("LinkId, From, To");
                lnkAttsList.ForEach(l => lnkFile.Write( "," + l ));
                lnkFile.WriteLine();

                foreach (var lnk in links.Values)
                {
                    lnkFile.Write("{0}, {1}, {2}", lnk.Id, lnk.From, lnk.To);
                    lnkAttsList.ForEach(l => lnkFile.Write( lnk.Attributes.ContainsKey(l) ? "," + lnk.Attributes[l] : "," ));
                    lnkFile.WriteLine();
                }
                lnkFile.Close();
            }

            Console.WriteLine();
            Console.WriteLine("Writing Object Files");

            var objAttsList = objAtts.ToList();
            progress = 0;
            using (var objFile = System.IO.File.CreateText(dblpXml + ".obj.csv"))
            {
                progress++;
                if (progress % 10000 == 0)
                    Console.Write("~");

                objFile.Write("Id");
                objAttsList.ForEach(l => objFile.Write("," + l.Replace(",", "~")));
                objFile.WriteLine();

                foreach (var obj in objs.Values)
                {
                    objFile.Write("{0}", obj.Id);
                    objAttsList.ForEach(l => objFile.Write(obj.Attributes.ContainsKey(l) ? "," + obj.Attributes[l].Replace(",", "~") : ","));
                    objFile.WriteLine();
                }
                objFile.Close();
            }

            return context;
        }
Esempio n. 2
0
        private static DblpContext LoadDblpXml(string dblpXml)
        {
            var context  = new DblpContext();
            var objs     = new Dictionary <Int32, RawObject>();
            var links    = new Dictionary <Int32, RawLink>();
            var objAtts  = new HashSet <String>();
            var lnkAtts  = new HashSet <String>();
            var progress = 0;

            //Open file
            using (var f = System.IO.File.OpenText(dblpXml))
            {
                //Read Raw Objects
                //Read Raw Links
                //Read Raw Attributes and apply them to raw links and objects

                var elements = GetXmlElements(f);
                foreach (var e in elements)
                {
                    progress++;
                    var obj = ParseObject(e);
                    var lnk = ParseLink(e);
                    var att = ParseAttribute(e);
                    if (obj != null)
                    {
                        objs.Add(obj.Id, obj);
                    }
                    if (lnk != null)
                    {
                        links.Add(lnk.Id, lnk);
                    }
                    if (att != null)
                    {
                        switch (att.Key)



                        {
                        case "in-year":
                        case "in-number":
                        case "in-volume":
                        case "month":
                        case "pages":
                        case "in-proceedings":
                        case "link-type":
                            var thelnk = default(RawLink);
                            links.TryGetValue(att.ObjectId, out thelnk);
                            thelnk.Attributes.Add(att.Key, att.Value);
                            lnkAtts.Add(att.Key);
                            break;

                        default:
                            var theObj = default(RawObject);
                            objs.TryGetValue(att.ObjectId, out theObj);
                            theObj.Attributes.Add(att.Key, att.Value);
                            objAtts.Add(att.Key);
                            break;
                        }
                        if (progress % 1000 == 0)
                        {
                            Console.Write("/");
                        }
                    }
                    if (progress % 10000 == 0)
                    {
                        Console.Write(".");
                    }
                }

                f.Close();
            }

            Console.WriteLine();
            Console.WriteLine("Writing the Link File");
            progress = 0;
            //Write the result in two files: Links, and Objects
            var lnkAttsList = lnkAtts.ToList();

            using (var lnkFile = System.IO.File.CreateText(dblpXml + ".lnk.csv"))
            {
                progress++;
                if (progress % 10000 == 0)
                {
                    Console.Write(".");
                }

                lnkFile.Write("LinkId, From, To");
                lnkAttsList.ForEach(l => lnkFile.Write("," + l));
                lnkFile.WriteLine();

                foreach (var lnk in links.Values)
                {
                    lnkFile.Write("{0}, {1}, {2}", lnk.Id, lnk.From, lnk.To);
                    lnkAttsList.ForEach(l => lnkFile.Write(lnk.Attributes.ContainsKey(l) ? "," + lnk.Attributes[l] : ","));
                    lnkFile.WriteLine();
                }
                lnkFile.Close();
            }

            Console.WriteLine();
            Console.WriteLine("Writing Object Files");

            var objAttsList = objAtts.ToList();

            progress = 0;
            using (var objFile = System.IO.File.CreateText(dblpXml + ".obj.csv"))
            {
                progress++;
                if (progress % 10000 == 0)
                {
                    Console.Write("~");
                }

                objFile.Write("Id");
                objAttsList.ForEach(l => objFile.Write("," + l.Replace(",", "~")));
                objFile.WriteLine();

                foreach (var obj in objs.Values)
                {
                    objFile.Write("{0}", obj.Id);
                    objAttsList.ForEach(l => objFile.Write(obj.Attributes.ContainsKey(l) ? "," + obj.Attributes[l].Replace(",", "~") : ","));
                    objFile.WriteLine();
                }
                objFile.Close();
            }

            return(context);
        }