public List<PubMedItem> Parse(string xml)
        {
            List<PubMedItem> pmItemList = new List<PubMedItem>();

            if (string.IsNullOrEmpty(xml))
                return pmItemList;
            if (xml.IndexOf("XML not found for id") > -1)
                return pmItemList;
            xml = xml.Replace("http://www.ncbi.nlm.nih.gov/corehtml/query/DTD/pubmed_140101.dtd", "");
            XmlDocument xdoc = new XmlDocument();
            xdoc.LoadXml(xml);

            XmlNodeList articleNodeList = xdoc.SelectNodes("/PubmedArticleSet/PubmedArticle");
            if (articleNodeList.Count < 1)
                return pmItemList;

            foreach (XmlNode articleNode in articleNodeList)
            {
                PubMedItem it = new PubMedItem();
                it.Pmid = articleNode.SelectSingleNode("MedlineCitation/PMID").InnerText;
                it.Title = articleNode.SelectSingleNode("MedlineCitation/Article/ArticleTitle").InnerText;

                XmlNode node = articleNode.SelectSingleNode("MedlineCitation/Article/Abstract/AbstractText");
                if (node != null)
                {
                    it.Abstract = node.InnerText;
                }
                else
                    it.Abstract = "";
                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/ISOAbbreviation");
                if (node != null)
                {
                    it.Journal = node.InnerText;
                }
                else
                {
                    it.Journal = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/Title").InnerText;
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/ISSN");
                if (node != null)
                {
                    it.JIssn = node.InnerText;
                }
                else
                {
                    it.JIssn = "";
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/Volume");
                if (node != null)
                {
                    it.Volume = node.InnerText;
                }
                else
                    it.Volume = "";

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/Issue");
                if (node != null)
                {
                    it.Issue = node.InnerText;
                }
                else
                {
                    it.Issue = "";
                }
                //if (it.Volssue.Length > 98)
                //    it.Volssue = it.Volssue.Substring(0, 90);

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/PubDate/Year");
                if (node != null)
                    it.Pubdate = node.InnerText;

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/PubDate/Month");
                if (node != null)
                    it.Pubdate = it.Pubdate + "," + node.InnerText;
                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/PubDate/Day");
                if (node != null)
                    it.Pubdate += " " + node.InnerText;

                XmlNodeList authorNodelist = articleNode.SelectNodes("MedlineCitation/Article/AuthorList/Author");
                if (authorNodelist == null)
                {
                    it.Authors = "";
                }
                else if (authorNodelist.Count == 0)
                {
                    it.Authors = "";
                }
                else
                {
                    string authors = "";
                    foreach (XmlNode authorNode in authorNodelist)
                    {
                        node=authorNode.SelectSingleNode("./LastName");
                        string lastname = "",forename="";
                        if (node != null)
                            lastname = node.InnerText;
                        node = authorNode.SelectSingleNode("./ForeName");
                        if (node != null)
                            forename= node.InnerText;

                        authors += forename+" "+lastname+",";
                    }
                    authors = authors.Substring(0,authors.Length-1);
                    it.Authors = authors;
                    /*string author = "";
                    node = nodelist[0].SelectSingleNode("./LastName");
                    if (node != null)
                        author = node.InnerText;
                    node = nodelist[0].SelectSingleNode("./Initials");
                    if (node != null)
                        author = author + ", " + node.InnerText;

                    it.Authors = author;

                    if (nodelist.Count > 1)
                    {
                        author = " and ";
                        node = nodelist[1].SelectSingleNode("./LastName");
                        if (node != null)
                            author += node.InnerText;
                        node = nodelist[1].SelectSingleNode("./Initials");

                        if (node != null)
                            author = author + ", " + node.InnerText;

                        it.Authors += author;
                    }*/
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Pagination/MedlinePgn");
                if (node != null)
                {
                    it.Pagination = node.InnerText;

                }
                else
                    it.Pagination = "";

                node=articleNode.SelectSingleNode("PubmedData/ArticleIdList/ArticleId[@IdType='doi']");
                if (node != null)
                {
                    it.Doi = node.InnerText;
                }

                XmlNodeList pubTypeNodeList = articleNode.SelectNodes("MedlineCitation/Article/PublicationTypeList/PublicationType");
                if(pubTypeNodeList==null)
                {
                    it.PublicationType = "";
                }
                else if (pubTypeNodeList.Count < 1)
                {
                    it.PublicationType = "";
                }
                else 
                {
                    string pubtype = "";
                    foreach (XmlNode pubTypeNode in pubTypeNodeList)
                    {
                        pubtype+=pubTypeNode.InnerText+"|";
                    }
                    pubtype = pubtype.Substring(0, pubtype.Length - 1);
                    it.PublicationType = pubtype;
                }
                pmItemList.Add(it);

            }
            return pmItemList;
        }
Пример #2
0
        public List <PubMedItem> Parse(string xml)
        {
            List <PubMedItem> pmItemList = new List <PubMedItem>();

            if (string.IsNullOrEmpty(xml))
            {
                return(pmItemList);
            }
            if (xml.IndexOf("XML not found for id") > -1)
            {
                return(pmItemList);
            }
            xml = xml.Replace("http://www.ncbi.nlm.nih.gov/corehtml/query/DTD/pubmed_140101.dtd", "");
            XmlDocument xdoc = new XmlDocument();

            xdoc.LoadXml(xml);

            XmlNodeList articleNodeList = xdoc.SelectNodes("/PubmedArticleSet/PubmedArticle");

            if (articleNodeList.Count < 1)
            {
                return(pmItemList);
            }

            foreach (XmlNode articleNode in articleNodeList)
            {
                PubMedItem it = new PubMedItem();
                it.Pmid  = articleNode.SelectSingleNode("MedlineCitation/PMID").InnerText;
                it.Title = articleNode.SelectSingleNode("MedlineCitation/Article/ArticleTitle").InnerText;

                XmlNode node = articleNode.SelectSingleNode("MedlineCitation/Article/Abstract/AbstractText");
                if (node != null)
                {
                    it.Abstract = node.InnerText;
                }
                else
                {
                    it.Abstract = "";
                }
                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/ISOAbbreviation");
                if (node != null)
                {
                    it.Journal = node.InnerText;
                }
                else
                {
                    it.Journal = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/Title").InnerText;
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/ISSN");
                if (node != null)
                {
                    it.JIssn = node.InnerText;
                }
                else
                {
                    it.JIssn = "";
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/Volume");
                if (node != null)
                {
                    it.Volume = node.InnerText;
                }
                else
                {
                    it.Volume = "";
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/Issue");
                if (node != null)
                {
                    it.Issue = node.InnerText;
                }
                else
                {
                    it.Issue = "";
                }
                //if (it.Volssue.Length > 98)
                //    it.Volssue = it.Volssue.Substring(0, 90);

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/PubDate/Year");
                if (node != null)
                {
                    it.Pubdate = node.InnerText;
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/PubDate/Month");
                if (node != null)
                {
                    it.Pubdate = it.Pubdate + "," + node.InnerText;
                }
                node = articleNode.SelectSingleNode("MedlineCitation/Article/Journal/JournalIssue/PubDate/Day");
                if (node != null)
                {
                    it.Pubdate += " " + node.InnerText;
                }

                XmlNodeList authorNodelist = articleNode.SelectNodes("MedlineCitation/Article/AuthorList/Author");
                if (authorNodelist == null)
                {
                    it.Authors = "";
                }
                else if (authorNodelist.Count == 0)
                {
                    it.Authors = "";
                }
                else
                {
                    string authors = "";
                    foreach (XmlNode authorNode in authorNodelist)
                    {
                        node = authorNode.SelectSingleNode("./LastName");
                        string lastname = "", forename = "";
                        if (node != null)
                        {
                            lastname = node.InnerText;
                        }
                        node = authorNode.SelectSingleNode("./ForeName");
                        if (node != null)
                        {
                            forename = node.InnerText;
                        }

                        authors += forename + " " + lastname + ",";
                    }
                    authors    = authors.Substring(0, authors.Length - 1);
                    it.Authors = authors;

                    /*string author = "";
                     * node = nodelist[0].SelectSingleNode("./LastName");
                     * if (node != null)
                     *  author = node.InnerText;
                     * node = nodelist[0].SelectSingleNode("./Initials");
                     * if (node != null)
                     *  author = author + ", " + node.InnerText;
                     *
                     * it.Authors = author;
                     *
                     * if (nodelist.Count > 1)
                     * {
                     *  author = " and ";
                     *  node = nodelist[1].SelectSingleNode("./LastName");
                     *  if (node != null)
                     *      author += node.InnerText;
                     *  node = nodelist[1].SelectSingleNode("./Initials");
                     *
                     *  if (node != null)
                     *      author = author + ", " + node.InnerText;
                     *
                     *  it.Authors += author;
                     * }*/
                }

                node = articleNode.SelectSingleNode("MedlineCitation/Article/Pagination/MedlinePgn");
                if (node != null)
                {
                    it.Pagination = node.InnerText;
                }
                else
                {
                    it.Pagination = "";
                }

                node = articleNode.SelectSingleNode("PubmedData/ArticleIdList/ArticleId[@IdType='doi']");
                if (node != null)
                {
                    it.Doi = node.InnerText;
                }

                XmlNodeList pubTypeNodeList = articleNode.SelectNodes("MedlineCitation/Article/PublicationTypeList/PublicationType");
                if (pubTypeNodeList == null)
                {
                    it.PublicationType = "";
                }
                else if (pubTypeNodeList.Count < 1)
                {
                    it.PublicationType = "";
                }
                else
                {
                    string pubtype = "";
                    foreach (XmlNode pubTypeNode in pubTypeNodeList)
                    {
                        pubtype += pubTypeNode.InnerText + "|";
                    }
                    pubtype            = pubtype.Substring(0, pubtype.Length - 1);
                    it.PublicationType = pubtype;
                }
                pmItemList.Add(it);
            }
            return(pmItemList);
        }