Пример #1
0
        private void CreateTerms()
        {
            if (!Directory.Exists(Environment.CurrentDirectory + @"\xml\"))
            {
                Directory.CreateDirectory(Environment.CurrentDirectory + @"\xml\");
            }
            string outfile = Environment.CurrentDirectory + $@"\xml\{txtAsin.Text}.entities.xml";

            Terms.Clear();
            var termId = 1;

            foreach (DataGridViewRow row in dgvTerms.Rows)
            {
                XRay.Term newTerm = new XRay.Term
                {
                    Id       = termId++,
                    Type     = CompareImages((Bitmap)row.Cells[0].Value, Resources.character) ? "character" : "topic",
                    TermName = row.Cells[1].Value.ToString(),
                    Aliases  = row.Cells[2].Value.ToString() != ""
                        ? row.Cells[2].Value.ToString().Split(',').Distinct().ToList()
                        : new List <string>(),
                    Desc         = row.Cells[3].Value.ToString(),
                    DescUrl      = row.Cells[4].Value.ToString(),
                    DescSrc      = row.Cells[5].Value.ToString(),
                    Match        = (bool)row.Cells[6].Value,
                    MatchCase    = (bool)row.Cells[7].Value,
                    RegexAliases = (bool)row.Cells[9].Value
                };
                Terms.Add(newTerm);
            }
            Functions.Save(Terms, outfile);
        }
Пример #2
0
        /// <summary>
        /// Extract terms from the given db.
        /// </summary>
        /// <param name="xrayDb">Connection to any db containing the proper dataset.</param>
        /// <param name="singleUse">If set, will close the connection when complete.</param>
        public static IEnumerable <XRay.Term> ExtractTermsNew(DbConnection xrayDb, bool singleUse)
        {
            if (xrayDb.State != ConnectionState.Open)
            {
                xrayDb.Open();
            }

            var command = xrayDb.CreateCommand();

            command.CommandText = "SELECT entity.id,entity.label,entity.type,entity.count,entity_description.text,string.text as sourcetxt FROM entity"
                                  + " LEFT JOIN entity_description ON entity.id = entity_description.entity"
                                  + " LEFT JOIN source ON entity_description.source = source.id"
                                  + " LEFT JOIN string ON source.label = string.id AND string.language = 'en'"
                                  + " WHERE entity.has_info_card = '1'";
            var reader = command.ExecuteReader();

            while (reader.Read())
            {
                var newTerm = new XRay.Term
                {
                    Id       = Convert.ToInt32(reader["id"]),
                    TermName = (string)reader["label"],
                    Type     = Convert.ToInt32(reader["type"]) == 1 ? "character" : "topic",
                    Desc     = (string)reader["text"],
                    DescSrc  = reader["sourcetxt"] == DBNull.Value ? "" : (string)reader["sourcetxt"]
                };

                // Real locations aren't needed for extracting terms for preview or XML saving, but need count
                var i = Convert.ToInt32(reader["count"]);
                for (; i > 0; i--)
                {
                    newTerm.Locs.Add(null);
                }

                // TODO: Should probably also confirm whether this URL exists or not
                if (newTerm.DescSrc == "Wikipedia")
                {
                    newTerm.DescUrl = string.Format(@"http://en.wikipedia.org/wiki/{0}", newTerm.TermName.Replace(" ", "_"));
                }
                yield return(newTerm);
            }

            command.Dispose();
            if (singleUse)
            {
                xrayDb.Close();
            }
        }
Пример #3
0
        private List <T> LoadTermsFromTxt <T>(string txtfile)
        {
            List <T> itemList = new List <T>();

            using (StreamReader streamReader = new StreamReader(txtfile, Encoding.UTF8))
            {
                int termId    = 1;
                int lineCount = 1;
                Terms.Clear();
                while (!streamReader.EndOfStream)
                {
                    try
                    {
                        string temp = streamReader.ReadLine()?.ToLower();
                        if (string.IsNullOrEmpty(temp))
                        {
                            continue;
                        }
                        lineCount++;
                        if (temp != "character" && temp != "topic")
                        {
                            MessageBox.Show("Error: Invalid term type \"" + temp + "\" on line " + lineCount);
                            return(null);
                        }
                        XRay.Term newTerm = new XRay.Term
                        {
                            Type     = temp,
                            TermName = streamReader.ReadLine(),
                            Desc     = streamReader.ReadLine()
                        };
                        lineCount        += 2;
                        newTerm.MatchCase = temp == "character";
                        newTerm.DescSrc   = "shelfari";
                        newTerm.Id        = termId++;
                        Terms.Add(newTerm);
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("An error occurred reading from txt file: " + ex.Message + "\r\n" + ex.StackTrace);
                        return(null);
                    }
                }
            }
            return(itemList);
        }
Пример #4
0
        // Are there actually any goodreads pages that aren't at goodreads.com for other languages??
        private async Task <XRay.Term> GetTerm(string baseUrl, string relativeUrl)
        {
            XRay.Term result  = new XRay.Term("character");
            Uri       tempUri = new Uri(baseUrl);

            tempUri        = new Uri(new Uri(tempUri.GetLeftPart(UriPartial.Authority)), relativeUrl);
            result.DescSrc = "Goodreads";
            result.DescUrl = tempUri.ToString();
            HtmlDocument charDoc = new HtmlDocument();

            charDoc.LoadHtml(await HttpDownloader.GetPageHtmlAsync(tempUri.ToString()));
            HtmlNode mainNode = charDoc.DocumentNode.SelectSingleNode("//div[@class='mainContentFloat']")
                                ?? charDoc.DocumentNode.SelectSingleNode("//div[@class='mainContentFloat ']");

            result.TermName = mainNode.SelectSingleNode("./h1").InnerText;
            mainNode        = mainNode.SelectSingleNode("//div[@class='grey500BoxContent']");
            HtmlNodeCollection tempNodes = mainNode.SelectNodes("//div[@class='floatingBox']");

            if (tempNodes == null)
            {
                return(result);
            }
            foreach (HtmlNode tempNode in tempNodes)
            {
                if (tempNode.Id.Contains("_aliases")) // If present, add any aliases found
                {
                    string aliasStr = tempNode.InnerText.Replace("[close]", "").Trim();
                    result.Aliases.AddRange(aliasStr.Split(new [] { ", " }, StringSplitOptions.RemoveEmptyEntries));
                }
                else
                {
                    result.Desc = tempNode.InnerText.Replace("[close]", "").Trim();
                }
            }
            return(result);
        }
Пример #5
0
        public override async Task <List <XRay.Term> > GetTerms(string dataUrl, IProgressBar progress, CancellationToken token)
        {
            Logger.Log("Downloading Shelfari page...");
            List <XRay.Term> terms = new List <XRay.Term>();

            if (sourceHtmlDoc == null)
            {
                sourceHtmlDoc = new HtmlDocument();
                sourceHtmlDoc.LoadHtml(await HttpDownloader.GetPageHtmlAsync(dataUrl));
            }

            //Constants for wiki processing
            Dictionary <string, string> sections = new Dictionary <string, string>
            {
                { "WikiModule_Characters", "character" },
                { "WikiModule_Organizations", "topic" },
                { "WikiModule_Settings", "topic" },
                { "WikiModule_Glossary", "topic" }
            };

            foreach (string header in sections.Keys)
            {
                HtmlNodeCollection characterNodes =
                    sourceHtmlDoc.DocumentNode.SelectNodes("//div[@id='" + header + "']//ul[@class='li_6']/li");
                if (characterNodes == null)
                {
                    continue;                         //Skip section if not found on page
                }
                foreach (HtmlNode li in characterNodes)
                {
                    string    tmpString = li.InnerText;
                    XRay.Term newTerm   = new XRay.Term(sections[header]); //Create term as either character/topic
                    if (tmpString.Contains(":"))
                    {
                        newTerm.TermName = tmpString.Substring(0, tmpString.IndexOf(":"));
                        newTerm.Desc     = tmpString.Substring(tmpString.IndexOf(":") + 1).Replace("&amp;", "&").Trim();
                    }
                    else
                    {
                        newTerm.TermName = tmpString;
                    }
                    newTerm.DescSrc = "shelfari";
                    //Use either the associated shelfari URL of the term or if none exists, use the book's url
                    newTerm.DescUrl = (li.InnerHtml.IndexOf("<a href") == 0
                        ? li.InnerHtml.Substring(9, li.InnerHtml.IndexOf("\"", 9) - 9)
                        : dataUrl);
                    if (header == "WikiModule_Glossary")
                    {
                        newTerm.MatchCase = false;
                    }
                    //Default glossary terms to be case insensitive when searching through book
                    if (terms.Select(t => t.TermName).Contains(newTerm.TermName))
                    {
                        Logger.Log("Duplicate term \"" + newTerm.TermName + "\" found. Ignoring this duplicate.");
                    }
                    else
                    {
                        terms.Add(newTerm);
                    }
                }
            }
            return(terms);
        }