Example #1
0
        //add <t> and <u> tags for title and url text
        public void AddTitleUrlTags()
        {
            string path = Helper.DOCS_PATH;

            string[]     files = Directory.GetFiles(path);
            d.Doc        p;
            int          pageId;
            FileInfo     fi;
            StreamWriter sw;

            for (int i = 0; i < files.Length; i++)
            {
                fi     = new FileInfo(files[i]);
                pageId = Convert.ToInt32(fi.Name.Substring(0, fi.Name.IndexOf(".")));
                p      = new d.Doc(pageId);
                sw     = new StreamWriter(files[i], true);
                sw.WriteLine("<u>" + p.Url + "</u>");
                sw.WriteLine("<t>" + p.Title + "</t>");
                sw.Close();
                Console.WriteLine(i);
            }
        }
Example #2
0
        public void Run()
        {
            UrlHelper       urlHelper = new UrlHelper();
            Regex           regex     = makeRegex();
            MatchCollection mc;

            Regex rSpace = new Regex(@"\s");

            d.DocData  pd = new d.DocData();
            d.LinkData ld = new d.LinkData();

            Uri      baseUri;
            Uri      childUri;
            FileInfo fi;

            d.Doc        p;
            int          pageId;
            StreamReader sr;
            string       html;
            string       linkToProcess;
            int          linkId;
            string       linkText;

            string path = Helper.DOCS_PATH;

            string[] files = Directory.GetFiles(path);

            for (int i = 545; i < files.Length; i++)            //545 already done
            {
                Console.WriteLine("processing file #" + i);

                fi      = new FileInfo(files[i]);
                pageId  = Convert.ToInt32(fi.Name.Substring(0, fi.Name.IndexOf(".")));
                p       = new d.Doc(pageId);
                baseUri = new Uri(p.Url);

                sr   = new StreamReader(fi.OpenRead());
                html = sr.ReadToEnd();
                mc   = regex.Matches(html);

                Console.WriteLine("found " + mc.Count + " links");

                foreach (Match m in mc)
                {
                    try
                    {
                        childUri      = new Uri(baseUri, urlHelper.MakeLink(m.Groups[3].ToString()));
                        linkToProcess = urlHelper.NormalizeUrl(childUri.AbsoluteUri);
                        linkText      = m.Groups[4].ToString();

                        linkId = pd.GetIdByUrl(linkToProcess);
                        if (linkId > 0 && linkText != "")                         //found page!
                        {
                            linkText = rSpace.Replace(linkText, " ");
                            linkText = linkText.Trim();
                            linkText = linkText.Replace("          ", " ");
                            linkText = linkText.Replace("         ", " ");
                            linkText = linkText.Replace("        ", " ");
                            linkText = linkText.Replace("       ", " ");
                            linkText = linkText.Replace("      ", " ");
                            linkText = linkText.Replace("     ", " ");
                            linkText = linkText.Replace("    ", " ");
                            linkText = linkText.Replace("   ", " ");
                            linkText = linkText.Replace("  ", " ");
                            ld.UpdateText(pageId, linkId, linkText);
                        }
                    }
                    catch (Exception) {}
                }

                //if (i % 100 == 0)
                //	Console.WriteLine("processing file #" + i);
            }
        }
Example #3
0
 private void BindTitle()
 {
     d.Doc doc = new d.Doc(getId());
     lblUrl.Text = "<a href='" + doc.Url + "' target='_blank'>" + doc.Url + "</a> (" + doc.Title + ")";
 }