protected override void OnLoad(EventArgs e) { base.OnLoad (e); name = Request ["q"]; if (name.Length > 500) { name = ""; return; } name = name.Replace ("<", " ").Replace (">", " ") .Replace ("\"", " ").Replace (",", " ") .Replace ("\\$", " ").ToLower ().Trim (); bool? isdelete = null; if (name.StartsWith ("http://") || name.StartsWith ("https://")) { isdelete = false; } else if (name.StartsWith ("delete") && (name.Contains ("http://") || name.Contains ("https://"))) { isdelete = true; } if (!isdelete.HasValue) { SearchResource.searchList.Enqueue (name); while (SearchResource.searchList.Count > 15) { String t; SearchResource.searchList.TryDequeue (out t); } } else { name = SearchResource.indexText (name, isdelete.Value); } pages = new List<Page> (); begin = DateTime.Now; using (var box = SDB.search_db.Cube()) { foreach (KeyWord kw in SearchResource.engine.searchDistinct(box, name)) { Page p = box ["Page", kw.ID].Select ().Select<Page> (); p.keyWord = kw; pages.Add (p); if (pages.Count > 100) { break; } } } if (pages.Count == 0) { Page p = new Page (); p.title = "NotFound"; p.content = "input URL to index"; p.url = "https://github.com/iboxdb/ftserver-cs"; pages.Add (p); } }
public static Page Get(String url) { try { if (url == null || url.Length > 100 || url.Length < 8) { return null; } Page page = new Page (); page.url = url; CQ doc = CQ.CreateFromUrl (url); //Console.WriteLine(doc.Html()); doc ["script"].Remove (); doc ["style"].Remove (); doc ["Script"].Remove (); doc ["Style"].Remove (); page.title = doc ["title"].Text (); if (page.title == null) { page.title = doc ["Title"].Text (); } if (page.title == null) { page.title = url; } page.title = page.title.Trim (); if (page.title.Length < 2) { page.title = url; } if (page.title.Length > 80) { page.title = page.title.Substring (0, 80); } page.title = page.title.Replace ("<", " ") .Replace (">", " ").Replace ("$", " "); page.description = doc ["meta[name='description']"].Attr ("content"); if (page.description == null) { page.description = doc ["meta[name='Description']"].Attr ("content"); } if (page.description == null) { page.description = ""; } if (page.description.Length > 200) { page.description = page.description.Substring (0, 200); } page.description = page.description.Replace ("<", " ") .Replace (">", " ").Replace ("$", " "); doc = CQ.Create (doc.Text ().Replace ("<", "<") .Replace (">", ">")); doc ["script"].Remove (); doc ["style"].Remove (); doc ["Script"].Remove (); doc ["Style"].Remove (); String content = doc.Text ().Trim(); if (content.Length < 50) { return null; } if (content.Length > 5000) { content = content.Substring (0, 5000); } content = content.Replace ("\r", " ") .Replace ("\n", " ") .Replace(" ", " ") .Replace(" ", " ") .Replace (" ", " ") .Replace (" ", " ") .Replace (" ", " ").Trim (); page.content = ((content + " " + page.url + " " + page.description) .Replace ("<", " ") .Replace (">", " ").Replace ("$", " ") .Replace (" ", " ")); return page; } catch (Exception ex) { Console.WriteLine (ex.ToString ()); return null; } }
public static Page Get (String url) { try { if (url == null || url.Length > MAX_URL_LENGTH || url.Length < 8) { return null; } Page page = new Page (); page.url = url; CQ doc = CQ.CreateFromUrl (url); //Console.WriteLine(doc.Html()); doc ["script"].Remove (); doc ["Script"].Remove (); doc ["style"].Remove (); doc ["Style"].Remove (); doc ["textarea"].Remove (); doc ["Textarea"].Remove (); doc ["noscript"].Remove (); doc ["Noscript"].Remove (); page.title = doc ["title"].Text (); if (page.title == null) { page.title = doc ["Title"].Text (); } if (page.title == null) { page.title = url; } page.title = page.title.Trim (); if (page.title.Length < 2) { page.title = url; } if (page.title.Length > 80) { page.title = page.title.Substring (0, 80); } page.title = page.title.Replace ("<", " ") .Replace (">", " ").Replace ("$", " "); doc ["title"].Remove (); doc ["Title"].Remove (); if (page.title.Contains ("�")) { //encode ?? return null; } page.description = doc ["meta[name='description']"].Attr ("content"); if (page.description == null) { page.description = doc ["meta[name='Description']"].Attr ("content"); } if (page.description == null) { page.description = ""; } if (page.description.Length > 200) { page.description = page.description.Substring (0, 200); } page.description = page.description.Replace ("<", " ") .Replace (">", " ").Replace ("$", " ").Replace (((char)8203).ToString (), ""); fixSpan(doc); String content = doc.Text ().Replace (" ", " ").Replace (((char)8203).ToString (), ""); content = Regex.Replace (content, "\t|\r|\n|�|<|>", " "); content = Regex.Replace (content, "\\$", " "); content = Regex.Replace (content, "\\s+", " "); content = content.Trim (); if (content.Length < 50) { return null; } if (content.Length > 5000) { content = content.Substring (0, 5000); } page.content = content + " " + page.url; return page; } catch (Exception ex) { Console.WriteLine (ex.ToString ()); return null; } }