Esempio n. 1
0
        protected override void OnLoad(EventArgs e)
        {
            base.OnLoad (e);

            name = Request ["q"];

            if (name.Length > 500) {
                name = "";
                return;
            }
            name = name.Replace ("<", " ").Replace (">", " ")
                .Replace ("\"", " ").Replace (",", " ")
                    .Replace ("\\$", " ").ToLower ().Trim ();

            bool? isdelete = null;

            if (name.StartsWith ("http://") || name.StartsWith ("https://")) {
                isdelete = false;
            } else if (name.StartsWith ("delete")
                && (name.Contains ("http://") || name.Contains ("https://"))) {
                isdelete = true;
            }
            if (!isdelete.HasValue) {
                SearchResource.searchList.Enqueue (name);
                while (SearchResource.searchList.Count > 15) {
                    String t;
                    SearchResource.searchList.TryDequeue (out t);
                }
            } else {
                name = SearchResource.indexText (name, isdelete.Value);
            }

            pages = new List<Page> ();
            begin = DateTime.Now;

            using (var box = SDB.search_db.Cube()) {
                foreach (KeyWord kw in SearchResource.engine.searchDistinct(box, name)) {
                    Page p = box ["Page", kw.ID].Select ().Select<Page> ();
                    p.keyWord = kw;
                    pages.Add (p);
                    if (pages.Count > 100) {
                        break;
                    }
                }
            }

            if (pages.Count == 0) {
                Page p = new Page ();
                p.title = "NotFound";
                p.content = "input URL to index";
                p.url = "https://github.com/iboxdb/ftserver-cs";
                pages.Add (p);
            }
        }
Esempio n. 2
0
        public static Page Get(String url)
        {
            try {
                if (url == null || url.Length > 100 || url.Length < 8) {
                    return null;
                }
                Page page = new Page ();
                page.url = url;

                CQ doc = CQ.CreateFromUrl (url);
                //Console.WriteLine(doc.Html());
                doc ["script"].Remove ();
                doc ["style"].Remove ();
                doc ["Script"].Remove ();
                doc ["Style"].Remove ();

                page.title = doc ["title"].Text ();
                if (page.title == null) {
                    page.title = doc ["Title"].Text ();
                }
                if (page.title == null) {
                    page.title = url;
                }
                page.title = page.title.Trim ();
                if (page.title.Length < 2) {
                    page.title = url;
                }
                if (page.title.Length > 80) {
                    page.title = page.title.Substring (0, 80);
                }
                page.title = page.title.Replace ("<", " ")
                    .Replace (">", " ").Replace ("$", " ");

                page.description = doc ["meta[name='description']"].Attr ("content");
                if (page.description == null) {
                    page.description = doc ["meta[name='Description']"].Attr ("content");
                }
                if (page.description == null) {
                    page.description = "";
                }
                if (page.description.Length > 200) {
                    page.description = page.description.Substring (0, 200);
                }
                page.description = page.description.Replace ("<", " ")
                    .Replace (">", " ").Replace ("$", " ");

                doc = CQ.Create (doc.Text ().Replace ("&lt;", "<")
                            .Replace ("&gt;", ">"));
                doc ["script"].Remove ();
                doc ["style"].Remove ();
                doc ["Script"].Remove ();
                doc ["Style"].Remove ();

                String content = doc.Text ().Trim();
                if (content.Length < 50) {
                    return null;
                }
                if (content.Length > 5000) {
                    content = content.Substring (0, 5000);
                }

                content = content.Replace ("\r", " ")
                    .Replace ("\n", " ")
                        .Replace(" ", " ")
                        .Replace("   ", " ")
                        .Replace ("   ", " ")
                        .Replace ("  ", " ")
                        .Replace ("  ", " ").Trim ();

                page.content = ((content
                    + " " + page.url
                    + " " + page.description)
                       .Replace ("<", " ")
                       .Replace (">", " ").Replace ("$", " ")
                        .Replace (" ", " "));

                return page;
            } catch (Exception ex) {
                Console.WriteLine (ex.ToString ());
                return null;
            }
        }
Esempio n. 3
0
		public static Page Get (String url)
		{
			try {
				if (url == null || url.Length > MAX_URL_LENGTH || url.Length < 8) {
					return null;
				}
				Page page = new Page ();
				page.url = url;

			  
				CQ doc = CQ.CreateFromUrl (url); 

				//Console.WriteLine(doc.Html());
				doc ["script"].Remove ();
				doc ["Script"].Remove ();

				doc ["style"].Remove ();
				doc ["Style"].Remove ();

				doc ["textarea"].Remove ();
				doc ["Textarea"].Remove ();

				doc ["noscript"].Remove ();
				doc ["Noscript"].Remove ();
						 
				page.title = doc ["title"].Text ();
				if (page.title == null) {
					page.title = doc ["Title"].Text ();
				}
				if (page.title == null) {
					page.title = url;
				}
				page.title = page.title.Trim ();
				if (page.title.Length < 2) {
					page.title = url;
				}
				if (page.title.Length > 80) {
					page.title = page.title.Substring (0, 80);
				}
				page.title = page.title.Replace ("<", " ")
					.Replace (">", " ").Replace ("$", " ");
				doc ["title"].Remove ();
				doc ["Title"].Remove ();
				if (page.title.Contains ("�")) {
					//encode ??
					return null;
				}

				page.description = doc ["meta[name='description']"].Attr ("content");
				if (page.description == null) {
					page.description = doc ["meta[name='Description']"].Attr ("content");
				}
				if (page.description == null) {
					page.description = "";
				}
				if (page.description.Length > 200) {
					page.description = page.description.Substring (0, 200);
				}
				page.description = page.description.Replace ("<", " ")
					.Replace (">", " ").Replace ("$", " ").Replace (((char)8203).ToString (), "");

			
				fixSpan(doc);
				String content = doc.Text ().Replace (" ", " ").Replace (((char)8203).ToString (), "");
				content = Regex.Replace (content, "\t|\r|\n|�|<|>", " ");
				content = Regex.Replace (content, "\\$", " ");
				content = Regex.Replace (content, "\\s+", " ");
				content = content.Trim (); 

				if (content.Length < 50) {
					return null;
				}
				if (content.Length > 5000) {
					content = content.Substring (0, 5000);
				}		
			
				page.content = content + " " + page.url;

				return page;
			} catch (Exception ex) {
				Console.WriteLine (ex.ToString ());
				return null;
			}
		}