public SearchEngineResult Parse(string html, Encoding encoding) { string content = ScriptManager.LoadScript(ScriptPath); SearchEngineResult result; if (content != "") { try { object o = WebBrowser.InvokeScript("eval", new String[] { content }); result = SearchEngineResult.GetSearchEngineResultFromXMLObject(new XMLObject(o.ToString())); return(result); } catch (Exception) { // currently do nothing result = new SearchEngineResult(); } } else { result = new SearchEngineResult(); } return(result); }
public async Task GetSearchEngineResultsPagePositions_ValidArguments_Success() { // Arrange const string DOMAIN_TO_MATCH = "domaintomatch.com"; const int RESULT_POSITION_1 = 2; var _SearchResults = new SearchEngineResult[] { new SearchEngineResult { Position = 1, ResultLink = "*****@*****.**" }, new SearchEngineResult { Position = RESULT_POSITION_1, ResultLink = DOMAIN_TO_MATCH }, new SearchEngineResult { Position = 3, ResultLink = "*****@*****.**" }, }; var _Analyser = GoogleResultsPageAnalyserFactory.Create(_SearchResults); // Act var _Results = await _Analyser.GetSearchEngineResultsPagePositions("random keywords", DOMAIN_TO_MATCH); // Assert Assert.Collection(_Results, p => Assert.Equal(RESULT_POSITION_1, p)); } //GetSearchEngineResultsPagePositions_ValidArguments_Success
private bool FindSearchEngineResult(object sender, out SearchEngineResult searchEngineResult) { bool hasFound = false; SearchEngineResult result = null; if (sender is ListBox) { ListBox sourceListBox = sender as ListBox; if (sourceListBox == _parent.GoogleOutputListBox) { hasFound = FindSearchEngineResult(_currentQueryResult, SearchEngineType.Google, out result); } else if (sourceListBox == _parent.BaiduOutputListBox) { hasFound = FindSearchEngineResult(_currentQueryResult, SearchEngineType.Baidu, out result); } else if (sourceListBox == _parent.SougouOutputListBox) { hasFound = FindSearchEngineResult(_currentQueryResult, SearchEngineType.Sougou, out result); } else if (sourceListBox == _parent.WikipediaOutputListBox) { hasFound = FindSearchEngineResult(_currentQueryResult, SearchEngineType.Wikipedia, out result); } } searchEngineResult = result; return(hasFound); }
static void Main(string[] args) { var input = args.Length == 0 ? Console.ReadLine() : string.Join(" ", args); SearchEngineManager manager = new SearchEngineManager(); SearchEngineResult result = manager.Search(input); Console.WriteLine($"Results"); Console.WriteLine($"##############################"); foreach (var item in result.Inputs) { Console.Write($"{item.Input} : "); foreach (var engine in item.Engines) { Console.Write($"{engine.Engine}: {engine.GetResult()} "); } Console.Write(System.Environment.NewLine); } Console.WriteLine($"##############################"); foreach (var item in result.Engines) { Console.Write($"{item.Engine} winner: {item.GetWinnerInput()} "); Console.Write(System.Environment.NewLine); } Console.WriteLine($"##############################"); Console.Write($"Total winner: {result.TotalWinner} "); Console.ReadKey(); }
private bool NotInDomainBlacklist(SearchEngineResult searchEngineResult) { var host = new Uri(searchEngineResult.Url).Host; var notInDomainBlacklist = !DomainsBlacklist.Contains(host); return(notInDomainBlacklist); }
/// <summary> /// 读完一个网页后进行解析的回调函数 /// </summary> /// <param name="sender">产生发送消息者</param> /// <param name="e">消息事件</param> private void WebBrowserLoadCompleted(object sender, NavigationEventArgs e) { foreach (string str in _libraryPath) { LoadLibrary(str); } _result = Parser.Parse(null, _encoding); isBrowserNavigating = false; }
public SearchEngineResult GetSearchEngineResult(object sender) { SearchEngineResult searchEngineResult = null; if (sender != null) { bool hasFound = FindSearchEngineResult(sender, out searchEngineResult); } return(searchEngineResult); }
private bool HasRelevantSentences(string personFullName, string evaluationCategory, SearchEngineResult searchEngineResult) { HtmlDocument htmlDocument = _htmlDocumentLoader.LoadFromUrl(searchEngineResult.Url); string text = _htmlSanitizer.SanitizeHtmlDocument(htmlDocument); string[] sentences = _textSentenceSplitter.SplitTextToSentences(text); bool hasRelevantSentences = sentences.Any(s => s.Contains(personFullName) && s.Contains(evaluationCategory)); return(hasRelevantSentences); }
private bool FindUIElements(SearchEngineResult searchEngineResult, out ListBox listBox, out TabItem tabItem, out string engineName) { bool hasFound = false; switch (searchEngineResult.SearchEngineType) { case SearchEngineType.Google: { hasFound = true; listBox = _parent.GoogleOutputListBox; tabItem = _parent.GoogleTabItem; engineName = "谷歌"; break; } case SearchEngineType.Baidu: { hasFound = true; listBox = _parent.BaiduOutputListBox; tabItem = _parent.BaiduTabItem; engineName = "百度"; break; } case SearchEngineType.Sougou: { hasFound = true; listBox = _parent.SougouOutputListBox; tabItem = _parent.SougouTabItem; engineName = "搜狗"; break; } case SearchEngineType.Wikipedia: { hasFound = true; listBox = _parent.WikipediaOutputListBox; tabItem = _parent.WikipediaTabItem; engineName = "维基"; break; } default: { hasFound = false; listBox = null; tabItem = null; engineName = "Unknown"; break; } } return(hasFound); }
/// <summary> /// Search online on Bing /// </summary> /// <param name="key"></param> /// <returns></returns> private static async Task <List <SearchEngineResult> > GetFromBing(string key) { var client = new RestClient($"https://www.bing.com/search?q={key}&qs=HS&pq=%D7%93&sc=1-1&&sp=1"); var response = await client.ExecuteGetTaskAsync(new RestRequest()); var document = new HtmlDocument(); document.LoadHtml(response.Content); //check if have any result var foundResult = document.DocumentNode.SelectSingleNode("//li[contains(@class, 'b_algo')]"); var searchEngineResults = new List <SearchEngineResult>(); if (foundResult != null) { var nodes = document.DocumentNode.SelectNodes("//li[contains(@class, 'b_algo')]").ToList(); foreach (var node in nodes.Take(5)) { var searchEngineResult = new SearchEngineResult { EnteredDate = DateTime.UtcNow, SearchEngine = SearchEngine.Bing }; switch (node.FirstChild.FirstChild.FirstChild.NodeType) { case HtmlNodeType.Element: searchEngineResult.Title = node.FirstChild.FirstChild.FirstChild.FirstChild.InnerHtml; break; case HtmlNodeType.Text: searchEngineResult.Title = node.FirstChild.FirstChild.InnerHtml; break; } searchEngineResults.Add(searchEngineResult); } } else { var searchEngineResult = new SearchEngineResult { EnteredDate = DateTime.UtcNow, SearchEngine = SearchEngine.Bing, Title = $"Key {key} not found" }; searchEngineResults.Add(searchEngineResult); } return(searchEngineResults); }
public SearchEngineResultVM(SearchEngineResult searchEngineResult) { SearchEngine = searchEngineResult.SearchEngine; ProgrammingLanguages = new List <ProgrammingLanguageVM>(); foreach (var item in searchEngineResult.ProgrammingLanguages) { var programmingLanguageVM = new ProgrammingLanguageVM(item); ProgrammingLanguages.Add(programmingLanguageVM); } ProgrammingLanguageWinner = ProgrammingLanguages .Where(x => x.PopularityAmount == ProgrammingLanguages.Max(y => y.PopularityAmount)) .FirstOrDefault(); }
public SearchEngineResult GetSearchResults(string searchStr) { SearchEngineResult result = null; List <Task> searchTasks = new List <Task>(); foreach (var searchEngine in Engines) { searchTasks.Add(Task.Run(async() => result = await searchEngine.DoSearch(searchStr))); } Task.WaitAny(searchTasks.ToArray()); return(result); }
public static void Test() { SearchEngineResult result = GetSearchResult(); Console.WriteLine(result); XMLObject xmlObject = result.ToXMLObject(); Console.WriteLine(xmlObject.ToString()); XMLObject newXmlObject = new XMLObject(xmlObject.ToString()); Console.WriteLine(newXmlObject.ToString()); SearchEngineResult newResult = SearchEngineResult.GetSearchEngineResultFromXMLObject(newXmlObject); Console.WriteLine(newResult); Console.WriteLine(result.ToString() == newResult.ToString()); }
public static void Test() { _libPath = new string[] { "jquery-1.3.2.min.js", "SmartMe-Buildin-Script.js" }; ExternelSearchEngine engine = new ExternelSearchEngine(_webBrowser, _googleUrlPattern, _scriptPath); engine.Encoding = Encoding.UTF8; engine.LibraryPath.AddRange(_libPath); InputQuery query = new InputQuery(_queryString, InputQueryType.Text); SearchEngineResult result = engine.Search(query) as SearchEngineResult; Console.WriteLine(result); }
public IQueryResultItem Search(InputQuery query) { SearchEngineResult result = new SearchEngineResult(); result.SearchEngineType = SearchEngineType.Other; SearchEngineResult.ResultItem item1 = new SearchEngineResult.ResultItem(); item1.Title = "INFO"; item1.CacheUrl = "http://aaa.www.com/"; item1.Description = query + " " + _count + " Done."; item1.SimilarUrl = "http://similar.www.com/"; item1.Url = "http://info.tsinghua.edu.cn"; result.Results.Add(item1); _count++; Thread.Sleep(_waitTime); return(result); }
public async Task <List <SearchEngineResult> > GetSearchEngineResultByProgrammingLanguageAsync(string programmingLanguage) { var results = new List <SearchEngineResult>(); foreach (var result in searchEngineResults) { var searchEngineResult = new SearchEngineResult(); var matchedlanguages = result.ProgrammingLanguages.Where(x => x.Keywords.Contains(programmingLanguage.ToLower())); searchEngineResult.Id = result.Id; searchEngineResult.ProgrammingLanguages = matchedlanguages.ToList(); searchEngineResult.SearchEngine = result.SearchEngine; results.Add(searchEngineResult); } return(await Task.FromResult(results)); }
public SearchEngineResult Parse(string html, Encoding encoding) { HTMLparser oP = HtmlParserFactory.GetInstance(); searchResult = new SearchEngineResult(); searchResult.SearchEngineType = SearchEngineType.Baidu; item = new SearchEngineResult.ResultItem(); //item.Source = "Baidu"; oP.Init(encoding.GetBytes(html)); oP.SetEncoding(encoding); HTMLchunk oChunk = null; int state = 0; bool bEncodingSet = false; while ((oChunk = oP.ParseNext()) != null) { switch (oChunk.oType) { case HTMLchunkType.OpenTag: HandleOpenTag(oChunk, ref state); printParams: if (oChunk.sTag == "meta") { HandleMetaEncoding(oP, oChunk, ref bEncodingSet); } ; HandleParam(oChunk, ref state); break; case HTMLchunkType.CloseTag: HandleCloseTag(oChunk, ref state); break; case HTMLchunkType.Text: HandleText(oChunk, ref state); break; default: break; } } return(searchResult); }
public async Task <SearchEngineResult> SearchOnGoGoDuck(string textQuery) { var result = new SearchEngineResult { SearchEngineValues = new List <SearchEngineValueReturned>() }; var textQueryParams = textQuery.Split(" "); foreach (var textParam in textQueryParams) { if (string.IsNullOrWhiteSpace(textParam)) { continue; } var valueReturned = RequestSearch.DataFromGogoDuck(textParam); result.SearchEngineValues.Add(valueReturned); } return(await Task.FromResult(result)); }
//[WebGet(UriTemplate = "refresh")] //public Message RefreshIndex() //{ // if (!CfIdentity.IsAuthenticated) // { // return ReturnAsJson(new List<object>() { new { Excerpt = "Refresh Failed - Not authenticated", Score = "0" } }); // } // return RefreshIndexInternal(); //} /// <summary> /// So we can control authorization to refresh index /// </summary> //private Message RefreshIndexInternal() //{ // try // { // AppLookups.RefreshCacheIndex(); // var siteSearchEngine = Global.SiteSearchEngine; // siteSearchEngine.Dispose(); // siteSearchEngine = null; // siteSearchEngine = new LuceneCfSearchEngineService(); // new LuceneCfIndexingService(siteSearchEngine).RebuildIndex(); // var successResult = new { Title = "Success", Score = float.Parse(siteSearchEngine.GetTotalIndexedEntryCount().ToString()) }; // return ReturnAsJson(new List<object>() { successResult }); // } // catch (Exception ex) // { // CfTracer.Error(ex); // var failedResult = new { Title = "Failed", Excerpt = ex.Message }; // return ReturnAsJson(new List<object>() { failedResult }); // } //} private Message HandelSearchException(Exception ex) { CfTracer.Error(ex); //RefreshIndex(); var rebuildingResult = new SearchEngineResult() { Title = "Search service refreshing", CountryID = 10, Excerpt = "Try search again in a few seconds", Url = "#" }; return(ReturnAsJson(new List <SearchEngineResult>() { rebuildingResult })); }
public SearchEngineResult Search(string input) { SearchEngineResult searchEngineResult = new SearchEngineResult(); var inputList = this.GetInputListToSearch(input); var searchEngines = this.GetSearchEngineServices(); //input stats foreach (var inputItem in inputList) { SearchEngineInputItemResult searchItem = new SearchEngineInputItemResult(inputItem); foreach (var item in searchEngines) { var result = item.Search(inputItem); searchItem.AddSearchEngineResult(item.GetName(), result); searchItem.AddResult(result); item.AddInputWinner(inputItem, result); } searchEngineResult.AddSearchInputItemResult(searchItem); } //engine stats foreach (var item in searchEngines) { searchEngineResult.AddSearchEngineItemResult(item.GetName(), item.GetInputWinner(), item.GetTotalWinner()); } //total winner long totalResult = 0; foreach (var item in searchEngineResult.Inputs) { if (item.TotalResult > totalResult) { totalResult = item.TotalResult; searchEngineResult.TotalWinner = item.Input; } } return(searchEngineResult); }
private void BindLink(RepeaterItemEventArgs e, SearchEngineResult searchResult) { var relatedLink = (HyperLink)e.Item.FindControl("Link"); var datePublished = (Literal)e.Item.FindControl("DatePublished"); var score = (Literal)e.Item.FindControl("Score"); if (relatedLink != null) { relatedLink.Text = searchResult.Title; relatedLink.NavigateUrl = Url.EntryUrl(searchResult); if (datePublished != null) { datePublished.Text = searchResult.DateSyndicated.ToShortDateString(); } if (score != null) { score.Text = searchResult.Score.ToString(); } } }
private void OpenSearchEngineResult(object sender, int index) { SearchEngineResult result = _resultHandler.GetSearchEngineResult(sender); if (result != null) { if (0 <= index && index < result.Results.Count) { string uri = string.Format("{0}", result.Results[index].Url); Shell shell = new Shell(); shell.DoOpenWebBrowser(uri); } else if (index == -1) { string uri = string.Format("{0}", result.SearchUrl); Shell shell = new Shell(); shell.DoOpenWebBrowser(uri); } } }
private void DispalySearchEngineResultDetailedGrid(object resultListBox, int index) { if (index >= 0) { SearchEngineResult result = _resultHandler.GetSearchEngineResult(resultListBox); if (result != null) { if (0 <= index && index < result.Results.Count) { string title = string.Format("{0}", result.Results[index].Title); string uri = string.Format("{0}", result.Results[index].Url); string description = string.Format("{0}", result.Results[index].Description); ShowDetailedGrid(title, description, uri); } } } else { HideDetailedGrid(); } }
private static SearchEngineResult GetSearchResult() { SearchEngineResult result = new SearchEngineResult(); result.SearchEngineType = SearchEngineType.Other; SearchEngineResult.ResultItem item1 = new SearchEngineResult.ResultItem(); item1.Title = "INFO"; item1.CacheUrl = "http://aaa.www.com/"; item1.Description = "LinTian" + " " + 1 + " Done."; item1.SimilarUrl = "http://similar.www.com/"; item1.Url = "http://info.tsinghua.edu.cn"; result.Results.Add(item1); SearchEngineResult.ResultItem item2 = new SearchEngineResult.ResultItem(); item2.Title = "INFO2"; item2.CacheUrl = "http://aaa.www.com22222/"; item2.Description = "LunaR" + " " + 2 + " Done2."; item2.SimilarUrl = "http://similar.www.com2222/"; item2.Url = "http://info.tsinghua.edu.cn2"; result.Results.Add(item2); return(result); }
private IList <SearchEngineResult> SearchValues(IList <string> values) { var searchEngineValueResults = new List <SearchEngineResult>(); SearchEngineResult searchEngineValueResult; for (int i = 0; i < values.Count; i++) { var searchValue = values[i].TrimStart().TrimEnd(); searchEngineValueResult = new SearchEngineResult { SearchValue = searchValue, SearchMatchResults = new List <SearchEngineMatch> { _bingApiResultService.GetSearchEngineMatch(searchValue), _googleApiResultService.GetSearchEngineMatch(searchValue) } }; searchEngineValueResults.Add(searchEngineValueResult); } return(searchEngineValueResults); }
private bool FindSearchEngineResult(QueryResult queryResult, SearchEngineType targetType, out SearchEngineResult searchEngineResult) { bool hasFound = false; searchEngineResult = null; if (queryResult != null) { if (queryResult.SearchEngineResultItems != null) { foreach (SearchEngineResult resultItem in queryResult.SearchEngineResultItems) { if (resultItem != null && resultItem.SearchEngineType == targetType) { searchEngineResult = resultItem; hasFound = true; break; } } } } return(hasFound); }
public static void Test() { QueryResultRecordManager manager = new QueryResultRecordManager("data", new TimeSpan(0, 1, 0)); Pipeline.Pipeline pipeline = new Pipeline.Pipeline(); pipeline.QueryResultSubscriberManager.AddSubscriber(manager); QueryResult result = new QueryResult(new InputQuery("Bill Gates")); SearchEngineResult resultItem = new SearchEngineResult(); resultItem.SearchEngineType = SearchEngineType.Google; resultItem.SearchUrl = "http://www.google.com/query.jsp"; SearchEngineResult.ResultItem item = new SearchEngineResult.ResultItem(); item.Title = "ddd"; item.Url = "http://www.gfw.com/"; item.SimilarUrl = "http://www.g.com/ddd"; item.CacheUrl = "http://www.g.com/cache"; item.Description = "Who cares?"; resultItem.Results.Add(item); SuggestionResult resultItem1 = new SuggestionResult(); resultItem1.SuggestionType = SuggestionType.Google; resultItem1.SearchUrl = "json"; SuggestionResult.ResultItem item1 = new SuggestionResult.ResultItem(); item1.Index = "1"; item1.Number = "2"; item1.Suggestion = "haha"; resultItem1.Results.Add(item1); result.SearchEngineResultItems.Add(resultItem); result.SuggestionResultItems.Add(resultItem1); pipeline.OnQueryResultReady(result); Thread.Sleep(1000); List <QueryResult> resultList = manager.GetResultList(DateTime.Today, DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); manager.RemoveAllResultList(); resultList = manager.GetResultList(DateTime.Today, DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); result = new QueryResult(new InputQuery("Bill Gates")); resultItem = new SearchEngineResult(); resultItem.SearchEngineType = SearchEngineType.Google; resultItem.SearchUrl = "http://www.google.com/query.jsp"; item = new SearchEngineResult.ResultItem(); item.Title = "ddd"; item.Url = "http://www.gfw.com/"; item.SimilarUrl = "http://www.g.com/ddd"; item.CacheUrl = "http://www.g.com/cache"; item.Description = "Who cares?"; resultItem.Results.Add(item); result.Items.Add(resultItem); pipeline.OnQueryResultReady(result); Thread.Sleep(1000); manager.RemoveResultListFromDate(DateTime.Today); resultList = manager.GetResultList(DateTime.Today - new TimeSpan(1, 0, 0, 0), DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); manager.RemoveResultListFromDate(DateTime.Today + new TimeSpan(1, 0, 0, 0)); resultList = manager.GetResultList(DateTime.Today - new TimeSpan(1, 0, 0, 0), DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); }
private void ValidatingDuplicatingSearchEnginesResults(List <SearchEngineResult> finalResults, SearchEngineResult result) { var existingResult = finalResults.Where(x => x.Id == result.Id).FirstOrDefault(); if (existingResult != null) { existingResult.ProgrammingLanguages.AddRange(result.ProgrammingLanguages); } else { finalResults.Add(result); } }