public SearchEngineResult Parse(string html, Encoding encoding) { HTMLparser oP = HtmlParserFactory.GetInstance(); searchResult = new SearchEngineResult(); searchResult.SearchEngineType = SearchEngineType.Google; item = new SearchEngineResult.ResultItem(); //item.Source = "Google"; oP.Init(encoding.GetBytes(html)); oP.SetEncoding(encoding); HTMLchunk oChunk = null; int state = 0; bool bEncodingSet = false; while ((oChunk = oP.ParseNext()) != null) { switch (oChunk.oType) { case HTMLchunkType.OpenTag: HandleOpenTag(oChunk, ref state); printParams: if (oChunk.sTag == "meta") { HandleMetaEncoding(oP, oChunk, ref bEncodingSet); }; HandleParam(oChunk, ref state); break; case HTMLchunkType.CloseTag: HandleCloseTag(oChunk, ref state); break; case HTMLchunkType.Text: HandleText(oChunk, ref state); break; default: break; } } return searchResult; }
private static SearchEngineResult GetSearchResult() { SearchEngineResult result = new SearchEngineResult(); result.SearchEngineType = SearchEngineType.Other; SearchEngineResult.ResultItem item1 = new SearchEngineResult.ResultItem(); item1.Title = "INFO"; item1.CacheUrl = "http://aaa.www.com/"; item1.Description = "LinTian" + " " + 1 + " Done."; item1.SimilarUrl = "http://similar.www.com/"; item1.Url = "http://info.tsinghua.edu.cn"; result.Results.Add(item1); SearchEngineResult.ResultItem item2 = new SearchEngineResult.ResultItem(); item2.Title = "INFO2"; item2.CacheUrl = "http://aaa.www.com22222/"; item2.Description = "LunaR" + " " + 2 + " Done2."; item2.SimilarUrl = "http://similar.www.com2222/"; item2.Url = "http://info.tsinghua.edu.cn2"; result.Results.Add(item2); return result; }
private void HandleCloseTag(HTMLchunk oChunk, ref int state) { if (oChunk.sTag == "a" && state == 6) { state += 1; } else if (oChunk.sTag == "li" && state == 7) { state = 4; if (item.Url != null && item.Url != "") { searchResult.Results.Add(item); item = new SearchEngineResult.ResultItem(); } } else if (oChunk.sTag == "ul" && state == 4) { state = -1; } }
private void HandleParam(HTMLchunk oChunk, ref int state) { if (oChunk.iParams > 0) { for (int i = 0; i < oChunk.iParams; i++) { switch (oChunk.cParamChars[i]) { default: if (oChunk.sValues[i] == "g" && oChunk.sParams[i] == "class" && state == 2) { state = 3; if (item.Url!=null && item.Url!="") { searchResult.Results.Add(item); item = new SearchEngineResult.ResultItem(); //item.Source = "Google"; } }else if(oChunk.sValues[i] == "r" && oChunk.sParams[i] == "class" && state == 3) { state = 4; }else if(oChunk.sValues[i] == "s" && oChunk.sParams[i] == "class" && state == 6) { state = 7; } else if (oChunk.sValues[i] == "gl" && oChunk.sParams[i] == "class" && state == 7) { state = 8; } else if (oChunk.sParams[i] == "href") { if (state == 5) { item.Url = oChunk.sValues[i]; } else if (state == 9 || state == 11) { if (oChunk.sValues[i].IndexOf("q=related")!=-1) { item.SimilarUrl = oChunk.sValues[i]; } else if (oChunk.sValues[i].IndexOf("q=cache") != -1) { item.CacheUrl = oChunk.sValues[i]; } } } break; } } } }
private void HandleParam(HTMLchunk oChunk, ref int state) { if (oChunk.iParams > 0) { for (int i = 0; i < oChunk.iParams; i++) { switch (oChunk.cParamChars[i]) { default: if (oChunk.sValues[i] == "f" && oChunk.sParams[i] == "class" && state == 2) { state = 3; if (item.Url != null && item.Url != "") { searchResult.Results.Add(item); item = new SearchEngineResult.ResultItem(); //item.Source = "Baidu"; } } else if (oChunk.sParams[i] == "href") { if (state == 4) { item.Url = oChunk.sValues[i]; } else if (state == 6 || state == 8) { if (oChunk.sValues[i].IndexOf("cache") != -1) { item.CacheUrl = oChunk.sValues[i]; } else if (oChunk.sValues[i].StartsWith("s?cl=2")) { item.SimilarUrl = oChunk.sValues[i]; } } } break; } } } }
public static void Test() { QueryResultRecordManager manager = new QueryResultRecordManager("data", new TimeSpan(0, 1, 0)); Pipeline.Pipeline pipeline = new Pipeline.Pipeline(); pipeline.QueryResultSubscriberManager.AddSubscriber(manager); QueryResult result = new QueryResult(new InputQuery("Bill Gates")); SearchEngineResult resultItem = new SearchEngineResult(); resultItem.SearchEngineType = SearchEngineType.Google; resultItem.SearchUrl = "http://www.google.com/query.jsp"; SearchEngineResult.ResultItem item = new SearchEngineResult.ResultItem(); item.Title = "ddd"; item.Url = "http://www.gfw.com/"; item.SimilarUrl = "http://www.g.com/ddd"; item.CacheUrl = "http://www.g.com/cache"; item.Description = "Who cares?"; resultItem.Results.Add(item); SuggestionResult resultItem1 = new SuggestionResult(); resultItem1.SuggestionType = SuggestionType.Google; resultItem1.SearchUrl = "json"; SuggestionResult.ResultItem item1 = new SuggestionResult.ResultItem(); item1.Index = "1"; item1.Number = "2"; item1.Suggestion = "haha"; resultItem1.Results.Add( item1 ); result.SearchEngineResultItems.Add(resultItem); result.SuggestionResultItems.Add(resultItem1); pipeline.OnQueryResultReady(result); Thread.Sleep(1000); List<QueryResult> resultList = manager.GetResultList(DateTime.Today, DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); manager.RemoveAllResultList(); resultList = manager.GetResultList(DateTime.Today, DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); result = new QueryResult(new InputQuery("Bill Gates")); resultItem = new SearchEngineResult(); resultItem.SearchEngineType = SearchEngineType.Google; resultItem.SearchUrl = "http://www.google.com/query.jsp"; item = new SearchEngineResult.ResultItem(); item.Title = "ddd"; item.Url = "http://www.gfw.com/"; item.SimilarUrl = "http://www.g.com/ddd"; item.CacheUrl = "http://www.g.com/cache"; item.Description = "Who cares?"; resultItem.Results.Add(item); result.Items.Add(resultItem); pipeline.OnQueryResultReady(result); Thread.Sleep(1000); manager.RemoveResultListFromDate(DateTime.Today); resultList = manager.GetResultList(DateTime.Today - new TimeSpan(1, 0, 0, 0), DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); manager.RemoveResultListFromDate(DateTime.Today + new TimeSpan(1, 0, 0, 0)); resultList = manager.GetResultList(DateTime.Today - new TimeSpan(1, 0, 0, 0), DateTime.Today); foreach (QueryResult queryResult in resultList) { Console.WriteLine(queryResult); } Console.WriteLine("----------------------------------------------"); }
public IQueryResultItem Search( InputQuery query ) { SearchEngineResult result = new SearchEngineResult(); result.SearchEngineType = SearchEngineType.Other; SearchEngineResult.ResultItem item1 = new SearchEngineResult.ResultItem(); item1.Title = "INFO"; item1.CacheUrl = "http://aaa.www.com/"; item1.Description = query + " " + _count + " Done."; item1.SimilarUrl = "http://similar.www.com/"; item1.Url = "http://info.tsinghua.edu.cn"; result.Results.Add(item1); _count++; Thread.Sleep(_waitTime); return result; }
private void HandleParam(HTMLchunk oChunk, ref int state) { if (oChunk.iParams > 0) { for (int i = 0; i < oChunk.iParams; i++) { switch (oChunk.cParamChars[i]) { default: if (oChunk.sValues[i] == "f" && oChunk.sParams[i] == "class" && state == 2) { state = 3; if (item.Url != null && item.Url != "") { searchResult.Results.Add(item); item = new SearchEngineResult.ResultItem(); //item.Source = "Sogou"; } } else if (oChunk.sParams[i] == "href") { if (state == 4) { item.Url = oChunk.sValues[i]; } else if (state == 7 ) { item.CacheUrl = oChunk.sValues[i]; }else if(state==10) { item.SimilarUrl = oChunk.sValues[i]; } } else if (oChunk.sParams[i] == "id" && (state == 6 || state == 9)) { if (oChunk.sValues[i].StartsWith("sogou_snapshot")) { state = 7; } else if (oChunk.sValues[i].StartsWith("sogou_sis")) { state = 10; } } break; } } } }