private void AddToFailedQueriesFile(ScraperQuery query) { string queryStr = query.query; if (queryStr.ToLower().EndsWith(" fund")) { queryStr = queryStr.Substring(0, queryStr.Length - 5); } File.AppendAllText(_failedQueriesFileName, queryStr + Environment.NewLine); }
private void ParseTickersMatches(ScraperQuery query) { Regex expression = new Regex(@"[A-Z]{3,5}|[A-Z0-9]{6,10}"); Dictionary <string, double> queryMatches = new Dictionary <string, double>(); if (_matches.ContainsKey(query.query)) { queryMatches = _matches[query.query]; } else { _matches.Add(query.query, queryMatches); } int i = query.results.Count + 1; foreach (ScraperResult item in query.results) { string searchStr = item.snippet + " " + item.title; double linkRating = 1; //baidu returns links to themselves, that looks like http://www.baidu.com/link?url=LbVBBXNIgC71iKD1YJXa3zFf4... (~ 40 symbols URL), thats not useful for search if (!item.link.Contains("baidu.com")) { searchStr += " " + item.link; linkRating = GetLinkRating(item.link); } // i = relevance by desc linkRating -= (double)1 / i; var results = expression.Matches(searchStr); foreach (var match in results) { string key = match.ToString(); if (queryMatches.ContainsKey(key)) { queryMatches[key] += linkRating; } else { queryMatches.Add(key, linkRating); } } i--; } }