Ejemplo n.º 1
0
        private void AddToFailedQueriesFile(ScraperQuery query)
        {
            string queryStr = query.query;

            if (queryStr.ToLower().EndsWith(" fund"))
            {
                queryStr = queryStr.Substring(0, queryStr.Length - 5);
            }
            File.AppendAllText(_failedQueriesFileName, queryStr + Environment.NewLine);
        }
Ejemplo n.º 2
0
        private void ParseTickersMatches(ScraperQuery query)
        {
            Regex expression = new Regex(@"[A-Z]{3,5}|[A-Z0-9]{6,10}");

            Dictionary <string, double> queryMatches = new Dictionary <string, double>();

            if (_matches.ContainsKey(query.query))
            {
                queryMatches = _matches[query.query];
            }
            else
            {
                _matches.Add(query.query, queryMatches);
            }

            int i = query.results.Count + 1;

            foreach (ScraperResult item in query.results)
            {
                string searchStr  = item.snippet + " " + item.title;
                double linkRating = 1;
                //baidu returns links to themselves, that looks like http://www.baidu.com/link?url=LbVBBXNIgC71iKD1YJXa3zFf4... (~ 40 symbols URL), thats not useful for search
                if (!item.link.Contains("baidu.com"))
                {
                    searchStr += " " + item.link;
                    linkRating = GetLinkRating(item.link);
                }
                // i = relevance by desc
                linkRating -= (double)1 / i;

                var results = expression.Matches(searchStr);

                foreach (var match in results)
                {
                    string key = match.ToString();
                    if (queryMatches.ContainsKey(key))
                    {
                        queryMatches[key] += linkRating;
                    }
                    else
                    {
                        queryMatches.Add(key, linkRating);
                    }
                }
                i--;
            }
        }