Ejemplo n.º 1
0
        public List <LinkItem> FindHtmlAgility(string filesource)
        {
            List <LinkItem> list = new List <LinkItem>();

            HtmlAgilityPack.HtmlWeb htmlweb = new HtmlWeb();
            try
            {
                HtmlDocument doc  = htmlweb.Load("file://" + filesource);
                var          node = doc.DocumentNode.SelectNodes("//a");
                foreach (var nd in node)
                {
                    if (nd.Attributes["href"] != null && nd.Attributes["href"].Value.Length > 0 && nd.Attributes["href"].Value.StartsWith(@"wiki/[\w]+") == true)
                    {
                        LinkItem ln = new LinkItem(nd.Attributes["title"].Value, nd.Attributes["href"].Value);
                        if (list.Contains(ln) == false)
                        {
                            list.Add(ln);
                        }
                    }
                }
            }
            catch
            {
            }
            return(list);
        }
Ejemplo n.º 2
0
        public Dictionary <String, String> retrieveResults(List <String> queryParts, out Boolean Did_you_mean)
        {
            Dictionary <String, String>    ResultSet   = new Dictionary <string, string>();
            Dictionary <String, titleHits> BestMatches = new Dictionary <String, titleHits>();
            LinkItem mostValuable = null;

            BestMatches = findBestHitPage(queryParts, out mostValuable, out Did_you_mean);
            if (mostValuable.Href != null && mostValuable.Href != "")
            {
                ResultSet.Add(mostValuable.Text, mostValuable.Href);
            }
            else
            {
                foreach (var match in BestMatches)
                {
                    mostValuable.Text = match.Key;
                    mostValuable.Href = match.Value.Link;
                    break;
                }
            }
            foreach (var match in BestMatches)
            {
                try
                {
                    ResultSet.Add(match.Key, match.Value.Link);
                }
                catch
                {
                    continue;
                }
            }
            //if (BestMatches.Count > 0)
            //{gg
            Dictionary <string, string> relatedLinks = new Dictionary <string, string>();

            relatedLinks = findRelatedLinks2(mostValuable.Text);
            if (relatedLinks.Count > 0)
            {
                foreach (var pair in relatedLinks)
                {
                    try
                    {
                        ResultSet.Add(pair.Key, pair.Value);
                    }
                    catch
                    {
                        continue;
                    }
                }
            }
            //}
            return(ResultSet);
        }
Ejemplo n.º 3
0
        public List <LinkItem> Find(string fileSource)
        {
            List <LinkItem> list = new List <LinkItem>();

            // 1.
            // Find all <a>.....</a> matches in file.
            MatchCollection aTag = Regex.Matches(fileSource, @"(<a.*?>.*?</a>)", RegexOptions.Singleline);


            // 2.
            // Loop over each match.
            foreach (Match m in aTag)
            {
                String   value = m.Groups[1].Value;
                String   link;
                LinkItem item = new LinkItem("", "");

                // 3.
                // Get href attribute.
                Match m2 = Regex.Match(value, @"href=\""(.*?)\""", RegexOptions.Singleline);

                if (m2.Success)
                {
                    link = m2.Groups[1].Value;
                    if (link.StartsWith("#") == true || link.Contains("(") || link.Contains("%"))
                    {
                        continue;
                    }
                    else if (System.Text.RegularExpressions.Regex.IsMatch(link, "[A-Z][a-z]*:"))
                    {
                        continue;
                    }
                    else if (link.StartsWith("/wiki") == true)
                    {
                        item.Href = "http://en.wikipedia.org" + link;

                        // 4.
                        //Extracting link title

                        Match m3 = Regex.Match(value, @"title=\""(.*?)\""", RegexOptions.Singleline);
                        if (m3.Success)
                        {
                            item.Text = m3.Groups[1].Value;
                        }
                        list.Add(item);
                    }
                }
            }

            return(list);
        }
Ejemplo n.º 4
0
        public Dictionary <String, titleHits> findBestHitPage(List <String> queryTerms, out LinkItem mostValuable, out Boolean Did_you_mean)
        {
            Dictionary <String, titleHits> exactWordsMatch  = new Dictionary <string, titleHits>();
            Dictionary <String, titleHits> DerivedWordmatch = new Dictionary <String, titleHits>();

            mostValuable = new LinkItem();
            LinkItem SimilarMatch = null;;
            Boolean  impDone      = false;
            Boolean  SimDone      = false;
            float    benchmark    = 0.45F;

            Did_you_mean = false;
            foreach (var result in LR.LoadedResultsSet1) //.OrderByDescending(key => key.Value)
            {
                int success = 0;
                foreach (var queryterm in queryTerms)
                {
                    String term = queryterm;
                    if (Regex.IsMatch(result.Key, term, RegexOptions.IgnoreCase) == true)
                    {
                        success++;
                    }
                    else
                    {
                        float value = CompareSimilarMatch(term, result.Key);
                        if (value < benchmark)
                        {
                            benchmark         = value;
                            SimilarMatch      = new LinkItem();
                            SimilarMatch.Href = result.Value.Link;
                            SimilarMatch.Text = result.Key;
                            SimDone           = true;
                            success++;
                        }
                    }
                }
                if (success == queryTerms.Count)
                {
                    SearchQuality decision = CompareTitleSuccess(queryTerms, result.Key);

                    if (decision == SearchQuality.BETTER)
                    {
                        exactWordsMatch.Add(result.Key, new titleHits(result.Value));
                    }
                    else if (decision == SearchQuality.GOOD)
                    {
                        DerivedWordmatch.Add(result.Key, new titleHits(result.Value));
                    }
                    else if (decision == SearchQuality.BEST)
                    {
                        if (impDone == false)
                        {
                            mostValuable.Text = result.Key;
                            mostValuable.Href = result.Value.Link;
                            impDone           = true;
                        }
                    }
                }
            }
            foreach (var match in DerivedWordmatch)
            {
                exactWordsMatch.Add(match.Key, new titleHits(match.Value));
            }
            bool b1 = String.IsNullOrEmpty(mostValuable.Href);
            bool b2 = String.IsNullOrEmpty(mostValuable.Text);

            if (b1 && b2 && SimDone == true && impDone == false)
            {
                mostValuable.Href = SimilarMatch.Href;
                mostValuable.Text = SimilarMatch.Text;
                Did_you_mean      = true;
            }
            return(exactWordsMatch);
        }