Beispiel #1
0
        public void StartDiscover(string url, Stream readStream, string charset)
        {
            OnDiscoverProgress("Discovering...");

            _baseUri            = new Uri(url);
            _results            = new RSSDiscoverResults();
            _candidateURLs      = new PriorityQueue();
            _candidateHintTexts = new HashMap();
            _candidateURLSet    = new HashSet();

            using (HTMLParser parser = new HTMLParser(OpenHTMLReader(readStream, charset)))
            {
                parser.AddTagHandler("link", new HTMLParser.TagHandler(OnLinkTag));
                parser.AddTagHandler("a", new HTMLParser.TagHandler(OnATag));
                parser.AddTagHandler("/a", new HTMLParser.TagHandler(OnEndATag));
                while (!parser.Finished)
                {
                    string fragment = parser.ReadNextFragment();
                    if (_lastCandidateURL != null)
                    {
                        _candidateHintTexts [_lastCandidateURL] = fragment.Trim();
                        _lastCandidateURL = null;
                    }
                }
                _lastPriority = -1;
                if (_downloadResults)
                {
                    ParseNextCandidate();
                }
            }
        }
Beispiel #2
0
 public void Reset()
 {
     FileInfo[] bookmarkFiles = IOTools.GetFiles(_path, "bookmarks.html");
     if (bookmarkFiles == null || bookmarkFiles.Length == 0)
     {
         _parser = null;
     }
     else
     {
         _parser            = new HTMLParser(new StreamReader(bookmarkFiles[0].FullName), true);
         _parser.BreakWords = false;
         _parser.AddTagHandler("dl", new HTMLParser.TagHandler(OnDLTag));
         _parser.AddTagHandler("/dl", new HTMLParser.TagHandler(OnDLClosedTag));
         _parser.AddTagHandler("h3", new HTMLParser.TagHandler(OnHeaderTag));
         _parser.AddTagHandler("/h3", new HTMLParser.TagHandler(OnHeaderClosedTag));
         _parser.AddTagHandler("a", new HTMLParser.TagHandler(OnLinkTag));
         _parser.AddTagHandler("/a", new HTMLParser.TagHandler(OnLinkClosedTag));
         _parser.AddTagHandler("dd", new HTMLParser.TagHandler(OnDescriptionTag));
         _level    = 0;
         _inHeader = _inLink = _inDescription = false;
     }
 }
Beispiel #3
0
 private void ProcessResourceStream(IResource resource, IResource source, TextReader reader,
                                    IResourceTextConsumer consumer)
 {
     _currentIndexedRes = resource;
     try
     {
         using (HTMLParser parser = new HTMLParser(reader))
         {
             parser.CloseReader = false;
             parser.AddTagHandler("link", LinkHandler);
             int    docID = resource.Id;
             string fragment;
             while (!parser.Finished)
             {
                 fragment = parser.ReadNextFragment();
                 if (fragment.Length > 0)
                 {
                     if (parser.InHeading)
                     {
                         consumer.AddDocumentHeading(docID, fragment);
                     }
                     else
                     {
                         consumer.AddDocumentFragment(docID, fragment);
                     }
                 }
             }
             // check whether source resource is favorite and has non-empty name property
             // if it hasn't, or has name equyal to URL then set name from the title of HTML stream
             if (source != null && source.Type == "Weblink")
             {
                 IBookmarkService service = (IBookmarkService)Core.PluginLoader.GetPluginService(typeof(IBookmarkService));
                 if (service != null)
                 {
                     string name = source.GetPropText(Core.Props.Name);
                     string url  = string.Empty;
                     if (Core.ResourceStore.PropTypes.Exist("URL"))
                     {
                         url = source.GetPropText("URL");
                         if (url.StartsWith("http://") || url.StartsWith("file://"))
                         {
                             url = url.Substring("http://".Length);
                         }
                         else if (url.StartsWith("ftp://"))
                         {
                             url = url.Substring("ftp://".Length);
                         }
                     }
                     if (url.IndexOfAny(Path.GetInvalidPathChars()) >= 0)
                     {
                         foreach (char invalidChar in Path.GetInvalidPathChars())
                         {
                             url = url.Replace(invalidChar, '-');
                         }
                     }
                     if (name.Length == 0 || url.StartsWith(name))
                     {
                         string title = parser.Title.Trim();
                         if (title.Length > 0)
                         {
                             IBookmarkProfile profile = service.GetOwnerProfile(source);
                             string           error;
                             if (profile != null && profile.CanRename(source, out error))
                             {
                                 profile.Rename(source, title);
                                 service.SetName(source, title);
                             }
                         }
                     }
                 }
             }
         }
     }
     finally
     {
         _currentIndexedRes = null;
     }
 }