public void StartDiscover(string url, Stream readStream, string charset) { OnDiscoverProgress("Discovering..."); _baseUri = new Uri(url); _results = new RSSDiscoverResults(); _candidateURLs = new PriorityQueue(); _candidateHintTexts = new HashMap(); _candidateURLSet = new HashSet(); using (HTMLParser parser = new HTMLParser(OpenHTMLReader(readStream, charset))) { parser.AddTagHandler("link", new HTMLParser.TagHandler(OnLinkTag)); parser.AddTagHandler("a", new HTMLParser.TagHandler(OnATag)); parser.AddTagHandler("/a", new HTMLParser.TagHandler(OnEndATag)); while (!parser.Finished) { string fragment = parser.ReadNextFragment(); if (_lastCandidateURL != null) { _candidateHintTexts [_lastCandidateURL] = fragment.Trim(); _lastCandidateURL = null; } } _lastPriority = -1; if (_downloadResults) { ParseNextCandidate(); } } }
public void Reset() { FileInfo[] bookmarkFiles = IOTools.GetFiles(_path, "bookmarks.html"); if (bookmarkFiles == null || bookmarkFiles.Length == 0) { _parser = null; } else { _parser = new HTMLParser(new StreamReader(bookmarkFiles[0].FullName), true); _parser.BreakWords = false; _parser.AddTagHandler("dl", new HTMLParser.TagHandler(OnDLTag)); _parser.AddTagHandler("/dl", new HTMLParser.TagHandler(OnDLClosedTag)); _parser.AddTagHandler("h3", new HTMLParser.TagHandler(OnHeaderTag)); _parser.AddTagHandler("/h3", new HTMLParser.TagHandler(OnHeaderClosedTag)); _parser.AddTagHandler("a", new HTMLParser.TagHandler(OnLinkTag)); _parser.AddTagHandler("/a", new HTMLParser.TagHandler(OnLinkClosedTag)); _parser.AddTagHandler("dd", new HTMLParser.TagHandler(OnDescriptionTag)); _level = 0; _inHeader = _inLink = _inDescription = false; } }
private void ProcessResourceStream(IResource resource, IResource source, TextReader reader, IResourceTextConsumer consumer) { _currentIndexedRes = resource; try { using (HTMLParser parser = new HTMLParser(reader)) { parser.CloseReader = false; parser.AddTagHandler("link", LinkHandler); int docID = resource.Id; string fragment; while (!parser.Finished) { fragment = parser.ReadNextFragment(); if (fragment.Length > 0) { if (parser.InHeading) { consumer.AddDocumentHeading(docID, fragment); } else { consumer.AddDocumentFragment(docID, fragment); } } } // check whether source resource is favorite and has non-empty name property // if it hasn't, or has name equyal to URL then set name from the title of HTML stream if (source != null && source.Type == "Weblink") { IBookmarkService service = (IBookmarkService)Core.PluginLoader.GetPluginService(typeof(IBookmarkService)); if (service != null) { string name = source.GetPropText(Core.Props.Name); string url = string.Empty; if (Core.ResourceStore.PropTypes.Exist("URL")) { url = source.GetPropText("URL"); if (url.StartsWith("http://") || url.StartsWith("file://")) { url = url.Substring("http://".Length); } else if (url.StartsWith("ftp://")) { url = url.Substring("ftp://".Length); } } if (url.IndexOfAny(Path.GetInvalidPathChars()) >= 0) { foreach (char invalidChar in Path.GetInvalidPathChars()) { url = url.Replace(invalidChar, '-'); } } if (name.Length == 0 || url.StartsWith(name)) { string title = parser.Title.Trim(); if (title.Length > 0) { IBookmarkProfile profile = service.GetOwnerProfile(source); string error; if (profile != null && profile.CanRename(source, out error)) { profile.Rename(source, title); service.SetName(source, title); } } } } } } } finally { _currentIndexedRes = null; } }