public static IIndexDocument FromFile(string file, IIndexDataSource parent) { string s = file.ToLower(); if (s.EndsWith(".pdf")) { // PDF return(new IndexPDFDocument(file, parent)); } else if (s.EndsWith(".txt")) { return(new IndexedTextFile(file, parent)); } else if ((s.EndsWith(".html")) || (s.EndsWith(".html"))) { using (FileStream fs = File.OpenRead(file)) { return(WebDataSource.FromHtml(fs, file, parent.Name)); } } return(null); }
public IndexPDFDocument(string fname, Stream data, IIndexDataSource parent) : base(fname, parent) { try { pdfDocument = PdfReader.Open(data, PdfDocumentOpenMode.ReadOnly); pdfExtractor = new Extractor(pdfDocument); } catch (Exception e) { Console.WriteLine($"Error open pdf: {fname}"); } }
/* Create IIndexDocument instance parsing url using parent as parent of created instance, * returns one of the known documents: html,txt,pdf, ... depending on server url responce content-Type*/ public static IIndexDocument FromUrl(string url, IIndexDataSource parent) { HttpWebRequest req = HttpWebRequest.CreateHttp(url); req.UserAgent = "DOCODO"; req.Accept = "text/html, text/plain, application/pdf"; req.Method = "GET"; IIndexDocument ret = null; WebResponse res; try { res = req.GetResponse(); } catch (WebException e) { return(null); } if (res.ContentType.ToLower().Equals("application/pdf")) { ret = new DocumentsDataSource.IndexPDFDocument(url, res.GetResponseStream(), parent); } else if (res.ContentType.ToLower().Equals("text/plain")) { using (StreamReader reader = new StreamReader(res.GetResponseStream())) { ret = new IndexPagedTextFile(url.Substring(parent.Path.Length), reader.ReadToEnd(), "Source=" + parent.Name); } } else { ret = FromHtml(res.GetResponseStream(), url.Substring(parent.Path.Length), parent.Name); } return(ret); }
/// <summary> /// Add datasource to main index /// </summary> /// <param name="ds">datasource to get data from</param> public DocodoServiceConfigure AddDataSource(IIndexDataSource ds) { _service.getBaseIndex().AddDataSource(ds); return(this); }
public IndexedTextFile(string fname, IIndexDataSource parent) { this.fname = fname; Name = fname.Substring(parent?.Path.Length ?? 0);// fname.IndexOfAny(new char[] { '\\', '/' }, parent.Path.Length) + 1); this.parent = parent; }