/// Add document stored in file fname public virtual void AddRecord(string name, string fname, string fields, ConcurrentQueue <IIndexDocument> queue) { if (indexType != IndexType.File) { throw new InvalidDataException("Adding record of wrong IndexType"); } AddRecordBase(name, fields); IndexTextFilesDataSource.IndexedTextFile doc = null; if (fname.ToLower().EndsWith(".pdf")) { doc = new DocumentsDataSource.IndexPDFDocument(System.IO.Path.Combine(Path, fname), this); } else { doc = new IndexTextFilesDataSource.IndexedTextFile(System.IO.Path.Combine(Path, fname), this); } if (doc != null) { doc.Name = name; if (fields != null) { doc.headers = () => { return(fields); }; } Enqueue(queue, doc); } }
/* Create IIndexDocument instance parsing url using parent as parent of created instance, * returns one of the known documents: html,txt,pdf, ... depending on server url responce content-Type*/ public static IIndexDocument FromUrl(string url, IIndexDataSource parent) { HttpWebRequest req = HttpWebRequest.CreateHttp(url); req.UserAgent = "DOCODO"; req.Accept = "text/html, text/plain, application/pdf"; req.Method = "GET"; IIndexDocument ret = null; WebResponse res; try { res = req.GetResponse(); } catch (WebException e) { return(null); } if (res.ContentType.ToLower().Equals("application/pdf")) { ret = new DocumentsDataSource.IndexPDFDocument(url, res.GetResponseStream(), parent); } else if (res.ContentType.ToLower().Equals("text/plain")) { using (StreamReader reader = new StreamReader(res.GetResponseStream())) { ret = new IndexPagedTextFile(url.Substring(parent.Path.Length), reader.ReadToEnd(), "Source=" + parent.Name); } } else { ret = FromHtml(res.GetResponseStream(), url.Substring(parent.Path.Length), parent.Name); } return(ret); }
/// Add document from BLOB public virtual void AddRecord(string name, Stream stream, string fields, ConcurrentQueue <IIndexDocument> queue) { bool isText = false; IIndexDocument doc = null; AddRecordBase(name, fields); if ((indexType == IndexType.File) || (indexType != IndexType.Blob)) { throw new InvalidDataException("Adding record of wrong IndexType"); } BinaryReader reader = new BinaryReader(stream); byte[] buff = new byte[4000]; reader.Read(buff, 0, 4000); String det = Encoding.UTF8.GetString(buff, 0, buff.Length); stream.Seek(0, SeekOrigin.Begin); reader.Dispose(); // detect type if ((buff[0] == '%') && (buff[1] == 'P') && (buff[2] == 'D') && (buff[3] == 'F')) { DocumentsDataSource.IndexPDFDocument pdf = new DocumentsDataSource.IndexPDFDocument(name, stream, this); if (fields != null) { pdf.headers = () => { return(fields); } } ; doc = pdf; } else if (det.Contains("<html")) { IndexPagedTextFile file = WebDataSource.FromHtml(stream, name, Name); if (fields != null) { file.SetHeaders(fields); } } else { // detect charset Ude.CharsetDetector detector = new Ude.CharsetDetector(); detector.Feed(buff, 0, buff.Length); detector.DataEnd(); if (detector.Charset != null) { Encoding enc = Portable.Text.Encoding.GetEncoding(detector.Charset); using (StreamReader sreader = new StreamReader(stream, enc, false)) { doc = new IndexPagedTextFile("", sreader.ReadToEnd(), fields != null ? fields : ""); } } } if (doc != null) { Enqueue(queue, doc); } }