public static IIndexDocument FromFile(string file, IIndexDataSource parent) { string s = file.ToLower(); if (s.EndsWith(".pdf")) { // PDF return(new IndexPDFDocument(file, parent)); } else if (s.EndsWith(".txt")) { return(new IndexedTextFile(file, parent)); } else if ((s.EndsWith(".html")) || (s.EndsWith(".html"))) { using (FileStream fs = File.OpenRead(file)) { return(WebDataSource.FromHtml(fs, file, parent.Name)); } } return(null); }
/// Add document from BLOB public virtual void AddRecord(string name, Stream stream, string fields, ConcurrentQueue <IIndexDocument> queue) { bool isText = false; IIndexDocument doc = null; AddRecordBase(name, fields); if ((indexType == IndexType.File) || (indexType != IndexType.Blob)) { throw new InvalidDataException("Adding record of wrong IndexType"); } BinaryReader reader = new BinaryReader(stream); byte[] buff = new byte[4000]; reader.Read(buff, 0, 4000); String det = Encoding.UTF8.GetString(buff, 0, buff.Length); stream.Seek(0, SeekOrigin.Begin); reader.Dispose(); // detect type if ((buff[0] == '%') && (buff[1] == 'P') && (buff[2] == 'D') && (buff[3] == 'F')) { DocumentsDataSource.IndexPDFDocument pdf = new DocumentsDataSource.IndexPDFDocument(name, stream, this); if (fields != null) { pdf.headers = () => { return(fields); } } ; doc = pdf; } else if (det.Contains("<html")) { IndexPagedTextFile file = WebDataSource.FromHtml(stream, name, Name); if (fields != null) { file.SetHeaders(fields); } } else { // detect charset Ude.CharsetDetector detector = new Ude.CharsetDetector(); detector.Feed(buff, 0, buff.Length); detector.DataEnd(); if (detector.Charset != null) { Encoding enc = Portable.Text.Encoding.GetEncoding(detector.Charset); using (StreamReader sreader = new StreamReader(stream, enc, false)) { doc = new IndexPagedTextFile("", sreader.ReadToEnd(), fields != null ? fields : ""); } } } if (doc != null) { Enqueue(queue, doc); } }