示例#1
0
        public static IIndexDocument FromFile(string file, IIndexDataSource parent)
        {
            string s = file.ToLower();

            if (s.EndsWith(".pdf"))
            {
                // PDF
                return(new IndexPDFDocument(file, parent));
            }
            else
            if (s.EndsWith(".txt"))
            {
                return(new IndexedTextFile(file, parent));
            }
            else
            if ((s.EndsWith(".html")) || (s.EndsWith(".html")))
            {
                using (FileStream fs = File.OpenRead(file))
                {
                    return(WebDataSource.FromHtml(fs, file, parent.Name));
                }
            }

            return(null);
        }
示例#2
0
        /// Add document from BLOB
        public virtual void AddRecord(string name, Stream stream, string fields, ConcurrentQueue <IIndexDocument> queue)
        {
            bool           isText = false;
            IIndexDocument doc    = null;

            AddRecordBase(name, fields);

            if ((indexType == IndexType.File) || (indexType != IndexType.Blob))
            {
                throw new InvalidDataException("Adding record of wrong IndexType");
            }

            BinaryReader reader = new BinaryReader(stream);

            byte[] buff = new byte[4000];
            reader.Read(buff, 0, 4000);
            String det = Encoding.UTF8.GetString(buff, 0, buff.Length);

            stream.Seek(0, SeekOrigin.Begin);
            reader.Dispose();

            // detect type
            if ((buff[0] == '%') && (buff[1] == 'P') && (buff[2] == 'D') && (buff[3] == 'F'))
            {
                DocumentsDataSource.IndexPDFDocument pdf = new DocumentsDataSource.IndexPDFDocument(name, stream, this);
                if (fields != null)
                {
                    pdf.headers = () => { return(fields); }
                }
                ;
                doc = pdf;
            }
            else
            if (det.Contains("<html"))
            {
                IndexPagedTextFile file = WebDataSource.FromHtml(stream, name, Name);
                if (fields != null)
                {
                    file.SetHeaders(fields);
                }
            }
            else
            {
                // detect charset
                Ude.CharsetDetector detector = new Ude.CharsetDetector();
                detector.Feed(buff, 0, buff.Length);
                detector.DataEnd();
                if (detector.Charset != null)
                {
                    Encoding enc = Portable.Text.Encoding.GetEncoding(detector.Charset);
                    using (StreamReader sreader = new StreamReader(stream, enc, false)) {
                        doc = new IndexPagedTextFile("", sreader.ReadToEnd(), fields != null ? fields : "");
                    }
                }
            }

            if (doc != null)
            {
                Enqueue(queue, doc);
            }
        }