Exemplo n.º 1
0
        /* Create IIndexDocument instance parsing url using parent as parent of created instance,
         * returns one of the known documents: html,txt,pdf, ... depending on server url responce content-Type*/
        public static IIndexDocument FromUrl(string url, IIndexDataSource parent)
        {
            HttpWebRequest req = HttpWebRequest.CreateHttp(url);

            req.UserAgent = "DOCODO";
            req.Accept    = "text/html, text/plain, application/pdf";
            req.Method    = "GET";
            IIndexDocument ret = null;
            WebResponse    res;

            try
            {
                res = req.GetResponse();
            }
            catch (WebException e)
            {
                return(null);
            }

            if (res.ContentType.ToLower().Equals("application/pdf"))
            {
                ret = new DocumentsDataSource.IndexPDFDocument(url, res.GetResponseStream(), parent);
            }
            else
            if (res.ContentType.ToLower().Equals("text/plain"))
            {
                using (StreamReader reader = new StreamReader(res.GetResponseStream()))
                {
                    ret = new IndexPagedTextFile(url.Substring(parent.Path.Length), reader.ReadToEnd(), "Source=" + parent.Name);
                }
            }
            else
            {
                ret = FromHtml(res.GetResponseStream(), url.Substring(parent.Path.Length), parent.Name);
            }


            return(ret);
        }
Exemplo n.º 2
0
        /// Add document from BLOB
        public virtual void AddRecord(string name, Stream stream, string fields, ConcurrentQueue <IIndexDocument> queue)
        {
            bool           isText = false;
            IIndexDocument doc    = null;

            AddRecordBase(name, fields);

            if ((indexType == IndexType.File) || (indexType != IndexType.Blob))
            {
                throw new InvalidDataException("Adding record of wrong IndexType");
            }

            BinaryReader reader = new BinaryReader(stream);

            byte[] buff = new byte[4000];
            reader.Read(buff, 0, 4000);
            String det = Encoding.UTF8.GetString(buff, 0, buff.Length);

            stream.Seek(0, SeekOrigin.Begin);
            reader.Dispose();

            // detect type
            if ((buff[0] == '%') && (buff[1] == 'P') && (buff[2] == 'D') && (buff[3] == 'F'))
            {
                DocumentsDataSource.IndexPDFDocument pdf = new DocumentsDataSource.IndexPDFDocument(name, stream, this);
                if (fields != null)
                {
                    pdf.headers = () => { return(fields); }
                }
                ;
                doc = pdf;
            }
            else
            if (det.Contains("<html"))
            {
                IndexPagedTextFile file = WebDataSource.FromHtml(stream, name, Name);
                if (fields != null)
                {
                    file.SetHeaders(fields);
                }
            }
            else
            {
                // detect charset
                Ude.CharsetDetector detector = new Ude.CharsetDetector();
                detector.Feed(buff, 0, buff.Length);
                detector.DataEnd();
                if (detector.Charset != null)
                {
                    Encoding enc = Portable.Text.Encoding.GetEncoding(detector.Charset);
                    using (StreamReader sreader = new StreamReader(stream, enc, false)) {
                        doc = new IndexPagedTextFile("", sreader.ReadToEnd(), fields != null ? fields : "");
                    }
                }
            }

            if (doc != null)
            {
                Enqueue(queue, doc);
            }
        }