Ejemplo n.º 1
0
        public void readfrompdffile()
        {
            string clearedfilepath = requiredPath.Replace("file:\\", "").Replace("\\", "/");
            string file            = clearedfilepath + "/terms-and-conditions.pdf";
            string outFile         = clearedfilepath + "/terms-and-conditions.txt";

            try

            {
                if (!File.Exists(file))
                {
                    file = Path.GetFullPath(file);
                    if (!File.Exists(file))
                    {
                        Console.WriteLine("Please give in the path to the PDF file.");
                    }
                }

                PDFParser pdfParser = new PDFParser();
                pdfParser.ExtractText(file, outFile);
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc);
            }
        }
        public ActionResult callExtraxtText(int serviceId)
        {
            Service service = db.Services.Find(serviceId);

            if (service != null)
            {
                if (service.PDF != null)
                {
                    string inFileName = service.PDF.PDFPath;

                    if (inFileName != null)
                    {
                        inFileName = ConfigurationManager.AppSettings["pdfArchive"] + @"\PDFs\" + inFileName;
                        PDFParser reader = new PDFParser();
                        string    text   = reader.ExtractText(serviceId, inFileName);
                        PDF       pdf    = service.PDF;
                        if (text != "false")
                        {
                            pdf.FacebookDescription = text;
                            pdf.FacebookTitle       = service.FirstName + " " + service.LastName + "'s Memorial Folder";
                            db.SaveChanges();
                            return(Json("success"));
                        }
                    }
                }
            }

            return(Json("Error"));
        }
Ejemplo n.º 3
0
 public static void ParsePdf()
 {
     PDFParser pdfParser = new PDFParser();
     string    address   = @"http://ec.europa.eu/health/documents/community-register/html/h_direct_anx.htm#412_et";
     string    outfile   = "outfile.txt";
     bool      result    = pdfParser.ExtractText(address, outfile);
 }
Ejemplo n.º 4
0
        static void Main(string[] args)
        {
            PDFParser pdfParser = new PDFParser();

            pdfParser.ExtractText(@"C:\Users\unknown\Desktop\oyak.pdf",
                                  @"C:\Users\unknown\Desktop\output.txt");
            Console.ReadKey();
        }
        public static string DownloadFile(string url)
        {
            string fileLocation = ConfigurationManager.AppSettings["downloadLocation"];
            string fileName     = DateTime.Now.Hour.ToString() + DateTime.Now.Minute.ToString() + DateTime.Now.Second.ToString() + DateTime.Now.Millisecond.ToString();
            string fullFileName = fileLocation + @"\" + fileName + ".pdf";
            string result       = "";

            try
            {
                using (WebClient client = new WebClient())
                {
                    client.DownloadFile(url, fullFileName);
                }



                PDFParser pdfParser = new PDFParser();
                pdfParser.ExtractText(fullFileName, System.IO.Path.GetFileNameWithoutExtension(fullFileName) + ".txt");
                try
                {
                    //using (StreamReader sr = new StreamReader(System.IO.Path.GetFileNameWithoutExtension(fullFileName) + ".txt"))
                    //{
                    //    // Read the stream to a string, and write the string to the console.
                    //    result = sr.ReadToEnd();

                    //}
                    //string output=    System.IO.Path.GetFileNameWithoutExtension(fullFileName) + ".txt";
                    //    var bytes = File.ReadAllBytes(fullFileName);
                    //    File.WriteAllText(output, ConvertToText(bytes), Encoding.UTF8);
                    var bytes = File.ReadAllBytes(fullFileName);
                    result = ConvertToText(bytes);
                }
                catch (Exception ex)
                {
                    ErrorUtil.logError(ex, "");
                }
            }
            catch (Exception ex)
            {
                ErrorUtil.logError(ex, "");
            }

            return(result);
        }
Ejemplo n.º 6
0
 public void convertFile(string fileToConvert)
 {
     O2Thread.mtaThread(
         () => {
         //fileToConvert.error();
         if (fileToConvert.fileExists())
         {
             if (fileToConvert.extension(".pdf"))
             {
                 textBox.set_Text("...processing pdf file: " + fileToConvert);
                 var pdfParser = new PDFParser();
                 var tempFile  = PublicDI.config.getTempFileInTempDirectory(".txt");
                 pdfParser.ExtractText(fileToConvert, tempFile);
                 textBox.set_Text(tempFile.contents().fixCRLF());
                 Files.deleteFile(tempFile);
             }
             else
             {
                 textBox.set_Text(fileToConvert.contents());
             }
         }
     });
 }
Ejemplo n.º 7
0
        static void Main(string[] args)
        {
            ITextParse.ExtractText("employe-1.pdf", "iparse.txt");

            //using (PdfReader reader = new PdfReader("letter.pdf")) //Index was outside the bounds of the array.
            //using (PdfReader reader = new PdfReader("employe-1.pdf")) //ok
            //using (PdfReader reader = new PdfReader("feuille_de_paie.pdf"))  //Rebuild failed: trailer not found.; Original message: PDF startxref not found.
            using (iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader("modele-bulletin-de-salaire.pdf")) //ok
            {
                StringBuilder text = new StringBuilder();

                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
                    //ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();

                    string currentText = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, page, strategy);

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                }

                System.IO.StreamWriter file = new System.IO.StreamWriter("itextsharp.txt");
                file.WriteLine(text);

                file.Close();

                //return text.ToString();
            }

            //PdfSharp
            using (var _document = PdfReader.Open("letter.pdf", PdfDocumentOpenMode.ReadOnly)) //ok
            //using (var _document = PdfReader.Open("employe-1.pdf", PdfDocumentOpenMode.ReadOnly)) //ok
            //using (var _document = PdfReader.Open("feuille_de_paie.pdf", PdfDocumentOpenMode.ReadOnly)) //Non-negative number required.
            //using (var _document = PdfReader.Open("modele-bulletin-de-salaire.pdf", PdfDocumentOpenMode.ReadOnly))  //ok
            {
                System.IO.StreamWriter file = new System.IO.StreamWriter("pdfsharp.txt");
                foreach (PdfPage page in _document.Pages)
                {
                    var text = ExtractText(page);

                    foreach (string s in text)
                    {
                        file.Write(s);
                    }
                }
                file.Close();
            }

            //iTextSharp too
            PDFParser parser = new PDFParser();

            //parser.ExtractText("letter.pdf", "pdfparser.txt"); //error
            parser.ExtractText("employe-1.pdf", "pdfparser.txt"); //ok
            //parser.ExtractText("feuille_de_paie.pdf", "pdfparser.txt"); //error
            //parser.ExtractText("modele-bulletin-de-salaire.pdf", "pdfparser.txt"); //error

            //PdfSharp
            //string text2 = PdfTextExtractor.GetText("letter.pdf"); //ok
            //string text2 = PdfTextExtractor.GetText("employe-1.pdf"); //ok
            //string text2 = PdfTextExtractor.GetText("feuille_de_paie.pdf"); //Non-negative number required.
            string text2 = PdfTextExtractor.GetText("modele-bulletin-de-salaire.pdf"); //ok

            System.IO.StreamWriter file2 = new System.IO.StreamWriter("PdfTextExtractor.txt");
            file2.Write(text2);
            file2.Close();
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Get specify file PDF content
        /// </summary>
        /// <param name="filePath"></param>
        /// <returns></returns>
        public static string GetPdfContent(string filePath)
        {
            PDFParser pdfParser = new PDFParser();

            return(pdfParser.ExtractText(filePath));;
        }
Ejemplo n.º 9
0
    // Insert logic for processing found files here.
    public static void ProcessFile(string path, string tipo, string dpto, bool actualizar)
    {
        bool indexatexto = false, indexaaudio = false, indexaimagen = false, indexahipertexto = false, indexavideo = false;

        Random rnd = new Random();


        switch (tipo)
        {
        case "texto":
            indexatexto = true;
            break;

        case "hipertexto":
            indexahipertexto = true;
            break;

        case "video":
            indexavideo = true;
            break;

        case "imagen":
            indexaimagen = true;
            break;

        case "audio":
            indexaaudio = true;
            break;

        case "":
            indexatexto      = true;
            indexahipertexto = true;
            indexavideo      = true;
            indexaimagen     = true;
            indexaaudio      = true;
            break;
        }


        if (Herramientas.EsHiperTexto(path) && indexatexto && path.Contains(dpto))
        {
            Regex           trimmer = new Regex(@"\s\s+");
            ScrapingBrowser Browser = new ScrapingBrowser();
            Browser.AllowAutoRedirect = true; // Browser has settings you can access in setup
            Browser.AllowMetaRedirect = true;
            HtmlNode html = GetNodes(new Uri(path));

            var titulo = html.CssSelect("title").FirstOrDefault().InnerText;
            var body   = html.CssSelect("body").FirstOrDefault().InnerText;

            body = Regex.Replace(body, "<.*?>", string.Empty);
            body = Regex.Replace(body, @"(?:(?:\r?\n)+ +){2,}", @"\n");

            var f = new FileInfo(path);
            var fileLengthInKB = f.Length / 1024.0;

            Hipertexto h = new Hipertexto();
            h.nombreArchivo  = titulo;
            h.textoContenido = body;
            h.tamanoArchivo  = fileLengthInKB;

            Uri    u        = new Uri(path);
            string ext      = System.IO.Path.GetExtension(path);
            string auxiliar = "http://localhost/servidores/";
            h.urlRuta = u.AbsoluteUri;
            var    puerto = u.Port;
            int    pos    = h.urlRuta.IndexOf("servidores/") + 11;
            string aux2   = h.urlRuta.Substring(pos);
            int    pos2   = aux2.IndexOf("/");
            h.urlRuta = auxiliar + aux2;
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart = depart.Substring(0, pos4);
            depart = depart.Replace("%20", " ");

            h.departamento             = depart;
            h.urlRuta                  = auxiliar + aux2;
            h.tamanoArchivo            = fileLengthInKB;
            h.formato                  = ext;
            h.idServidor               = servidor;
            h.nombreArchivo            = System.IO.Path.GetFileName(path);
            h.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            h.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            h.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            h.fechaUltimaActualizacion = DateTime.Now;
            h.hits = rnd.Next(1, 55);

            if (File.Exists(path))
            {
                h.estadoActividad = 1;
            }

            Hipertexto existente = OperacionesElasticSearch.ExisteHipertexto(h);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarHiperTexto(h);
            }
            else
            {
                OperacionesElasticSearch.actualizarHipertexto(existente, h);
            }
        }
        else if (path.EndsWith(".txt") && indexatexto && path.Contains(dpto))
        {
            var    f = new FileInfo(path);
            var    fileLengthInKB = f.Length / 1024.0;
            Uri    u        = new Uri(path);
            Texto  t        = new Texto();
            string ext      = System.IO.Path.GetExtension(path);
            string auxiliar = "http://localhost/servidores/";
            t.urlRuta = u.AbsoluteUri;
            var    puerto = u.Port;
            int    pos    = t.urlRuta.IndexOf("servidores/") + 11;
            string aux2   = t.urlRuta.Substring(pos);
            t.urlRuta = auxiliar + aux2;
            int    pos2     = aux2.IndexOf("/");
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart = depart.Substring(0, pos4);
            depart = depart.Replace("%20", " ");

            string textoContenido = System.IO.File.ReadAllText(path);
            string user           = System.IO.File.GetAccessControl(path).GetOwner(typeof(System.Security.Principal.NTAccount)).ToString();
            t.estadoActividad          = 1;
            t.departamento             = depart;
            t.urlRuta                  = auxiliar + aux2;
            t.tamanoArchivo            = fileLengthInKB;
            t.idServidor               = servidor;
            t.textoContenido           = textoContenido;
            t.titulo                   = t.nombreArchivo;
            t.formato                  = ext;
            t.nombreArchivo            = System.IO.Path.GetFileName(path);
            t.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            t.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            t.fechaUltimaActualizacion = DateTime.Now;
            t.hits         = rnd.Next(1, 55);
            t.autorArchivo = user;


            Texto existente = OperacionesElasticSearch.ExisteTexto(t);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarTexto(t);
            }
            else
            {
                OperacionesElasticSearch.actualizarTexto(existente, t);
            }
        }
        else if (Herramientas.EsWord(path) && indexatexto && path.Contains(dpto))
        {
            var f = new FileInfo(path);
            var fileLengthInKB = f.Length / 1024.0;

            var applicationWord = new Microsoft.Office.Interop.Word.Application();
            applicationWord.Visible = false;
            Word.Document w = applicationWord.Documents.Open(@path, ReadOnly: true);
            Word.Range    ContentTypeProperties = w.Content;

            Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application();
            object miss = System.Reflection.Missing.Value;

            object readOnly = true;
            Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(path, ref miss, ref readOnly, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss);


            //Get Author Name
            object wordProperties = docs.BuiltInDocumentProperties;

            Type   typeDocBuiltInProps = wordProperties.GetType();
            Object Authorprop          = typeDocBuiltInProps.InvokeMember("Item", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, wordProperties, new object[] { "Author" });//query for author properties
            Type   typeAuthorprop      = Authorprop.GetType();
            //string strAuthor = typeAuthorprop.InvokeMember("Value", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, Authorprop, new object[] { }).ToString();//get author name

            string textoContenido = "";
            for (int i = 0; i < docs.Paragraphs.Count; i++)
            {
                textoContenido += " \r\n " + docs.Paragraphs[i + 1].Range.Text.ToString();
            }
            Uri    u   = new Uri(path);
            Texto  t   = new Texto();
            string ext = System.IO.Path.GetExtension(path);

            string auxiliar = "http://localhost/servidores/";
            t.urlRuta = u.AbsoluteUri;
            var    puerto = u.Port;
            int    pos    = t.urlRuta.IndexOf("servidores/") + 11;
            string aux2   = t.urlRuta.Substring(pos);
            t.urlRuta = auxiliar + aux2;
            int    pos2     = aux2.IndexOf("/");
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart = depart.Substring(0, pos4);
            depart = depart.Replace("%20", " ");


            t.departamento             = depart;
            t.urlRuta                  = auxiliar + aux2;
            t.tamanoArchivo            = fileLengthInKB;
            t.idServidor               = servidor;
            t.textoContenido           = textoContenido;
            t.titulo                   = t.nombreArchivo;
            t.formato                  = ext;
            t.nombreArchivo            = System.IO.Path.GetFileName(path);
            t.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            t.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            t.fechaUltimaActualizacion = DateTime.Now;
            t.hits = rnd.Next(1, 55);

            if (File.Exists(path))
            {
                t.estadoActividad = 1;
            }

            w.Close();


            Texto existente = OperacionesElasticSearch.ExisteTexto(t);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarTexto(t);
            }
            else
            {
                OperacionesElasticSearch.actualizarTexto(existente, t);
            }
        }
        else if (Herramientas.EsExcel(path) && indexatexto && path.Contains(dpto))
        {
            /*Microsoft.Office.Interop.Excel.Application app = new Microsoft.Office.Interop.Excel.Application();
             * Microsoft.Office.Interop.Excel.Workbook wb = app.Workbooks.Open(@path, ReadOnly: true);
             *
             * var f = new FileInfo(path);
             * var fileLengthInKB = f.Length / 1024.0;
             *
             * //Create COM Objects. Create a COM object for everything that is referenced
             * Excel.Application xlApp = new Excel.Application();
             * Excel.Workbook xlWorkbook = xlApp.Workbooks.Open(path);
             * Excel._Worksheet xlWorksheet = xlWorkbook.Sheets[0];
             * Excel.Range xlRange = xlWorksheet.UsedRange;
             * int rowCount = xlRange.Rows.Count;
             * int colCount = xlRange.Columns.Count;
             * //iterate over the rows and columns and print to the console as it appears in the file
             * //excel is not zero based!!
             * //Get Author Name
             * String autor = wb.Author;
             *
             * String textoContenido = "";
             * for (int i = 1; i <= rowCount; i++)
             * {
             *  for (int j = 1; j <= colCount; j++)
             *  {
             *      //new line
             *      if (j == 1)
             *          Console.Write("\r\n");
             *
             *      //write the value to the console
             *      if ((Excel.Range)xlRange.Cells[i, j] != null && xlRange.Cells[i, j].Value2 != null)
             *          textoContenido += xlRange.Cells[i, j].Value2.ToString() + " ";
             *
             *      //add useful things here!
             *  }
             * }
             *
             * xlWorkbook.Close();
             * Uri u = new Uri(path);
             *
             * Texto t = new Texto();
             * t.urlRuta = u.AbsoluteUri;
             * string ext = System.IO.Path.GetExtension(path);
             *
             * string auxiliar = "http://localhost/servidorIntranet/";
             * h.urlRuta = u.AbsoluteUri;
             * var puerto = u.Port;
             * int pos = h.urlRuta.IndexOf("servidores/") + 11 ;
             * string aux2 = h.urlRuta.Substring(pos);
             * t.urlRuta = auxiliar + aux2;
             * int pos2 = aux2.IndexOf("/");
             * string servidor = aux2.Substring(0, pos2);
             * int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1;
             *
             * string depart = aux2.Substring(pos3);
             * int pos4 = depart.IndexOf("/");
             * depart = depart.Substring(0, pos4);
             * depart = depart.Replace("%20", " ");
             *
             * string depart = aux2.Substring(pos3);
             * int pos4 = depart.IndexOf("/");
             * depart = depart.Substring(0, pos4);
             * depart = depart.Replace("%20", " ");
             * t.departamento = depart;
             *
             * t.idServidor = servidor;
             * t.urlRuta = u.AbsoluteUri;
             * t.tamanoArchivo = fileLengthInKB;
             * t.textoContenido = textoContenido;
             * t.titulo = t.nombreArchivo;
             * t.formato = ext;
             * t.nombreArchivo = System.IO.Path.GetFileName(path);
             * t.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path);
             * t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
             * t.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path);
             * t.fechaUltimaActualizacion = DateTime.Now;
             * t.hits = 0;
             *
             * if (File.Exists(path))
             *  t.estadoActividad = 1;
             *
             * Texto existente = OperacionesElasticSearch.ExisteTexto(t);
             * if (existente == null)
             *  OperacionesElasticSearch.InsertarTexto(t);
             * else
             *  OperacionesElasticSearch.actualizarTexto(existente, t);
             *
             */
        }
        else if (Herramientas.EsPDF(path) && indexatexto && path.Contains(dpto))
        {
            var text = new TextExtractor().Extract(path).Text;
            text = Regex.Replace(text, @"\s+", " ");
            text = text.Replace("\r", "");
            text = text.Replace("\n", "");

            PDFParser pdfParser = new PDFParser();

            var f = new FileInfo(path);
            var fileLengthInKB = f.Length / 1024.0;

            // extract the text
            String resultado = "";
            pdfParser.ExtractText(path, "C:\\Users\\cesar\\Desktop\\DocumentosIndeaxar\\salida.txt");
            resultado = pdfParser.ToString();

            String autor          = "";
            String textoContenido = "";
            String titulo         = "";


            using (PdfReader reader = new PdfReader(path)){
                //titulo = reader.Info["Title"];
                //String ayt = reader.Info["Author"];
                titulo = "";

                StringBuilder text2 = new StringBuilder();

                for (int i = 1; i <= reader.NumberOfPages; i++)
                {
                    text2.Append(PdfTextExtractor.GetTextFromPage(reader, i));
                }

                textoContenido = text.ToString();
            }

            Texto t = new Texto();
            Uri   u = new Uri(path);

            t.urlRuta = u.AbsoluteUri;
            string auxiliar = "http://localhost/servidores/";
            t.urlRuta = u.AbsoluteUri;
            var puerto = u.Port;
            int pos    = t.urlRuta.IndexOf("servidores/") + 11;

            string aux2 = t.urlRuta.Substring(pos);
            t.urlRuta = auxiliar + aux2;
            int    pos2     = aux2.IndexOf("/");
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart = depart.Substring(0, pos4);
            depart = depart.Replace("%20", " ");

            t.idServidor     = servidor;
            t.departamento   = depart;
            t.textoContenido = textoContenido;
            t.nombreArchivo  = path.Substring(0, path.IndexOf(".pdf"));
            t.titulo         = titulo;
            t.tamanoArchivo  = fileLengthInKB;

            string ext = System.IO.Path.GetExtension(path);
            t.formato                  = ext;
            t.nombreArchivo            = path.Substring(0, path.IndexOf(ext));
            t.nombreArchivo            = System.IO.Path.GetFileName(path);
            t.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            t.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            t.fechaUltimaActualizacion = DateTime.Now;
            t.hits = rnd.Next(1, 55);

            if (File.Exists(path))
            {
                t.estadoActividad = 1;
            }

            Texto existente = OperacionesElasticSearch.ExisteTexto(t);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarTexto(t);
            }
            else
            {
                OperacionesElasticSearch.actualizarTexto(existente, t);
            }
        }
        else if (Herramientas.EsPowerPoint(path) && indexatexto && path.Contains(dpto))
        {
            Microsoft.Office.Interop.PowerPoint.Application   PowerPoint_App      = new Microsoft.Office.Interop.PowerPoint.Application();
            Microsoft.Office.Interop.PowerPoint.Presentations multi_presentations = PowerPoint_App.Presentations;
            Microsoft.Office.Interop.PowerPoint.Presentation  presentation        = multi_presentations.Open(path);

            var f = new FileInfo(path);
            var fileLengthInKB = f.Length / 1024.0;

            string textoContenido = "";
            for (int i = 0; i < presentation.Slides.Count; i++)
            {
                foreach (var item in presentation.Slides[i + 1].Shapes)
                {
                    var shape = (Powerpoint.Shape)item;
                    if (shape.HasTextFrame == MsoTriState.msoTrue)
                    {
                        if (shape.TextFrame.HasText == MsoTriState.msoTrue)
                        {
                            var textRange = shape.TextFrame.TextRange;
                            var text      = textRange.Text;
                            textoContenido += text + " ";
                        }
                    }
                }
            }
            //Get Author Name
            object wordProperties      = presentation.BuiltInDocumentProperties;
            Type   typeDocBuiltInProps = wordProperties.GetType();
            Object Authorprop          = typeDocBuiltInProps.InvokeMember("Item", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, wordProperties, new object[] { "Author" }); //query for author properties
            Type   typeAuthorprop      = Authorprop.GetType();
            string autor = typeAuthorprop.InvokeMember("Value", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, Authorprop, new object[] { }).ToString();                     //get author name

            Texto t = new Texto();
            t.textoContenido = textoContenido;
            Uri u = new Uri(path);
            t.urlRuta = u.AbsoluteUri;
            string ext = System.IO.Path.GetExtension(path);
            t.formato       = ext;
            t.nombreArchivo = path.Substring(0, path.IndexOf(ext));
            string auxiliar = "http://localhost/servidores/";
            t.urlRuta = u.AbsoluteUri;
            var puerto = u.Port;
            int pos    = t.urlRuta.IndexOf("servidores/") + 11;

            string aux2 = t.urlRuta.Substring(pos);
            t.urlRuta = auxiliar + aux2;
            int    pos2     = aux2.IndexOf("/");
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart         = depart.Substring(0, pos4);
            depart         = depart.Replace("%20", " ");
            t.departamento = depart;
            t.hits         = rnd.Next(1, 55);


            PowerPoint_App.Quit();
            presentation.Close();

            t.idServidor               = servidor;
            textoContenido             = textoContenido.Trim();
            t.nombreArchivo            = System.IO.Path.GetFileName(path);
            t.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            t.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            t.fechaUltimaActualizacion = DateTime.Now;

            if (File.Exists(path))
            {
                t.estadoActividad = 1;
            }

            Texto existente = OperacionesElasticSearch.ExisteTexto(t);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarTexto(t);
            }
            else
            {
                OperacionesElasticSearch.actualizarTexto(existente, t);
            }
        }
        else if (Herramientas.EsImagen(path) && indexaimagen && path.Contains(dpto))
        {
            var           f = new FileInfo(path);
            var           fileLengthInKB = f.Length / 1024.0;
            string        ext            = System.IO.Path.GetExtension(path);
            List <string> eti            = new List <string>();
            eti.Add("imagen");
            eti.Add("foto");

            String   titulo  = path.Substring(0, path.IndexOf(ext));
            FileInfo file    = new FileInfo(path);
            int      tamanio = (int)file.Length;

            Bitmap img = new Bitmap(path);

            int altura  = img.Height;
            int anchura = img.Width;

            Imagen im = new Imagen();
            im.pixelesAltura  = altura;
            im.pixelesAnchura = anchura;

            Uri u = new Uri(path);
            im.urlRuta = u.AbsoluteUri;


            string auxiliar = "http://localhost/servidores/";
            im.urlRuta = u.AbsoluteUri;
            var puerto = u.Port;
            int pos    = im.urlRuta.IndexOf("servidores/") + 11;

            string aux2 = im.urlRuta.Substring(pos);
            im.urlRuta = auxiliar + aux2;
            int    pos2     = aux2.IndexOf("/");
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart          = depart.Substring(0, pos4);
            depart          = depart.Replace("%20", " ");
            im.departamento = depart;

            im.idServidor               = servidor;
            ext                         = System.IO.Path.GetExtension(path);
            im.formato                  = ext;
            im.nombreArchivo            = System.IO.Path.GetFileName(path);
            im.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            im.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            im.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            im.fechaUltimaActualizacion = DateTime.Now;
            im.etiquetas                = eti;
            im.hits                     = rnd.Next(1, 55);


            if (File.Exists(path))
            {
                im.estadoActividad = 1;
            }

            Imagen existente = OperacionesElasticSearch.ExisteImagen(im);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarImagen(im);
            }
            else
            {
                OperacionesElasticSearch.actualizarImagen(existente, im);
            }
        }
        else if (Herramientas.EsAudio(path) && indexaaudio)
        {
            var fi             = new FileInfo(path);
            var fileLengthInKB = fi.Length / 1024.0;

            string ext    = System.IO.Path.GetExtension(path);
            string titulo = path.Substring(0, path.IndexOf(ext));

            TagLib.File f        = TagLib.File.Create(path, TagLib.ReadStyle.Average);
            var         duracion = (int)f.Properties.Duration.TotalSeconds;

            List <string> eti = new List <string>();
            eti.Add("audio");
            eti.Add("sonido");

            Audio au = new Audio();
            Uri   u  = new Uri(path);
            au.urlRuta = u.AbsoluteUri;
            string auxiliar = "http://localhost/servidores/";
            au.urlRuta = u.AbsoluteUri;
            var puerto = u.Port;
            int pos    = au.urlRuta.IndexOf("servidores/") + 11;

            string aux2 = au.urlRuta.Substring(pos);
            au.urlRuta = auxiliar + aux2;
            int    pos2     = aux2.IndexOf("/");
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart                      = depart.Substring(0, pos4);
            depart                      = depart.Replace("%20", " ");
            au.departamento             = depart;
            au.duracion                 = duracion;
            au.etiquetas                = eti;
            au.formato                  = ext;
            au.nombreArchivo            = System.IO.Path.GetFileName(path);
            au.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            au.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            au.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            au.fechaUltimaActualizacion = DateTime.Now;
            au.hits                     = rnd.Next(1, 55);
            au.idServidor               = servidor;

            if (File.Exists(path))
            {
                au.estadoActividad = 1;
            }

            Audio existente = OperacionesElasticSearch.ExisteAudio(au);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarAudio(au);
            }
            else
            {
                OperacionesElasticSearch.actualizarAudio(existente, au);
            }
        }
        else if (Herramientas.EsVideo(path) && indexavideo)
        {
            string        ext    = System.IO.Path.GetExtension(path);
            string        titulo = path.Substring(0, path.IndexOf(ext));
            List <string> eti    = new List <string>();
            eti.Add("video");
            var    fi             = new FileInfo(path);
            int    duracion       = 0;
            string calidad        = "";
            var    fileLengthInKB = fi.Length / 1024.0;
            if (ext == ".mp4")
            {
                TagLib.File f = TagLib.File.Create(path, TagLib.ReadStyle.Average);
                duracion = (int)f.Properties.Duration.TotalSeconds;

                if (f.Properties.VideoHeight != 0 && f.Properties.VideoWidth != 0)
                {
                    int height = (int)f.Properties.VideoHeight;
                    int width  = (int)f.Properties.VideoWidth;
                    calidad = height + "x" + width;
                }
            }

            Uri u = new Uri(path);

            Video v = new Video();
            v.urlRuta = u.AbsoluteUri;
            string auxiliar = "http://localhost/servidores/";
            v.urlRuta = u.AbsoluteUri;
            var puerto = u.Port;
            int pos    = v.urlRuta.IndexOf("servidores/") + 11;

            string aux2 = v.urlRuta.Substring(pos);
            v.urlRuta = auxiliar + aux2;
            int    pos2     = aux2.IndexOf("/");
            string servidor = aux2.Substring(0, pos2);
            int    pos3     = aux2.IndexOf(servidor + "/") + servidor.Length + 1;

            string depart = aux2.Substring(pos3);
            int    pos4   = depart.IndexOf("/");
            depart                     = depart.Substring(0, pos4);
            depart                     = depart.Replace("%20", " ");
            depart                     = depart.Replace("%20", " ");
            v.departamento             = depart;
            v.duracion                 = duracion;
            v.etiquetas                = eti;
            v.calidad                  = calidad;
            v.idServidor               = servidor;
            v.nombreArchivo            = System.IO.Path.GetFileName(path);
            v.fechaCreacionArchivo     = (DateTime)File.GetCreationTime(path);
            v.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path);
            v.fechaUltimaLectura       = (DateTime)File.GetLastAccessTime(path);
            v.fechaUltimaActualizacion = DateTime.Now;
            v.formato                  = ext;
            v.hits                     = rnd.Next(1, 55);

            if (File.Exists(path))
            {
                v.estadoActividad = 1;
            }

            Video existente = OperacionesElasticSearch.ExisteVideo(v);
            if (existente == null)
            {
                OperacionesElasticSearch.InsertarVideo(v);
            }
            else
            {
                OperacionesElasticSearch.actualizarVideo(existente, v);
            }
        }
    }
Ejemplo n.º 10
0
        public static void CheckAttachmentsForDocOrPDFText(ActiveRecord record)
        {
            //walk the field list for this record looking for attachments
            foreach (var fieldName in record.GetFieldNames())
            {
                if (fieldName.Contains("Attachment") && fieldName.DoesntContain("RawText"))
                {
                    //if (record.Fields.Attachment.IsDirty) {
                    if (ActiveFieldBase.IsDirtyObj(record[fieldName].ValueObject, record[fieldName].OriginalValueObject))
                    {
                        if (record[fieldName].ToString().Contains(".doc") || record[fieldName].ToString().EndsWith(".pdf") || record[fieldName].ToString().EndsWith(".rtf"))
                        {
                            if (!record.FieldExists(fieldName + "RawText"))
                            {
                                (new Sql("ALTER TABLE ", record.GetTableName().SqlizeName(), " ADD [" + fieldName + "RawText] nvarchar (MAX);")).Execute();
                            }
                            string output = "";
                            if (record[fieldName].ToString().ToLower().EndsWith(".doc"))
                            {
                                OfficeFileReader.OfficeFileReader objOFR = new OfficeFileReader.OfficeFileReader();
                                if (objOFR.GetText(Web.MapPath(Web.Attachments) + record[fieldName].ToString(), ref output) > 0)
                                {
                                    //ok
                                }
                            }
                            else if (record[fieldName].ToString().ToLower().EndsWith(".docx"))
                            {
                                BewebCore.ThirdParty.ReadWordDocText.DocxToText objOFR = new DocxToText(Web.MapPath(Web.Attachments) + record[fieldName].ToString());
                                if ((output = objOFR.ExtractText()).Length > 0)
                                {
                                    //ok
                                }
                            }
                            else if (record[fieldName].ToString().Contains(".pdf"))
                            {
                                PdfToText.PDFParser pdf = new PDFParser();
                                if (pdf.ExtractText(Web.MapPath(Web.Attachments) + record[fieldName].ToString(), ref output))
                                {
                                    //ok
                                }
                            }
                            else if (record[fieldName].ToString().Contains(".rtf"))
                            {
#if RTFProcessingAvailable
                                //Create the RTF tree object
                                RtfTree tree = new RtfTree();

                                //Load and parse RTF document
                                tree.LoadRtfFile(Web.MapPath(Web.Attachments) + record[fieldName].ToString());
                                output = tree.Text;
#else
                                throw new Exception("rtf library not included");
#endif
                            }
                            if (output.Trim() != "")
                            {
                                (new Sql("update ", record.GetTableName().SqlizeName(), "set " + fieldName + "RawText=", output.SqlizeText(), " where ",
                                         record.GetPrimaryKeyName().SqlizeName(), "=", record.ID_Field.Sqlize(), "")).Execute();
                            }
                        }
                        else
                        {
                            //no doc any more
                            if (record.FieldExists(fieldName + "RawText"))
                            {
                                (new Sql("update ", record.GetTableName().SqlizeName(), "set " + fieldName + "RawText=null where ",
                                         record.GetPrimaryKeyName().SqlizeName(), "=", record.ID_Field.Sqlize(), "")).Execute();
                            }
                        }
                    }
                }
            }
        }