public void readfrompdffile() { string clearedfilepath = requiredPath.Replace("file:\\", "").Replace("\\", "/"); string file = clearedfilepath + "/terms-and-conditions.pdf"; string outFile = clearedfilepath + "/terms-and-conditions.txt"; try { if (!File.Exists(file)) { file = Path.GetFullPath(file); if (!File.Exists(file)) { Console.WriteLine("Please give in the path to the PDF file."); } } PDFParser pdfParser = new PDFParser(); pdfParser.ExtractText(file, outFile); } catch (Exception exc) { Console.WriteLine(exc); } }
public ActionResult callExtraxtText(int serviceId) { Service service = db.Services.Find(serviceId); if (service != null) { if (service.PDF != null) { string inFileName = service.PDF.PDFPath; if (inFileName != null) { inFileName = ConfigurationManager.AppSettings["pdfArchive"] + @"\PDFs\" + inFileName; PDFParser reader = new PDFParser(); string text = reader.ExtractText(serviceId, inFileName); PDF pdf = service.PDF; if (text != "false") { pdf.FacebookDescription = text; pdf.FacebookTitle = service.FirstName + " " + service.LastName + "'s Memorial Folder"; db.SaveChanges(); return(Json("success")); } } } } return(Json("Error")); }
public static void ParsePdf() { PDFParser pdfParser = new PDFParser(); string address = @"http://ec.europa.eu/health/documents/community-register/html/h_direct_anx.htm#412_et"; string outfile = "outfile.txt"; bool result = pdfParser.ExtractText(address, outfile); }
static void Main(string[] args) { PDFParser pdfParser = new PDFParser(); pdfParser.ExtractText(@"C:\Users\unknown\Desktop\oyak.pdf", @"C:\Users\unknown\Desktop\output.txt"); Console.ReadKey(); }
public static string DownloadFile(string url) { string fileLocation = ConfigurationManager.AppSettings["downloadLocation"]; string fileName = DateTime.Now.Hour.ToString() + DateTime.Now.Minute.ToString() + DateTime.Now.Second.ToString() + DateTime.Now.Millisecond.ToString(); string fullFileName = fileLocation + @"\" + fileName + ".pdf"; string result = ""; try { using (WebClient client = new WebClient()) { client.DownloadFile(url, fullFileName); } PDFParser pdfParser = new PDFParser(); pdfParser.ExtractText(fullFileName, System.IO.Path.GetFileNameWithoutExtension(fullFileName) + ".txt"); try { //using (StreamReader sr = new StreamReader(System.IO.Path.GetFileNameWithoutExtension(fullFileName) + ".txt")) //{ // // Read the stream to a string, and write the string to the console. // result = sr.ReadToEnd(); //} //string output= System.IO.Path.GetFileNameWithoutExtension(fullFileName) + ".txt"; // var bytes = File.ReadAllBytes(fullFileName); // File.WriteAllText(output, ConvertToText(bytes), Encoding.UTF8); var bytes = File.ReadAllBytes(fullFileName); result = ConvertToText(bytes); } catch (Exception ex) { ErrorUtil.logError(ex, ""); } } catch (Exception ex) { ErrorUtil.logError(ex, ""); } return(result); }
public void convertFile(string fileToConvert) { O2Thread.mtaThread( () => { //fileToConvert.error(); if (fileToConvert.fileExists()) { if (fileToConvert.extension(".pdf")) { textBox.set_Text("...processing pdf file: " + fileToConvert); var pdfParser = new PDFParser(); var tempFile = PublicDI.config.getTempFileInTempDirectory(".txt"); pdfParser.ExtractText(fileToConvert, tempFile); textBox.set_Text(tempFile.contents().fixCRLF()); Files.deleteFile(tempFile); } else { textBox.set_Text(fileToConvert.contents()); } } }); }
static void Main(string[] args) { ITextParse.ExtractText("employe-1.pdf", "iparse.txt"); //using (PdfReader reader = new PdfReader("letter.pdf")) //Index was outside the bounds of the array. //using (PdfReader reader = new PdfReader("employe-1.pdf")) //ok //using (PdfReader reader = new PdfReader("feuille_de_paie.pdf")) //Rebuild failed: trailer not found.; Original message: PDF startxref not found. using (iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader("modele-bulletin-de-salaire.pdf")) //ok { StringBuilder text = new StringBuilder(); for (int page = 1; page <= reader.NumberOfPages; page++) { iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); //ITextExtractionStrategy strategy = new LocationTextExtractionStrategy(); string currentText = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } System.IO.StreamWriter file = new System.IO.StreamWriter("itextsharp.txt"); file.WriteLine(text); file.Close(); //return text.ToString(); } //PdfSharp using (var _document = PdfReader.Open("letter.pdf", PdfDocumentOpenMode.ReadOnly)) //ok //using (var _document = PdfReader.Open("employe-1.pdf", PdfDocumentOpenMode.ReadOnly)) //ok //using (var _document = PdfReader.Open("feuille_de_paie.pdf", PdfDocumentOpenMode.ReadOnly)) //Non-negative number required. //using (var _document = PdfReader.Open("modele-bulletin-de-salaire.pdf", PdfDocumentOpenMode.ReadOnly)) //ok { System.IO.StreamWriter file = new System.IO.StreamWriter("pdfsharp.txt"); foreach (PdfPage page in _document.Pages) { var text = ExtractText(page); foreach (string s in text) { file.Write(s); } } file.Close(); } //iTextSharp too PDFParser parser = new PDFParser(); //parser.ExtractText("letter.pdf", "pdfparser.txt"); //error parser.ExtractText("employe-1.pdf", "pdfparser.txt"); //ok //parser.ExtractText("feuille_de_paie.pdf", "pdfparser.txt"); //error //parser.ExtractText("modele-bulletin-de-salaire.pdf", "pdfparser.txt"); //error //PdfSharp //string text2 = PdfTextExtractor.GetText("letter.pdf"); //ok //string text2 = PdfTextExtractor.GetText("employe-1.pdf"); //ok //string text2 = PdfTextExtractor.GetText("feuille_de_paie.pdf"); //Non-negative number required. string text2 = PdfTextExtractor.GetText("modele-bulletin-de-salaire.pdf"); //ok System.IO.StreamWriter file2 = new System.IO.StreamWriter("PdfTextExtractor.txt"); file2.Write(text2); file2.Close(); }
/// <summary> /// Get specify file PDF content /// </summary> /// <param name="filePath"></param> /// <returns></returns> public static string GetPdfContent(string filePath) { PDFParser pdfParser = new PDFParser(); return(pdfParser.ExtractText(filePath));; }
// Insert logic for processing found files here. public static void ProcessFile(string path, string tipo, string dpto, bool actualizar) { bool indexatexto = false, indexaaudio = false, indexaimagen = false, indexahipertexto = false, indexavideo = false; Random rnd = new Random(); switch (tipo) { case "texto": indexatexto = true; break; case "hipertexto": indexahipertexto = true; break; case "video": indexavideo = true; break; case "imagen": indexaimagen = true; break; case "audio": indexaaudio = true; break; case "": indexatexto = true; indexahipertexto = true; indexavideo = true; indexaimagen = true; indexaaudio = true; break; } if (Herramientas.EsHiperTexto(path) && indexatexto && path.Contains(dpto)) { Regex trimmer = new Regex(@"\s\s+"); ScrapingBrowser Browser = new ScrapingBrowser(); Browser.AllowAutoRedirect = true; // Browser has settings you can access in setup Browser.AllowMetaRedirect = true; HtmlNode html = GetNodes(new Uri(path)); var titulo = html.CssSelect("title").FirstOrDefault().InnerText; var body = html.CssSelect("body").FirstOrDefault().InnerText; body = Regex.Replace(body, "<.*?>", string.Empty); body = Regex.Replace(body, @"(?:(?:\r?\n)+ +){2,}", @"\n"); var f = new FileInfo(path); var fileLengthInKB = f.Length / 1024.0; Hipertexto h = new Hipertexto(); h.nombreArchivo = titulo; h.textoContenido = body; h.tamanoArchivo = fileLengthInKB; Uri u = new Uri(path); string ext = System.IO.Path.GetExtension(path); string auxiliar = "http://localhost/servidores/"; h.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = h.urlRuta.IndexOf("servidores/") + 11; string aux2 = h.urlRuta.Substring(pos); int pos2 = aux2.IndexOf("/"); h.urlRuta = auxiliar + aux2; string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); h.departamento = depart; h.urlRuta = auxiliar + aux2; h.tamanoArchivo = fileLengthInKB; h.formato = ext; h.idServidor = servidor; h.nombreArchivo = System.IO.Path.GetFileName(path); h.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); h.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); h.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); h.fechaUltimaActualizacion = DateTime.Now; h.hits = rnd.Next(1, 55); if (File.Exists(path)) { h.estadoActividad = 1; } Hipertexto existente = OperacionesElasticSearch.ExisteHipertexto(h); if (existente == null) { OperacionesElasticSearch.InsertarHiperTexto(h); } else { OperacionesElasticSearch.actualizarHipertexto(existente, h); } } else if (path.EndsWith(".txt") && indexatexto && path.Contains(dpto)) { var f = new FileInfo(path); var fileLengthInKB = f.Length / 1024.0; Uri u = new Uri(path); Texto t = new Texto(); string ext = System.IO.Path.GetExtension(path); string auxiliar = "http://localhost/servidores/"; t.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = t.urlRuta.IndexOf("servidores/") + 11; string aux2 = t.urlRuta.Substring(pos); t.urlRuta = auxiliar + aux2; int pos2 = aux2.IndexOf("/"); string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); string textoContenido = System.IO.File.ReadAllText(path); string user = System.IO.File.GetAccessControl(path).GetOwner(typeof(System.Security.Principal.NTAccount)).ToString(); t.estadoActividad = 1; t.departamento = depart; t.urlRuta = auxiliar + aux2; t.tamanoArchivo = fileLengthInKB; t.idServidor = servidor; t.textoContenido = textoContenido; t.titulo = t.nombreArchivo; t.formato = ext; t.nombreArchivo = System.IO.Path.GetFileName(path); t.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); t.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); t.fechaUltimaActualizacion = DateTime.Now; t.hits = rnd.Next(1, 55); t.autorArchivo = user; Texto existente = OperacionesElasticSearch.ExisteTexto(t); if (existente == null) { OperacionesElasticSearch.InsertarTexto(t); } else { OperacionesElasticSearch.actualizarTexto(existente, t); } } else if (Herramientas.EsWord(path) && indexatexto && path.Contains(dpto)) { var f = new FileInfo(path); var fileLengthInKB = f.Length / 1024.0; var applicationWord = new Microsoft.Office.Interop.Word.Application(); applicationWord.Visible = false; Word.Document w = applicationWord.Documents.Open(@path, ReadOnly: true); Word.Range ContentTypeProperties = w.Content; Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application(); object miss = System.Reflection.Missing.Value; object readOnly = true; Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(path, ref miss, ref readOnly, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss); //Get Author Name object wordProperties = docs.BuiltInDocumentProperties; Type typeDocBuiltInProps = wordProperties.GetType(); Object Authorprop = typeDocBuiltInProps.InvokeMember("Item", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, wordProperties, new object[] { "Author" });//query for author properties Type typeAuthorprop = Authorprop.GetType(); //string strAuthor = typeAuthorprop.InvokeMember("Value", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, Authorprop, new object[] { }).ToString();//get author name string textoContenido = ""; for (int i = 0; i < docs.Paragraphs.Count; i++) { textoContenido += " \r\n " + docs.Paragraphs[i + 1].Range.Text.ToString(); } Uri u = new Uri(path); Texto t = new Texto(); string ext = System.IO.Path.GetExtension(path); string auxiliar = "http://localhost/servidores/"; t.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = t.urlRuta.IndexOf("servidores/") + 11; string aux2 = t.urlRuta.Substring(pos); t.urlRuta = auxiliar + aux2; int pos2 = aux2.IndexOf("/"); string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); t.departamento = depart; t.urlRuta = auxiliar + aux2; t.tamanoArchivo = fileLengthInKB; t.idServidor = servidor; t.textoContenido = textoContenido; t.titulo = t.nombreArchivo; t.formato = ext; t.nombreArchivo = System.IO.Path.GetFileName(path); t.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); t.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); t.fechaUltimaActualizacion = DateTime.Now; t.hits = rnd.Next(1, 55); if (File.Exists(path)) { t.estadoActividad = 1; } w.Close(); Texto existente = OperacionesElasticSearch.ExisteTexto(t); if (existente == null) { OperacionesElasticSearch.InsertarTexto(t); } else { OperacionesElasticSearch.actualizarTexto(existente, t); } } else if (Herramientas.EsExcel(path) && indexatexto && path.Contains(dpto)) { /*Microsoft.Office.Interop.Excel.Application app = new Microsoft.Office.Interop.Excel.Application(); * Microsoft.Office.Interop.Excel.Workbook wb = app.Workbooks.Open(@path, ReadOnly: true); * * var f = new FileInfo(path); * var fileLengthInKB = f.Length / 1024.0; * * //Create COM Objects. Create a COM object for everything that is referenced * Excel.Application xlApp = new Excel.Application(); * Excel.Workbook xlWorkbook = xlApp.Workbooks.Open(path); * Excel._Worksheet xlWorksheet = xlWorkbook.Sheets[0]; * Excel.Range xlRange = xlWorksheet.UsedRange; * int rowCount = xlRange.Rows.Count; * int colCount = xlRange.Columns.Count; * //iterate over the rows and columns and print to the console as it appears in the file * //excel is not zero based!! * //Get Author Name * String autor = wb.Author; * * String textoContenido = ""; * for (int i = 1; i <= rowCount; i++) * { * for (int j = 1; j <= colCount; j++) * { * //new line * if (j == 1) * Console.Write("\r\n"); * * //write the value to the console * if ((Excel.Range)xlRange.Cells[i, j] != null && xlRange.Cells[i, j].Value2 != null) * textoContenido += xlRange.Cells[i, j].Value2.ToString() + " "; * * //add useful things here! * } * } * * xlWorkbook.Close(); * Uri u = new Uri(path); * * Texto t = new Texto(); * t.urlRuta = u.AbsoluteUri; * string ext = System.IO.Path.GetExtension(path); * * string auxiliar = "http://localhost/servidorIntranet/"; * h.urlRuta = u.AbsoluteUri; * var puerto = u.Port; * int pos = h.urlRuta.IndexOf("servidores/") + 11 ; * string aux2 = h.urlRuta.Substring(pos); * t.urlRuta = auxiliar + aux2; * int pos2 = aux2.IndexOf("/"); * string servidor = aux2.Substring(0, pos2); * int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; * * string depart = aux2.Substring(pos3); * int pos4 = depart.IndexOf("/"); * depart = depart.Substring(0, pos4); * depart = depart.Replace("%20", " "); * * string depart = aux2.Substring(pos3); * int pos4 = depart.IndexOf("/"); * depart = depart.Substring(0, pos4); * depart = depart.Replace("%20", " "); * t.departamento = depart; * * t.idServidor = servidor; * t.urlRuta = u.AbsoluteUri; * t.tamanoArchivo = fileLengthInKB; * t.textoContenido = textoContenido; * t.titulo = t.nombreArchivo; * t.formato = ext; * t.nombreArchivo = System.IO.Path.GetFileName(path); * t.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); * t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); * t.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); * t.fechaUltimaActualizacion = DateTime.Now; * t.hits = 0; * * if (File.Exists(path)) * t.estadoActividad = 1; * * Texto existente = OperacionesElasticSearch.ExisteTexto(t); * if (existente == null) * OperacionesElasticSearch.InsertarTexto(t); * else * OperacionesElasticSearch.actualizarTexto(existente, t); * */ } else if (Herramientas.EsPDF(path) && indexatexto && path.Contains(dpto)) { var text = new TextExtractor().Extract(path).Text; text = Regex.Replace(text, @"\s+", " "); text = text.Replace("\r", ""); text = text.Replace("\n", ""); PDFParser pdfParser = new PDFParser(); var f = new FileInfo(path); var fileLengthInKB = f.Length / 1024.0; // extract the text String resultado = ""; pdfParser.ExtractText(path, "C:\\Users\\cesar\\Desktop\\DocumentosIndeaxar\\salida.txt"); resultado = pdfParser.ToString(); String autor = ""; String textoContenido = ""; String titulo = ""; using (PdfReader reader = new PdfReader(path)){ //titulo = reader.Info["Title"]; //String ayt = reader.Info["Author"]; titulo = ""; StringBuilder text2 = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { text2.Append(PdfTextExtractor.GetTextFromPage(reader, i)); } textoContenido = text.ToString(); } Texto t = new Texto(); Uri u = new Uri(path); t.urlRuta = u.AbsoluteUri; string auxiliar = "http://localhost/servidores/"; t.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = t.urlRuta.IndexOf("servidores/") + 11; string aux2 = t.urlRuta.Substring(pos); t.urlRuta = auxiliar + aux2; int pos2 = aux2.IndexOf("/"); string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); t.idServidor = servidor; t.departamento = depart; t.textoContenido = textoContenido; t.nombreArchivo = path.Substring(0, path.IndexOf(".pdf")); t.titulo = titulo; t.tamanoArchivo = fileLengthInKB; string ext = System.IO.Path.GetExtension(path); t.formato = ext; t.nombreArchivo = path.Substring(0, path.IndexOf(ext)); t.nombreArchivo = System.IO.Path.GetFileName(path); t.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); t.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); t.fechaUltimaActualizacion = DateTime.Now; t.hits = rnd.Next(1, 55); if (File.Exists(path)) { t.estadoActividad = 1; } Texto existente = OperacionesElasticSearch.ExisteTexto(t); if (existente == null) { OperacionesElasticSearch.InsertarTexto(t); } else { OperacionesElasticSearch.actualizarTexto(existente, t); } } else if (Herramientas.EsPowerPoint(path) && indexatexto && path.Contains(dpto)) { Microsoft.Office.Interop.PowerPoint.Application PowerPoint_App = new Microsoft.Office.Interop.PowerPoint.Application(); Microsoft.Office.Interop.PowerPoint.Presentations multi_presentations = PowerPoint_App.Presentations; Microsoft.Office.Interop.PowerPoint.Presentation presentation = multi_presentations.Open(path); var f = new FileInfo(path); var fileLengthInKB = f.Length / 1024.0; string textoContenido = ""; for (int i = 0; i < presentation.Slides.Count; i++) { foreach (var item in presentation.Slides[i + 1].Shapes) { var shape = (Powerpoint.Shape)item; if (shape.HasTextFrame == MsoTriState.msoTrue) { if (shape.TextFrame.HasText == MsoTriState.msoTrue) { var textRange = shape.TextFrame.TextRange; var text = textRange.Text; textoContenido += text + " "; } } } } //Get Author Name object wordProperties = presentation.BuiltInDocumentProperties; Type typeDocBuiltInProps = wordProperties.GetType(); Object Authorprop = typeDocBuiltInProps.InvokeMember("Item", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, wordProperties, new object[] { "Author" }); //query for author properties Type typeAuthorprop = Authorprop.GetType(); string autor = typeAuthorprop.InvokeMember("Value", System.Reflection.BindingFlags.Default | System.Reflection.BindingFlags.GetProperty, null, Authorprop, new object[] { }).ToString(); //get author name Texto t = new Texto(); t.textoContenido = textoContenido; Uri u = new Uri(path); t.urlRuta = u.AbsoluteUri; string ext = System.IO.Path.GetExtension(path); t.formato = ext; t.nombreArchivo = path.Substring(0, path.IndexOf(ext)); string auxiliar = "http://localhost/servidores/"; t.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = t.urlRuta.IndexOf("servidores/") + 11; string aux2 = t.urlRuta.Substring(pos); t.urlRuta = auxiliar + aux2; int pos2 = aux2.IndexOf("/"); string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); t.departamento = depart; t.hits = rnd.Next(1, 55); PowerPoint_App.Quit(); presentation.Close(); t.idServidor = servidor; textoContenido = textoContenido.Trim(); t.nombreArchivo = System.IO.Path.GetFileName(path); t.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); t.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); t.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); t.fechaUltimaActualizacion = DateTime.Now; if (File.Exists(path)) { t.estadoActividad = 1; } Texto existente = OperacionesElasticSearch.ExisteTexto(t); if (existente == null) { OperacionesElasticSearch.InsertarTexto(t); } else { OperacionesElasticSearch.actualizarTexto(existente, t); } } else if (Herramientas.EsImagen(path) && indexaimagen && path.Contains(dpto)) { var f = new FileInfo(path); var fileLengthInKB = f.Length / 1024.0; string ext = System.IO.Path.GetExtension(path); List <string> eti = new List <string>(); eti.Add("imagen"); eti.Add("foto"); String titulo = path.Substring(0, path.IndexOf(ext)); FileInfo file = new FileInfo(path); int tamanio = (int)file.Length; Bitmap img = new Bitmap(path); int altura = img.Height; int anchura = img.Width; Imagen im = new Imagen(); im.pixelesAltura = altura; im.pixelesAnchura = anchura; Uri u = new Uri(path); im.urlRuta = u.AbsoluteUri; string auxiliar = "http://localhost/servidores/"; im.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = im.urlRuta.IndexOf("servidores/") + 11; string aux2 = im.urlRuta.Substring(pos); im.urlRuta = auxiliar + aux2; int pos2 = aux2.IndexOf("/"); string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); im.departamento = depart; im.idServidor = servidor; ext = System.IO.Path.GetExtension(path); im.formato = ext; im.nombreArchivo = System.IO.Path.GetFileName(path); im.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); im.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); im.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); im.fechaUltimaActualizacion = DateTime.Now; im.etiquetas = eti; im.hits = rnd.Next(1, 55); if (File.Exists(path)) { im.estadoActividad = 1; } Imagen existente = OperacionesElasticSearch.ExisteImagen(im); if (existente == null) { OperacionesElasticSearch.InsertarImagen(im); } else { OperacionesElasticSearch.actualizarImagen(existente, im); } } else if (Herramientas.EsAudio(path) && indexaaudio) { var fi = new FileInfo(path); var fileLengthInKB = fi.Length / 1024.0; string ext = System.IO.Path.GetExtension(path); string titulo = path.Substring(0, path.IndexOf(ext)); TagLib.File f = TagLib.File.Create(path, TagLib.ReadStyle.Average); var duracion = (int)f.Properties.Duration.TotalSeconds; List <string> eti = new List <string>(); eti.Add("audio"); eti.Add("sonido"); Audio au = new Audio(); Uri u = new Uri(path); au.urlRuta = u.AbsoluteUri; string auxiliar = "http://localhost/servidores/"; au.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = au.urlRuta.IndexOf("servidores/") + 11; string aux2 = au.urlRuta.Substring(pos); au.urlRuta = auxiliar + aux2; int pos2 = aux2.IndexOf("/"); string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); au.departamento = depart; au.duracion = duracion; au.etiquetas = eti; au.formato = ext; au.nombreArchivo = System.IO.Path.GetFileName(path); au.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); au.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); au.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); au.fechaUltimaActualizacion = DateTime.Now; au.hits = rnd.Next(1, 55); au.idServidor = servidor; if (File.Exists(path)) { au.estadoActividad = 1; } Audio existente = OperacionesElasticSearch.ExisteAudio(au); if (existente == null) { OperacionesElasticSearch.InsertarAudio(au); } else { OperacionesElasticSearch.actualizarAudio(existente, au); } } else if (Herramientas.EsVideo(path) && indexavideo) { string ext = System.IO.Path.GetExtension(path); string titulo = path.Substring(0, path.IndexOf(ext)); List <string> eti = new List <string>(); eti.Add("video"); var fi = new FileInfo(path); int duracion = 0; string calidad = ""; var fileLengthInKB = fi.Length / 1024.0; if (ext == ".mp4") { TagLib.File f = TagLib.File.Create(path, TagLib.ReadStyle.Average); duracion = (int)f.Properties.Duration.TotalSeconds; if (f.Properties.VideoHeight != 0 && f.Properties.VideoWidth != 0) { int height = (int)f.Properties.VideoHeight; int width = (int)f.Properties.VideoWidth; calidad = height + "x" + width; } } Uri u = new Uri(path); Video v = new Video(); v.urlRuta = u.AbsoluteUri; string auxiliar = "http://localhost/servidores/"; v.urlRuta = u.AbsoluteUri; var puerto = u.Port; int pos = v.urlRuta.IndexOf("servidores/") + 11; string aux2 = v.urlRuta.Substring(pos); v.urlRuta = auxiliar + aux2; int pos2 = aux2.IndexOf("/"); string servidor = aux2.Substring(0, pos2); int pos3 = aux2.IndexOf(servidor + "/") + servidor.Length + 1; string depart = aux2.Substring(pos3); int pos4 = depart.IndexOf("/"); depart = depart.Substring(0, pos4); depart = depart.Replace("%20", " "); depart = depart.Replace("%20", " "); v.departamento = depart; v.duracion = duracion; v.etiquetas = eti; v.calidad = calidad; v.idServidor = servidor; v.nombreArchivo = System.IO.Path.GetFileName(path); v.fechaCreacionArchivo = (DateTime)File.GetCreationTime(path); v.fechaModificacionArchivo = (DateTime)File.GetLastWriteTime(path); v.fechaUltimaLectura = (DateTime)File.GetLastAccessTime(path); v.fechaUltimaActualizacion = DateTime.Now; v.formato = ext; v.hits = rnd.Next(1, 55); if (File.Exists(path)) { v.estadoActividad = 1; } Video existente = OperacionesElasticSearch.ExisteVideo(v); if (existente == null) { OperacionesElasticSearch.InsertarVideo(v); } else { OperacionesElasticSearch.actualizarVideo(existente, v); } } }
public static void CheckAttachmentsForDocOrPDFText(ActiveRecord record) { //walk the field list for this record looking for attachments foreach (var fieldName in record.GetFieldNames()) { if (fieldName.Contains("Attachment") && fieldName.DoesntContain("RawText")) { //if (record.Fields.Attachment.IsDirty) { if (ActiveFieldBase.IsDirtyObj(record[fieldName].ValueObject, record[fieldName].OriginalValueObject)) { if (record[fieldName].ToString().Contains(".doc") || record[fieldName].ToString().EndsWith(".pdf") || record[fieldName].ToString().EndsWith(".rtf")) { if (!record.FieldExists(fieldName + "RawText")) { (new Sql("ALTER TABLE ", record.GetTableName().SqlizeName(), " ADD [" + fieldName + "RawText] nvarchar (MAX);")).Execute(); } string output = ""; if (record[fieldName].ToString().ToLower().EndsWith(".doc")) { OfficeFileReader.OfficeFileReader objOFR = new OfficeFileReader.OfficeFileReader(); if (objOFR.GetText(Web.MapPath(Web.Attachments) + record[fieldName].ToString(), ref output) > 0) { //ok } } else if (record[fieldName].ToString().ToLower().EndsWith(".docx")) { BewebCore.ThirdParty.ReadWordDocText.DocxToText objOFR = new DocxToText(Web.MapPath(Web.Attachments) + record[fieldName].ToString()); if ((output = objOFR.ExtractText()).Length > 0) { //ok } } else if (record[fieldName].ToString().Contains(".pdf")) { PdfToText.PDFParser pdf = new PDFParser(); if (pdf.ExtractText(Web.MapPath(Web.Attachments) + record[fieldName].ToString(), ref output)) { //ok } } else if (record[fieldName].ToString().Contains(".rtf")) { #if RTFProcessingAvailable //Create the RTF tree object RtfTree tree = new RtfTree(); //Load and parse RTF document tree.LoadRtfFile(Web.MapPath(Web.Attachments) + record[fieldName].ToString()); output = tree.Text; #else throw new Exception("rtf library not included"); #endif } if (output.Trim() != "") { (new Sql("update ", record.GetTableName().SqlizeName(), "set " + fieldName + "RawText=", output.SqlizeText(), " where ", record.GetPrimaryKeyName().SqlizeName(), "=", record.ID_Field.Sqlize(), "")).Execute(); } } else { //no doc any more if (record.FieldExists(fieldName + "RawText")) { (new Sql("update ", record.GetTableName().SqlizeName(), "set " + fieldName + "RawText=null where ", record.GetPrimaryKeyName().SqlizeName(), "=", record.ID_Field.Sqlize(), "")).Execute(); } } } } } }