private void ParseDocx(string path) { var processor = new PrependNumberingToContentProcessor(new DefaultNumberingConverterRegistry()); using (var docxParser = new DocxParser(path, new List <IProcessor> { processor })) { IList <ParagraphFormatException> exceptions; var doc = docxParser.Parse(out exceptions); foreach (var e in exceptions) { WriteException(e); } var results = _extractor.Extract(doc).ToList(); foreach (var result in results) { foreach (var e in result.Item3) { WriteException(e); } _output.WriteLine($"******{result.Item1}******"); VisitRoot(result.Item2); } } }
public void GetAnnotationPropertyFrom_InvalidExpression_ReturnsNulls() { var(item, list) = DocxParser.GetAnnotationPropertyFrom("Some property [propaddress] [someclassaddr its value]]"); Assert.IsNull(item); Assert.IsNull(list); }
public ActionResult createAssessment() { DocxParser docx = new DocxParser(); string path1 = Server.MapPath(Request.Files["btnUpload"].FileName).Replace("createAssessment", "DocxFiles"); string path2 = Server.MapPath(Request.Files["btnAnswerSheet"].FileName).Replace("createAssessment", "DocxFiles"); Request.Files["btnUpload"].SaveAs(AppDomain.CurrentDomain.BaseDirectory + "DocxFiles\\" + Request.Files["btnAnswerSheet"].FileName); Request.Files["btnAnswerSheet"].SaveAs(AppDomain.CurrentDomain.BaseDirectory + "DocxFiles\\" + Request.Files["btnAnswerSheet"].FileName); List <Question> questions = docx.getQuestionswithCorrectAnswers(path1, path2); if (questions.Count != 0) { int newAssessmentID = 0; Common.dbAccess.storeAssessment(Request.Form["txtAssessmentTitle"], "CI6110", Convert.ToInt32(Request.Form["txtPercentageWorth"]), Convert.ToInt32(Request.Form["txtDuration"]), "KU14009", ref newAssessmentID); foreach (Question question in questions) { string questionID = newAssessmentID + "." + question.questionNumber; Common.dbAccess.storeQuestion(questionID, question.question, newAssessmentID, question.questionNumber); foreach (Answer answer in question.possibleAnswers) { string answerID = newAssessmentID + "." + question.questionNumber + "." + answer.answerLetter; Common.dbAccess.storeAnswer(answerID, answer.answer, newAssessmentID, answer.answerLetter, answer.correct.ToString(), questionID); } } } return(RedirectToAction("Index")); }
public void GetClassFrom_ValidString_ReturnsClassInstants(string expression, string name, string iriAddress) { var item = DocxParser.GetClassFrom(expression); Assert.AreEqual(item.IriAddress, iriAddress); Assert.AreEqual(item.Name, name); }
public void HandleOpenDoc(string filePath) { parser = new DocxParser(filePath); tbFileContent.Text = parser.GetFileContent(); tbAuthor.Text = String.Join(", ", parser.GetMetadata(MetadataType.Author)); tbTitle.Text = String.Join(", ", parser.GetMetadata(MetadataType.Title)); tbDateOfPublication.Text = parser.GetMetadata(MetadataType.PublishDate).FirstOrDefault(); }
public void GetAnnotationPropertyFrom_ValidExpression_ReturnsAnnotationPropertyInstant() { var(item, list) = DocxParser.GetAnnotationPropertyFrom("Some property [propaddress] [someclassaddr [its value]]"); Assert.AreEqual(item.IriAddress, "propaddress"); Assert.AreEqual(item.Name, "Some property"); Assert.AreEqual(list[0].Item1, "someclassaddr"); Assert.AreEqual(list[0].Item2, "its value"); }
private async Task <Tuple <List <ParagraphFormatException>, List <DocumentSection> > > LoadDocument(DocumentModel documentModel) { return(await Task.Run(() => { var filePath = Path.Combine(AppModel.DocumentList.DirectoryPath, documentModel.RelativePath); var sections = new List <DocumentSection>(); var exceptions = new List <ParagraphFormatException>(); try { using (var parser = new DocxParser(filePath, _processors)) { var doc = parser.Parse(out var parseExceptions); exceptions.AddRange(parseExceptions); foreach (var result in _extractor.Extract(doc)) { (var sectionName, var questionRoot, var extractExceptions) = result; exceptions.AddRange(extractExceptions); sections.Add(new DocumentSection(sectionName, ConvertNumberRootToQuestions(questionRoot))); } } } catch (IOException e) { var msg = e.Message; if (e.Message.Contains("being used by another process")) { msg = "文件被占用(打开),请先关闭"; } exceptions.Add(new SevereException(msg)); } return Tuple.Create(exceptions, sections); }).ConfigureAwait(false)); }
private IParser GetParser(FileExtension fileType) { IParser parser = null; switch (fileType) { case FileExtension.Docx: parser = new DocxParser(); break; case FileExtension.Odt: parser = new OdtParser(); break; case FileExtension.Pdf: parser = new PdfParser(); break; default: throw new Exception("Unknown file type"); } return(parser); }
//[RequestSizeLimit(40000000)] //public async Task<IActionResult> AddFile(IFormFile uploadedFile) public async Task <IActionResult> AddFile(AddFileModelView file) { //var s=User.FindFirstValue(ClaimTypes.NameIdentifier); if (file.uploadedFile != null) { string ext = Path.GetExtension(file.uploadedFile.FileName).ToLowerInvariant(); var types = GetMimeTypes(); string newName = DateTime.Now.ToString("M-d-yyyy_hh-mm-ss") + ext; string path = ""; string MonthNumber = DateTime.Now.Month.ToString(); string YearNumber = DateTime.Now.Year.ToString(); DirectoryInfo directory = new DirectoryInfo(_appEnvironment.WebRootPath + @"\Files\" + YearNumber + @"\" + MonthNumber); if (directory.Exists) { path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName; } else { directory.Create(); path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName; } //DirectoryInfo directory=new DirectoryInfo("/Files/"); //Linux // путь к папке Files //path = "\\Files\\" + newName; //string s=pDF.ReadPdfFile(path); // сохраняем файл в папку Files в каталоге wwwroot // Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Encoding utf8 = Encoding.GetEncoding("utf-8"); // Encoding win1251 = Encoding.GetEncoding("windows-1251"); // byte[] utf8Bytes = win1251.GetBytes(pDF.ReadPdfFile(path)); // byte[] win1251Bytes = Encoding.Convert(win1251, utf8, utf8Bytes); // string currentText = utf8.GetString(win1251Bytes); using (var fileStream = new FileStream(_appEnvironment.WebRootPath + path, FileMode.Create)) { await file.uploadedFile.CopyToAsync(fileStream); } string userId = HttpContext.User.FindFirst(ClaimTypes.NameIdentifier).Value.ToString(); LibDocument newfile = new LibDocument(); switch (ext) { case ".pdf": //PDFParser pDF = new PDFParser(_appEnvironment); //Task<string> pdfText=Task.Factory.StartNew(()=>pDF.ReadPdfFile(path)); newfile = new LibDocument { Name = file.Name, Path = path, Desc1 = file.Desc1, Desc2 = "",//pDF.ReadPdfFile(path) UserId = Int32.Parse(userId), CategoryId = file.categories, DbName = newName, Published = file.Published, AccessLinkOnly = file.AccessLinkOnly, ContentType = types[ext] }; break; case ".mp4": newfile = new LibDocument { Name = file.Name, Path = path, Desc1 = file.Desc1, Desc2 = "", UserId = Int32.Parse(userId), CategoryId = file.categories, DbName = newName, Published = file.Published, AccessLinkOnly = file.AccessLinkOnly, ContentType = types[ext] }; break; case ".doc": case ".docx": DocxParser docx = new DocxParser(_appEnvironment); newfile = new LibDocument { Name = file.Name, Path = path, Desc1 = file.Desc1, Desc2 = docx.GetText(path), UserId = Int32.Parse(userId), CategoryId = file.categories, DbName = newName, Published = file.Published, AccessLinkOnly = file.AccessLinkOnly, ContentType = types[ext] }; break; // case ".xls": // case ".xlsx": // newfile = new LibDocument { // Name = file.Name, // Path = path, // Desc1 = file.Desc1, // Desc2 = "", // UserId=Int32.Parse(userId), // CategoryId=file.categories, // DbName=newName, // Published=file.Published, // AccessLinkOnly=file.AccessLinkOnly, // ContentType=types[ext] // }; // break; } // if(ext==".pdf") // { // newfile = new LibDocument { // Name = file.Name, // Path = path, // Desc1 = file.Desc1, // Desc2 = pDF.ReadPdfFile(path), // UserId=Int32.Parse(userId), // CategoryId=file.categories, // DbName=newName, // Published=file.Published, // AccessLinkOnly=file.AccessLinkOnly, // ContentType=types[ext] // }; // } // else if(ext==".mp4") // { // newfile = new LibDocument { // Name = file.Name, // Path = path, // Desc1 = file.Desc1, // Desc2 = "", // UserId=Int32.Parse(userId), // CategoryId=file.categories, // DbName=newName, // Published=file.Published, // AccessLinkOnly=file.AccessLinkOnly, // ContentType=types[ext] // }; // } db.Documents.Add(newfile); await db.SaveChangesAsync(); await Task.Factory.StartNew(() => InsertPdfTextToDbAsync(newfile.id, path)); //int id =file.id; //AddToIndexDoc(newfile.id); //db.Entry(file).Reload()//Encoding.UTF8 List <Category> categories = db.Categories.ToList(); // categories.Insert(0, new Category { Name = "Все", Id = 0 }); ViewBag.Categories = new SelectList(categories, "Id", "Name"); } return(RedirectToAction("MyFiles", "File")); }
public void Parse() { var parser = new DocxParser("test1.docx"); var res = parser.Parse(out var exceptions); }
public void GetClassFrom_InvalidString_ReturnsNull(string expression) { var item = DocxParser.GetClassFrom(expression); Assert.IsNull(item); }
public async Task <IActionResult> AddFile(AddFileModelView file) { if (file.uploadedFile != null) { string ext = Path.GetExtension(file.uploadedFile.FileName).ToLowerInvariant(); var types = GetMimeTypes(); string newName = DateTime.Now.ToString("M-d-yyyy_hh-mm-ss") + ext; string path = ""; string MonthNumber = DateTime.Now.Month.ToString(); string YearNumber = DateTime.Now.Year.ToString(); DirectoryInfo directory = new DirectoryInfo(_appEnvironment.WebRootPath + @"\Files\" + YearNumber + @"\" + MonthNumber); if (directory.Exists) { path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName; } else { directory.Create(); path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName; } using (var fileStream = new FileStream(_appEnvironment.WebRootPath + path, FileMode.Create)) { await file.uploadedFile.CopyToAsync(fileStream); } string userId = HttpContext.User.FindFirst(ClaimTypes.NameIdentifier).Value.ToString(); LibDocument newfile = new LibDocument(); switch (ext) { case ".pdf": //PDFParser pDF = new PDFParser(_appEnvironment); //Task<string> pdfText=Task.Factory.StartNew(()=>pDF.ReadPdfFile(path)); newfile = new LibDocument { Name = file.Name, Path = path, Desc1 = file.Desc1, UserId = Int32.Parse(userId), CategoryId = file.categories, DbName = newName, Published = file.Published, AccessLinkOnly = file.AccessLinkOnly, ContentType = types[ext] }; break; case ".mp4": newfile = new LibDocument { Name = file.Name, Path = path, Desc1 = file.Desc1, UserId = Int32.Parse(userId), CategoryId = file.categories, DbName = newName, Published = file.Published, AccessLinkOnly = file.AccessLinkOnly, ContentType = types[ext] }; break; case ".doc": case ".docx": DocxParser docx = new DocxParser(_appEnvironment); newfile = new LibDocument { Name = file.Name, Path = path, Desc1 = file.Desc1, Desc2 = docx.GetText(path), UserId = Int32.Parse(userId), CategoryId = file.categories, DbName = newName, Published = file.Published, AccessLinkOnly = file.AccessLinkOnly, ContentType = types[ext] }; break; } db.Documents.Add(newfile); await db.SaveChangesAsync(); await Task.Factory.StartNew(() => InsertPdfTextToDbAsync(newfile.id, path)); List <Category> categories = db.Categories.ToList(); ViewBag.Categories = new SelectList(categories, "Id", "Name"); } return(RedirectToAction("MyFiles", "File")); }