Пример #1
0
        private void ParseDocx(string path)
        {
            var processor = new PrependNumberingToContentProcessor(new DefaultNumberingConverterRegistry());

            using (var docxParser = new DocxParser(path, new List <IProcessor> {
                processor
            }))
            {
                IList <ParagraphFormatException> exceptions;
                var doc = docxParser.Parse(out exceptions);

                foreach (var e in exceptions)
                {
                    WriteException(e);
                }

                var results = _extractor.Extract(doc).ToList();

                foreach (var result in results)
                {
                    foreach (var e in result.Item3)
                    {
                        WriteException(e);
                    }

                    _output.WriteLine($"******{result.Item1}******");
                    VisitRoot(result.Item2);
                }
            }
        }
Пример #2
0
        public void GetAnnotationPropertyFrom_InvalidExpression_ReturnsNulls()
        {
            var(item, list) = DocxParser.GetAnnotationPropertyFrom("Some property [propaddress] [someclassaddr its value]]");

            Assert.IsNull(item);
            Assert.IsNull(list);
        }
Пример #3
0
        public ActionResult createAssessment()
        {
            DocxParser docx  = new DocxParser();
            string     path1 = Server.MapPath(Request.Files["btnUpload"].FileName).Replace("createAssessment", "DocxFiles");
            string     path2 = Server.MapPath(Request.Files["btnAnswerSheet"].FileName).Replace("createAssessment", "DocxFiles");

            Request.Files["btnUpload"].SaveAs(AppDomain.CurrentDomain.BaseDirectory + "DocxFiles\\" + Request.Files["btnAnswerSheet"].FileName);
            Request.Files["btnAnswerSheet"].SaveAs(AppDomain.CurrentDomain.BaseDirectory + "DocxFiles\\" + Request.Files["btnAnswerSheet"].FileName);
            List <Question> questions = docx.getQuestionswithCorrectAnswers(path1, path2);

            if (questions.Count != 0)
            {
                int newAssessmentID = 0;
                Common.dbAccess.storeAssessment(Request.Form["txtAssessmentTitle"], "CI6110", Convert.ToInt32(Request.Form["txtPercentageWorth"]), Convert.ToInt32(Request.Form["txtDuration"]), "KU14009", ref newAssessmentID);
                foreach (Question question in questions)
                {
                    string questionID = newAssessmentID + "." + question.questionNumber;
                    Common.dbAccess.storeQuestion(questionID, question.question, newAssessmentID, question.questionNumber);
                    foreach (Answer answer in question.possibleAnswers)
                    {
                        string answerID = newAssessmentID + "." + question.questionNumber + "." + answer.answerLetter;
                        Common.dbAccess.storeAnswer(answerID, answer.answer, newAssessmentID, answer.answerLetter, answer.correct.ToString(), questionID);
                    }
                }
            }
            return(RedirectToAction("Index"));
        }
Пример #4
0
        public void GetClassFrom_ValidString_ReturnsClassInstants(string expression, string name, string iriAddress)
        {
            var item = DocxParser.GetClassFrom(expression);

            Assert.AreEqual(item.IriAddress, iriAddress);
            Assert.AreEqual(item.Name, name);
        }
Пример #5
0
 public void HandleOpenDoc(string filePath)
 {
     parser                   = new DocxParser(filePath);
     tbFileContent.Text       = parser.GetFileContent();
     tbAuthor.Text            = String.Join(", ", parser.GetMetadata(MetadataType.Author));
     tbTitle.Text             = String.Join(", ", parser.GetMetadata(MetadataType.Title));
     tbDateOfPublication.Text = parser.GetMetadata(MetadataType.PublishDate).FirstOrDefault();
 }
Пример #6
0
        public void GetAnnotationPropertyFrom_ValidExpression_ReturnsAnnotationPropertyInstant()
        {
            var(item, list) = DocxParser.GetAnnotationPropertyFrom("Some property [propaddress] [someclassaddr [its value]]");

            Assert.AreEqual(item.IriAddress, "propaddress");
            Assert.AreEqual(item.Name, "Some property");
            Assert.AreEqual(list[0].Item1, "someclassaddr");
            Assert.AreEqual(list[0].Item2, "its value");
        }
Пример #7
0
        private async Task <Tuple <List <ParagraphFormatException>, List <DocumentSection> > > LoadDocument(DocumentModel documentModel)
        {
            return(await Task.Run(() =>
            {
                var filePath = Path.Combine(AppModel.DocumentList.DirectoryPath, documentModel.RelativePath);
                var sections = new List <DocumentSection>();
                var exceptions = new List <ParagraphFormatException>();

                try
                {
                    using (var parser = new DocxParser(filePath, _processors))
                    {
                        var doc = parser.Parse(out var parseExceptions);
                        exceptions.AddRange(parseExceptions);

                        foreach (var result in _extractor.Extract(doc))
                        {
                            (var sectionName, var questionRoot, var extractExceptions) = result;
                            exceptions.AddRange(extractExceptions);

                            sections.Add(new DocumentSection(sectionName, ConvertNumberRootToQuestions(questionRoot)));
                        }
                    }
                }
                catch (IOException e)
                {
                    var msg = e.Message;
                    if (e.Message.Contains("being used by another process"))
                    {
                        msg = "文件被占用(打开),请先关闭";
                    }

                    exceptions.Add(new SevereException(msg));
                }

                return Tuple.Create(exceptions, sections);
            }).ConfigureAwait(false));
        }
        private IParser GetParser(FileExtension fileType)
        {
            IParser parser = null;

            switch (fileType)
            {
            case FileExtension.Docx:
                parser = new DocxParser();
                break;

            case FileExtension.Odt:
                parser = new OdtParser();
                break;

            case FileExtension.Pdf:
                parser = new PdfParser();
                break;

            default:
                throw new Exception("Unknown file type");
            }

            return(parser);
        }
Пример #9
0
        //[RequestSizeLimit(40000000)]
        //public async Task<IActionResult> AddFile(IFormFile uploadedFile)
        public async Task <IActionResult> AddFile(AddFileModelView file)
        {
            //var s=User.FindFirstValue(ClaimTypes.NameIdentifier);
            if (file.uploadedFile != null)
            {
                string ext         = Path.GetExtension(file.uploadedFile.FileName).ToLowerInvariant();
                var    types       = GetMimeTypes();
                string newName     = DateTime.Now.ToString("M-d-yyyy_hh-mm-ss") + ext;
                string path        = "";
                string MonthNumber = DateTime.Now.Month.ToString();
                string YearNumber  = DateTime.Now.Year.ToString();

                DirectoryInfo directory = new DirectoryInfo(_appEnvironment.WebRootPath + @"\Files\" + YearNumber + @"\" + MonthNumber);
                if (directory.Exists)
                {
                    path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName;
                }
                else
                {
                    directory.Create();
                    path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName;
                }
                //DirectoryInfo directory=new DirectoryInfo("/Files/"); //Linux
                // путь к папке Files
                //path = "\\Files\\" + newName;


                //string s=pDF.ReadPdfFile(path);
                // сохраняем файл в папку Files в каталоге wwwroot

                // Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
                //                             Encoding utf8 = Encoding.GetEncoding("utf-8");
                //                             Encoding win1251 = Encoding.GetEncoding("windows-1251");
                //                             byte[] utf8Bytes = win1251.GetBytes(pDF.ReadPdfFile(path));
                //                             byte[] win1251Bytes = Encoding.Convert(win1251, utf8, utf8Bytes);
                //                             string currentText = utf8.GetString(win1251Bytes);



                using (var fileStream = new FileStream(_appEnvironment.WebRootPath + path, FileMode.Create))
                {
                    await file.uploadedFile.CopyToAsync(fileStream);
                }
                string      userId  = HttpContext.User.FindFirst(ClaimTypes.NameIdentifier).Value.ToString();
                LibDocument newfile = new LibDocument();

                switch (ext)
                {
                case ".pdf":


                    //PDFParser pDF = new PDFParser(_appEnvironment);
                    //Task<string> pdfText=Task.Factory.StartNew(()=>pDF.ReadPdfFile(path));
                    newfile = new LibDocument {
                        Name           = file.Name,
                        Path           = path,
                        Desc1          = file.Desc1,
                        Desc2          = "",//pDF.ReadPdfFile(path)
                        UserId         = Int32.Parse(userId),
                        CategoryId     = file.categories,
                        DbName         = newName,
                        Published      = file.Published,
                        AccessLinkOnly = file.AccessLinkOnly,
                        ContentType    = types[ext]
                    };
                    break;

                case ".mp4":
                    newfile = new LibDocument {
                        Name           = file.Name,
                        Path           = path,
                        Desc1          = file.Desc1,
                        Desc2          = "",
                        UserId         = Int32.Parse(userId),
                        CategoryId     = file.categories,
                        DbName         = newName,
                        Published      = file.Published,
                        AccessLinkOnly = file.AccessLinkOnly,
                        ContentType    = types[ext]
                    };
                    break;

                case ".doc":
                case ".docx":
                    DocxParser docx = new DocxParser(_appEnvironment);
                    newfile = new LibDocument {
                        Name           = file.Name,
                        Path           = path,
                        Desc1          = file.Desc1,
                        Desc2          = docx.GetText(path),
                        UserId         = Int32.Parse(userId),
                        CategoryId     = file.categories,
                        DbName         = newName,
                        Published      = file.Published,
                        AccessLinkOnly = file.AccessLinkOnly,
                        ContentType    = types[ext]
                    };
                    break;

                    // case ".xls":
                    // case ".xlsx":
                    //         newfile = new LibDocument {
                    //     Name = file.Name,
                    //     Path = path,
                    //     Desc1 = file.Desc1,
                    //     Desc2 = "",
                    //     UserId=Int32.Parse(userId),
                    //     CategoryId=file.categories,
                    //     DbName=newName,
                    //     Published=file.Published,
                    //     AccessLinkOnly=file.AccessLinkOnly,
                    //     ContentType=types[ext]
                    //         };
                    // break;
                }


                // if(ext==".pdf")
                // {
                //    newfile = new LibDocument {
                //      Name = file.Name,
                //       Path = path,
                //       Desc1 = file.Desc1,
                //        Desc2 = pDF.ReadPdfFile(path),
                //        UserId=Int32.Parse(userId),
                //        CategoryId=file.categories,
                //        DbName=newName,
                //        Published=file.Published,
                //        AccessLinkOnly=file.AccessLinkOnly,
                //        ContentType=types[ext]
                //         };
                // }
                // else if(ext==".mp4")
                // {
                //    newfile = new LibDocument {
                //      Name = file.Name,
                //       Path = path,
                //       Desc1 = file.Desc1,
                //        Desc2 = "",
                //        UserId=Int32.Parse(userId),
                //        CategoryId=file.categories,
                //        DbName=newName,
                //        Published=file.Published,
                //        AccessLinkOnly=file.AccessLinkOnly,
                //        ContentType=types[ext]
                //         };
                // }

                db.Documents.Add(newfile);

                await db.SaveChangesAsync();

                await Task.Factory.StartNew(() => InsertPdfTextToDbAsync(newfile.id, path));

                //int id =file.id;


                //AddToIndexDoc(newfile.id);

                //db.Entry(file).Reload()//Encoding.UTF8

                List <Category> categories = db.Categories.ToList();

                // categories.Insert(0, new Category { Name = "Все", Id = 0 });
                ViewBag.Categories = new SelectList(categories, "Id", "Name");
            }

            return(RedirectToAction("MyFiles", "File"));
        }
Пример #10
0
 public void Parse()
 {
     var parser = new DocxParser("test1.docx");
     var res    = parser.Parse(out var exceptions);
 }
Пример #11
0
        public void GetClassFrom_InvalidString_ReturnsNull(string expression)
        {
            var item = DocxParser.GetClassFrom(expression);

            Assert.IsNull(item);
        }
Пример #12
0
        public async Task <IActionResult> AddFile(AddFileModelView file)
        {
            if (file.uploadedFile != null)
            {
                string ext         = Path.GetExtension(file.uploadedFile.FileName).ToLowerInvariant();
                var    types       = GetMimeTypes();
                string newName     = DateTime.Now.ToString("M-d-yyyy_hh-mm-ss") + ext;
                string path        = "";
                string MonthNumber = DateTime.Now.Month.ToString();
                string YearNumber  = DateTime.Now.Year.ToString();

                DirectoryInfo directory = new DirectoryInfo(_appEnvironment.WebRootPath + @"\Files\" + YearNumber + @"\" + MonthNumber);
                if (directory.Exists)
                {
                    path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName;
                }
                else
                {
                    directory.Create();
                    path = "\\Files\\" + YearNumber + @"\" + MonthNumber + @"\" + newName;
                }

                using (var fileStream = new FileStream(_appEnvironment.WebRootPath + path, FileMode.Create))
                {
                    await file.uploadedFile.CopyToAsync(fileStream);
                }
                string      userId  = HttpContext.User.FindFirst(ClaimTypes.NameIdentifier).Value.ToString();
                LibDocument newfile = new LibDocument();

                switch (ext)
                {
                case ".pdf":


                    //PDFParser pDF = new PDFParser(_appEnvironment);
                    //Task<string> pdfText=Task.Factory.StartNew(()=>pDF.ReadPdfFile(path));
                    newfile = new LibDocument {
                        Name           = file.Name,
                        Path           = path,
                        Desc1          = file.Desc1,
                        UserId         = Int32.Parse(userId),
                        CategoryId     = file.categories,
                        DbName         = newName,
                        Published      = file.Published,
                        AccessLinkOnly = file.AccessLinkOnly,
                        ContentType    = types[ext]
                    };
                    break;

                case ".mp4":
                    newfile = new LibDocument {
                        Name           = file.Name,
                        Path           = path,
                        Desc1          = file.Desc1,
                        UserId         = Int32.Parse(userId),
                        CategoryId     = file.categories,
                        DbName         = newName,
                        Published      = file.Published,
                        AccessLinkOnly = file.AccessLinkOnly,
                        ContentType    = types[ext]
                    };
                    break;

                case ".doc":
                case ".docx":
                    DocxParser docx = new DocxParser(_appEnvironment);
                    newfile = new LibDocument {
                        Name           = file.Name,
                        Path           = path,
                        Desc1          = file.Desc1,
                        Desc2          = docx.GetText(path),
                        UserId         = Int32.Parse(userId),
                        CategoryId     = file.categories,
                        DbName         = newName,
                        Published      = file.Published,
                        AccessLinkOnly = file.AccessLinkOnly,
                        ContentType    = types[ext]
                    };
                    break;
                }

                db.Documents.Add(newfile);
                await db.SaveChangesAsync();

                await Task.Factory.StartNew(() => InsertPdfTextToDbAsync(newfile.id, path));

                List <Category> categories = db.Categories.ToList();
                ViewBag.Categories = new SelectList(categories, "Id", "Name");
            }
            return(RedirectToAction("MyFiles", "File"));
        }