/** * Reads input file to get PDFObjects from the original PDF * */ public Dictionary <object, object> ReadinputFile(PDFDocument pdfDoc) { PDFImportedPage ip; Dictionary <Object, Object> PlainDictionary = new Dictionary <Object, Object>(); PDFObjectsProcessor objectsProcessor = new PDFObjectsProcessor(); for (int PageIndex = 0; PageIndex < pdfDoc.Pages.Count; PageIndex++) { ip = (PDFImportedPage)pdfDoc.Pages[PageIndex]; PDFPageObjectCollection Objects = ip.ExtractPageObjects(); //SHA256 mySHA256 = SHA256Managed.Create(); //string content = ip.ExtractText(); //byte[] hashedContentKey = mySHA256.ComputeHash(GenerateStreamFromString(content)); List <Words> WordObjects = new List <Words>(); List <Blocks> BlockList = new List <Blocks>(); for (int j = 0; j < Objects.Count; j++) { if (Objects[j] is PDFTextPageObject) { PDFTextPageObject TextObject = (PDFTextPageObject)Objects[j]; WordObjects = objectsProcessor.GetWords(TextObject, WordObjects); //BlockList = objectsProcessor.GetBlocks(TextObject, BlockList); } else if (Objects[j] is PDFImagePageObject) { _imageObjects.Add((PDFImagePageObject)Objects[j]); } else if (Objects[j] is PDFPathPageObject) { _pathObjects.Add((PDFPathPageObject)Objects[j]); } } //PlainDictionary.Add(PageIndex, BlockList); PlainDictionary.Add(PageIndex, WordObjects); } pdfDoc.Dispose(); return(PlainDictionary); }
/** * Processes PDFObjects and returns blocks * */ public List <Blocks> GetBlocks(PDFTextPageObject TextObject, List <Blocks> BlockList) { Blocks block; bool firstCharacter; FontsProcessor fontsProcessor = new FontsProcessor(); PDFGlyphCollection GlyphCollection = TextObject.Glyphs; if (BlockList.Count == 0 || BlockList[BlockList.Count - 1].BlockComplete) { firstCharacter = true; block = new Blocks(); } else { firstCharacter = false; block = BlockList[BlockList.Count - 1]; BlockList.RemoveAt(BlockList.Count - 1); } for (int i = 0; i < GlyphCollection.Count; i++) { block.Block = block.Block + GlyphCollection[i].Text; if (block.Block.Length == 16) { block.BlockComplete = true; BlockList.Add(block); if (++i >= GlyphCollection.Count) { return(BlockList); } firstCharacter = true; block = new Blocks(); block.Block = block.Block + GlyphCollection[i].Text; } if (firstCharacter) { firstCharacter = false; block.FontBase = fontsProcessor.GetFontBase(TextObject.FontName, TextObject.FontSize); block.FontBrush = new PDFBrush(TextObject.FillColor); block.Top.Add(GlyphCollection[i].DisplayBounds.Top); block.Left.Add(GlyphCollection[i].DisplayBounds.Left); } else { if (GlyphCollection[i].DisplayBounds.Top > block.TopOfLastCharacter) { block.Chunks.Add(block.Block.Length - 1); block.Top.Add(GlyphCollection[i].DisplayBounds.Top); block.Left.Add(GlyphCollection[i].DisplayBounds.Left); } } block.TopOfLastCharacter = GlyphCollection[i].DisplayBounds.Top; } BlockList.Add(block); return(BlockList); }
/** * Iterates through encrypted documents and encrypted keywords to check if provided keyword exists or not * * */ public void SearchKeyWord() { List <PDFDocument> documentsList; foreach (KeyValuePair <string, List <PDFDocument> > document in _encryptedDocuments) { Stopwatch watch = new Stopwatch(); watch.Start(); documentsList = document.Value; List <string[]> coefficientList = RetrieveCoefficients(documentsList); List <int> decryptedCoef = new CoefficientCodec(0, _numOfShares, _shareforRec, "Decrypt", coefficientList).coefficients; List <int> CoefNeededToSearch = PerformCombinationOfCoef(decryptedCoef, _shareforRec - 1); PDFDocument docShare = documentsList[0]; bool keywordFound = false; Console.WriteLine("CoefNeededToSearch : " + CoefNeededToSearch.Count); for (int coefIndex = 0; coefIndex < CoefNeededToSearch.Count; coefIndex++) { Share[] share = ShareGenerator.GenerateShares(_keyword, _numOfShares, _shareforRec, new int[] { CoefNeededToSearch[coefIndex] }, new SortedDictionary <int, int>()); for (int pageIndex = 0; pageIndex < docShare.Pages.Count; pageIndex++) { PDFPage page = docShare.Pages[pageIndex]; string text = ((PDFImportedPage)page).ExtractText(); PDFImportedPage ip = (PDFImportedPage)docShare.Pages[pageIndex]; PDFPageObjectCollection Objects = ip.ExtractPageObjects(); for (int j = 0; j < Objects.Count; j++) { if (Objects[j] is PDFTextPageObject) { PDFTextPageObject TextObject = (PDFTextPageObject)Objects[j]; if (TextObject.Text == share[0].GetCipherText()) { keywordFound = true; } } } } } if (keywordFound) { watch.Stop(); Console.WriteLine("Search Time : " + watch.ElapsedMilliseconds); watch.Reset(); watch.Start(); ShareAssemblerController shareAssembler = new ShareAssemblerController(_numOfShares, _shareforRec, _keyword); shareAssembler.SharesReconstructor(); keywordFound = true; watch.Stop(); Console.WriteLine("Decrypt and Highlight Time : " + watch.ElapsedMilliseconds); } else { Console.WriteLine("NO FILES CONTAIN THE KEYWORD YOU'RE LOOKING FOR."); } } }
/** * Processes PDFObjects and returns words * */ public List <Words> GetWords(PDFTextPageObject TextObject, List <Words> WordObjects) { Words Word; bool firstCharacter; FontsProcessor fontsProcessor = new FontsProcessor(); PDFGlyphCollection GlyphCollection = TextObject.Glyphs; if (WordObjects.Count == 0 || WordObjects[WordObjects.Count - 1].WordComplete) { firstCharacter = true; Word = new Words(); } else { firstCharacter = false; Word = WordObjects[WordObjects.Count - 1]; Word.Added = false; WordObjects.RemoveAt(WordObjects.Count - 1); } for (int i = 0; i < GlyphCollection.Count; i++) { if (String.IsNullOrWhiteSpace(GlyphCollection[i].Text)) { firstCharacter = true; if (!String.IsNullOrEmpty(Word.Word)) { Word.WordComplete = true; if (!Word.Added) { Word.Added = true; WordObjects.Add(Word); } Word = new Words(); } } else { Word.Word = Word.Word + GlyphCollection[i].Text; if (firstCharacter) { firstCharacter = false; Word.FontBase = fontsProcessor.GetFontBase(TextObject.FontName, TextObject.FontSize); Word.FontBrush = new PDFBrush(TextObject.FillColor); Word.Top = GlyphCollection[i].DisplayBounds.Top; Word.Left = GlyphCollection[i].DisplayBounds.Left; } } } if (Word.Word != "") { Word.Added = true; WordObjects.Add(Word); } return(WordObjects); }
/** * Recontructs the encrypted shares of the PDF to get original PDF. * Hightlights every occurrences of the word that a user has searched for. * */ public void SharesReconstructor() { List <List <PDFPageObjectCollection> > pDFPageObjects = ExtractObjects(); List <PDFPageObjectCollection> ObjectsPerPage; PDFBrush redBrush = new PDFBrush(new PDFRgbColor(204, 255, 51)); PDFPen pen = new PDFPen(new PDFRgbColor(204, 255, 51), 1); string[] shares; for (int pageIndex = 0; pageIndex < pDFPageObjects.Count; pageIndex++) { _decryptedPDF.AddPage(); ObjectsPerPage = pDFPageObjects[pageIndex]; for (int wordIndex = 0; wordIndex < ObjectsPerPage[0].Count; wordIndex++) { shares = new string[_shareforRec]; PDFTextPageObject TextObject = null; FontsProcessor fontsProcessor = null; bool shouldProcess = false; for (int shareIndex = 0; shareIndex < _shareforRec; shareIndex++) { PDFPageObjectCollection collection = ObjectsPerPage[shareIndex]; if (collection[wordIndex] is PDFTextPageObject) { TextObject = (PDFTextPageObject)collection[wordIndex]; if (TextObject.Text == "PDF4NET evaluation version 5.0.1.0") { shouldProcess = false; break; } shares[shareIndex] = TextObject.Text; fontsProcessor = new FontsProcessor(); shouldProcess = true; } } if (shouldProcess) { string decryptedText = ShareAssembler.TextReconstruction(shares, _shareNumbers, _numOfShares, _shareforRec); PDFBrush brush = new PDFBrush(TextObject.FillColor); PDFFontBase FontBase = fontsProcessor.GetFontBase(TextObject.FontName, TextObject.FontSize); if (decryptedText.ToLower().Equals(_keyword.ToLower())) { _decryptedPDF.Pages[pageIndex].Canvas.DrawRectangle(pen, redBrush, TextObject.DisplayBounds.Left, TextObject.DisplayBounds.Top, 4 * decryptedText.Length, FontBase.Size, 0); for (int i = 0; i < _encryptedPdf.Count; i++) { _encryptedPdf[i].Pages[pageIndex].Canvas.DrawRectangle(pen, redBrush, TextObject.DisplayBounds.Left, TextObject.DisplayBounds.Top, 4 * decryptedText.Length, FontBase.Size, 0); _encryptedPdf[i].Pages[pageIndex].Canvas.DrawText(shares[i > shares.Length - 1 ? 0 : i], FontBase, brush, TextObject.DisplayBounds.Left, TextObject.DisplayBounds.Top); } } _decryptedPDF.Pages[pageIndex].Canvas.DrawText(decryptedText, FontBase, brush, TextObject.DisplayBounds.Left, TextObject.DisplayBounds.Top); } } } for (int i = 0; i < _encryptedPdf.Count; i++) { _encryptedPdf[i].Save(_encryptedFilePath + (i + 1) + ".pdf"); } _decryptedPDF.Save(_decryptedFilePath); }