Example #1
        public static DocumentJSON.Document CreateJSON(DocumentCore docCore, int version)
            DocumentJSON.Document docJSON = new DocumentJSON.Document();
            string now = DateTime.Now.ToString("yyyymmdd");

            docJSON.date    = now;
            docJSON.version = version;
            //Create a list of paragraphs
            List <DocumentJSON.Paragraph> parJSONList   = new List <DocumentJSON.Paragraph>();
            List <DocumentJSON.Image>     imageJSONList = new List <DocumentJSON.Image>();
            List <DocumentJSON.Cell>      cellJSONList  = new List <DocumentJSON.Cell>();
            //loops through all sections
            int  parId   = 0;
            int  imgId   = 0;
            bool imgLoop = false;
            bool parLoop = false;
            int  tableId = 0;

            foreach (Section sec in docCore.GetChildElements(false, ElementType.Section))
                //loop through tables
                foreach (Table t in sec.GetChildElements(false, ElementType.Table))
                    foreach (TableRow r in t.GetChildElements(false, ElementType.TableRow))
                        foreach (TableCell c in r.GetChildElements(false, ElementType.TableCell))
                            DocumentJSON.Cell cell = new DocumentJSON.Cell();
                            cell.hash    = CalculateHash(Encoding.UTF8.GetBytes(c.Content.ToString()));
                            cell.content = c.Content.ToString();
                            cell.version = 1;
                            cell.date    = now;
                            cell.status  = "n";
                docJSON.cells = cellJSONList.ToArray();
                //gets all text on each section
                foreach (Paragraph par in sec.GetChildElements(false, ElementType.Paragraph))
                    //gets images
                    foreach (Picture p in par.GetChildElements(false, ElementType.Picture))
                        imgLoop = true;
                        MemoryStream stream = new MemoryStream();
                        stream = p.ImageData.GetStream();
                        BinaryReader binaryReader = new BinaryReader(stream);
                        Byte[]       data         = binaryReader.ReadBytes((int)stream.Length);
                        string       hash         = Document.CalculateHash(data);
                        if (imageJSONList.Where(x => x.hash.Equals(hash)).Count() == 0)
                            DocumentJSON.Image image = new DocumentJSON.Image();
                            image.hash = hash;
                            image.date = now;
                            image.numberOfRepetition = 0;
                            image.id      = imgId++;
                            image.status  = "n";
                            image.version = version;
                            DocumentJSON.Image image = imageJSONList.Where(x => x.hash.Equals(hash)).First();
                    //guarantees that only paragraphs with actual text are saved
                    if (!par.Content.ToString().Equals("\r\n") && !par.Content.ToString().Equals(""))
                        parLoop = true;
                        DocumentJSON.Paragraph parJSON = new DocumentJSON.Paragraph();
                        parJSON.content = par.Content.ToString().Replace("trial", "");
                        if (parJSON.content.Contains("Created by the  version of Document .Net!"))
                            parJSON.content = parJSON.content.Replace("Created by the  version of Document .Net!\r\nThe  version sometimes inserts \"\" into random places.\r\nGet the full version of Document .Net.\r\n", "");
                        parJSON.hash     = CalculateHash(Encoding.UTF8.GetBytes(parJSON.content));
                        parJSON.id       = parId++;
                        parJSON.version  = version;
                        parJSON.date     = now;
                        parJSON.status   = "n";
                        parJSON.sentence = null;
                        if (!parJSON.content.Equals(""))
            docJSON.images = imageJSONList.ToArray();
            if (parLoop)
                docJSON.lastParId = --parId;
            if (imgLoop)
                docJSON.lastImageId = --imgId;
            docJSON.paragraphs = parJSONList.ToArray();
Example #2
        public static DocumentJSON.Document CompareParagraphs(DocumentJSON.Document oldDocument, DocumentJSON.Document newDocument)
            //DocumentJSON.Document refDoc = getJSONObject(docId, version - 1);
            string now = DateTime.Now.ToString("yyyymmdd");
            List <DocumentJSON.Paragraph> newParList = new List <DocumentJSON.Paragraph>(newDocument.paragraphs);
            List <DocumentJSON.Paragraph> oldParList = new List <DocumentJSON.Paragraph>(oldDocument.paragraphs);

            newDocument.lastParId = oldDocument.lastParId;
            foreach (DocumentJSON.Paragraph par in newParList)
                //Checks for pre-existent paragraphs
                if (oldParList.Where(x => x.hash.Equals(par.hash)).Count() != 0)
                    if (!oldParList.Where(x => x.hash.Equals(par.hash)).First().status.Equals("d"))
                        par.status = "o";
                        par.status = "d";
                    DocumentJSON.Paragraph refPar = (DocumentJSON.Paragraph)oldParList.Where(x => x.hash == par.hash).First();
                    par.version = refPar.version;
                    par.date    = refPar.date;
                    par.id      = refPar.id;
                //Checks for new and modified paragraph
                    //Checks for modified paragraph
                    string[] sentences = par.content.Split('.');
                    List <DocumentJSON.Sentence> sentenceList = new List <DocumentJSON.Sentence>();
                    //creates a similarityCounter
                    List <KeyVal <int, int> > similarityList = new List <KeyVal <int, int> >();
                    //creates a reference paragraph list
                    List <DocumentJSON.Paragraph> refPars = new List <DocumentJSON.Paragraph>();
                    //sets version and date for the paragraph
                    par.version = newDocument.version;
                    par.date    = now;
                    //loops through the sentences verifying the content on the paragraph
                    foreach (string s in sentences)
                        refPars = (List <DocumentJSON.Paragraph>)oldParList.Where(x => x.content.Contains(s.Replace(".", ""))).ToList();
                        if (refPars.Count > 0)
                            foreach (DocumentJSON.Paragraph refPar in refPars)
                                if (similarityList.Where(x => x.Id == refPar.id).Count() > 0)
                                    similarityList.Where(x => x.Id == refPar.id).First().Text += 1;
                                    similarityList.Add(new KeyVal <int, int>(refPar.id, 1));
                            DocumentJSON.Sentence stc = new DocumentJSON.Sentence()
                                content = s, status = "m"
                    KeyVal <int, int> mostSimilarPar = new KeyVal <int, int>(-1, -1);
                    foreach (KeyVal <int, int> key in similarityList)
                        if (key.Text > mostSimilarPar.Text)
                            mostSimilarPar = key;

                    //if the content has more than 50% of similarity the paragraph is considered as modified
                    if (mostSimilarPar.Text / ((double)sentences.Count()) > paragraphSensivity)
                        DocumentJSON.Paragraph oldPar = oldParList.Where(x => x.id == mostSimilarPar.Id).First();
                        string[] oldSentences         = oldPar.content.Split('.');
                        foreach (string s in oldSentences)
                            int i = 0;
                            foreach (string c in sentences)
                                sentences[i] = c.Trim();
                            if (!sentences.Contains(s.Trim()))
                                sentenceList.Add(new DocumentJSON.Sentence()
                                    content = s, status = "d"

                        par.status   = "m";
                        par.id       = mostSimilarPar.Id;
                        par.sentence = sentenceList.ToArray();
                        if (refPars.Where(x => x.id == mostSimilarPar.Id).Count() > 0)
                            oldParList.Remove(refPars.Where(x => x.id == mostSimilarPar.Id).First());
                    //if the paragraph has less or equal to 50% of similarity, it is considered as new
                        par.status = "n";
                        //gets a new id for the paragraph
                        par.id = ++newDocument.lastParId;
            //Paragraphs not found are added as deleted on the document.
            foreach (DocumentJSON.Paragraph par in oldParList)
                if (!par.status.Equals("d"))
                    par.status  = "d";
                    par.version = newDocument.version;
                    par.date    = DateTime.Now.ToString("yyyymmdd");
            newDocument.paragraphs = newParList.ToArray();