Exemple #1
0
        private int ExtractColorData(List <string> nTexts, int pageCount, Dictionary <int, string> pageviseContent, string filepath)
        {
            using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(filepath, true))
            {
                Body body = wordDocument.MainDocumentPart.Document.Body;
                if (wordDocument.ExtendedFilePropertiesPart.Properties.Pages.Text != null)
                {
                    pageCount = Convert.ToInt32(wordDocument.ExtendedFilePropertiesPart.Properties.Pages.Text);
                }
                int i = 1;

                StringBuilder pageContentBuilder = new StringBuilder();
                foreach (OpenXmlElement element in body.ChildElements)
                {
                    if (element.InnerXml.IndexOf("<w:br w:type=\"page\" />", StringComparison.OrdinalIgnoreCase) < 0)
                    {
                        //pageContentBuilder.Append(element.InnerText );
                        string result       = DataExtractUtilities.GetTagData(element.InnerText);
                        var    isMatchColor = DataExtractUtilities.IsMatchColor(element, "green");
                        if (isMatchColor)
                        {
                            lstResult.Items.Add("Green Color Text --> " + result);
                        }

                        if (i > 1)
                        {
                            if (!string.IsNullOrWhiteSpace(result.Trim()) && isMatchColor)
                            {
                                //  Console.WriteLine(element.InnerText);
                                nTexts.Add(element.InnerText);
                            }
                        }
                    }
                    else
                    {
                        pageviseContent.Add(i, pageContentBuilder.ToString());
                        i++;
                        pageContentBuilder = new StringBuilder();
                    }
                    if (body.LastChild == element && pageContentBuilder.Length > 0)
                    {
                        pageviseContent.Add(i, pageContentBuilder.ToString());
                    }
                }

                // Console.WriteLine("pageContentBuilder=", pageContentBuilder.ToString());
                foreach (var ntextItem in nTexts)
                {
                    // Console.WriteLine(ntextItem);
                    if (ntextItem.Trim() != "," && ntextItem.Trim() != "." && ntextItem.Trim() != ":" && ntextItem.Trim() != "")
                    {
                        lstResult.Items.Add(ntextItem);
                    }
                }
            }
            return(pageCount);
        }
Exemple #2
0
        private int ExtractDataFRomDoc(List <string> nTexts, int pageCount, Dictionary <int, string> pageviseContent, string filepath)
        {
            WordprocessingDocument wordDocument = WordprocessingDocument.Open(filepath, true);
            Body body = wordDocument.MainDocumentPart.Document.Body;

            if (wordDocument.ExtendedFilePropertiesPart.Properties.Pages.Text != null)
            {
                pageCount = Convert.ToInt32(wordDocument.ExtendedFilePropertiesPart.Properties.Pages.Text);
            }
            int i = 1;



            StringBuilder pageContentBuilder = new StringBuilder();

            foreach (OpenXmlElement element in body.ChildElements)
            {
                if (element.InnerXml.IndexOf("<w:br w:type=\"page\" />", StringComparison.OrdinalIgnoreCase) < 0)
                {
                    //pageContentBuilder.Append(element.InnerText );
                    string result = DataExtractUtilities.RemoveTagsFromData(element.InnerText);

                    pageContentBuilder.Append(result);
                    if (i > 1)
                    {
                        if (!string.IsNullOrWhiteSpace(result.Trim()))
                        {
                            //  Console.WriteLine(element.InnerText);
                            nTexts.AddRange(DataExtractUtilities.GetNonStrikeTextWithOutTags(element));
                        }
                    }
                }
                else
                {
                    pageviseContent.Add(i, pageContentBuilder.ToString());
                    i++;
                    pageContentBuilder = new StringBuilder();
                }
                if (body.LastChild == element && pageContentBuilder.Length > 0)
                {
                    pageviseContent.Add(i, pageContentBuilder.ToString());
                }
            }
            int tagStart = 0;

            foreach (var ntextItem in nTexts)
            {
                // Console.WriteLine(ntextItem);
                if (ntextItem == ">")
                {
                    tagStart = 0;
                }
                else if (ntextItem == "<")
                {
                    tagStart = 1;
                }
                else
                {
                    if (tagStart != 1)
                    {
                        if (ntextItem.Trim() != "," && ntextItem.Trim() != "." && ntextItem.Trim() != ":" && ntextItem.Trim() != "")
                        {
                            lstResult.Items.Add(ntextItem);
                        }
                    }
                }
            }

            return(pageCount);
        }
Exemple #3
0
        private int GetTextBySearch(List <string> nTexts, int pageCount, Dictionary <int, string> pageviseContent, string filepath)
        {
            StringBuilder sb = new StringBuilder();

            using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(filepath, true))
            {
                Body body = wordDocument.MainDocumentPart.Document.Body;
                if (wordDocument.ExtendedFilePropertiesPart.Properties.Pages.Text != null)
                {
                    pageCount = Convert.ToInt32(wordDocument.ExtendedFilePropertiesPart.Properties.Pages.Text);
                    // Console.WriteLine("*** pageCount --> " + pageCount);
                }
                int           i                  = 1;
                int           ifCount            = 0;
                StringBuilder pageContentBuilder = new StringBuilder();
                foreach (OpenXmlElement element in body.ChildElements)
                {
                    Console.WriteLine("***InnerText  --> " + element.InnerText);

                    String[] startCondition   = new String[] { "[IF " };
                    String[] endCondition     = new String[] { " END IF]" };
                    var      isMatchCondition = DataExtractUtilities.IsMatchCondition(element, startCondition);
                    if (isMatchCondition)
                    {
                        ifCount++;
                        sb.AppendFormat(startCondition + DataExtractUtilities.ExtractConditionText(element, startCondition));
                        lstResult.Items.Add(startCondition[0].ToString() + DataExtractUtilities.ExtractConditionText(element, startCondition));
                    }


                    var isMatchConditionEnd = DataExtractUtilities.IsMatchCondition(element, endCondition);
                    if (isMatchConditionEnd)
                    {
                        sb.Append(DataExtractUtilities.ExtractEndConditionText(element, endCondition) + endCondition);
                        ifCount = 0;
                        lstResult.Items.Add(sb.ToString());
                        //Console.WriteLine("*** 2. Condition Text --> " + element.InnerText);
                    }
                    //if (ifCount == 0)
                    //    Console.WriteLine("111 ***---- Text --> " + sb.ToString());

                    /*
                     * if (element.InnerXml.IndexOf("<w:br w:type=\"page\" />", StringComparison.OrdinalIgnoreCase) < 0)
                     * {
                     *  //pageContentBuilder.Append(element.InnerText );
                     *  string result =  element.InnerText;
                     *  var isMatchCondition = DataExtractUtilities.IsMatchCondition(element, "[IF ");
                     *   if (isMatchCondition)
                     *  Console.WriteLine("*** 1. Condition Text --> " + result);
                     *
                     *
                     *  var isMatchConditionEnd = DataExtractUtilities.IsMatchCondition(element, " END IF]");
                     *  if (isMatchConditionEnd)
                     *  {
                     *      Console.WriteLine("*** 2. Condition Text --> " + element.InnerText);
                     *  }
                     *
                     *      if (i > 1)
                     *  {
                     *      if (!string.IsNullOrWhiteSpace(result.Trim()) && isMatchCondition)
                     *      {
                     *          //  Console.WriteLine(element.InnerText);
                     *          nTexts.Add(element.InnerText);
                     *      }
                     *  }
                     * }
                     * else
                     * {
                     *  Console.WriteLine("*** Page Number --> " + i);
                     *  pageviseContent.Add(i, pageContentBuilder.ToString());
                     *  i++;
                     *  pageContentBuilder = new StringBuilder();
                     * }
                     * if (body.LastChild == element && pageContentBuilder.Length > 0)
                     * {
                     *  pageviseContent.Add(i, pageContentBuilder.ToString());
                     * }*/
                }

                // Console.WriteLine("pageContentBuilder=", pageContentBuilder.ToString());
                foreach (var ntextItem in nTexts)
                {
                    // Console.WriteLine(ntextItem);
                    if (ntextItem.Trim() != "," && ntextItem.Trim() != "." && ntextItem.Trim() != ":" && ntextItem.Trim() != "")
                    {
                        lstResult.Items.Add(ntextItem);
                    }
                }
            }
            return(0);
        }