Exemple #1
0
        private void generateFinalRect(List <TextWithRect> rectAndTextList)
        {
            int listSize = rectAndTextList.Count;

            if (listSize == 1)
            {
                this.myPoints.Add(rectAndTextList[0]);
            }
            else
            {
                TextWithRect tmpObj = rectAndTextList[0];
                for (int i = 1; i < listSize; i++)
                {
                    TextWithRect tmp = rectAndTextList[i];
                    if (tmp.Rect.Bottom == tmpObj.Rect.Bottom)//if in the same line
                    {
                        tmpObj.Text      += " " + tmp.Text;
                        tmpObj.Rect.Top   = tmp.Rect.Top;
                        tmpObj.Rect.Right = tmp.Rect.Right;
                    }
                    else
                    {
                        this.myPoints.Add(tmpObj);
                        tmpObj = rectAndTextList[i];
                    }
                }
                this.myPoints.Add(tmpObj);
            }
        }
Exemple #2
0
        private void mergeAndStoreChunk(List <TextWithRect> tmpList)
        {
            TextWithRect mergedChunk  = tmpList[0];
            int          tmpListCount = tmpList.Count();

            for (int i = 1; i < tmpListCount; i++)
            {
                TextWithRect nowChunk = tmpList[i];
                mergedChunk.Rect.Right = nowChunk.Rect.Right;
                mergedChunk.Rect.Top   = nowChunk.Rect.Top;
                mergedChunk.Text      += nowChunk.Text;
            }

            string chunkText = mergedChunk.Text.ToLower().Trim();

            if (chunkText.Length == 0 || chunkText.Equals("table") || chunkText.Equals("figure") || chunkText.Equals(".rev.") || chunkText.Equals("inc."))
            {
                return;
            }


            docText.Add(index, mergedChunk);
            docTextString += chunkText + " ";
            index         += chunkText.Length + 1;

            List <string> tmpTerms = new List <string>(mergedChunk.Text.Split(' '));
        }
Exemple #3
0
        private void matchTopicTerms()
        {
            int docTextCount = docText.Count;

            foreach (string term in topicTerms)
            {
                List <TextWithRect> rectAndTextList = new List <TextWithRect>();

                List <string> subTerms = new List <string>();
                if (term.Contains(" "))
                {
                    subTerms = term.Split(' ').ToList();
                }
                else
                {
                    subTerms.Add(term);
                }

                int subTermCount = subTerms.Count;
                int startPlace   = 0;
                int tmpIndex     = docTextString.IndexOf(term, startPlace);

                while (tmpIndex >= 0)
                {
                    char[] sps = docTextString.Substring(tmpIndex + term.Length, 1).ToCharArray();
                    if (sps.Length > 0 && ((sps[0] == 's') || !((sps[0] > 'a' && sps[0] < 'z') || (sps[0] > 'A' && sps[0] < 'Z'))))
                    {
                        List <int> keys = docText.Keys.ToList();
                        for (int i = 0; i < docTextCount; i++)
                        {
                            int          curKey     = keys[i];
                            TextWithRect textRect   = docText[curKey];
                            string       curText    = textRect.Text.ToLower();
                            int          textLength = curText.Length;

                            if (curKey <= tmpIndex && curKey + textLength > tmpIndex && !rectAndTextList.Contains(textRect))
                            {
                                if (subTermCount == 1)
                                {
                                    rectAndTextList.Add(textRect);
                                    break;
                                }

                                else if (subTermCount > 1)
                                {
                                    string startTerm = subTerms[0];
                                    int    tmpLength = startTerm.Length;

                                    List <TextWithRect> rectList = new List <TextWithRect>();
                                    rectList.Add(textRect);
                                    string tmpTextAmount = curText + " ";
                                    if (curText.Contains(startTerm))
                                    {
                                        for (int j = 1; j < subTermCount; j++)
                                        {
                                            int          tmpKey  = keys[i + j];
                                            TextWithRect tmpRect = docText[tmpKey];
                                            rectList.Add(tmpRect);
                                            string tmpText = tmpRect.Text.ToLower();
                                            tmpTextAmount += tmpText + " ";
                                        }

                                        if (tmpTextAmount.Contains(term + " ") || tmpTextAmount.Contains(term + "s") || tmpTextAmount.Contains(term + ".") || tmpTextAmount.Contains(term + "]") || tmpTextAmount.Contains(term + "es"))
                                        {
                                            foreach (TextWithRect tmpRect in rectList)
                                            {
                                                rectAndTextList.Add(tmpRect);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                    startPlace = tmpIndex + term.Length;
                    tmpIndex   = docTextString.IndexOf(term, startPlace);
                }

                if (rectAndTextList.Count > 0)
                {
                    generateFinalRect(rectAndTextList);
                }
            }
        }