public async Task ParseTextTestAsync(Stream inputFile, DocumentTree expected)
        {
            var actual = await _parser.ParseFile(inputFile);

            Assert.Equal(actual.RootSegment.Children.First().RootElement.Text, expected.RootSegment.Children.First().RootElement.Text);
            Assert.Equal(actual.RootSegment.Children.First().RootElement.Type, expected.RootSegment.Children.First().RootElement.Type);
        }
        public void CharChunkingTest(DocumentTree parseResult, List <ChunkInfo> expectedChunks)
        {
            IChunkerService  msReadChunker = new ChunkerService();
            List <ChunkInfo> actualChunks  = msReadChunker.Chunk(parseResult, ChunkMethod.Char, 1000, ElementType.Other);

            Assert.Equal(expectedChunks.Count, actualChunks.Count);
            Assert.Equal(expectedChunks, actualChunks, new ChunkInfoComparer());
        }
        public void PageChunkingTest(DocumentTree parseResult, List <ChunkInfo> expectedChunks)
        {
            IChunkerService  msReadChunker = new ChunkerService();
            List <ChunkInfo> actualChunks  = msReadChunker.Chunk(parseResult, ChunkMethod.Page, Constants.CustomTextPredictionMaxCharLimit, ElementType.Other);

            Assert.Equal(expectedChunks.Count, actualChunks.Count);
            Assert.Equal(expectedChunks, actualChunks, new ChunkInfoComparer());
        }
示例#4
0
        public void OnGUI(Rect pos)
        {
            m_Position = pos;
            var docTreeRect = new Rect(
                m_Position.x + k_BorderWidth,
                m_Position.y + k_SearchBarHeight,
                m_Position.width - 2 * k_BorderWidth,//m_HorizontalSplitterRect.x - k_BorderWidth,
                m_Position.height - k_BorderWidth - k_ToolsHeight - k_SearchBarHeight);

            var searchBarRect = new Rect(
                docTreeRect.x,
                m_Position.y,
                docTreeRect.width,
                k_SearchBarHeight);

            if (m_TreeView == null)
            {
                bool firstInit   = m_MultiColumnHeaderState == null;
                var  headerState = DocumentTree.CreateDefaultMultiColumnHeaderState(docTreeRect.width);
                if (MultiColumnHeaderState.CanOverwriteSerializedFields(m_MultiColumnHeaderState, headerState))
                {
                    MultiColumnHeaderState.OverwriteSerializedFields(m_MultiColumnHeaderState, headerState);
                }
                m_MultiColumnHeaderState = headerState;

                if (m_TreeViewState == null)
                {
                    m_TreeViewState = new TreeViewState();
                }

                m_TreeView = new DocumentTree(m_TreeViewState, new MultiColumnHeader(m_MultiColumnHeaderState));
            }
            OnGUISearchBar(searchBarRect);

            HandleHorizontalResize();

            m_TreeView.OnGUI(docTreeRect);

            var bottomRect = new Rect(
                docTreeRect.x,
                docTreeRect.y + docTreeRect.height + k_BorderWidth,
                docTreeRect.width,
                k_ToolsHeight - k_BorderWidth);

            BottomToolBar(bottomRect);
            if (m_ResizingHorizontalSplitter)
            {
                m_Parent.Repaint();
            }
        }
        public static TheoryData CharChunkingTestData()
        {
            string                  inputString1    = File.ReadAllText(@"TestData\Chunker\loremipsum-4.json");
            DocumentTree            testInput1      = JsonConvert.DeserializeObject <DocumentTree>(inputString1);
            string                  expectedString1 = File.ReadAllText(@"TestData\Chunker\CharChunking\loremipsum-4_chunks.json");
            IEnumerable <ChunkInfo> output1         = JsonConvert.DeserializeObject <IEnumerable <ChunkInfo> >(expectedString1);

            return(new TheoryData <DocumentTree, IEnumerable <ChunkInfo> >
            {
                {
                    testInput1,
                    output1
                }
            });
        }
        /*
         *  Join all elements into a single string
         */
        private List <ChunkInfo> ApplyNoChunking(DocumentTree documentTree)
        {
            var resultText = new StringBuilder();

            foreach (var docSegment in documentTree.RootSegment.Children)
            {
                var segmentText = ApplyNoChunkingInternal(docSegment);
                resultText.Append(segmentText);
            }
            var text      = resultText.ToString().Trim();
            var firstPage = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber;
            var lastPage  = documentTree.RootSegment.Children.LastOrDefault()?.RootElement.PageNumber;

            return(new List <ChunkInfo>
            {
                new ChunkInfo(chunkNumber: 1, text, firstPage, lastPage)
            });
        }
        private List <ChunkInfo> ChunkBySection(DocumentTree documentTree, ElementType chunkLevel, int charLimit)
        {
            var resultChunks          = new List <ChunkInfo>();
            var currentChunkText      = new StringBuilder();
            var canEndChunk           = false; // true if current chunk contains a simple element
            var currentChunkNumber    = 1;
            var currentChunkStartPage = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber;
            var currentChunkEndPage   = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber;

            ChunkBySectionInternal(documentTree.RootSegment, currentChunkText, resultChunks, ref canEndChunk, chunkLevel, charLimit, ref currentChunkNumber, ref currentChunkStartPage, ref currentChunkEndPage);

            // handle remaining text
            if (currentChunkText.Length > 0)
            {
                resultChunks.Add(new ChunkInfo(currentChunkNumber, currentChunkText.ToString(), currentChunkStartPage, currentChunkEndPage));
            }
            return(resultChunks);
        }
示例#8
0
        public DocumentTree GetList(Guid ParentID)
        {
            DocumentTree model = new DocumentTree();

            model.IsLastTree      = true; //默认其自身是最后一级
            model.IsChildLastTree = true; //默认其自身为最后一级 则无子集
            model.TreeList        = _cDocumentTreeAdapter.GetDocumentTreeListByID(ParentID).ToList();
            if (model.TreeList.Count > 0)
            {
                model.IsLastTree = false;                                                                                       //
                List <C_DocumentTree> ChildTree = _cDocumentTreeAdapter.GetDocumentTreeListByID(model.TreeList[0].ID).ToList(); //取其子集的第一项,看其是否有子集
                if (ChildTree.Count > 0)
                {
                    model.IsChildLastTree = false;
                }
            }
            return(model);
        }
示例#9
0
        public DocumentControl()
        {
            InitializeComponent();

            Load += DocumentControl_Load;

            if (ManagementControl.Instance != null)
            {
                UsedDocument      = ManagementControl.Instance.CreateDocument(this);
                DocumentTree.Text = "untitled" + UsedDocument.DocumentID;
                DocumentTree.ExpandAll();
            }
            else
            {
                Log.Fatal("CAD-система не инициализирована!");
            }
            ElementProperties.PropertyValueChanged += ElementProperties_PropertyValueChanged;
        }
        public List <ChunkInfo> Chunk(DocumentTree documentTree, ChunkMethod chunkMethod, int charLimit, ElementType chunkLevel)
        {
            switch (chunkMethod)
            {
            case ChunkMethod.NoChunking:
                return(ApplyNoChunking(documentTree));

            case ChunkMethod.Char:
                return(ChunkByCharacterLimit(documentTree, charLimit));

            case ChunkMethod.Page:
                return(ChunkByPage(documentTree, charLimit));

            case ChunkMethod.Section:
                return(ChunkBySection(documentTree, chunkLevel, charLimit));

            default:
                throw new NotSupportedException($"The chunk type {chunkMethod} isn't supported.");
            }
        }
        /*
         *  Intuition:
         *      To construct a page, we concatenate elements in the same page to the chunk without exceeding the character limit
         *  Considerations:
         *      1- Chunk doesn't exceed character limit
         *  Special cases:
         *      1- Element length bigger than character limit
         *          - Element will be split into different chunks
         */
        private List <ChunkInfo> ChunkByPage(DocumentTree documentTree, int charLimit)
        {
            // prepare variables
            var pages = new List <ChunkInfo>();
            var currentChunkNumber = 1;
            var currentPageNumber  = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber;
            var currentChunk       = new StringBuilder();

            // chunk document segments
            documentTree.RootSegment.Children.ForEach(segment =>
            {
                ChunkByPageInternal(segment, charLimit, pages, ref currentChunkNumber, ref currentPageNumber, currentChunk);
            });

            // handle last page
            if (currentChunk.Length > 0)
            {
                pages.Add(new ChunkInfo(currentChunkNumber, currentChunk.ToString(), currentPageNumber, currentPageNumber));
            }
            return(pages);
        }
        /*
         *  Intuition:
         *      To construct a chunk, we concatenate elements to the chunk without exceeding the character limit
         *  Considerations:
         *      1- Chunk doesn't exceed character limit
         *  Special cases:
         *      1- Element length bigger than character limit
         *          - Element will be split into different chunks
         */
        private List <ChunkInfo> ChunkByCharacterLimit(DocumentTree documentTree, int charLimit)
        {
            // prepare variables
            var resultChunks       = new List <ChunkInfo>();
            var currentChunkNumber = 1;
            var chunkStartPage     = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber;
            var chunkEndPage       = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber;
            var currentChunk       = new StringBuilder();

            // handle document segments
            documentTree.RootSegment.Children.ForEach(segment =>
            {
                ChunkByCharacterLimitInternal(segment, charLimit, resultChunks, ref currentChunkNumber, ref chunkStartPage, ref chunkEndPage, currentChunk);
            });

            // handle remaining text
            if (currentChunk.Length > 0)
            {
                resultChunks.Add(new ChunkInfo(currentChunkNumber, currentChunk.ToString(), chunkStartPage, chunkEndPage));
            }
            return(resultChunks);
        }
        public DocumentTree GetDTreeListByID(string ParentID)
        {
            DocumentTree model = C_DocumentTreeOperator.Instance.GetList(Guid.Parse(ParentID));

            return(model);
        }
 private void OnCurrentThemeChanged(object sender, EventArgs e)
 {
     DocumentTree.InvalidateProperty(ItemsControl.ItemsSourceProperty);
     DocumentTree.UpdateLayout();
 }
        public async Task ParseDocxTest(Stream file, DocumentTree expected)
        {
            var actual = await _parser.ParseFile(file);

            Assert.Equal(expected, actual, new DocumentTreeComparer());
        }
        public void MapMsReadParserServiceTest(ReadOperationResult parsingResult, DocumentTree expected)
        {
            var actual = _parser.MapMsReadResult(parsingResult);

            Assert.Equal(expected, actual, new DocumentTreeComparer());
        }