public async Task ParseTextTestAsync(Stream inputFile, DocumentTree expected) { var actual = await _parser.ParseFile(inputFile); Assert.Equal(actual.RootSegment.Children.First().RootElement.Text, expected.RootSegment.Children.First().RootElement.Text); Assert.Equal(actual.RootSegment.Children.First().RootElement.Type, expected.RootSegment.Children.First().RootElement.Type); }
public void CharChunkingTest(DocumentTree parseResult, List <ChunkInfo> expectedChunks) { IChunkerService msReadChunker = new ChunkerService(); List <ChunkInfo> actualChunks = msReadChunker.Chunk(parseResult, ChunkMethod.Char, 1000, ElementType.Other); Assert.Equal(expectedChunks.Count, actualChunks.Count); Assert.Equal(expectedChunks, actualChunks, new ChunkInfoComparer()); }
public void PageChunkingTest(DocumentTree parseResult, List <ChunkInfo> expectedChunks) { IChunkerService msReadChunker = new ChunkerService(); List <ChunkInfo> actualChunks = msReadChunker.Chunk(parseResult, ChunkMethod.Page, Constants.CustomTextPredictionMaxCharLimit, ElementType.Other); Assert.Equal(expectedChunks.Count, actualChunks.Count); Assert.Equal(expectedChunks, actualChunks, new ChunkInfoComparer()); }
public void OnGUI(Rect pos) { m_Position = pos; var docTreeRect = new Rect( m_Position.x + k_BorderWidth, m_Position.y + k_SearchBarHeight, m_Position.width - 2 * k_BorderWidth,//m_HorizontalSplitterRect.x - k_BorderWidth, m_Position.height - k_BorderWidth - k_ToolsHeight - k_SearchBarHeight); var searchBarRect = new Rect( docTreeRect.x, m_Position.y, docTreeRect.width, k_SearchBarHeight); if (m_TreeView == null) { bool firstInit = m_MultiColumnHeaderState == null; var headerState = DocumentTree.CreateDefaultMultiColumnHeaderState(docTreeRect.width); if (MultiColumnHeaderState.CanOverwriteSerializedFields(m_MultiColumnHeaderState, headerState)) { MultiColumnHeaderState.OverwriteSerializedFields(m_MultiColumnHeaderState, headerState); } m_MultiColumnHeaderState = headerState; if (m_TreeViewState == null) { m_TreeViewState = new TreeViewState(); } m_TreeView = new DocumentTree(m_TreeViewState, new MultiColumnHeader(m_MultiColumnHeaderState)); } OnGUISearchBar(searchBarRect); HandleHorizontalResize(); m_TreeView.OnGUI(docTreeRect); var bottomRect = new Rect( docTreeRect.x, docTreeRect.y + docTreeRect.height + k_BorderWidth, docTreeRect.width, k_ToolsHeight - k_BorderWidth); BottomToolBar(bottomRect); if (m_ResizingHorizontalSplitter) { m_Parent.Repaint(); } }
public static TheoryData CharChunkingTestData() { string inputString1 = File.ReadAllText(@"TestData\Chunker\loremipsum-4.json"); DocumentTree testInput1 = JsonConvert.DeserializeObject <DocumentTree>(inputString1); string expectedString1 = File.ReadAllText(@"TestData\Chunker\CharChunking\loremipsum-4_chunks.json"); IEnumerable <ChunkInfo> output1 = JsonConvert.DeserializeObject <IEnumerable <ChunkInfo> >(expectedString1); return(new TheoryData <DocumentTree, IEnumerable <ChunkInfo> > { { testInput1, output1 } }); }
/* * Join all elements into a single string */ private List <ChunkInfo> ApplyNoChunking(DocumentTree documentTree) { var resultText = new StringBuilder(); foreach (var docSegment in documentTree.RootSegment.Children) { var segmentText = ApplyNoChunkingInternal(docSegment); resultText.Append(segmentText); } var text = resultText.ToString().Trim(); var firstPage = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber; var lastPage = documentTree.RootSegment.Children.LastOrDefault()?.RootElement.PageNumber; return(new List <ChunkInfo> { new ChunkInfo(chunkNumber: 1, text, firstPage, lastPage) }); }
private List <ChunkInfo> ChunkBySection(DocumentTree documentTree, ElementType chunkLevel, int charLimit) { var resultChunks = new List <ChunkInfo>(); var currentChunkText = new StringBuilder(); var canEndChunk = false; // true if current chunk contains a simple element var currentChunkNumber = 1; var currentChunkStartPage = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber; var currentChunkEndPage = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber; ChunkBySectionInternal(documentTree.RootSegment, currentChunkText, resultChunks, ref canEndChunk, chunkLevel, charLimit, ref currentChunkNumber, ref currentChunkStartPage, ref currentChunkEndPage); // handle remaining text if (currentChunkText.Length > 0) { resultChunks.Add(new ChunkInfo(currentChunkNumber, currentChunkText.ToString(), currentChunkStartPage, currentChunkEndPage)); } return(resultChunks); }
public DocumentTree GetList(Guid ParentID) { DocumentTree model = new DocumentTree(); model.IsLastTree = true; //默认其自身是最后一级 model.IsChildLastTree = true; //默认其自身为最后一级 则无子集 model.TreeList = _cDocumentTreeAdapter.GetDocumentTreeListByID(ParentID).ToList(); if (model.TreeList.Count > 0) { model.IsLastTree = false; // List <C_DocumentTree> ChildTree = _cDocumentTreeAdapter.GetDocumentTreeListByID(model.TreeList[0].ID).ToList(); //取其子集的第一项,看其是否有子集 if (ChildTree.Count > 0) { model.IsChildLastTree = false; } } return(model); }
public DocumentControl() { InitializeComponent(); Load += DocumentControl_Load; if (ManagementControl.Instance != null) { UsedDocument = ManagementControl.Instance.CreateDocument(this); DocumentTree.Text = "untitled" + UsedDocument.DocumentID; DocumentTree.ExpandAll(); } else { Log.Fatal("CAD-система не инициализирована!"); } ElementProperties.PropertyValueChanged += ElementProperties_PropertyValueChanged; }
public List <ChunkInfo> Chunk(DocumentTree documentTree, ChunkMethod chunkMethod, int charLimit, ElementType chunkLevel) { switch (chunkMethod) { case ChunkMethod.NoChunking: return(ApplyNoChunking(documentTree)); case ChunkMethod.Char: return(ChunkByCharacterLimit(documentTree, charLimit)); case ChunkMethod.Page: return(ChunkByPage(documentTree, charLimit)); case ChunkMethod.Section: return(ChunkBySection(documentTree, chunkLevel, charLimit)); default: throw new NotSupportedException($"The chunk type {chunkMethod} isn't supported."); } }
/* * Intuition: * To construct a page, we concatenate elements in the same page to the chunk without exceeding the character limit * Considerations: * 1- Chunk doesn't exceed character limit * Special cases: * 1- Element length bigger than character limit * - Element will be split into different chunks */ private List <ChunkInfo> ChunkByPage(DocumentTree documentTree, int charLimit) { // prepare variables var pages = new List <ChunkInfo>(); var currentChunkNumber = 1; var currentPageNumber = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber; var currentChunk = new StringBuilder(); // chunk document segments documentTree.RootSegment.Children.ForEach(segment => { ChunkByPageInternal(segment, charLimit, pages, ref currentChunkNumber, ref currentPageNumber, currentChunk); }); // handle last page if (currentChunk.Length > 0) { pages.Add(new ChunkInfo(currentChunkNumber, currentChunk.ToString(), currentPageNumber, currentPageNumber)); } return(pages); }
/* * Intuition: * To construct a chunk, we concatenate elements to the chunk without exceeding the character limit * Considerations: * 1- Chunk doesn't exceed character limit * Special cases: * 1- Element length bigger than character limit * - Element will be split into different chunks */ private List <ChunkInfo> ChunkByCharacterLimit(DocumentTree documentTree, int charLimit) { // prepare variables var resultChunks = new List <ChunkInfo>(); var currentChunkNumber = 1; var chunkStartPage = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber; var chunkEndPage = documentTree.RootSegment.Children.FirstOrDefault()?.RootElement.PageNumber; var currentChunk = new StringBuilder(); // handle document segments documentTree.RootSegment.Children.ForEach(segment => { ChunkByCharacterLimitInternal(segment, charLimit, resultChunks, ref currentChunkNumber, ref chunkStartPage, ref chunkEndPage, currentChunk); }); // handle remaining text if (currentChunk.Length > 0) { resultChunks.Add(new ChunkInfo(currentChunkNumber, currentChunk.ToString(), chunkStartPage, chunkEndPage)); } return(resultChunks); }
public DocumentTree GetDTreeListByID(string ParentID) { DocumentTree model = C_DocumentTreeOperator.Instance.GetList(Guid.Parse(ParentID)); return(model); }
private void OnCurrentThemeChanged(object sender, EventArgs e) { DocumentTree.InvalidateProperty(ItemsControl.ItemsSourceProperty); DocumentTree.UpdateLayout(); }
public async Task ParseDocxTest(Stream file, DocumentTree expected) { var actual = await _parser.ParseFile(file); Assert.Equal(expected, actual, new DocumentTreeComparer()); }
public void MapMsReadParserServiceTest(ReadOperationResult parsingResult, DocumentTree expected) { var actual = _parser.MapMsReadResult(parsingResult); Assert.Equal(expected, actual, new DocumentTreeComparer()); }