/// <summary> /// Processes the specified block. /// </summary> /// <param name="block">The block.</param> /// <returns></returns> public List <imbMCStream> process(imbMCBlock block) { List <imbMCStream> output = new List <imbMCStream>(); String content = ""; // block.blockModel.getContent(nodeBlockOutputEnum.text); foreach (htmlWrapper node in block.blockModel) { content = node.GetContent(nodeBlockOutputEnum.text); content = HtmlEntity.DeEntitize(content); var streams = content.SplitSmart(Environment.NewLine, "", true, true); Int32 c = 1; foreach (String str in streams) { imbMCStream stream = new imbMCStream(); stream.name = "S" + c.ToString("D5"); c++; block.Add(stream); stream.content = str; stream.htmlNode = node.html; output.Add(stream); } } return(output); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>; if (realTask == null) { return(next); } pipelineTaskSubjectContentToken realSubject = realTask.subject; if (realSubject.contentLevelType != flags.token.cnt_level.mcBlock) { return(next); } imbMCBlock mcBlock = realSubject.mcElement as imbMCBlock; var streams = streamComposer.process(mcBlock); foreach (imbMCStream stream in streams) { pipelineTaskSubjectContentToken streamSubject = new pipelineTaskSubjectContentToken(); streamSubject.contentLevelType = flags.token.cnt_level.mcTokenStream; streamSubject.mcElement = stream; streamSubject.name = stream.name; streamSubject.currentForm = stream.content; streamSubject.initialForm = stream.content; streamSubject.parent = realSubject; realSubject.Add(streamSubject); pipelineTask <pipelineTaskSubjectContentToken> newTask = new pipelineTask <pipelineTaskSubjectContentToken>(streamSubject); task.context.scheduledTasks.Push(newTask); } // <---- tagging code return(forward); }
public List <imbMCBlock> process(HtmlDocument html, String name) { List <imbMCBlock> output = new List <imbMCBlock>(); nodeTree tree = new nodeTree("document", html); //var navigator = html.DocumentNode.CreateNavigator(); var contentTree = html.buildTree(name, true, false); // contentTree = new nodeTree(page.webpage.domain, htmlDoc); var contentBlocks = contentTree.getBlocks(targetBlockCount); contentBlocks.CalculateScores(); //var blocks = tree.getBlocks(targetBlockCount); Int32 c = 0; foreach (nodeBlock bl in contentBlocks) { c++; imbMCBlock mcBlock = new imbMCBlock(); mcBlock.name = "B" + c.ToString("D3"); mcBlock.blockModel = bl; output.Add(mcBlock); } if (output.Any()) { } else { } return(output); }