/// <summary> /// Texts the rendering. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> public void TextRendering(OperationContext context, ILogBuilder log, Boolean EnableRendering = true) { log.log("Text rendering"); foreach (KeyValuePair <string, WebSiteDocumentsSet> pair in context.dataset) { foreach (WebSiteDocuments site in pair.Value) { TextDocumentSet tds = null; if (DoUseCache) { if (CacheProvider.IsReady) { tds = CacheProvider.GetCached <TextDocumentSet>(setupSignature, context.dataSetSignature, site.domain); } } if (tds == null) { tds = render.RenderSiteDocuments(site, log, EnableRendering); } tds.name = site.domain; context.renderSiteByDomain.Add(tds.name, tds); foreach (var td in tds) { context.renderLayersByAssignedID.Add(td.name, td); } if (DoUseCache) { if (CacheProvider.IsReady) { CacheProvider.SetCached(setupSignature, context.dataSetSignature, tds.name, tds); } } } } }
/// <summary> /// Executes the plane method, invoking contained functions according to the settings /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { if (notes != null) { notes.logStartPhase("[1] Entity Plane - execution", ""); } IEntityPlaneContext context = inputContext as IEntityPlaneContext; CorpusPlaneContext outputContext = new CorpusPlaneContext(); outputContext.provider.StoreAndReceive(context); outputContext.dataset = context.dataset; // ---------------- rendering procedure Dictionary <WebSiteDocumentsSet, List <TextDocumentSet> > renderIndex = new Dictionary <WebSiteDocumentsSet, List <TextDocumentSet> >(); Dictionary <string, SpaceLabel> labels = new Dictionary <string, SpaceLabel>(); Dictionary <WebSiteDocuments, TextDocumentSet> sitesToRenders = new Dictionary <WebSiteDocuments, TextDocumentSet>(); Dictionary <String, WebSiteDocuments> inputSites = new Dictionary <string, WebSiteDocuments>(); Dictionary <String, TextDocumentSet> inputTextRenders = new Dictionary <string, TextDocumentSet>(); Dictionary <WebSiteDocuments, List <SpaceLabel> > inputSiteVsLabels = new Dictionary <WebSiteDocuments, List <SpaceLabel> >(); Int32 c = 0; // rendering foreach (WebSiteDocumentsSet docSet in context.dataset) { if (docSet.name.isNullOrEmpty() || docSet.name == SpaceLabel.UNKNOWN) { outputContext.space.label_unknown = new SpaceLabel(SpaceLabel.UNKNOWN); labels.Add(SpaceLabel.UNKNOWN, outputContext.space.label_unknown); } else { SpaceLabel lab = new SpaceLabel(docSet.name); labels.Add(lab.name, lab); outputContext.space.labels.Add(lab); } String datasetSignature = context.dataset.GetDataSetSignature(); // ---- render List <TextDocumentSet> textSetForLabel = new List <TextDocumentSet>(); if (CacheProvider.IsReady) { foreach (WebSiteDocuments site in docSet) { TextDocumentSet tds = CacheProvider.GetCached <TextDocumentSet>(setupSignature, datasetSignature, site.domain); if (tds == null) { tds = render.RenderSiteDocuments(site, logger); CacheProvider.SetCached(setupSignature, datasetSignature, tds.name, tds); } else { tds.name = site.domain; } textSetForLabel.Add(tds); } } else { textSetForLabel = render.RenderDocumentSet(docSet, logger); foreach (TextDocumentSet ws in textSetForLabel) { CacheProvider.SetCached(setupSignature, datasetSignature, ws.name, ws); } } // // <--- performs the rendering textSetForLabel.ForEach(x => inputTextRenders.Add(x.name, x)); // --- rest of indexing docSet.ForEach(x => inputSites.Add(x.domain, x)); renderIndex.Add(docSet, textSetForLabel); foreach (WebSiteDocuments site in docSet) { inputSiteVsLabels.Add(site, new List <SpaceLabel>()); inputSiteVsLabels[site].Add(labels[docSet.name]); c++; } } if (notes != null) { notes.log("Text document for [" + c + "] entities created"); } // tmp index foreach (String key in inputSites.Keys) { sitesToRenders.Add(inputSites[key], inputTextRenders[key]); } // page in site filtering if (filter.IsEnabled) { Dictionary <WebSiteDocuments, TextDocumentSet> renderIndexFiltered = new Dictionary <WebSiteDocuments, TextDocumentSet>(); filter.Learn(inputTextRenders.Values); foreach (KeyValuePair <WebSiteDocuments, TextDocumentSet> pair in sitesToRenders) { renderIndexFiltered.Add(pair.Key, filter.FilterDocumentSet(pair.Value)); } sitesToRenders = renderIndexFiltered; } Dictionary <String, TextDocumentSet> TextDocumentsByDomainName = new Dictionary <string, TextDocumentSet>(); foreach (var pair in sitesToRenders) { TextDocumentsByDomainName.Add(pair.Key.domain, pair.Value); } // blending pages into single page per web site // DoBlendPagesIntoSingleEntity = blender.options.HasFlag(DocumentBlenderFunctionOptions.separatePages); Boolean keepSeparated = blender.DoKeepPagesSeparated; foreach (var pair in renderIndex) { foreach (TextDocumentSet entitySet in pair.Value) { TextDocumentSet selectedTexts = TextDocumentsByDomainName[entitySet.name]; WebSiteDocuments web = inputSites[entitySet.name]; IEnumerable <string> label = inputSiteVsLabels[web].Select(x => x.name); if (keepSeparated) { // filter function TextDocument doc = blender.blendToTextDocument(selectedTexts); doc.labels.AddRange(label); outputContext.corpus_documents.Add(doc); } else { var docs = blender.blendToSeparateTextDocuments(selectedTexts); //blender.blendToTextDocument(selectedTexts); foreach (TextDocument doc in docs) { doc.labels.AddRange(label); outputContext.corpus_documents.Add(doc); } } } } if (notes != null) { notes.logEndPhase(); } return(outputContext); }