/// <summary> /// Uses lexic information to transform /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { //pipelineTask<pipelineTaskSubjectContentToken> realTask = task as pipelineTask<pipelineTaskSubjectContentToken>; pipelineTaskSubjectContentToken realSubject = task.subject as pipelineTaskSubjectContentToken; if (realSubject.contentLevelType != flags.token.cnt_level.mcToken) { return(next); } var g = parser.GetInflectionGraph(realSubject.currentForm, -1, task.context.logger); realSubject.graph = g; realSubject.currentForm = g.lemmaForm; foreach (lexicGrammarCase chld in g) { realSubject.flagBag.AddUnique(chld.tags.Get <pos_type>(pos_type.none)); } // <---- tagging code return(forward); }
/// <summary> /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/> /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskMCPageSubject> realTask = task as pipelineTask <pipelineTaskMCPageSubject>; if (realTask == null) { return(next); } pipelineTaskMCPageSubject realSubject = realTask.subject; var tkns = mLanguageEval.GetAllProperTokensSortedByFrequency(realSubject.MCPage.TextContent, settings.tokenLengthMin); var mle = mLanguageEval.evaluate(settings, tkns); if (mle.result_language == languagePrimary) { Int32 vc = task.context.GetAndChangeCustomDataProperty("validPageCount_" + realSubject.parent.name, 1); if (vc > limitValidPageCount) { return(task.model.trashBin); } else { return(forward); } } else { return(task.model.trashBin); } }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { var realSubject = task.subject as pipelineTaskSubjectContentToken; if (realSubject == null) { return(next); } if (inverseUse) { realSubject.currentForm = pairSet.ConvertFromBtoA(realSubject.currentForm); } else { realSubject.currentForm = pairSet.ConvertFromAtoB(realSubject.currentForm); } // <---- tagging code if (realSubject.mcElement != null) { realSubject.mcElement.content = realSubject.currentForm; } return(forward); }
public async Task UpdateAsync(IPipelineTask task) { if (task == null) { return; } await _repository.UpdateAsync(task); }
public async Task UpdateAsync(IPipelineTask task) { if (task == null) { return; } PipelineTaskMongoDbModel dbo = new PipelineTaskMongoDbModel(task); await TaskCollection.ReplaceOneAsync(new BsonDocument("_id", new ObjectId(task.Id)), dbo); }
public virtual void AddAfter(IPipelineTask newTask, IPipelineTask after) { int index = this.Tasks.IndexOf(after); if (index >= 0) { this.Tasks.Insert(index + 1, newTask); } }
public PipelineTaskMongoDbModel(IPipelineTask task) { Id = task.Id; Name = task.Name; CreatedAt = task.CreatedAt; AverageTime = task.AverageTime; UserId = task.UserId; UserName = task.UserName; }
public virtual void AddBefore(IPipelineTask newTask, IPipelineTask before) { int index = this.Tasks.IndexOf(before); if (index >= 0) { this.Tasks.Insert(index, newTask); } }
public async Task CreateAsync(IPipelineTask task) { if (task == null) { return; } PipelineTaskMongoDbModel dbo = new PipelineTaskMongoDbModel(task); await TaskCollection.InsertOneAsync(dbo); }
/// <summary> /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/> /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { var realTask = task as pipelineTask <pipelineTaskMCSiteSubject>; if (realTask == null) { return(next); } pipelineTaskMCSiteSubject realSubject = realTask.subject; var repoSubject = realSubject.parent as pipelineTaskMCRepoSubject; var repo = repoSubject.MCRepo; if (repo == null) { task.context.logger.log("MCRepo is null at [" + task.GetStringInfo() + "]"); } List <imbMCWebPage> listPages = repo.GetAllWebPages(realSubject.MCSite, null, takeSetup); if (doFilterOutDuplicates) { listPages = listPages.GetUniquePages(); } if (doSortPagesByTextSize) { listPages.Sort(SortByPageSize); } foreach (imbMCWebPage page in listPages) { var mCPageSubject = new pipelineTaskMCPageSubject(); imbMCDocument doc = new imbMCDocument(); doc.webPage = page; realSubject.mcElement.Add(doc); mCPageSubject.mcElement = doc; mCPageSubject.MCPage = page; // mCPageSubject.name = page.entry.HashCode; mCPageSubject.parent = realSubject; realSubject.Add(mCPageSubject); pipelineTask <pipelineTaskMCPageSubject> taskForPage = new pipelineTask <pipelineTaskMCPageSubject>(mCPageSubject); task.context.scheduledTasks.Push(taskForPage); } return(forward); }
public override IPipelineNode process(IPipelineTask task) { pipelineTask <T> realTask = task as pipelineTask <T>; if (realTask.subject.flagBag.ContainsByEnum(flags.ToArray(), queryType)) { return(forward); } return(next); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>; if (realTask == null) { return(next); } pipelineTaskSubjectContentToken realSubject = realTask.subject; if (realSubject.mcElement != null) { HtmlNode node = realSubject.mcElement.htmlNode; List <String> tags = new List <string>(); if (node != null) { tags = node.GetTagNames(); foreach (var tag in tags) { switch (tag) { case "a": realSubject.flagBag.AddUnique(cnt_containerType.link); break; case "title": realSubject.flagBag.AddUnique(cnt_containerType.title); break; case "h": case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": realSubject.flagBag.AddUnique(cnt_containerType.title); break; } } } } // <---- tagging code return(forward); }
public async Task CreateAsync(IPipelineTask task, string userId) { if (task == null) { return; } task.CreatedAt = DateTime.UtcNow; if (!string.IsNullOrWhiteSpace(userId)) { task.UserId = userId; } await _repository.CreateAsync(task); }
public void EnqueueTask(IPipelineTask <TContext> task) { if (this._rootTask == null) { this._rootTask = this._lastTask = new TaskLinkedListItem(task); } else { var newTask = new TaskLinkedListItem(task); this._lastTask.NextTask = newTask; this._lastTask = newTask; } }
/// <summary> /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/> /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskMCPageSubject> realTask = task as pipelineTask <pipelineTaskMCPageSubject>; if (realTask == null) { return(next); } pipelineTaskMCPageSubject realSubject = realTask.subject; HtmlDocument html = new HtmlDocument(); html.LoadHtml(realSubject.MCPage.HtmlSourceCode); pipelineTaskMCSiteSubject siteSubject = realSubject.parent as pipelineTaskMCSiteSubject; realSubject.htmlDocument = html; List <imbMCBlock> blocks = blockComposer.process(html, realSubject.name); if (!blocks.Any()) { task.context.logger.log("Block composer returned zero blocks for [" + siteSubject.name + "]"); } foreach (imbMCBlock block in blocks) { pipelineTaskSubjectContentToken tokenSubject = new pipelineTaskSubjectContentToken(); tokenSubject.name = block.name; tokenSubject.contentLevelType = flags.token.cnt_level.mcBlock; tokenSubject.mcElement = block; tokenSubject.currentForm = block.content; realSubject.mcElement.Add(tokenSubject.mcElement); realSubject.Add(tokenSubject); pipelineTask <pipelineTaskSubjectContentToken> taskForElement = new pipelineTask <pipelineTaskSubjectContentToken>(tokenSubject); task.context.scheduledTasks.Push(taskForElement); } return(forward); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { var realTask = task as pipelineTask <pipelineTaskSubjectContentToken>; if (realTask == null) { return(next); } IPipelineNode direction = null; Boolean testResult = test.IsMatch(realTask.subject.currentForm); if (testResult) { direction = forward; if (testType.HasFlag(pipelineRegexTestTypeEnum.tagger)) { realTask.subject.flagBag.AddRange(tagsToApply); } if (testType.HasFlag(pipelineRegexTestTypeEnum.groupreplacer)) { var mch = test.Match(realTask.subject.currentForm); if (mch.Groups.Count >= groupToCurrent) { realTask.subject.currentForm = mch.Groups[groupToCurrent].Value; } } if (testType.HasFlag(pipelineRegexTestTypeEnum.replacer)) { realTask.subject.currentForm = test.Replace(realTask.subject.currentForm, replacement); } } else { direction = next; } return(direction); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>; if (realTask == null) { return(next); } pipelineTaskSubjectContentToken realSubject = realTask.subject; if (realSubject.contentLevelType != flags.token.cnt_level.mcTokenStream) { return(next); } List <imbMCToken> mcTokens = tokenComposer.process(realSubject.mcElement as imbMCStream); foreach (imbMCToken token in mcTokens) { pipelineTaskSubjectContentToken tokenSubject = new pipelineTaskSubjectContentToken(); tokenSubject.mcElement = token; realSubject.mcElement.Add(token); tokenSubject.name = token.name; tokenSubject.contentLevelType = flags.token.cnt_level.mcToken; tokenSubject.parent = realSubject; tokenSubject.currentForm = token.content; realSubject.Add(tokenSubject); pipelineTask <pipelineTaskSubjectContentToken> newTask = new pipelineTask <pipelineTaskSubjectContentToken>(tokenSubject); task.context.scheduledTasks.Push(newTask); } // <---- tagging code return(forward); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>; if (realTask == null) { return(next); } pipelineTaskSubjectContentToken realSubject = realTask.subject; // <---- tagging code var flags = resolver.process(realSubject.currentForm); realSubject.flagBag.AddRange(flags, true); return(forward); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { // <---- tagging code pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>; if (realTask == null) { return(next); } pipelineTaskSubjectContentToken realSubject = realTask.subject; if (realSubject.contentLevelType != cnt_level.mcToken) { return(next); } if (realSubject.currentForm == "εμάσ") { } var tkns = mLanguageEval.GetAllProperTokensSortedByFrequency(realSubject.currentForm, settings.tokenLengthMin); var mle = mLanguageEval.evaluate(settings, tkns); if (mle.languageEnums.Contains(languagePrimary)) { List <Object> l = new List <object>(); mle.languageEnums.ForEach(x => l.Add(x)); realSubject.flagBag.AddRange(l); return(forward); } else { return(task.model.trashBin); } return(forward); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>; if (realTask == null) { return(next); } pipelineTaskSubjectContentToken realSubject = realTask.subject; if (realSubject.contentLevelType != flags.token.cnt_level.mcBlock) { return(next); } imbMCBlock mcBlock = realSubject.mcElement as imbMCBlock; var streams = streamComposer.process(mcBlock); foreach (imbMCStream stream in streams) { pipelineTaskSubjectContentToken streamSubject = new pipelineTaskSubjectContentToken(); streamSubject.contentLevelType = flags.token.cnt_level.mcTokenStream; streamSubject.mcElement = stream; streamSubject.name = stream.name; streamSubject.currentForm = stream.content; streamSubject.initialForm = stream.content; streamSubject.parent = realSubject; realSubject.Add(streamSubject); pipelineTask <pipelineTaskSubjectContentToken> newTask = new pipelineTask <pipelineTaskSubjectContentToken>(streamSubject); task.context.scheduledTasks.Push(newTask); } // <---- tagging code return(forward); }
/// <summary> /// Processes the specified task. /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { //pipelineTask<pipelineTaskSubjectContentToken> realTask = task as pipelineTask<pipelineTaskSubjectContentToken>; pipelineTaskSubjectContentToken realSubject = task.subject as pipelineTaskSubjectContentToken; if (realSubject == null) { return(next); } if (realSubject.currentForm.isNullOrEmpty()) { return(next); } // <---- tagging code realSubject.currentForm = resolver.process(realSubject.currentForm); return(forward); }
public PipelineTaskRuntime(IPipelineTask <TContext> task) { this.Task = task; }
/// <summary> /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/> /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { pipelineTask <pipelineTaskMCRepoSubject> realTask = task as pipelineTask <pipelineTaskMCRepoSubject>; if (realTask == null) { return(next); } pipelineTaskMCRepoSubject realSubject = realTask.subject; folderNode repoFolder = appManager.Application.folder_resources.Add(imbMCManager.MCRepo_DefaultDirectoryName, "MC Repositories", "Root directory with repositories of Crawled Web content"); imbMCRepository repo = realSubject.MCRepoName.LoadDataStructure <imbMCRepository>(repoFolder, task.context.logger); imbMCDocumentRepositorium docRepo = new imbMCDocumentRepositorium(); docRepo.webRepository = repo; realSubject.mcElement = docRepo; realSubject.MCRepo = repo; List <imbMCWebSite> websites = repo.GetAllWebSites(task.context.logger, takeSetup); List <imbMCWebSite> ws = new List <imbMCWebSite>(); //try //{ // repo.siteTable.Clear(); // repo.CheckSiteTable(task.context.logger); // if (realSubject.WebSiteSample.Any()) // { // foreach (String w in realSubject.WebSiteSample) // { // var iws = websites.FirstOrDefault(x => w.Contains(x.name)); //repo.GetWebSite(new domainAnalysis(w), false, task.context.logger); // if (iws != null) // { // task.context.logger.log(this.name + " Web site [ _" + w + "_ ] added to the pipeline: [" + repo.name + "]"); // websites.Add(iws); // } // else // { // task.context.logger.log(this.name + " Web site [ _" + w + "_] not found in the repo: [" + repo.name + "]"); // } // } // } // else // { // } //} catch (Exception ex) //{ // throw new aceGeneralException("Failed to recover web sites from the repository", ex, this, "Failed to load sites from repository: " + ex.Message); //} if (!websites.Any()) { task.context.logger.log(this.name + " Failed --- no web sites loaded"); } else { } List <String> needle = new List <string>(); realSubject.MCSiteTargets.ForEach(x => needle.Add(pipelineSubjectTools.GetCleanCaseName(x))); List <String> urls = new List <string>(); foreach (imbMCWebSite site in websites) { String sName = pipelineSubjectTools.GetCleanCaseName(site.domainInfo.urlProper); Boolean ok = true; if (realSubject.MCSiteTargets.Any()) { if (!needle.Contains(sName)) { ok = false; #if DEBUG //Console.WriteLine("Site refused [" + sName + "]"); #endif } } if (urls.Contains(sName)) { ok = false; } if (ok) { pipelineTaskMCSiteSubject mCSiteSubject = new pipelineTaskMCSiteSubject(); mCSiteSubject.MCSite = site; imbMCDocumentSet docSet = new imbMCDocumentSet(); docRepo.Add(docSet); mCSiteSubject.mcElement = docSet; mCSiteSubject.contentLevelType = flags.token.cnt_level.mcSite; mCSiteSubject.name = sName; mCSiteSubject.parent = realSubject; realSubject.Add(mCSiteSubject); urls.Add(mCSiteSubject.name); pipelineTask <pipelineTaskMCSiteSubject> taskForSite = new pipelineTask <pipelineTaskMCSiteSubject>(mCSiteSubject); task.context.scheduledTasks.Push(taskForSite); } } if (urls.Count < needle.Count) { urls.ForEach(x => needle.Remove(x)); if (needle.Any()) { String nd = ""; needle.ForEach(x => nd += x + " "); throw new aceScienceException("Some sites are not found in the MC Repository!! [" + nd + "]", null, realSubject, "Sites not loaded [" + nd + "]", this); } } return(forward); }
public static OUTPUT Pipe <INPUT, OUTPUT>(this INPUT input, IPipelineTask <INPUT, OUTPUT> task) { return(task.Process(input)); }
public abstract override IPipelineNode process(IPipelineTask task);
public SwitchPipelineTask(IEnumerable <ConditionalPipelineTask <TContext> > cases, IPipelineTask <TContext> @default = null) { this._cases = cases; this._default = @default; }
/// <summary> /// Redirects the task by <see cref="cnt_level"/> to (if not null) corresponding pipeline /// </summary> /// <param name="task">The task.</param> /// <returns>pipeline to direct to</returns> public override IPipelineNode process(IPipelineTask task) { //pipelineTask<pipelineTaskSubjectContentToken> realTask = task as pipelineTask<pipelineTaskSubjectContentToken>; pipelineTaskSubjectContentToken realSubject = task.subject as pipelineTaskSubjectContentToken; if (realSubject == null) { if (task.context.RunInDebugMode) { task.context.logger.log("Node " + name + " received a task [" + task.GetType().Name + "] with non compatibile task subject [" + task.subject.GetType().Name + "]"); } } switch (realSubject.contentLevelType) { case cnt_level.mcBlock: if (blockPipeline != null) { return(blockPipeline); } break; case cnt_level.mcChunk: if (chunkPipeline != null) { return(chunkPipeline); } break; case cnt_level.mcPage: if (pagePipeline != null) { return(pagePipeline); } break; case cnt_level.mcRepo: if (repoPipeline != null) { return(repoPipeline); } break; case cnt_level.mcSite: if (sitePipeline != null) { return(sitePipeline); } break; default: case cnt_level.none: case cnt_level.mcSubtoken: return(next); break; case cnt_level.mcToken: if (tokenPipeline != null) { return(tokenPipeline); } break; case cnt_level.mcTokenStream: if (streamPipeline != null) { return(streamPipeline); } break; } // <---- tagging code return(forward); }
/// <summary> /// Process call -- just forwards the task to its first child /// </summary> /// <param name="task">The task.</param> /// <returns></returns> public override IPipelineNode process(IPipelineTask task) { return(forward); }
public ForEachTask(Func <TContext, IEnumerable <TElement> > selector, IPipelineTask <IForEachTaskContext <TContext, TElement> > task) { this._selector = selector; this._task = task; }
/// <summary> /// Process call /// </summary> /// <param name="task">The task.</param> public abstract IPipelineNode process(IPipelineTask task);