Exemplo n.º 1
0
        /// <summary>
        /// Uses lexic information to transform
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            //pipelineTask<pipelineTaskSubjectContentToken> realTask = task as pipelineTask<pipelineTaskSubjectContentToken>;
            pipelineTaskSubjectContentToken realSubject = task.subject as pipelineTaskSubjectContentToken;

            if (realSubject.contentLevelType != flags.token.cnt_level.mcToken)
            {
                return(next);
            }

            var g = parser.GetInflectionGraph(realSubject.currentForm, -1, task.context.logger);

            realSubject.graph = g;

            realSubject.currentForm = g.lemmaForm;

            foreach (lexicGrammarCase chld in g)
            {
                realSubject.flagBag.AddUnique(chld.tags.Get <pos_type>(pos_type.none));
            }

            // <---- tagging code

            return(forward);
        }
        /// <summary>
        /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/>
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <pipelineTaskMCPageSubject> realTask = task as pipelineTask <pipelineTaskMCPageSubject>;

            if (realTask == null)
            {
                return(next);
            }


            pipelineTaskMCPageSubject realSubject = realTask.subject;
            var tkns = mLanguageEval.GetAllProperTokensSortedByFrequency(realSubject.MCPage.TextContent, settings.tokenLengthMin);

            var mle = mLanguageEval.evaluate(settings, tkns);


            if (mle.result_language == languagePrimary)
            {
                Int32 vc = task.context.GetAndChangeCustomDataProperty("validPageCount_" + realSubject.parent.name, 1);
                if (vc > limitValidPageCount)
                {
                    return(task.model.trashBin);
                }
                else
                {
                    return(forward);
                }
            }
            else
            {
                return(task.model.trashBin);
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            var realSubject = task.subject as pipelineTaskSubjectContentToken;

            if (realSubject == null)
            {
                return(next);
            }

            if (inverseUse)
            {
                realSubject.currentForm = pairSet.ConvertFromBtoA(realSubject.currentForm);
            }
            else
            {
                realSubject.currentForm = pairSet.ConvertFromAtoB(realSubject.currentForm);
            }
            // <---- tagging code

            if (realSubject.mcElement != null)
            {
                realSubject.mcElement.content = realSubject.currentForm;
            }

            return(forward);
        }
Exemplo n.º 4
0
 public async Task UpdateAsync(IPipelineTask task)
 {
     if (task == null)
     {
         return;
     }
     await _repository.UpdateAsync(task);
 }
 public async Task UpdateAsync(IPipelineTask task)
 {
     if (task == null)
     {
         return;
     }
     PipelineTaskMongoDbModel dbo = new PipelineTaskMongoDbModel(task);
     await TaskCollection.ReplaceOneAsync(new BsonDocument("_id", new ObjectId(task.Id)), dbo);
 }
Exemplo n.º 6
0
        public virtual void AddAfter(IPipelineTask newTask, IPipelineTask after)
        {
            int index = this.Tasks.IndexOf(after);

            if (index >= 0)
            {
                this.Tasks.Insert(index + 1, newTask);
            }
        }
Exemplo n.º 7
0
 public PipelineTaskMongoDbModel(IPipelineTask task)
 {
     Id          = task.Id;
     Name        = task.Name;
     CreatedAt   = task.CreatedAt;
     AverageTime = task.AverageTime;
     UserId      = task.UserId;
     UserName    = task.UserName;
 }
Exemplo n.º 8
0
        public virtual void AddBefore(IPipelineTask newTask, IPipelineTask before)
        {
            int index = this.Tasks.IndexOf(before);

            if (index >= 0)
            {
                this.Tasks.Insert(index, newTask);
            }
        }
 public async Task CreateAsync(IPipelineTask task)
 {
     if (task == null)
     {
         return;
     }
     PipelineTaskMongoDbModel dbo = new PipelineTaskMongoDbModel(task);
     await TaskCollection.InsertOneAsync(dbo);
 }
Exemplo n.º 10
0
        /// <summary>
        /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/>
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            var realTask = task as pipelineTask <pipelineTaskMCSiteSubject>;

            if (realTask == null)
            {
                return(next);
            }

            pipelineTaskMCSiteSubject realSubject = realTask.subject;

            var repoSubject = realSubject.parent as pipelineTaskMCRepoSubject;

            var repo = repoSubject.MCRepo;

            if (repo == null)
            {
                task.context.logger.log("MCRepo is null at [" + task.GetStringInfo() + "]");
            }

            List <imbMCWebPage> listPages = repo.GetAllWebPages(realSubject.MCSite, null, takeSetup);

            if (doFilterOutDuplicates)
            {
                listPages = listPages.GetUniquePages();
            }

            if (doSortPagesByTextSize)
            {
                listPages.Sort(SortByPageSize);
            }

            foreach (imbMCWebPage page in listPages)
            {
                var mCPageSubject = new pipelineTaskMCPageSubject();

                imbMCDocument doc = new imbMCDocument();
                doc.webPage = page;
                realSubject.mcElement.Add(doc);

                mCPageSubject.mcElement = doc;
                mCPageSubject.MCPage    = page;
                // mCPageSubject.name = page.entry.HashCode;
                mCPageSubject.parent = realSubject;

                realSubject.Add(mCPageSubject);

                pipelineTask <pipelineTaskMCPageSubject> taskForPage = new pipelineTask <pipelineTaskMCPageSubject>(mCPageSubject);

                task.context.scheduledTasks.Push(taskForPage);
            }

            return(forward);
        }
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <T> realTask = task as pipelineTask <T>;

            if (realTask.subject.flagBag.ContainsByEnum(flags.ToArray(), queryType))
            {
                return(forward);
            }

            return(next);
        }
Exemplo n.º 12
0
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>;

            if (realTask == null)
            {
                return(next);
            }

            pipelineTaskSubjectContentToken realSubject = realTask.subject;

            if (realSubject.mcElement != null)
            {
                HtmlNode node = realSubject.mcElement.htmlNode;

                List <String> tags = new List <string>();


                if (node != null)
                {
                    tags = node.GetTagNames();

                    foreach (var tag in tags)
                    {
                        switch (tag)
                        {
                        case "a":
                            realSubject.flagBag.AddUnique(cnt_containerType.link);
                            break;

                        case "title":
                            realSubject.flagBag.AddUnique(cnt_containerType.title);
                            break;

                        case "h":
                        case "h1":
                        case "h2":
                        case "h3":
                        case "h4":
                        case "h5":
                        case "h6":
                            realSubject.flagBag.AddUnique(cnt_containerType.title);
                            break;
                        }
                    }
                }
            }

            // <---- tagging code

            return(forward);
        }
Exemplo n.º 13
0
 public async Task CreateAsync(IPipelineTask task, string userId)
 {
     if (task == null)
     {
         return;
     }
     task.CreatedAt = DateTime.UtcNow;
     if (!string.IsNullOrWhiteSpace(userId))
     {
         task.UserId = userId;
     }
     await _repository.CreateAsync(task);
 }
Exemplo n.º 14
0
 public void EnqueueTask(IPipelineTask <TContext> task)
 {
     if (this._rootTask == null)
     {
         this._rootTask = this._lastTask = new TaskLinkedListItem(task);
     }
     else
     {
         var newTask = new TaskLinkedListItem(task);
         this._lastTask.NextTask = newTask;
         this._lastTask          = newTask;
     }
 }
Exemplo n.º 15
0
        /// <summary>
        /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/>
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <pipelineTaskMCPageSubject> realTask = task as pipelineTask <pipelineTaskMCPageSubject>;

            if (realTask == null)
            {
                return(next);
            }

            pipelineTaskMCPageSubject realSubject = realTask.subject;

            HtmlDocument html = new HtmlDocument();

            html.LoadHtml(realSubject.MCPage.HtmlSourceCode);

            pipelineTaskMCSiteSubject siteSubject = realSubject.parent as pipelineTaskMCSiteSubject;

            realSubject.htmlDocument = html;

            List <imbMCBlock> blocks = blockComposer.process(html, realSubject.name);

            if (!blocks.Any())
            {
                task.context.logger.log("Block composer returned zero blocks for [" + siteSubject.name + "]");
            }


            foreach (imbMCBlock block in blocks)
            {
                pipelineTaskSubjectContentToken tokenSubject = new pipelineTaskSubjectContentToken();
                tokenSubject.name             = block.name;
                tokenSubject.contentLevelType = flags.token.cnt_level.mcBlock;
                tokenSubject.mcElement        = block;
                tokenSubject.currentForm      = block.content;
                realSubject.mcElement.Add(tokenSubject.mcElement);
                realSubject.Add(tokenSubject);


                pipelineTask <pipelineTaskSubjectContentToken> taskForElement = new pipelineTask <pipelineTaskSubjectContentToken>(tokenSubject);


                task.context.scheduledTasks.Push(taskForElement);
            }



            return(forward);
        }
Exemplo n.º 16
0
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            var realTask = task as pipelineTask <pipelineTaskSubjectContentToken>;

            if (realTask == null)
            {
                return(next);
            }

            IPipelineNode direction = null;

            Boolean testResult = test.IsMatch(realTask.subject.currentForm);

            if (testResult)
            {
                direction = forward;

                if (testType.HasFlag(pipelineRegexTestTypeEnum.tagger))
                {
                    realTask.subject.flagBag.AddRange(tagsToApply);
                }

                if (testType.HasFlag(pipelineRegexTestTypeEnum.groupreplacer))
                {
                    var mch = test.Match(realTask.subject.currentForm);

                    if (mch.Groups.Count >= groupToCurrent)
                    {
                        realTask.subject.currentForm = mch.Groups[groupToCurrent].Value;
                    }
                }

                if (testType.HasFlag(pipelineRegexTestTypeEnum.replacer))
                {
                    realTask.subject.currentForm = test.Replace(realTask.subject.currentForm, replacement);
                }
            }
            else
            {
                direction = next;
            }


            return(direction);
        }
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>;

            if (realTask == null)
            {
                return(next);
            }

            pipelineTaskSubjectContentToken realSubject = realTask.subject;

            if (realSubject.contentLevelType != flags.token.cnt_level.mcTokenStream)
            {
                return(next);
            }


            List <imbMCToken> mcTokens = tokenComposer.process(realSubject.mcElement as imbMCStream);


            foreach (imbMCToken token in mcTokens)
            {
                pipelineTaskSubjectContentToken tokenSubject = new pipelineTaskSubjectContentToken();
                tokenSubject.mcElement = token;

                realSubject.mcElement.Add(token);

                tokenSubject.name             = token.name;
                tokenSubject.contentLevelType = flags.token.cnt_level.mcToken;
                tokenSubject.parent           = realSubject;
                tokenSubject.currentForm      = token.content;

                realSubject.Add(tokenSubject);

                pipelineTask <pipelineTaskSubjectContentToken> newTask = new pipelineTask <pipelineTaskSubjectContentToken>(tokenSubject);

                task.context.scheduledTasks.Push(newTask);
            }


            // <---- tagging code

            return(forward);
        }
Exemplo n.º 18
0
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>;

            if (realTask == null)
            {
                return(next);
            }

            pipelineTaskSubjectContentToken realSubject = realTask.subject;

            // <---- tagging code

            var flags = resolver.process(realSubject.currentForm);

            realSubject.flagBag.AddRange(flags, true);

            return(forward);
        }
Exemplo n.º 19
0
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            // <---- tagging code

            pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>;

            if (realTask == null)
            {
                return(next);
            }


            pipelineTaskSubjectContentToken realSubject = realTask.subject;

            if (realSubject.contentLevelType != cnt_level.mcToken)
            {
                return(next);
            }

            if (realSubject.currentForm == "εμάσ")
            {
            }

            var tkns = mLanguageEval.GetAllProperTokensSortedByFrequency(realSubject.currentForm, settings.tokenLengthMin);

            var mle = mLanguageEval.evaluate(settings, tkns);


            if (mle.languageEnums.Contains(languagePrimary))
            {
                List <Object> l = new List <object>();
                mle.languageEnums.ForEach(x => l.Add(x));
                realSubject.flagBag.AddRange(l);

                return(forward);
            }
            else
            {
                return(task.model.trashBin);
            }

            return(forward);
        }
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <pipelineTaskSubjectContentToken> realTask = task as pipelineTask <pipelineTaskSubjectContentToken>;

            if (realTask == null)
            {
                return(next);
            }

            pipelineTaskSubjectContentToken realSubject = realTask.subject;

            if (realSubject.contentLevelType != flags.token.cnt_level.mcBlock)
            {
                return(next);
            }


            imbMCBlock mcBlock = realSubject.mcElement as imbMCBlock;


            var streams = streamComposer.process(mcBlock);

            foreach (imbMCStream stream in streams)
            {
                pipelineTaskSubjectContentToken streamSubject = new pipelineTaskSubjectContentToken();
                streamSubject.contentLevelType = flags.token.cnt_level.mcTokenStream;
                streamSubject.mcElement        = stream;
                streamSubject.name             = stream.name;
                streamSubject.currentForm      = stream.content;
                streamSubject.initialForm      = stream.content;
                streamSubject.parent           = realSubject;
                realSubject.Add(streamSubject);

                pipelineTask <pipelineTaskSubjectContentToken> newTask = new pipelineTask <pipelineTaskSubjectContentToken>(streamSubject);
                task.context.scheduledTasks.Push(newTask);
            }


            // <---- tagging code

            return(forward);
        }
Exemplo n.º 21
0
        /// <summary>
        /// Processes the specified task.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            //pipelineTask<pipelineTaskSubjectContentToken> realTask = task as pipelineTask<pipelineTaskSubjectContentToken>;
            pipelineTaskSubjectContentToken realSubject = task.subject as pipelineTaskSubjectContentToken;

            if (realSubject == null)
            {
                return(next);
            }

            if (realSubject.currentForm.isNullOrEmpty())
            {
                return(next);
            }

            // <---- tagging code

            realSubject.currentForm = resolver.process(realSubject.currentForm);

            return(forward);
        }
Exemplo n.º 22
0
 public PipelineTaskRuntime(IPipelineTask <TContext> task)
 {
     this.Task = task;
 }
Exemplo n.º 23
0
        /// <summary>
        /// Task builder for <see cref="imbMCRepository"/> level of subject. Sends to next if task is not with <see cref="pipelineTaskMCRepoSubject"/>
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            pipelineTask <pipelineTaskMCRepoSubject> realTask = task as pipelineTask <pipelineTaskMCRepoSubject>;

            if (realTask == null)
            {
                return(next);
            }

            pipelineTaskMCRepoSubject realSubject = realTask.subject;



            folderNode repoFolder = appManager.Application.folder_resources.Add(imbMCManager.MCRepo_DefaultDirectoryName, "MC Repositories", "Root directory with repositories of Crawled Web content");



            imbMCRepository           repo    = realSubject.MCRepoName.LoadDataStructure <imbMCRepository>(repoFolder, task.context.logger);
            imbMCDocumentRepositorium docRepo = new imbMCDocumentRepositorium();

            docRepo.webRepository = repo;
            realSubject.mcElement = docRepo;
            realSubject.MCRepo    = repo;


            List <imbMCWebSite> websites = repo.GetAllWebSites(task.context.logger, takeSetup);
            List <imbMCWebSite> ws       = new List <imbMCWebSite>();



            //try
            //{
            //    repo.siteTable.Clear();

            //    repo.CheckSiteTable(task.context.logger);


            //    if (realSubject.WebSiteSample.Any())
            //    {
            //        foreach (String w in realSubject.WebSiteSample)
            //        {
            //            var iws = websites.FirstOrDefault(x => w.Contains(x.name));  //repo.GetWebSite(new domainAnalysis(w), false, task.context.logger);
            //            if (iws != null)
            //            {
            //                task.context.logger.log(this.name + " Web site [ _" + w + "_ ] added to the pipeline: [" + repo.name + "]");
            //                websites.Add(iws);
            //            }
            //            else
            //            {
            //                task.context.logger.log(this.name + " Web site [ _" + w + "_] not found in the repo: [" + repo.name + "]");
            //            }
            //        }
            //    }
            //    else
            //    {

            //    }
            //} catch (Exception ex)
            //{
            //    throw new aceGeneralException("Failed to recover web sites from the repository", ex, this, "Failed to load sites from repository: " + ex.Message);
            //}



            if (!websites.Any())
            {
                task.context.logger.log(this.name + " Failed --- no web sites loaded");
            }
            else
            {
            }

            List <String> needle = new List <string>();

            realSubject.MCSiteTargets.ForEach(x => needle.Add(pipelineSubjectTools.GetCleanCaseName(x)));


            List <String> urls = new List <string>();

            foreach (imbMCWebSite site in websites)
            {
                String sName = pipelineSubjectTools.GetCleanCaseName(site.domainInfo.urlProper);

                Boolean ok = true;

                if (realSubject.MCSiteTargets.Any())
                {
                    if (!needle.Contains(sName))
                    {
                        ok = false;

#if DEBUG
                        //Console.WriteLine("Site refused [" + sName + "]");
#endif
                    }
                }

                if (urls.Contains(sName))
                {
                    ok = false;
                }

                if (ok)
                {
                    pipelineTaskMCSiteSubject mCSiteSubject = new pipelineTaskMCSiteSubject();
                    mCSiteSubject.MCSite = site;


                    imbMCDocumentSet docSet = new imbMCDocumentSet();

                    docRepo.Add(docSet);
                    mCSiteSubject.mcElement        = docSet;
                    mCSiteSubject.contentLevelType = flags.token.cnt_level.mcSite;
                    mCSiteSubject.name             = sName;
                    mCSiteSubject.parent           = realSubject;
                    realSubject.Add(mCSiteSubject);


                    urls.Add(mCSiteSubject.name);

                    pipelineTask <pipelineTaskMCSiteSubject> taskForSite = new pipelineTask <pipelineTaskMCSiteSubject>(mCSiteSubject);

                    task.context.scheduledTasks.Push(taskForSite);
                }
            }

            if (urls.Count < needle.Count)
            {
                urls.ForEach(x => needle.Remove(x));

                if (needle.Any())
                {
                    String nd = "";
                    needle.ForEach(x => nd += x + " ");

                    throw new aceScienceException("Some sites are not found in the MC Repository!! [" + nd + "]", null, realSubject, "Sites not loaded [" + nd + "]", this);
                }
            }



            return(forward);
        }
Exemplo n.º 24
0
 public static OUTPUT Pipe <INPUT, OUTPUT>(this INPUT input, IPipelineTask <INPUT, OUTPUT> task)
 {
     return(task.Process(input));
 }
 public abstract override IPipelineNode process(IPipelineTask task);
Exemplo n.º 26
0
 public SwitchPipelineTask(IEnumerable <ConditionalPipelineTask <TContext> > cases, IPipelineTask <TContext> @default = null)
 {
     this._cases   = cases;
     this._default = @default;
 }
        /// <summary>
        /// Redirects the task by <see cref="cnt_level"/> to (if not null) corresponding pipeline
        /// </summary>
        /// <param name="task">The task.</param>
        /// <returns>pipeline to direct to</returns>
        public override IPipelineNode process(IPipelineTask task)
        {
            //pipelineTask<pipelineTaskSubjectContentToken> realTask = task as pipelineTask<pipelineTaskSubjectContentToken>;
            pipelineTaskSubjectContentToken realSubject = task.subject as pipelineTaskSubjectContentToken;

            if (realSubject == null)
            {
                if (task.context.RunInDebugMode)
                {
                    task.context.logger.log("Node " + name + " received a task [" + task.GetType().Name + "] with non compatibile task subject [" + task.subject.GetType().Name + "]");
                }
            }

            switch (realSubject.contentLevelType)
            {
            case cnt_level.mcBlock:
                if (blockPipeline != null)
                {
                    return(blockPipeline);
                }
                break;

            case cnt_level.mcChunk:
                if (chunkPipeline != null)
                {
                    return(chunkPipeline);
                }
                break;

            case cnt_level.mcPage:
                if (pagePipeline != null)
                {
                    return(pagePipeline);
                }
                break;

            case cnt_level.mcRepo:
                if (repoPipeline != null)
                {
                    return(repoPipeline);
                }
                break;

            case cnt_level.mcSite:
                if (sitePipeline != null)
                {
                    return(sitePipeline);
                }
                break;

            default:
            case cnt_level.none:
            case cnt_level.mcSubtoken:
                return(next);

                break;

            case cnt_level.mcToken:
                if (tokenPipeline != null)
                {
                    return(tokenPipeline);
                }
                break;

            case cnt_level.mcTokenStream:
                if (streamPipeline != null)
                {
                    return(streamPipeline);
                }
                break;
            }

            // <---- tagging code

            return(forward);
        }
Exemplo n.º 28
0
 /// <summary>
 /// Process call -- just forwards the task to its first child
 /// </summary>
 /// <param name="task">The task.</param>
 /// <returns></returns>
 public override IPipelineNode process(IPipelineTask task)
 {
     return(forward);
 }
Exemplo n.º 29
0
 public ForEachTask(Func <TContext, IEnumerable <TElement> > selector, IPipelineTask <IForEachTaskContext <TContext, TElement> > task)
 {
     this._selector = selector;
     this._task     = task;
 }
Exemplo n.º 30
0
 /// <summary>
 /// Process call
 /// </summary>
 /// <param name="task">The task.</param>
 public abstract IPipelineNode process(IPipelineTask task);