示例#1
0
        /// <summary>
        /// Primarno obelezavanje - od mikro ka makro nivoa
        /// </summary>
        /// <remarks>Obelezavanje se oslanja samo na sopstvene podatke i podatke svojih itema</remarks>
        /// <param name="resources"></param>
        public override void primaryFlaging(params object[] resources)
        {
            base.primaryFlaging(resources);
            // ovde ubaciti algoritam specifican za ovaj tip

            if (items.Count == 1)
            {
                IHtmlContentElement child = items.First() as IHtmlContentElement;
                if (child != null)
                {
                    if (child.htmlTag == "a")
                    {
                        sentenceFlags |= contentSentenceFlag.navigationContainer;
                    }
                }
            }

            if (htmlTag == "a")
            {
                sentenceFlags |= contentSentenceFlag.navigationLink;
            }

            if (parent is IHtmlContentElement)
            {
                IHtmlContentElement parent_IHtmlContentElement = (IHtmlContentElement)parent;
                if (parent_IHtmlContentElement.htmlTag == "a")
                {
                    sentenceFlags |= contentSentenceFlag.titleForLink;
                }
            }

            //flags.Add(contentStructure.flags.contentTokenFlag.)
        }
        public static List <IHtmlContentElement> getChildern(this IHtmlContentElement item, List <IHtmlContentElement> input, int index = 5)
        {
            List <IHtmlContentElement> output = new List <IHtmlContentElement>();

            if (input == null)
            {
                input = new List <IHtmlContentElement>();
            }

            foreach (IHtmlContentElement ch in input)
            {
                output.AddMulti(ch.items);
            }

            if (index > 0)
            {
                if (item.items.Count > 0)
                {
                    output.AddMulti(item.getChildern(output, index - 1));
                }
            }

            output.AddMulti(input);


            return(output);
        }
        public new htmlLinkNode Add(IHtmlContentElement rootLink)
        {
            htmlLinkNode tmp = new htmlLinkNode(rootLink);

            Add(tmp);
            scoped.AddMulti(tmp.scoped);
            if (!byUrl.ContainsKey(tmp.url))
            {
                byUrl.Add(tmp.url, tmp);
            }
            return(tmp);
        }
 /// <summary>
 /// Returns parent that has link tag in self
 /// </summary>
 /// <param name="item">The item.</param>
 /// <returns></returns>
 public static IHtmlContentElement linkRootParent(this IHtmlContentElement item)
 {
     if (item.parent is IHtmlContentElement)
     {
         IHtmlContentElement parent_IHtmlContentElement = (IHtmlContentElement)item.parent;
         if (parent_IHtmlContentElement == null)
         {
             return(null);
         }
         else if (parent_IHtmlContentElement.htmlTag.ToLower() == htmlTagName.a.ToString().ToLower())
         {
             return(parent_IHtmlContentElement);
         }
         else
         {
             return(parent_IHtmlContentElement.linkRootParent);
         }
     }
     else
     {
         return(null);
     }
 }
 public htmlLinkNode(IHtmlContentElement __linkRoot)
 {
     linkRootParent = __linkRoot;
     evaluate();
 }
示例#6
0
        //public static contentElementList tokenizeUrlAndTitle(String url, String title, String description="")
        //{
        //    contentElementList output = new contentStructure.collections.contentElementList();



        //}



        /// <summary>
        /// Pravi rečenice na osnovu HtmlNode-a i vraća kolekciju -- koristi se za glavne rečenice kao i za pod rečenice
        /// </summary>
        /// <param name="htmlNode">The HTML node.</param>
        /// <param name="parent">The parent.</param>
        /// <param name="output">The output.</param>
        /// <param name="preprocessFlags">The preprocess flags.</param>
        /// <param name="flags">The flags.</param>
        /// <returns></returns>
        public static contentTokenCollection createSentencesFromNode(this HtmlNode htmlNode, IHtmlContentElement parent,
                                                                     contentTokenCollection output         = null,
                                                                     contentPreprocessFlag preprocessFlags = contentPreprocessFlag.none,
                                                                     sentenceDetectionFlag flags           = sentenceDetectionFlag.none)
        {
            if (output == null)
            {
                output = new contentTokenCollection();
            }
            // if (preprocessFlags == null) preprocessFlags = contentPreprocessFlags.getDefaultFlags();
//            if (flags == null) flags = sentenceDetectionFlags.getDefaultFlags();


            List <HtmlNode> nodes = new List <HtmlNode>();

            if (htmlNode.HasChildNodes)
            {
                foreach (HtmlNode child in htmlNode.ChildNodes)
                {
                    if (child.isNodeAcceptable())
                    {
                        nodes.Add(child);
                    }
                }
            }
            else
            {
                nodes.Add(htmlNode);
            }


            foreach (HtmlNode child in nodes)
            {
                HtmlNode relNode = child;
                if (child.ChildNodes.Count > 0)
                {
                    htmlContentSentence    htmlSentence = new htmlContentSentence(child, "");
                    contentTokenCollection subSentences = child.createSentencesFromNode(htmlSentence, null,
                                                                                        preprocessFlags, flags);
                    output.AddRange(subSentences);
                    output.Add(htmlSentence);
                    parent.setItem(htmlSentence);

                    //subSentences.ForEach(x=>htmlSentence.items.Add(x));
                }
                else
                {
                    //if (child.ChildNodes.Count == 1)
                    //{
                    //    relNode = child.FirstChild;
                    //}
                    //if (relNode.NodeType==HtmlNodeType.Text)
                    //{
                    //    relNode = relNode.ParentNode;
                    //}
                    string input = child.InnerText.Trim();


                    if (flags.HasFlag(sentenceDetectionFlag.preprocessParagraphContent))
                    {
                        input = preprocess.process(input, preprocessFlags);
                    }

                    List <string> inputSentences = splitContentToSentences(input);

                    foreach (string _inputSentece in inputSentences)
                    {
                        if (string.IsNullOrEmpty(_inputSentece))
                        {
                        }
                        else
                        {
                            htmlContentSentence newSentence = new htmlContentSentence(relNode, _inputSentece);
                            if (_select_sentenceTerminator.IsMatch(_inputSentece))
                            {
                                newSentence.sentenceFlags |= contentSentenceFlag.regular;
                                Match m = _select_sentenceTerminator.Match(_inputSentece);
                                if (m.Success)
                                {
                                    newSentence.spliter = m.Value;
                                    newSentence.content = _inputSentece.Substring(0,
                                                                                  _inputSentece.Length -
                                                                                  newSentence.spliter.Length);
                                    newSentence.content = newSentence.content.Trim();
                                }
                            }
                            else
                            {
                                newSentence.sentenceFlags |= contentSentenceFlag.inregular;
                            }
                            output.Add(newSentence);
                            parent.setItem(newSentence);
                        }
                    }
                }
            }


            return(output);
        }