Ejemplo n.º 1
0
        /// <summary>
        /// Izvršava imbBasic metod detekcije templejta
        /// </summary>
        /// <param name="source">Lista učitanih stranica</param>
        /// <param name="settings">Podešavanja</param>
        /// <returns>Sređena definicija templejta</returns>
        public static templateDefinition detectTemplate_imbBasic(crawledPage[] source, imbWebTemplateSettings settings)
        {
            templateDefinition output = new templateDefinition();

            List <string> xPathList;

            // COMMON TREE DETECTION
            switch (settings.commonTreeDetection)
            {
            default:
            case commonTreeMethod.imbEndNodePathFrequency:
                output.xPathStruktura = templateOperations.commonTree_imbENPF(source, settings);
                break;
            }

            crawledPage c = source.First();

            //imbNamespaceSetup nsSetup = new imbNamespaceSetup(c.xmlDocument);


            // COMMON CONTENT CHECK
            output.xPathStruktura = templateOperations.commonContentCheck(source, output.xPathStruktura, settings);
            templateExtensions.prepareContent(output, settings);

            // page track
            if (settings.doSavePageUrls)
            {
                foreach (crawledPage p in source)
                {
                    output.relatedPages.Add(p.url);
                }
            }


            output.score = source.Length;

            if (output.xPathStruktura.Count == 0)
            {
                logSystem.log("Template detection failed!", logType.Notification);
            }

            return(output);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Deo imbBasic metodologije
 /// </summary>
 /// <param name="template">Definicija templejta ciji sadržaj treba podesiti</param>
 /// <param name="settings">Podešavanja</param>
 public static void prepareContent(templateDefinition template, imbWebTemplateSettings settings)
 {
     /*
      * StringBuilder textMaker = new StringBuilder();
      * template.templateXML = new XmlDocument(nsSetup.namespaceManager.NameTable);
      *
      * String tmp = template.templateXML.OuterXml;
      *
      * template.templateXML.Prefix = nsSetup.nsPrefix;
      *
      *
      *
      * String basicXML = "<?xml version=\"1.0\" encoding=\"utf-16\"?>" + Environment.NewLine;
      * basicXML += "<span>" + Environment.NewLine;
      * basicXML += "<html xmlns=\"" + nsSetup.nsSourceUrl + "\" >" + Environment.NewLine;
      * basicXML += "</html>" + Environment.NewLine;
      * basicXML += "</span>" + Environment.NewLine;
      *
      * template.templateXML.LoadXml(basicXML);
      *
      *
      *
      *
      * foreach (KeyValuePair<String, templateElement> el in template.xPathStruktura)
      * {
      *  if (settings.doMakeTEXT)
      *  {
      *
      *      textMaker.AppendLine(el.Value.content);
      *  }
      *  if (settings.doMakeXML)
      *  {
      *     // imbXmlExtendedTools.makeNodeByxPath(template.templateXML, template.templateXML.DocumentElement, el.Key, el.Value.source, nsSetup);
      *  }
      * }
      *
      * if (settings.doMakeHTML)
      * {
      *  template.templateHTML = template.templateXML.OuterXml;
      * }
      * */
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Description of $property$
        /// </summary>
        public static string makeUniKey(crawledPage[] input, imbWebTemplateSettings settings,
                                        templateDefinition template)
        {
            string output = ""; // = new String();

            //imbKeywordScoreList nameList = new imbKeywordScoreList();

            //switch (settings.uniKeyMode)
            //{
            //    case uniKeyCreation.idToString:
            //        output = template.id.ToString("D3");
            //        break;
            //    case uniKeyCreation.tokenizePageTitles:
            //        foreach (crawledPage p in input)
            //        {
            //            nameList.addText(p.pageCaption, false, imbNLPengine.imbBasic);
            //        }
            //        break;
            //    case uniKeyCreation.tokenizeTemplateContent:
            //        foreach (crawledPage p in input)
            //        {
            //            nameList.addText(template.templateHTML, false, imbNLPengine.imbBasic);
            //        }
            //        break;
            //}
            //if (output == "")
            //{
            //   nameList.sort();
            //   output = imbCollectionHelpers.imbGetFirstValue<String>(nameList.getStringList(), "", false, 0);
            //   output = output.TrimToMaxLength(10, "");
            //}

            //logSystem.log("UniKey created (" + settings.uniKeyMode + ") = " + output, logType.Execution);

            return(output);
        }