/// <summary> /// Izvršava imbBasic metod detekcije templejta /// </summary> /// <param name="source">Lista učitanih stranica</param> /// <param name="settings">Podešavanja</param> /// <returns>Sređena definicija templejta</returns> public static templateDefinition detectTemplate_imbBasic(crawledPage[] source, imbWebTemplateSettings settings) { templateDefinition output = new templateDefinition(); List <string> xPathList; // COMMON TREE DETECTION switch (settings.commonTreeDetection) { default: case commonTreeMethod.imbEndNodePathFrequency: output.xPathStruktura = templateOperations.commonTree_imbENPF(source, settings); break; } crawledPage c = source.First(); //imbNamespaceSetup nsSetup = new imbNamespaceSetup(c.xmlDocument); // COMMON CONTENT CHECK output.xPathStruktura = templateOperations.commonContentCheck(source, output.xPathStruktura, settings); templateExtensions.prepareContent(output, settings); // page track if (settings.doSavePageUrls) { foreach (crawledPage p in source) { output.relatedPages.Add(p.url); } } output.score = source.Length; if (output.xPathStruktura.Count == 0) { logSystem.log("Template detection failed!", logType.Notification); } return(output); }
/// <summary> /// Deo imbBasic metodologije /// </summary> /// <param name="template">Definicija templejta ciji sadržaj treba podesiti</param> /// <param name="settings">Podešavanja</param> public static void prepareContent(templateDefinition template, imbWebTemplateSettings settings) { /* * StringBuilder textMaker = new StringBuilder(); * template.templateXML = new XmlDocument(nsSetup.namespaceManager.NameTable); * * String tmp = template.templateXML.OuterXml; * * template.templateXML.Prefix = nsSetup.nsPrefix; * * * * String basicXML = "<?xml version=\"1.0\" encoding=\"utf-16\"?>" + Environment.NewLine; * basicXML += "<span>" + Environment.NewLine; * basicXML += "<html xmlns=\"" + nsSetup.nsSourceUrl + "\" >" + Environment.NewLine; * basicXML += "</html>" + Environment.NewLine; * basicXML += "</span>" + Environment.NewLine; * * template.templateXML.LoadXml(basicXML); * * * * * foreach (KeyValuePair<String, templateElement> el in template.xPathStruktura) * { * if (settings.doMakeTEXT) * { * * textMaker.AppendLine(el.Value.content); * } * if (settings.doMakeXML) * { * // imbXmlExtendedTools.makeNodeByxPath(template.templateXML, template.templateXML.DocumentElement, el.Key, el.Value.source, nsSetup); * } * } * * if (settings.doMakeHTML) * { * template.templateHTML = template.templateXML.OuterXml; * } * */ }
/// <summary> /// Description of $property$ /// </summary> public static string makeUniKey(crawledPage[] input, imbWebTemplateSettings settings, templateDefinition template) { string output = ""; // = new String(); //imbKeywordScoreList nameList = new imbKeywordScoreList(); //switch (settings.uniKeyMode) //{ // case uniKeyCreation.idToString: // output = template.id.ToString("D3"); // break; // case uniKeyCreation.tokenizePageTitles: // foreach (crawledPage p in input) // { // nameList.addText(p.pageCaption, false, imbNLPengine.imbBasic); // } // break; // case uniKeyCreation.tokenizeTemplateContent: // foreach (crawledPage p in input) // { // nameList.addText(template.templateHTML, false, imbNLPengine.imbBasic); // } // break; //} //if (output == "") //{ // nameList.sort(); // output = imbCollectionHelpers.imbGetFirstValue<String>(nameList.getStringList(), "", false, 0); // output = output.TrimToMaxLength(10, ""); //} //logSystem.log("UniKey created (" + settings.uniKeyMode + ") = " + output, logType.Execution); return(output); }