Beispiel #1
0
 private void process(IElement node, bool root)
 {
     IList<RDFa.IncompleteTriple> incompleteTriplesLocal=new List<RDFa.IncompleteTriple>();
     string localLanguage=context.language;
     RDFTerm newSubject=null;
     bool recurse=true;
     bool skipElement=false;
     RDFTerm currentObject=null;
     IDictionary<string,string> namespacesLocal=
     new PeterO.Support.LenientDictionary<string,string>(context.namespaces);
     IDictionary<string,string> iriMapLocal=
     new PeterO.Support.LenientDictionary<string,string>(context.iriMap);
     string attr=null;
     if(!xhtml){
       attr=node.getAttribute("xml:base");
       if(attr!=null){
     context.baseURI=URIUtility.relativeResolve(attr, context.baseURI);
       }
     }
     // Support XML namespaces
     foreach(var attrib in node.getAttributes()){
       string name=StringUtility.toLowerCaseAscii(attrib.getName());
       //Console.WriteLine(attrib);
       if(name.Equals("xmlns")){
     //Console.WriteLine("xmlns %s",attrib.getValue());
     iriMapLocal.Add("", attrib.getValue());
     namespacesLocal.Add("", attrib.getValue());
       } else if(name.StartsWith("xmlns:",StringComparison.Ordinal) && name.Length>6){
     string prefix=name.Substring(6);
     //Console.WriteLine("xmlns %s %s",prefix,attrib.getValue());
     if(!"_".Equals(prefix)){
       iriMapLocal.Add(prefix, attrib.getValue());
     }
     namespacesLocal.Add(prefix, attrib.getValue());
       }
     }
     attr=node.getAttribute("xml:lang");
     if(attr!=null){
       localLanguage=attr;
     }
     // Support RDF/XML metadata
     if(node.getLocalName().Equals("RDF") &&
     RDF_NAMESPACE.Equals(node.getNamespaceURI())){
       miniRdfXml(node,context);
       return;
     }
     string rel=node.getAttribute("rel");
     string rev=node.getAttribute("rev");
     string property=node.getAttribute("property");
     string content=node.getAttribute("content");
     string datatype=node.getAttribute("datatype");
     if(rel==null && rev==null){
       // Step 4
       RDFTerm resource=getSafeCurieOrCurieOrIri(
       node.getAttribute("about"),iriMapLocal);
       if(resource==null){
     resource=getSafeCurieOrCurieOrIri(
     node.getAttribute("resource"),iriMapLocal);
       }
       if(resource==null){
     resource=relativeResolve(node.getAttribute("href"));
       }
       if(resource==null){
     resource=relativeResolve(node.getAttribute("src"));
       }
       if((resource==null || resource.getKind()!=RDFTerm.IRI)){
     string rdfTypeof=getCurie(node.getAttribute("typeof"),iriMapLocal);
     if(isHtmlElement(node, "head") ||
     isHtmlElement(node, "body")){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && !xhtml && root){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && rdfTypeof!=null){
       resource=generateBlankNode();
     }
     if(resource==null){
       if(context.parentObject!=null) {
     resource=context.parentObject;
       }
       if(node.getAttribute("property")==null){
     skipElement=true;
       }
     }
     newSubject=resource;
       } else {
     newSubject=resource;
       }
     } else {
       // Step 5
       RDFTerm resource=getSafeCurieOrCurieOrIri(
       node.getAttribute("about"),iriMapLocal);
       if(resource==null){
     resource=relativeResolve(node.getAttribute("src"));
       }
       if((resource==null || resource.getKind()!=RDFTerm.IRI)){
     string rdfTypeof=getCurie(node.getAttribute("typeof"),iriMapLocal);
     if(isHtmlElement(node, "head") ||
     isHtmlElement(node, "body")){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && !xhtml && root){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && rdfTypeof!=null){
       resource=generateBlankNode();
     }
     if(resource==null){
       if(context.parentObject!=null) {
     resource=context.parentObject;
       }
     }
     newSubject=resource;
       } else {
     newSubject=resource;
       }
       resource=getSafeCurieOrCurieOrIri(
       node.getAttribute("resource"),iriMapLocal);
       if(resource==null){
     resource=relativeResolve(node.getAttribute("href"));
       }
       currentObject=resource;
     }
     // Step 6
     if(newSubject!=null){
       string[] types=StringUtility.splitAtNonFFSpaces(node.getAttribute("typeof"));
       foreach(var type in types){
     string iri=getCurie(type,iriMapLocal);
     if(iri!=null){
       outputGraph.Add(new RDFTriple(
       newSubject,RDFTerm.A,
       RDFTerm.fromIRI(iri)
       ));
     }
       }
     }
     // Step 7
     if(currentObject!=null){
       string[] types=StringUtility.splitAtNonFFSpaces(rel);
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     #if DEBUG
     if(!(newSubject!=null))throw new InvalidOperationException("doesn't satisfy newSubject!=null");
     #endif
     if(iri!=null){
       outputGraph.Add(new RDFTriple(
       newSubject,
       RDFTerm.fromIRI(iri),currentObject
       ));
     }
       }
       types=StringUtility.splitAtNonFFSpaces(rev);
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     if(iri!=null){
       outputGraph.Add(new RDFTriple(
       currentObject,
       RDFTerm.fromIRI(iri),
       newSubject
       ));
     }
       }
     } else {
       // Step 8
       string[] types=StringUtility.splitAtNonFFSpaces(rel);
       bool hasPredicates=false;
       // Defines predicates
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     if(iri!=null){
       if(!hasPredicates){
     hasPredicates=true;
     currentObject=generateBlankNode();
       }
       RDFa.IncompleteTriple inc=new RDFa.IncompleteTriple();
       inc.predicate=RDFTerm.fromIRI(iri);
       inc.direction=RDFa.ChainingDirection.Forward;
       incompleteTriplesLocal.Add(inc);
     }
       }
       types=StringUtility.splitAtNonFFSpaces(rev);
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     if(iri!=null){
       if(!hasPredicates){
     hasPredicates=true;
     currentObject=generateBlankNode();
       }
       RDFa.IncompleteTriple inc=new RDFa.IncompleteTriple();
       inc.predicate=RDFTerm.fromIRI(iri);
       inc.direction=RDFa.ChainingDirection.Reverse;
       incompleteTriplesLocal.Add(inc);
     }
       }
     }
     // Step 9
     string[] preds=StringUtility.splitAtNonFFSpaces(property);
     string datatypeValue=getCurie(datatype,
     iriMapLocal);
     if(datatype!=null && datatypeValue==null) {
       datatypeValue="";
     }
     //Console.WriteLine("datatype=[%s] prop=%s vocab=%s",
     //  datatype,property,localDefaultVocab);
     //Console.WriteLine("datatypeValue=[%s]",datatypeValue);
     RDFTerm currentProperty=null;
     foreach(var pred in preds){
       string iri=getCurie(pred,
       iriMapLocal);
       if(iri!=null){
     //Console.WriteLine("iri=[%s]",iri);
     currentProperty=null;
     if(datatypeValue!=null && datatypeValue.Length>0 &&
     !datatypeValue.Equals(RDF_XMLLITERAL)){
       string literal=content;
       if(literal==null) {
     literal=getTextNodeText(node);
       }
       currentProperty=RDFTerm.fromTypedString(literal,datatypeValue);
     } else if(node.getAttribute("content")!=null ||
     !hasNonTextChildNodes(node) ||
     (datatypeValue!=null && datatypeValue.Length==0)){
       string literal=node.getAttribute("content");
       if(literal==null) {
     literal=getTextNodeText(node);
       }
       currentProperty=(!string.IsNullOrEmpty(localLanguage)) ?
       RDFTerm.fromLangString(literal, localLanguage) :
         RDFTerm.fromTypedString(literal);
     } else if(hasNonTextChildNodes(node) &&
     (datatypeValue==null || datatypeValue.Equals(RDF_XMLLITERAL))){
       // XML literal
       recurse=false;
       if(datatypeValue==null) {
     datatypeValue=RDF_XMLLITERAL;
       }
       try {
     string literal=ExclusiveCanonicalXML.canonicalize(node,
         false, namespacesLocal);
     currentProperty=RDFTerm.fromTypedString(literal,datatypeValue);
       } catch(ArgumentException){
     // failure to canonicalize
       }
     }
     #if DEBUG
     if(!(newSubject!=null))throw new InvalidOperationException("doesn't satisfy newSubject!=null");
     #endif
     outputGraph.Add(new RDFTriple(
     newSubject,
     RDFTerm.fromIRI(iri),currentProperty
     ));
       }
     }
     // Step 10
     if(!skipElement && newSubject!=null){
       foreach(var triple in context.incompleteTriples){
     if(triple.direction==RDFa.ChainingDirection.Forward){
       outputGraph.Add(new RDFTriple(
       context.parentSubject,
       triple.predicate,
       newSubject));
     } else {
       outputGraph.Add(new RDFTriple(
       newSubject,triple.predicate,
       context.parentSubject));
     }
       }
     }
     // Step 13
     if(recurse){
       foreach(var childNode in node.getChildNodes()){
     IElement childElement;
     RDFa.EvalContext oldContext=context;
     if(childNode is IElement){
       childElement=((IElement)childNode);
       //Console.WriteLine("skip=%s vocab=%s local=%s",
       //  skipElement,context.defaultVocab,
       //localDefaultVocab);
       if(skipElement){
     RDFa.EvalContext ec=oldContext.copy();
     ec.language=localLanguage;
     ec.iriMap=iriMapLocal;
     ec.namespaces=namespacesLocal;
     context=ec;
     process(childElement,false);
       } else {
     RDFa.EvalContext ec=new RDFa.EvalContext();
     ec.baseURI=oldContext.baseURI;
     ec.iriMap=iriMapLocal;
     ec.namespaces=namespacesLocal;
     ec.incompleteTriples=incompleteTriplesLocal;
     ec.parentSubject=((newSubject==null) ? oldContext.parentSubject :
       newSubject);
     ec.parentObject=((currentObject==null) ?
         ((newSubject==null) ? oldContext.parentSubject :
           newSubject) : currentObject);
     ec.language=localLanguage;
     context=ec;
     process(childElement,false);
       }
     }
     context=oldContext;
       }
     }
 }
Beispiel #2
0
 // Processes a subset of RDF/XML metadata
 // Doesn't implement RDF/XML completely
 private void miniRdfXml(IElement node, RDFa.EvalContext context, RDFTerm subject)
 {
     string language=context.language;
     foreach(var child in node.getChildNodes()){
       IElement childElement=(child is IElement) ?
       ((IElement)child) : null;
       if(childElement==null) {
     continue;
       }
       if(node.getAttribute("xml:lang")!=null){
     language=node.getAttribute("xml:lang");
       } else {
     language=context.language;
       }
       if(childElement.getLocalName().Equals("Description") &&
       RDF_NAMESPACE.Equals(childElement.getNamespaceURI())){
     RDFTerm about=relativeResolve(childElement.getAttributeNS(RDF_NAMESPACE,"about"));
     //Console.WriteLine("about=%s [%s]",about,childElement.getAttribute("about"));
     if(about==null){
       about=subject;
       if(about==null) {
         continue;
       }
     }
     foreach(var child2 in child.getChildNodes()){
       IElement childElement2=
           ((child2 is IElement) ?
               ((IElement)child2) : null);
       if(childElement2==null) {
         continue;
       }
       miniRdfXmlChild(childElement2,about,language);
     }
       } else if(RDF_NAMESPACE.Equals(childElement.getNamespaceURI()))
     throw new NotSupportedException();
     }
 }
Beispiel #3
0
 private void miniRdfXmlChild(IElement node, RDFTerm subject, string language)
 {
     string nsname=node.getNamespaceURI();
     if(node.getAttribute("xml:lang")!=null){
       language=node.getAttribute("xml:lang");
     }
     string localname=node.getLocalName();
     RDFTerm predicate=relativeResolve(nsname+localname);
     if(!hasNonTextChildNodes(node)){
       string content=getTextNodeText(node);
       RDFTerm literal;
       if(!string.IsNullOrEmpty(language)){
     literal=RDFTerm.fromLangString(content, language);
       } else {
     literal=RDFTerm.fromTypedString(content);
       }
       outputGraph.Add(new RDFTriple(subject,predicate,literal));
     } else {
       string parseType=node.getAttributeNS(RDF_NAMESPACE, "parseType");
       if("Literal".Equals(parseType))
     throw new NotSupportedException();
       RDFTerm blank=generateBlankNode();
       context.language=language;
       miniRdfXml(node,context,blank);
       outputGraph.Add(new RDFTriple(subject,predicate,blank));
     }
 }
 private static bool implyForLink(IElement root, JSONObject subProperties)
 {
     if(StringUtility.toLowerCaseAscii(root.getLocalName()).Equals("a") &&
     root.getAttribute("href")!=null){
       // get the link's URL
       setValueIfAbsent(subProperties,"url", getUValue(root));
       IList<IElement> elements=getChildElements(root);
       if(elements.Count==1 &&
       StringUtility.toLowerCaseAscii(elements[0].getLocalName()).Equals("img")){
     string pValue=getPValue(elements[0]); // try to get the ALT/TITLE from the image
     if(StringUtility.isNullOrSpaces(pValue))
     {
       pValue=getPValue(root); // if empty, get text from link instead
     }
     setValueIfAbsent(subProperties,"name", pValue);
     // get the SRC of the image
     setValueIfAbsent(subProperties,"photo", getUValue(elements[0]));
       } else {
     // get the text content
     string pvalue=getPValue(root);
     if(!StringUtility.isNullOrSpaces(pvalue)) {
       setValueIfAbsent(subProperties,"name", pvalue);
     }
       }
       return true;
     }
     return false;
 }
   private static void propertyWalk(IElement root,
 JSONObject properties, JSONArray children)
   {
       string[] className=getClassNames(root);
       if(className.Length>0){
         IList<string> types=new List<string>();
         bool hasProperties=false;
         foreach(var cls in className){
       if(cls.StartsWith("p-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("u-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("dt-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("e-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("h-",StringComparison.Ordinal)){
         types.Add(cls);
       }
         }
         if(types.Count==0 && hasProperties){
       // has properties and isn't a microformat
       // root
       foreach(var cls in className){
         if(cls.StartsWith("p-",StringComparison.Ordinal)){
       string value=getPValue(root);
       if(!StringUtility.isNullOrSpaces(value)) {
         accumulateValue(properties,cls.Substring(2),value);
       }
         } else if(cls.StartsWith("u-",StringComparison.Ordinal)){
       accumulateValue(properties,cls.Substring(2),
           getUValue(root));
         } else if(cls.StartsWith("dt-",StringComparison.Ordinal)){
       accumulateValue(properties,cls.Substring(3),
           getDTValue(root,getLastKnownTime(properties)));
         } else if(cls.StartsWith("e-",StringComparison.Ordinal)){
       accumulateValue(properties,cls.Substring(2),
           getEValue(root));
         }
       }
         } else if(types.Count>0){
       // this is a child microformat
       // with no properties
       JSONObject obj=new JSONObject();
       obj.put("type", new JSONArray(types));
       // for holding child elements with
       // properties
       JSONObject subProperties=new JSONObject();
       // for holding child microformats with no
       // property class
       JSONArray subChildren=new JSONArray();
       foreach(var child in root.getChildNodes()){
         if(child is IElement) {
       propertyWalk((IElement)child,
           subProperties,subChildren);
         }
       }
       if(subChildren.Length>0){
         obj.put("children", subChildren);
       }
       if(types.Count>0){
         // we imply missing properties here
         // Imply p-name and p-url
         if(!implyForLink(root,subProperties)){
       if(hasSingleChildElementNamed(root,"a")){
         implyForLink(getFirstChildElement(root),subProperties);
       } else {
         string pvalue=getPValue(root);
         if(!StringUtility.isNullOrSpaces(pvalue)) {
           setValueIfAbsent(subProperties,"name", pvalue);
         }
       }
         }
         // Also imply u-photo
         if(StringUtility.toLowerCaseAscii(root.getLocalName()).Equals("img") &&
         root.getAttribute("src")!=null){
       setValueIfAbsent(subProperties,"photo", getUValue(root));
         }
         if(!subProperties.has("photo")){
       IList<IElement> images=root.getElementsByTagName("img");
       // If there is only one descendant image, imply
       // u-photo
       if(images.Count==1){
         setValueIfAbsent(subProperties,"photo",
             getUValue(images[0]));
       }
         }
       }
       obj.put("properties", subProperties);
       if(hasProperties){
         foreach(var cls in className){
       if(cls.StartsWith("p-",StringComparison.Ordinal)){ // property
         JSONObject clone=copyJson(obj);
         clone.put("value",getPValue(root));
         accumulateValue(properties,cls.Substring(2),clone);
       } else if(cls.StartsWith("u-",StringComparison.Ordinal)){ // URL
         JSONObject clone=copyJson(obj);
         clone.put("value",getUValue(root));
         accumulateValue(properties,cls.Substring(2),clone);
       } else if(cls.StartsWith("dt-",StringComparison.Ordinal)){ // date/time
         JSONObject clone=copyJson(obj);
         clone.put("value",getDTValue(root,getLastKnownTime(properties)));
         accumulateValue(properties,cls.Substring(3),clone);
       } else if(cls.StartsWith("e-",StringComparison.Ordinal)){ // date/time
         JSONObject clone=copyJson(obj);
         clone.put("value",getEValue(root));
         accumulateValue(properties,cls.Substring(2),clone);
       }
         }
       } else {
         children.put(obj);
       }
       return;
         }
       }
       foreach(var child in root.getChildNodes()){
         if(child is IElement) {
       propertyWalk((IElement)child,properties,children);
         }
       }
   }
 private static string getValueElementContent(IElement valueElement)
 {
     if(hasClassName(valueElement,"value-title"))
       // If element has the value-title class, use
       // the title instead
       return valueOrEmpty(valueElement.getAttribute("title"));
     else if(elementName(valueElement).Equals("img") ||
     elementName(valueElement).Equals("area")){
       string s=valueElement.getAttribute("alt");
       return (s==null) ? "" : s;
     } else if(elementName(valueElement).Equals("data")){
       string s=valueElement.getAttribute("value");
       return (s==null) ? getTrimmedTextContent(valueElement) : s;
     } else if(elementName(valueElement).Equals("abbr")){
       string s=valueElement.getAttribute("title");
       return (s==null) ? getTrimmedTextContent(valueElement) : s;
     } else
       return getTrimmedTextContent(valueElement);
 }
 private static bool hasClassName(IElement e, string className)
 {
     string attr=e.getAttribute("class");
     if(attr==null || attr.Length<className.Length)return false;
     string[] cls=StringUtility.splitAtSpaces(attr);
     foreach(var c in cls){
       if(c.Equals(className))return true;
     }
     return false;
 }
 private static string[] getRelNames(IElement element)
 {
     string[] ret=StringUtility.splitAtSpaces(
     StringUtility.toLowerCaseAscii(element.getAttribute("rel")));
     if(ret.Length==0)return ret;
     IList<string> retList=new List<string>();
     foreach(var element2 in ret) {
       retList.Add(element2);
     }
     if(retList.Count>=2){
       ISet<string> stringSet=new HashSet<string>(retList);
       return PeterO.Support.Collections.ToArray(stringSet);
     } else
       return retList.ToArray();
 }
 private static string getPValue(IElement root)
 {
     if(root.getAttribute("title")!=null)
       return root.getAttribute("title");
     if(StringUtility.toLowerCaseAscii(root.getLocalName()).Equals("img") &&
     !StringUtility.isNullOrSpaces(root.getAttribute("alt")))
       return root.getAttribute("alt");
     return getValueContent(root,false);
 }
 private static string getHref(IElement node)
 {
     string name=StringUtility.toLowerCaseAscii(node.getLocalName());
     string href="";
     if("a".Equals(name) || "link".Equals(name) || "area".Equals(name)){
       href=node.getAttribute("href");
     } else if("object".Equals(name)){
       href=node.getAttribute("data");
     } else if("img".Equals(name) || "source".Equals(name) ||
     "track".Equals(name) ||
     "iframe".Equals(name) ||
     "audio".Equals(name) ||
     "video".Equals(name) ||
     "embed".Equals(name)){
       href=node.getAttribute("src");
     } else
       return null;
     if(href==null || href.Length==0)
       return "";
     href=HtmlDocument.resolveURL(node,href,null);
     if(href==null || href.Length==0)
       return "";
     return href;
 }
 private static string getDTValueContent(IElement valueElement)
 {
     string elname=elementName(valueElement);
     string text="";
     if(hasClassName(valueElement,"value-title"))
       return valueOrEmpty(valueElement.getAttribute("title"));
     else if(elname.Equals("img") || elname.Equals("area")){
       string s=valueElement.getAttribute("alt");
       text=(s==null) ? "" : s;
     } else if(elname.Equals("data")){
       string s=valueElement.getAttribute("value");
       text=(s==null) ? getTrimmedTextContent(valueElement) : s;
     } else if(elname.Equals("abbr")){
       string s=valueElement.getAttribute("title");
       text=(s==null) ? getTrimmedTextContent(valueElement) : s;
     } else if(elname.Equals("del") || elname.Equals("ins") || elname.Equals("time")){
       string s=valueElement.getAttribute("datetime");
       if(StringUtility.isNullOrSpaces(s)) {
     s=valueElement.getAttribute("title");
       }
       text=(s==null) ? getTrimmedTextContent(valueElement) : s;
     } else {
       text=getTrimmedTextContent(valueElement);
     }
     return text;
 }
 private static string[] getClassNames(IElement element)
 {
     string[] ret=StringUtility.splitAtSpaces(element.getAttribute("class"));
     string[] rel=parseLegacyRel(element.getAttribute("rel"));
     if(ret.Length==0 && rel.Length==0)return ret;
     // Replace old microformats class names with
     // their modern versions
     IList<string> retList=new List<string>();
     foreach(var element2 in rel) {
       retList.Add(element2);
     }
     foreach(var element2 in ret) {
       string legacyLabel=legacyLabelsMap[element2];
       if(complexLegacyMap.ContainsKey(element2)){
     foreach(var item in complexLegacyMap[element2]){
       retList.Add(item);
     }
       }
       else if(legacyLabel!=null) {
     retList.Add(legacyLabel);
       } else {
     retList.Add(element2);
       }
     }
     if(retList.Count>=2){
       ISet<string> stringSet=new HashSet<string>(retList);
       return PeterO.Support.Collections.ToArray(stringSet);
     } else
       return retList.ToArray();
 }