Example #1
0
 private void process(IElement node, bool root)
 {
     IList<RDFa.IncompleteTriple> incompleteTriplesLocal=new List<RDFa.IncompleteTriple>();
     string localLanguage=context.language;
     RDFTerm newSubject=null;
     bool recurse=true;
     bool skipElement=false;
     RDFTerm currentObject=null;
     IDictionary<string,string> namespacesLocal=
     new PeterO.Support.LenientDictionary<string,string>(context.namespaces);
     IDictionary<string,string> iriMapLocal=
     new PeterO.Support.LenientDictionary<string,string>(context.iriMap);
     string attr=null;
     if(!xhtml){
       attr=node.getAttribute("xml:base");
       if(attr!=null){
     context.baseURI=URIUtility.relativeResolve(attr, context.baseURI);
       }
     }
     // Support XML namespaces
     foreach(var attrib in node.getAttributes()){
       string name=StringUtility.toLowerCaseAscii(attrib.getName());
       //Console.WriteLine(attrib);
       if(name.Equals("xmlns")){
     //Console.WriteLine("xmlns %s",attrib.getValue());
     iriMapLocal.Add("", attrib.getValue());
     namespacesLocal.Add("", attrib.getValue());
       } else if(name.StartsWith("xmlns:",StringComparison.Ordinal) && name.Length>6){
     string prefix=name.Substring(6);
     //Console.WriteLine("xmlns %s %s",prefix,attrib.getValue());
     if(!"_".Equals(prefix)){
       iriMapLocal.Add(prefix, attrib.getValue());
     }
     namespacesLocal.Add(prefix, attrib.getValue());
       }
     }
     attr=node.getAttribute("xml:lang");
     if(attr!=null){
       localLanguage=attr;
     }
     // Support RDF/XML metadata
     if(node.getLocalName().Equals("RDF") &&
     RDF_NAMESPACE.Equals(node.getNamespaceURI())){
       miniRdfXml(node,context);
       return;
     }
     string rel=node.getAttribute("rel");
     string rev=node.getAttribute("rev");
     string property=node.getAttribute("property");
     string content=node.getAttribute("content");
     string datatype=node.getAttribute("datatype");
     if(rel==null && rev==null){
       // Step 4
       RDFTerm resource=getSafeCurieOrCurieOrIri(
       node.getAttribute("about"),iriMapLocal);
       if(resource==null){
     resource=getSafeCurieOrCurieOrIri(
     node.getAttribute("resource"),iriMapLocal);
       }
       if(resource==null){
     resource=relativeResolve(node.getAttribute("href"));
       }
       if(resource==null){
     resource=relativeResolve(node.getAttribute("src"));
       }
       if((resource==null || resource.getKind()!=RDFTerm.IRI)){
     string rdfTypeof=getCurie(node.getAttribute("typeof"),iriMapLocal);
     if(isHtmlElement(node, "head") ||
     isHtmlElement(node, "body")){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && !xhtml && root){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && rdfTypeof!=null){
       resource=generateBlankNode();
     }
     if(resource==null){
       if(context.parentObject!=null) {
     resource=context.parentObject;
       }
       if(node.getAttribute("property")==null){
     skipElement=true;
       }
     }
     newSubject=resource;
       } else {
     newSubject=resource;
       }
     } else {
       // Step 5
       RDFTerm resource=getSafeCurieOrCurieOrIri(
       node.getAttribute("about"),iriMapLocal);
       if(resource==null){
     resource=relativeResolve(node.getAttribute("src"));
       }
       if((resource==null || resource.getKind()!=RDFTerm.IRI)){
     string rdfTypeof=getCurie(node.getAttribute("typeof"),iriMapLocal);
     if(isHtmlElement(node, "head") ||
     isHtmlElement(node, "body")){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && !xhtml && root){
       resource=getSafeCurieOrCurieOrIri("",iriMapLocal);
     }
     if(resource==null && rdfTypeof!=null){
       resource=generateBlankNode();
     }
     if(resource==null){
       if(context.parentObject!=null) {
     resource=context.parentObject;
       }
     }
     newSubject=resource;
       } else {
     newSubject=resource;
       }
       resource=getSafeCurieOrCurieOrIri(
       node.getAttribute("resource"),iriMapLocal);
       if(resource==null){
     resource=relativeResolve(node.getAttribute("href"));
       }
       currentObject=resource;
     }
     // Step 6
     if(newSubject!=null){
       string[] types=StringUtility.splitAtNonFFSpaces(node.getAttribute("typeof"));
       foreach(var type in types){
     string iri=getCurie(type,iriMapLocal);
     if(iri!=null){
       outputGraph.Add(new RDFTriple(
       newSubject,RDFTerm.A,
       RDFTerm.fromIRI(iri)
       ));
     }
       }
     }
     // Step 7
     if(currentObject!=null){
       string[] types=StringUtility.splitAtNonFFSpaces(rel);
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     #if DEBUG
     if(!(newSubject!=null))throw new InvalidOperationException("doesn't satisfy newSubject!=null");
     #endif
     if(iri!=null){
       outputGraph.Add(new RDFTriple(
       newSubject,
       RDFTerm.fromIRI(iri),currentObject
       ));
     }
       }
       types=StringUtility.splitAtNonFFSpaces(rev);
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     if(iri!=null){
       outputGraph.Add(new RDFTriple(
       currentObject,
       RDFTerm.fromIRI(iri),
       newSubject
       ));
     }
       }
     } else {
       // Step 8
       string[] types=StringUtility.splitAtNonFFSpaces(rel);
       bool hasPredicates=false;
       // Defines predicates
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     if(iri!=null){
       if(!hasPredicates){
     hasPredicates=true;
     currentObject=generateBlankNode();
       }
       RDFa.IncompleteTriple inc=new RDFa.IncompleteTriple();
       inc.predicate=RDFTerm.fromIRI(iri);
       inc.direction=RDFa.ChainingDirection.Forward;
       incompleteTriplesLocal.Add(inc);
     }
       }
       types=StringUtility.splitAtNonFFSpaces(rev);
       foreach(var type in types){
     string iri=getRelTermOrCurie(type,
     iriMapLocal);
     if(iri!=null){
       if(!hasPredicates){
     hasPredicates=true;
     currentObject=generateBlankNode();
       }
       RDFa.IncompleteTriple inc=new RDFa.IncompleteTriple();
       inc.predicate=RDFTerm.fromIRI(iri);
       inc.direction=RDFa.ChainingDirection.Reverse;
       incompleteTriplesLocal.Add(inc);
     }
       }
     }
     // Step 9
     string[] preds=StringUtility.splitAtNonFFSpaces(property);
     string datatypeValue=getCurie(datatype,
     iriMapLocal);
     if(datatype!=null && datatypeValue==null) {
       datatypeValue="";
     }
     //Console.WriteLine("datatype=[%s] prop=%s vocab=%s",
     //  datatype,property,localDefaultVocab);
     //Console.WriteLine("datatypeValue=[%s]",datatypeValue);
     RDFTerm currentProperty=null;
     foreach(var pred in preds){
       string iri=getCurie(pred,
       iriMapLocal);
       if(iri!=null){
     //Console.WriteLine("iri=[%s]",iri);
     currentProperty=null;
     if(datatypeValue!=null && datatypeValue.Length>0 &&
     !datatypeValue.Equals(RDF_XMLLITERAL)){
       string literal=content;
       if(literal==null) {
     literal=getTextNodeText(node);
       }
       currentProperty=RDFTerm.fromTypedString(literal,datatypeValue);
     } else if(node.getAttribute("content")!=null ||
     !hasNonTextChildNodes(node) ||
     (datatypeValue!=null && datatypeValue.Length==0)){
       string literal=node.getAttribute("content");
       if(literal==null) {
     literal=getTextNodeText(node);
       }
       currentProperty=(!string.IsNullOrEmpty(localLanguage)) ?
       RDFTerm.fromLangString(literal, localLanguage) :
         RDFTerm.fromTypedString(literal);
     } else if(hasNonTextChildNodes(node) &&
     (datatypeValue==null || datatypeValue.Equals(RDF_XMLLITERAL))){
       // XML literal
       recurse=false;
       if(datatypeValue==null) {
     datatypeValue=RDF_XMLLITERAL;
       }
       try {
     string literal=ExclusiveCanonicalXML.canonicalize(node,
         false, namespacesLocal);
     currentProperty=RDFTerm.fromTypedString(literal,datatypeValue);
       } catch(ArgumentException){
     // failure to canonicalize
       }
     }
     #if DEBUG
     if(!(newSubject!=null))throw new InvalidOperationException("doesn't satisfy newSubject!=null");
     #endif
     outputGraph.Add(new RDFTriple(
     newSubject,
     RDFTerm.fromIRI(iri),currentProperty
     ));
       }
     }
     // Step 10
     if(!skipElement && newSubject!=null){
       foreach(var triple in context.incompleteTriples){
     if(triple.direction==RDFa.ChainingDirection.Forward){
       outputGraph.Add(new RDFTriple(
       context.parentSubject,
       triple.predicate,
       newSubject));
     } else {
       outputGraph.Add(new RDFTriple(
       newSubject,triple.predicate,
       context.parentSubject));
     }
       }
     }
     // Step 13
     if(recurse){
       foreach(var childNode in node.getChildNodes()){
     IElement childElement;
     RDFa.EvalContext oldContext=context;
     if(childNode is IElement){
       childElement=((IElement)childNode);
       //Console.WriteLine("skip=%s vocab=%s local=%s",
       //  skipElement,context.defaultVocab,
       //localDefaultVocab);
       if(skipElement){
     RDFa.EvalContext ec=oldContext.copy();
     ec.language=localLanguage;
     ec.iriMap=iriMapLocal;
     ec.namespaces=namespacesLocal;
     context=ec;
     process(childElement,false);
       } else {
     RDFa.EvalContext ec=new RDFa.EvalContext();
     ec.baseURI=oldContext.baseURI;
     ec.iriMap=iriMapLocal;
     ec.namespaces=namespacesLocal;
     ec.incompleteTriples=incompleteTriplesLocal;
     ec.parentSubject=((newSubject==null) ? oldContext.parentSubject :
       newSubject);
     ec.parentObject=((currentObject==null) ?
         ((newSubject==null) ? oldContext.parentSubject :
           newSubject) : currentObject);
     ec.language=localLanguage;
     context=ec;
     process(childElement,false);
       }
     }
     context=oldContext;
       }
     }
 }
Example #2
0
 private static bool isHtmlElement(IElement element, string name)
 {
     return element!=null &&
     "http://www.w3.org/1999/xhtml".Equals(element.getNamespaceURI()) &&
     name.Equals(element.getLocalName());
 }
Example #3
0
 private void miniRdfXmlChild(IElement node, RDFTerm subject, string language)
 {
     string nsname=node.getNamespaceURI();
     if(node.getAttribute("xml:lang")!=null){
       language=node.getAttribute("xml:lang");
     }
     string localname=node.getLocalName();
     RDFTerm predicate=relativeResolve(nsname+localname);
     if(!hasNonTextChildNodes(node)){
       string content=getTextNodeText(node);
       RDFTerm literal;
       if(!string.IsNullOrEmpty(language)){
     literal=RDFTerm.fromLangString(content, language);
       } else {
     literal=RDFTerm.fromTypedString(content);
       }
       outputGraph.Add(new RDFTriple(subject,predicate,literal));
     } else {
       string parseType=node.getAttributeNS(RDF_NAMESPACE, "parseType");
       if("Literal".Equals(parseType))
     throw new NotSupportedException();
       RDFTerm blank=generateBlankNode();
       context.language=language;
       miniRdfXml(node,context,blank);
       outputGraph.Add(new RDFTriple(subject,predicate,blank));
     }
 }
   private static void propertyWalk(IElement root,
 JSONObject properties, JSONArray children)
   {
       string[] className=getClassNames(root);
       if(className.Length>0){
         IList<string> types=new List<string>();
         bool hasProperties=false;
         foreach(var cls in className){
       if(cls.StartsWith("p-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("u-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("dt-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("e-",StringComparison.Ordinal) && properties!=null){
         hasProperties=true;
       } else if(cls.StartsWith("h-",StringComparison.Ordinal)){
         types.Add(cls);
       }
         }
         if(types.Count==0 && hasProperties){
       // has properties and isn't a microformat
       // root
       foreach(var cls in className){
         if(cls.StartsWith("p-",StringComparison.Ordinal)){
       string value=getPValue(root);
       if(!StringUtility.isNullOrSpaces(value)) {
         accumulateValue(properties,cls.Substring(2),value);
       }
         } else if(cls.StartsWith("u-",StringComparison.Ordinal)){
       accumulateValue(properties,cls.Substring(2),
           getUValue(root));
         } else if(cls.StartsWith("dt-",StringComparison.Ordinal)){
       accumulateValue(properties,cls.Substring(3),
           getDTValue(root,getLastKnownTime(properties)));
         } else if(cls.StartsWith("e-",StringComparison.Ordinal)){
       accumulateValue(properties,cls.Substring(2),
           getEValue(root));
         }
       }
         } else if(types.Count>0){
       // this is a child microformat
       // with no properties
       JSONObject obj=new JSONObject();
       obj.put("type", new JSONArray(types));
       // for holding child elements with
       // properties
       JSONObject subProperties=new JSONObject();
       // for holding child microformats with no
       // property class
       JSONArray subChildren=new JSONArray();
       foreach(var child in root.getChildNodes()){
         if(child is IElement) {
       propertyWalk((IElement)child,
           subProperties,subChildren);
         }
       }
       if(subChildren.Length>0){
         obj.put("children", subChildren);
       }
       if(types.Count>0){
         // we imply missing properties here
         // Imply p-name and p-url
         if(!implyForLink(root,subProperties)){
       if(hasSingleChildElementNamed(root,"a")){
         implyForLink(getFirstChildElement(root),subProperties);
       } else {
         string pvalue=getPValue(root);
         if(!StringUtility.isNullOrSpaces(pvalue)) {
           setValueIfAbsent(subProperties,"name", pvalue);
         }
       }
         }
         // Also imply u-photo
         if(StringUtility.toLowerCaseAscii(root.getLocalName()).Equals("img") &&
         root.getAttribute("src")!=null){
       setValueIfAbsent(subProperties,"photo", getUValue(root));
         }
         if(!subProperties.has("photo")){
       IList<IElement> images=root.getElementsByTagName("img");
       // If there is only one descendant image, imply
       // u-photo
       if(images.Count==1){
         setValueIfAbsent(subProperties,"photo",
             getUValue(images[0]));
       }
         }
       }
       obj.put("properties", subProperties);
       if(hasProperties){
         foreach(var cls in className){
       if(cls.StartsWith("p-",StringComparison.Ordinal)){ // property
         JSONObject clone=copyJson(obj);
         clone.put("value",getPValue(root));
         accumulateValue(properties,cls.Substring(2),clone);
       } else if(cls.StartsWith("u-",StringComparison.Ordinal)){ // URL
         JSONObject clone=copyJson(obj);
         clone.put("value",getUValue(root));
         accumulateValue(properties,cls.Substring(2),clone);
       } else if(cls.StartsWith("dt-",StringComparison.Ordinal)){ // date/time
         JSONObject clone=copyJson(obj);
         clone.put("value",getDTValue(root,getLastKnownTime(properties)));
         accumulateValue(properties,cls.Substring(3),clone);
       } else if(cls.StartsWith("e-",StringComparison.Ordinal)){ // date/time
         JSONObject clone=copyJson(obj);
         clone.put("value",getEValue(root));
         accumulateValue(properties,cls.Substring(2),clone);
       }
         }
       } else {
         children.put(obj);
       }
       return;
         }
       }
       foreach(var child in root.getChildNodes()){
         if(child is IElement) {
       propertyWalk((IElement)child,properties,children);
         }
       }
   }
 private static bool implyForLink(IElement root, JSONObject subProperties)
 {
     if(StringUtility.toLowerCaseAscii(root.getLocalName()).Equals("a") &&
     root.getAttribute("href")!=null){
       // get the link's URL
       setValueIfAbsent(subProperties,"url", getUValue(root));
       IList<IElement> elements=getChildElements(root);
       if(elements.Count==1 &&
       StringUtility.toLowerCaseAscii(elements[0].getLocalName()).Equals("img")){
     string pValue=getPValue(elements[0]); // try to get the ALT/TITLE from the image
     if(StringUtility.isNullOrSpaces(pValue))
     {
       pValue=getPValue(root); // if empty, get text from link instead
     }
     setValueIfAbsent(subProperties,"name", pValue);
     // get the SRC of the image
     setValueIfAbsent(subProperties,"photo", getUValue(elements[0]));
       } else {
     // get the text content
     string pvalue=getPValue(root);
     if(!StringUtility.isNullOrSpaces(pvalue)) {
       setValueIfAbsent(subProperties,"name", pvalue);
     }
       }
       return true;
     }
     return false;
 }
 private static string getPValue(IElement root)
 {
     if(root.getAttribute("title")!=null)
       return root.getAttribute("title");
     if(StringUtility.toLowerCaseAscii(root.getLocalName()).Equals("img") &&
     !StringUtility.isNullOrSpaces(root.getAttribute("alt")))
       return root.getAttribute("alt");
     return getValueContent(root,false);
 }
 private static string getHref(IElement node)
 {
     string name=StringUtility.toLowerCaseAscii(node.getLocalName());
     string href="";
     if("a".Equals(name) || "link".Equals(name) || "area".Equals(name)){
       href=node.getAttribute("href");
     } else if("object".Equals(name)){
       href=node.getAttribute("data");
     } else if("img".Equals(name) || "source".Equals(name) ||
     "track".Equals(name) ||
     "iframe".Equals(name) ||
     "audio".Equals(name) ||
     "video".Equals(name) ||
     "embed".Equals(name)){
       href=node.getAttribute("src");
     } else
       return null;
     if(href==null || href.Length==0)
       return "";
     href=HtmlDocument.resolveURL(node,href,null);
     if(href==null || href.Length==0)
       return "";
     return href;
 }
 private static string elementName(IElement element)
 {
     return StringUtility.toLowerCaseAscii(element.getLocalName());
 }