示例#1
0
        private static List <DtdNode> GetAllDtdElements(DtdNode node)
        {
            List <DtdNode> list = new List <DtdNode>();

            if (!string.IsNullOrEmpty(node.ElementName))
            {
                list.Add(node);
            }
            foreach (DtdNode child in node.Children)
            {
                list.AddRange(GetAllDtdElements(child));
            }
            return(list);
        }
示例#2
0
        //child element names must be formatted like "a#b#c#"

        /*    private static void RevalidateChildren(Regex regex, string childrenNames, ContentDocumentValidationError error)
         *  {
         *      Match match = regex.Match(childrenNames);
         *
         *      if (match.ToString() == childrenNames)
         *      {
         *          //we can say that the element after the last child element in the sequence
         *          //is where the problems start
         *          //and childrenNames is known to start at the beginning of the target node's children
         *          ArrayList childrenArr = StringToArrayList(childrenNames, '#');
         *          if (childrenArr.Count < error.Target.Children.Count)
         *          {
         *              //error.BeginningOfError = error.Target.Children.Get(childrenArr.Count);
         *          }
         *      }
         *      else
         *      {
         *          //test subsets of children -- for a#b#c# test a#b#
         *          ArrayList childrenArr = StringToArrayList(childrenNames, '#');
         *          if (childrenArr.Count >= 2)
         *          {
         *              string subchildren = ArrayListToString(childrenArr.GetRange(0, childrenArr.Count - 1), '#');
         *              RevalidateChildren(regex, subchildren, error);
         *          }
         *          else
         *          {
         *              //there are no smaller subsets to test, so the error could be either with the first child
         *              //or just a general error with the overall sequence
         *              //better not to be specific if we aren't sures
         *              //error.BeginningOfError = null;
         *          }
         *      }
         *  }
         */
        //private static ArrayList StringToArrayList(string input, char delim)
        //{
        //    ArrayList arr = new ArrayList(input.Split(delim));
        //    //trim the null item at the end of the array list
        //    if (string.IsNullOrEmpty((string)arr[arr.Count - 1]))
        //        arr.RemoveAt(arr.Count - 1);
        //    return arr;
        //}
        //private static string ArrayListToString(ArrayList arr, char delim)
        //{
        //    string str = "";
        //    for (int i = 0; i < arr.Count; i++)
        //    {
        //        str += arr[i].ToString();
        //        str += delim;
        //    }
        //    return str;
        //}

        //list the allowed elements, given a regex representing DTD rules
        //it would be easier to just construct this from DtdSharp objects, but there's a chance
        //that DtdSharp was never used in the scenario where Tobi caches a parsed DTD and uses that instead of
        //parsing with DtdSharp.  our cache contains a series of pairs, each representing
        //an element name and a regex of its allowed content model.
        public static string GetElementsListFromDtdRegex(string regex)
        {
            if (string.IsNullOrEmpty(regex))
            {
                return("");
            }

            //string cleaner = regex.Replace("?:", "").Replace(DELIMITER, "").Replace("((", "( (").Replace("))", ") )").Replace(")?(", ")? (");
            string cleaner = regex.Replace("?:", "").Replace("" + DtdSharpToRegex.DELIMITER, "");

            //this tree structure could also be used to tell the user what the proper sequence(s) should be
            //it's not the most efficient way to only retrieve a unique list of element names
            //however, we are dealing with small datasets, so it's not really an issue
            DtdNode dtdExpressionAsTree = Treeify(cleaner);

            //get a list of the dtd items that are elements (not just groupings of other elements)
            List <DtdNode> list = GetAllDtdElements(dtdExpressionAsTree);

            //keep track of already-seen items
            var alreadySeen = new List <string>();

            //make a list of unique element names
            var  strBuilder = new StringBuilder();
            bool first      = true;

            foreach (DtdNode node in list)
            {
                if (!alreadySeen.Contains(node.ElementName))
                {
                    if (!first)
                    {
                        strBuilder.Append(", ");
                    }
                    first = false;

                    strBuilder.Append(node.ElementName);

                    alreadySeen.Add(node.ElementName);
                }
            }

            return(strBuilder.ToString());
        }
示例#3
0
        private static DtdNode Treeify(string regex)
        {
            List <DtdNode> parentQ = new List <DtdNode>();
            //for each open paren, start a new DtdTreeNode
            string  temp  = "";
            DtdNode node  = null;
            bool    first = true;
            DtdNode root  = null;

            for (int i = 0; i < regex.Length; i++)
            {
                if (regex[i] == '(')
                {
                    temp = "";
                    node = new DtdNode();
                    if (first)
                    {
                        root = node;
                    }
                    first = false;
                    if (parentQ.Count > 0)
                    {
                        parentQ[parentQ.Count - 1].Children.Add(node);
                        node.Parent = parentQ[parentQ.Count - 1];
                    }
                    parentQ.Add(node);
                }
                else if (regex[i] == ')')
                {
                    temp = "";
                    parentQ.RemoveAt(parentQ.Count - 1);
                }
                else if (regex[i] == '?' || regex[i] == '+' || regex[i] == '*')
                {
                    node.AdditionalInfo += regex[i];
                    temp = "";
                }
                else if (regex[i] == '|')
                {
                    if (node.Parent != null)
                    {
                        node.Parent.ChildRelationship = "or";
                    }
                }
                else if (regex[i] == DtdSharpToRegex.PCDATA[0])
                {
                    //look ahead for PCDATA
                    string str = regex.Substring(i, DtdSharpToRegex.PCDATA.Length);
                    if (str == DtdSharpToRegex.PCDATA)
                    {
                        node             = new DtdNode();
                        node.ElementName = "TEXT";
                        if (parentQ.Count > 0)
                        {
                            parentQ[parentQ.Count - 1].Children.Add(node);
                            node.Parent = parentQ[parentQ.Count - 1];
                        }
                        else
                        {
                            if (first)
                            {
                                root = node;
                            }
                            first = false;
                        }

                        i += (DtdSharpToRegex.PCDATA.Length - 1);
                    }
                }
                else
                {
                    temp            += regex[i];
                    node.ElementName = temp.Replace(DtdSharpToRegex.NAMESPACE_PREFIX_SEPARATOR, ':');
                }
            }

            return(root);
        }