private static List <DtdNode> GetAllDtdElements(DtdNode node) { List <DtdNode> list = new List <DtdNode>(); if (!string.IsNullOrEmpty(node.ElementName)) { list.Add(node); } foreach (DtdNode child in node.Children) { list.AddRange(GetAllDtdElements(child)); } return(list); }
//child element names must be formatted like "a#b#c#" /* private static void RevalidateChildren(Regex regex, string childrenNames, ContentDocumentValidationError error) * { * Match match = regex.Match(childrenNames); * * if (match.ToString() == childrenNames) * { * //we can say that the element after the last child element in the sequence * //is where the problems start * //and childrenNames is known to start at the beginning of the target node's children * ArrayList childrenArr = StringToArrayList(childrenNames, '#'); * if (childrenArr.Count < error.Target.Children.Count) * { * //error.BeginningOfError = error.Target.Children.Get(childrenArr.Count); * } * } * else * { * //test subsets of children -- for a#b#c# test a#b# * ArrayList childrenArr = StringToArrayList(childrenNames, '#'); * if (childrenArr.Count >= 2) * { * string subchildren = ArrayListToString(childrenArr.GetRange(0, childrenArr.Count - 1), '#'); * RevalidateChildren(regex, subchildren, error); * } * else * { * //there are no smaller subsets to test, so the error could be either with the first child * //or just a general error with the overall sequence * //better not to be specific if we aren't sures * //error.BeginningOfError = null; * } * } * } */ //private static ArrayList StringToArrayList(string input, char delim) //{ // ArrayList arr = new ArrayList(input.Split(delim)); // //trim the null item at the end of the array list // if (string.IsNullOrEmpty((string)arr[arr.Count - 1])) // arr.RemoveAt(arr.Count - 1); // return arr; //} //private static string ArrayListToString(ArrayList arr, char delim) //{ // string str = ""; // for (int i = 0; i < arr.Count; i++) // { // str += arr[i].ToString(); // str += delim; // } // return str; //} //list the allowed elements, given a regex representing DTD rules //it would be easier to just construct this from DtdSharp objects, but there's a chance //that DtdSharp was never used in the scenario where Tobi caches a parsed DTD and uses that instead of //parsing with DtdSharp. our cache contains a series of pairs, each representing //an element name and a regex of its allowed content model. public static string GetElementsListFromDtdRegex(string regex) { if (string.IsNullOrEmpty(regex)) { return(""); } //string cleaner = regex.Replace("?:", "").Replace(DELIMITER, "").Replace("((", "( (").Replace("))", ") )").Replace(")?(", ")? ("); string cleaner = regex.Replace("?:", "").Replace("" + DtdSharpToRegex.DELIMITER, ""); //this tree structure could also be used to tell the user what the proper sequence(s) should be //it's not the most efficient way to only retrieve a unique list of element names //however, we are dealing with small datasets, so it's not really an issue DtdNode dtdExpressionAsTree = Treeify(cleaner); //get a list of the dtd items that are elements (not just groupings of other elements) List <DtdNode> list = GetAllDtdElements(dtdExpressionAsTree); //keep track of already-seen items var alreadySeen = new List <string>(); //make a list of unique element names var strBuilder = new StringBuilder(); bool first = true; foreach (DtdNode node in list) { if (!alreadySeen.Contains(node.ElementName)) { if (!first) { strBuilder.Append(", "); } first = false; strBuilder.Append(node.ElementName); alreadySeen.Add(node.ElementName); } } return(strBuilder.ToString()); }
private static DtdNode Treeify(string regex) { List <DtdNode> parentQ = new List <DtdNode>(); //for each open paren, start a new DtdTreeNode string temp = ""; DtdNode node = null; bool first = true; DtdNode root = null; for (int i = 0; i < regex.Length; i++) { if (regex[i] == '(') { temp = ""; node = new DtdNode(); if (first) { root = node; } first = false; if (parentQ.Count > 0) { parentQ[parentQ.Count - 1].Children.Add(node); node.Parent = parentQ[parentQ.Count - 1]; } parentQ.Add(node); } else if (regex[i] == ')') { temp = ""; parentQ.RemoveAt(parentQ.Count - 1); } else if (regex[i] == '?' || regex[i] == '+' || regex[i] == '*') { node.AdditionalInfo += regex[i]; temp = ""; } else if (regex[i] == '|') { if (node.Parent != null) { node.Parent.ChildRelationship = "or"; } } else if (regex[i] == DtdSharpToRegex.PCDATA[0]) { //look ahead for PCDATA string str = regex.Substring(i, DtdSharpToRegex.PCDATA.Length); if (str == DtdSharpToRegex.PCDATA) { node = new DtdNode(); node.ElementName = "TEXT"; if (parentQ.Count > 0) { parentQ[parentQ.Count - 1].Children.Add(node); node.Parent = parentQ[parentQ.Count - 1]; } else { if (first) { root = node; } first = false; } i += (DtdSharpToRegex.PCDATA.Length - 1); } } else { temp += regex[i]; node.ElementName = temp.Replace(DtdSharpToRegex.NAMESPACE_PREFIX_SEPARATOR, ':'); } } return(root); }