public void ParseSource() { if (this.Inferred) { return; } // get description this.Name = this.Type.Replace(" ", "_").Replace("-", "_"); HtmlDocument sourceDoc = new HtmlDocument(); sourceDoc.LoadHtml(_source); // <div id="discussion"> var summaryNode = sourceDoc.DocumentNode.SelectSingleNode("//div[@id='discussion']"); if (summaryNode == null) { throw new Exception("could not find summary node for " + Type); } Description = summaryNode.InnerText.Trim(); HtmlNode fieldsList; if (this.Name == "response_wrapper") { // #EDGE CASE ParseOldStyle(sourceDoc); } else { // fields are now a table. grr.... //<table class="type-field-list"> fieldsList = sourceDoc.DocumentNode.SelectSingleNode("//table[@class='type-field-list']"); if (fieldsList == null) { throw new Exception("could not find fields list for " + Type); } var rows = fieldsList.SelectNodes("tr"); FieldInfo fieldInfo = null; foreach (var row in rows) { HtmlNode td = row.SelectNodes("td")[0]; if (string.IsNullOrEmpty(td.InnerHtml.Trim())) { // this is additional descriptor for current fields //unchanged in unsafe filters td = row.SelectNodes("td")[1]; // td is now the field type descriptor if (td.InnerText.Trim() != "unchanged in unsafe filters") { throw new Exception("unexpected field type descriptor:" + td.InnerText.Trim()); } fieldInfo.UnchangedInUnsafeFilters = true; } else { fieldInfo = new FieldInfo(); this.Fields.Add(fieldInfo); // this is a new field //<td><span class='included' title='this field is included in the default filter'>✔</span> question_id</td> // <td>integer, refers to a <a href="/docs/types/question">question</a></td> bool included = false; var span = td.SelectSingleNode("span"); if (span == null) { throw new Exception("could not find include/exclude span"); } var classAttr = span.Attributes["class"]; if (classAttr == null || (classAttr.Value != "included" && classAttr.Value != "excluded")) { throw new Exception("unexpected class in field row"); } if (classAttr.Value == "included") { included = true; } fieldInfo.IncludedInDefaultFilter = included; td.ChildNodes.Remove(span); var fieldName = td.InnerText.Trim(); fieldInfo.Name = fieldName; td = row.SelectNodes("td")[1]; // td is now the field type descriptor // look for "xxx, refers to a question" string fieldType = null; if (td.InnerText.IndexOf(", refers to") > -1) { string description = td.InnerText.Substring(td.InnerText.IndexOf(", refers to") + 1).Trim(); fieldInfo.Description = description; fieldType = td.InnerText.Substring(0, td.InnerText.IndexOf(", refers to")).Trim(); } else { fieldType = td.InnerText; } string tdInnerHtml = td.InnerHtml; if (tdInnerHtml.IndexOf(", refers to") > -1) { tdInnerHtml = tdInnerHtml.Substring(0, tdInnerHtml.IndexOf(", refers to")).Trim(); } if (!tdInnerHtml.Contains("href=\"/docs/types/")) { fieldInfo.IsPrimitive = true; } else { var typeLink = td.SelectSingleNode("a"); if (!typeLink.Attributes["href"].Value.StartsWith("/docs/types")) { throw new Exception("unexpected type"); } fieldType = typeLink.InnerText; } if (td.InnerHtml.Contains("an array of")) { fieldInfo.IsArray = true; if (fieldInfo.IsPrimitive) { fieldType = fieldType.Replace("an array of", "").Trim(); if (fieldType == "strings") { fieldType = "string"; } else { if (fieldInfo.IsPrimitive) { throw new Exception("unexpected field array type: " + fieldType); } } } } if (fieldType.Contains("one of")) { fieldInfo.IsEnum = true; fieldType = fieldType.Replace(", or ", ", "); fieldType = fieldType.Replace("one of", ""); } // if (fieldInfo.IsEnum) { fieldInfo.EnumValues = fieldType; fieldType = Type + " " + fieldName; } //// clean up repeated words ? fieldType = fieldType.Replace("-", " "); fieldType = fieldType.Replace("_", " "); fieldType = fieldType.Trim(); Regex doubleWordPattern = new Regex("\\b(?<word>\\w+)\\s+(\\k<word>)\\b"); var doubleWordPatternMatch = doubleWordPattern.Match(fieldType); if (doubleWordPatternMatch.Success) { string word = doubleWordPatternMatch.Groups["word"].Value; fieldType = doubleWordPattern.Replace(fieldType, word); } fieldType = fieldType.Replace(" ", "_").Replace("-", "_"); fieldInfo.Type = fieldType; if (fieldInfo.IsEnum) { var type = new TypeInfo(Docs); type.Type = fieldType; type.Name = fieldType; type.IsEnum = true; type.Inferred = true; foreach (string item in fieldInfo.EnumValues.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries)) { var f = new FieldInfo(); f.Type = "string"; f.Name = item.Trim(); type.Fields.Add(f); } this.Docs.Types.Add(type); } // #EDGE CASE if (fieldType == "the_type_found_in_type") { GenericType = fieldType; fieldInfo.IsPrimitive = false; } } } } }
private void ParseScript() { Match scriptMatch = Regex.Match(Source, "<script type=\"text/javascript\">\\s*(?<script>var\\s+parameters\\s+=.*?)</script>", RegexOptions.ExplicitCapture | RegexOptions.Singleline); if (!scriptMatch.Success) { throw new Exception("could not find script in method source " + UriTemplate); } string script = scriptMatch.Groups["script"].Value; Script = script; var methodNameMatch = Regex.Match(script, "var method = \"(?<uri>/2.0/(?<name>.*?))\";", RegexOptions.ExplicitCapture | RegexOptions.Singleline | RegexOptions.IgnoreCase); if (!methodNameMatch.Success) { throw new Exception("could not find script methodname " + UriTemplate); } ScriptMethodUri = methodNameMatch.Groups["uri"].Value; UriTemplate = ScriptMethodUri.Substring( ("/2.0/" + Target).Length); ScriptMethodName = methodNameMatch.Groups["name"].Value; var filterNameMatch = Regex.Match(script, "var filterName = \"(?<filter>.*?)\";", RegexOptions.ExplicitCapture | RegexOptions.Singleline | RegexOptions.IgnoreCase); if (!filterNameMatch.Success) { throw new Exception("could not find script filterName " + UriTemplate); } ScriptFilter = filterNameMatch.Groups["filter"].Value; var dependantTypesMatch = Regex.Match(script, "var dependentTypes = (?<types>\\[.*?\\]);", RegexOptions.ExplicitCapture | RegexOptions.Singleline | RegexOptions.IgnoreCase); if (!dependantTypesMatch.Success) { throw new Exception("could not find script dependant types " + UriTemplate); } // JArray scriptDependantTypes = JArray.Parse(dependantTypesMatch.Groups["types"].Value); var paramMatch = Regex.Match(script, "var parameters = (?<params>{.*?});", RegexOptions.ExplicitCapture | RegexOptions.Singleline | RegexOptions.IgnoreCase); if (!paramMatch.Success) { throw new Exception("could not find script params " + UriTemplate); } string paramJson = paramMatch.Groups["params"].Value; var paramsObj = JObject.Parse(paramJson); // determine parameter name and type. foreach (var prop in paramsObj) { bool addParam = true; var paramName = prop.Key; var parameter = new Parameter { Name = paramName, IsPrimitive = true }; TypeInfo pType = null; switch (prop.Value.Type) { case JTokenType.Array: // indicates a simple 'enum' type. // these should be same for every method that calls them switch (paramName) { case "order": parameter.IsPrimitive = false; parameter.Type = "order"; // verify that the shape has not change if (prop.Value.Children().Count() != 2) { throw new Exception("badly formed 'order' enum"); } if (prop.Value.Children().Where(c => c.Value<string>() == "asc").Count() != 1) { throw new Exception("badly formed 'order' enum"); } if (prop.Value.Children().Where(c => c.Value<string>() == "desc").Count() != 1) { throw new Exception("badly formed 'order' enum"); } pType = Docs.Types.FirstOrDefault(t => t.Type == "order"); if (pType == null) { pType = new TypeInfo(Docs); pType.Type = "order"; pType.IsEnum = true; pType.Inferred = true; pType.Name = pType.Type.Replace(" ", "_").Replace("-", "_").Trim(); foreach (JToken item in prop.Value.Children()) { var field = new FieldInfo(); field.Type = "string"; field.Name = item.Value<string>(); pType.Fields.Add(field); } this.Docs.Types.Add(pType); } break; case "period": parameter.IsPrimitive = false; parameter.Type = "period"; // verify that the shape has not change if (prop.Value.Children().Count() != 2) { throw new Exception("badly formed 'period' enum"); } if (prop.Value.Children().Where(c => c.Value<string>() == "all_time").Count() != 1) { throw new Exception("badly formed 'period' enum"); } if (prop.Value.Children().Where(c => c.Value<string>() == "month").Count() != 1) { throw new Exception("badly formed 'period' enum"); } pType = Docs.Types.FirstOrDefault(t => t.Type == "period"); if (pType == null) { pType = new TypeInfo(Docs); pType.Type = "period"; pType.IsEnum = true; pType.Inferred = true; string typeType = pType.Type; pType.Name = typeType.Replace(" ", "_").Replace("-", "_").Trim(); foreach (JToken item in prop.Value.Children()) { var field = new FieldInfo(); field.Type = "string"; field.Name = item.Value<string>(); pType.Fields.Add(field); } this.Docs.Types.Add(pType); } break; case "unsafe": parameter.Type = "boolean"; // verify that the shape has not change if (prop.Value.Children().Count() != 2) { throw new Exception("badly formed 'unsafe' enum"); } if (prop.Value.Children().Where(c => c.Value<string>() == "false").Count() != 1) { throw new Exception("badly formed 'unsafe' enum"); } if (prop.Value.Children().Where(c => c.Value<string>() == "true").Count() != 1) { throw new Exception("badly formed 'unsafe' enum"); } break; default: throw new Exception("unexpected array: " + paramName); } break; case JTokenType.Object: switch (paramName) { case "sort": parameter.IsPrimitive = false; parameter.Type = "sort_" + this.Name; // this is an enum type with added meta to help inform use of min/max. // this enum can and will be different for every method so we can either try to identify // like instances and provide a name for it or simply name it after the method, e.g. AnswersSort // i like this but some of the longer method names will result in unwieldy type names but oh well. // e.g. GetUsersByIdAssociatedSort. Maybe reverse the composition to SortGetUsersByIdAssociated. // this will make autocomplete and intellisense work better as well as grouping the sort enums. // this will also help us determine what type of values are // appropriate for min/max); pType = this.Docs.Types.FirstOrDefault(t => t.Type == parameter.Type); if (pType == null) { pType = new TypeInfo(Docs); pType.Type = parameter.Type; pType.Name = parameter.Type; pType.IsEnum = true; pType.Inferred = true; foreach (JProperty item in prop.Value.Children()) { var field = new FieldInfo(); field.Type = "string"; field.Name = item.Name; field.Description = "min/max are " + ((JValue)item.Value).Value; pType.Fields.Add(field); } this.Docs.Types.Add(pType); } break; default: throw new Exception("unexpected object: " + paramName); } break; case JTokenType.String: // simple string propType = prop.Value.Value<string>(); switch (propType) { case "access_token": RequiresAuthentication = true; addParam = false; break; case "read_inbox": // this is part of access token scope RequiredScopes.Add(propType); addParam = false; break; case "depends": parameter.Type = "string"; parameter.Format = "sort-dependant"; break; case "date": parameter.Type = "number"; parameter.Format = "utc-millisec"; break; case "guid list": parameter.Type = "string"; parameter.Format = "guid-list"; break; case "number": parameter.Type = "number"; break; case "number list": parameter.Type = "string"; parameter.Format = "number-list"; break; case "string": parameter.Type = "string"; break; case "string list": parameter.Type = "string"; parameter.Format = "string-list"; break; default: throw new Exception("unexpected property type"); } break; default: throw new Exception("unexpected property type"); } if (addParam) { Parameters.Add(parameter); } } }
private void ParseOldStyle(HtmlDocument sourceDoc) { HtmlNode fieldsList; fieldsList = sourceDoc.DocumentNode.SelectSingleNode("//div[@id='discussion']/ul"); if (fieldsList == null) { throw new Exception("could not find fields list for " + Type); } var fields = fieldsList.SelectNodes("li"); if (fields == null || fields.Count == 0) { throw new Exception("error reading fields list for " + Type); } foreach (HtmlNode field in fields) { var typeList = field.SelectNodes("ul"); if (typeList == null || typeList.Count != 1) { throw new Exception("error reading fields list for " + Type + ".[TODO field name]"); } HtmlNode typeNode = typeList[0]; field.RemoveChild(typeNode); HtmlNode filterNode = field.SelectSingleNode("span"); // #TODO: set default filter flag field.RemoveChild(filterNode); var fieldInfo = new FieldInfo(); string fieldName = field.InnerText.Trim(); fieldInfo.Name = fieldName; Fields.Add(fieldInfo); var typeDescriptors = typeNode.SelectNodes("li"); if (typeDescriptors == null) { throw new Exception("error reading type descriptors list for " + Type + ".[TODO field name]"); } if (typeDescriptors.Count != 1) { if (typeDescriptors.Count == 2) { if (!typeDescriptors[1].InnerText.Contains("unchanged in unsafe filters")) { throw new Exception("expected unsafe filters"); } fieldInfo.UnchangedInUnsafeFilters = true; } } if (!typeDescriptors[0].InnerHtml.Contains("href=\"/docs/types/")) { fieldInfo.IsPrimitive = true; } string fieldType = typeDescriptors[0].InnerText; if (fieldType.Contains("an array of")) { fieldInfo.IsArray = true; fieldType = fieldType.Replace("an array of", ""); } if (fieldType.Contains("one of")) { fieldInfo.IsEnum = true; fieldType = fieldType.Replace(", or ", ", "); fieldType = fieldType.Replace("one of", ""); } // if (fieldInfo.IsEnum) { fieldInfo.EnumValues = fieldType; fieldType = Type + " " + fieldName; } //// clean up repeated words ? fieldType = fieldType.Replace("-", " "); fieldType = fieldType.Replace("_", " "); fieldType = fieldType.Trim(); Regex doubleWordPattern = new Regex("\\b(?<word>\\w+)\\s+(\\k<word>)\\b"); var doubleWordPatternMatch = doubleWordPattern.Match(fieldType); if (doubleWordPatternMatch.Success) { string word = doubleWordPatternMatch.Groups["word"].Value; fieldType = doubleWordPattern.Replace(fieldType, word); } fieldType = fieldType.Replace(" ", "_").Replace("-", "_"); fieldInfo.Type = fieldType; if (fieldInfo.IsEnum) { var type = new TypeInfo(Docs); type.Type = fieldType; type.Name = fieldType; type.IsEnum = true; type.Inferred = true; foreach (string item in fieldInfo.EnumValues.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries)) { var f = new FieldInfo(); f.Type = "string"; f.Name = item.Trim(); type.Fields.Add(f); } this.Docs.Types.Add(type); } // #EDGE CASE if (fieldType == "the_type_found_in_type") { GenericType = fieldType; fieldInfo.IsPrimitive = false; } } }