private ParseResult ProcessSourceContent(DataTable dataTable) { ParseResult ret = new ParseResult(); ret.Sql = new ParseResult.SqlParseResult(); if (dataTable == null || dataTable.Rows.Count < 1) { ret.Time.End = DateTime.UtcNow; return(ret); } List <Dictionary <string, object> > dicts = Common.DataTableToListDictionary(dataTable); foreach (Dictionary <string, object> dict in dicts) { foreach (KeyValuePair <string, object> kvp in dict) { ret.Flattened.Add(new DataNode(kvp.Key, kvp.Value, DataNode.TypeFromValue(kvp.Value))); } } ret.Schema = ParserCommon.BuildSchema(ret.Flattened); ret.Tokens = ParserCommon.GetTokens(ret.Flattened, _TextParser); ret.Sql.Rows = dataTable.Rows.Count; ret.Sql.Columns = dataTable.Columns.Count; ret.Success = true; ret.Time.End = DateTime.UtcNow; return(ret); }
private ParseResult ProcessSourceContent(string content) { ParseResult ret = new ParseResult(); ret.Xml = new ParseResult.XmlParseResult(); int maxDepth = 0; int nodes = 0; int arrays = 0; string pox = XmlTools.Convert(content); XElement xe = XElement.Parse(pox); ret.Flattened = Flatten(xe, out maxDepth, out nodes, out arrays); ret.Xml.MaxDepth = maxDepth; ret.Xml.Nodes = nodes; ret.Xml.Arrays = arrays; ret.Schema = ParserCommon.BuildSchema(ret.Flattened); ret.Tokens = ParserCommon.GetTokens(ret.Flattened, _TextParser); ret.Success = true; ret.Time.End = DateTime.UtcNow; return(ret); }
private ParseResult ProcessSourceContent(string data) { ParseResult ret = new ParseResult(); ret.Tokens = ParserCommon.GetTokens(data, _ParseOptions.Text); ret.Success = true; ret.Time.End = DateTime.UtcNow; return(ret); }
private ParseResult ProcessSourceContent(string content) { int maxDepth; int arrayCount; int nodeCount; ParseResult ret = new ParseResult(); ret.Json = new ParseResult.JsonParseResult(); JToken jtoken = JToken.Parse(content); ret.Flattened = Flatten(jtoken, out maxDepth, out arrayCount, out nodeCount); ret.Json.MaxDepth = maxDepth; ret.Json.Arrays = arrayCount; ret.Json.Nodes = nodeCount; ret.Schema = ParserCommon.BuildSchema(ret.Flattened); ret.Tokens = ParserCommon.GetTokens(ret.Flattened, _TextParser); ret.Success = true; ret.Time.End = DateTime.UtcNow; return(ret); }
private ParseResult ProcessSourceContent(byte[] data) { ParseResult ret = new ParseResult(); ret.Csv = new ParseResult.CsvParseResult(); string[] headerNames = null; List <Dictionary <string, object> > dicts = new List <Dictionary <string, object> >(); int rows = 0; int columns = 0; _CsvConfiguration.Delimiter = _ParseOptions.Csv.ColumnDelimiter.ToString(); _CsvConfiguration.TrimOptions = TrimOptions.Trim; using (MemoryStream ms = new MemoryStream()) { ms.Write(data, 0, data.Length); ms.Seek(0, SeekOrigin.Begin); using (TextReader tr = new StreamReader(ms)) { using (CsvHelper.CsvParser cp = new CsvHelper.CsvParser(tr, _CsvConfiguration)) { while (cp.Read()) { if (cp.Record != null && cp.Record.Length > 0) { if (rows == 0) { headerNames = cp.Record; List <string> headerNamesList = headerNames.Distinct().ToList(); if (headerNamesList.Count != headerNames.Length) { throw new DuplicateNameException("Supplied CSV contains headers that would create duplicate columns."); } columns = headerNames.Length; } else { Dictionary <string, object> dict = new Dictionary <string, object>(); for (int i = 0; i < cp.Record.Length; i++) { if (headerNames.Length > i && !String.IsNullOrEmpty(headerNames[i])) { dict.Add(headerNames[i], cp.Record[i]); } else { dict.Add(_ParseOptions.Csv.UnknownColumnPrefix + i.ToString(), cp.Record[i]); } } if (cp.Record.Length != columns) { ret.Csv.Irregular = true; } if (cp.Record.Length > columns) { columns = cp.Record.Length; } dicts.Add(dict); } rows++; } else { break; } } } } } if (dicts != null && dicts.Count > 0) { foreach (Dictionary <string, object> dict in dicts) { foreach (KeyValuePair <string, object> kvp in dict) { ret.Flattened.Add(new DataNode(kvp.Key, kvp.Value, DataNode.TypeFromValue(kvp.Value))); } } } ret.Schema = ParserCommon.BuildSchema(ret.Flattened); ret.Tokens = ParserCommon.GetTokens(ret.Flattened, _TextParser); ret.Csv.Rows = rows; ret.Csv.Columns = columns; ret.Success = true; ret.Time.End = DateTime.UtcNow; return(ret); }
private ParseResult ProcessSourceContent(string data) { #region Load-Document HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); ParseResult ret = new ParseResult(); ret.Html = new ParseResult.HtmlParseResult(); #endregion #region Head ret.Html.Head.Title = GetTitle(data); ret.Html.Head.MetaDescription = GetMetaDescription(doc); ret.Html.Head.MetaKeywords = GetMetaKeywords(doc); ret.Html.Head.MetaImageOpengraph = GetMetaImageOpengraph(doc); ret.Html.Head.MetaDescriptionOpengraph = GetMetaDescriptionOpengraph(doc); ret.Html.Head.MetaVideoTagsOpengraph = GetMetaVideoTagsOpengraph(doc); StringBuilder head = new StringBuilder(" "); if (!String.IsNullOrEmpty(ret.Html.Head.Title)) { head.Append(" " + ret.Html.Head.Title); } if (!String.IsNullOrEmpty(ret.Html.Head.MetaDescription)) { head.Append(" " + ret.Html.Head.MetaDescription); } if (ret.Html.Head.MetaKeywords != null && ret.Html.Head.MetaKeywords.Count > 0) { head.Append(" " + String.Join(" ", ret.Html.Head.MetaKeywords)); } if (!String.IsNullOrEmpty(ret.Html.Head.MetaDescriptionOpengraph)) { head.Append(" " + ret.Html.Head.MetaDescriptionOpengraph); } if (ret.Html.Head.MetaVideoTagsOpengraph != null && ret.Html.Head.MetaVideoTagsOpengraph.Count > 0) { head.Append(" " + String.Join(" ", ret.Html.Head.MetaVideoTagsOpengraph)); } ret.Html.Head.Content = head.ToString(); ret.Html.Head.Tokens = ParserCommon.GetTokens(ret.Html.Head.Content, _ParseOptions.Text); #endregion #region Body ret.Html.Body.ImageUrls = GetImageUrls(doc, data); ret.Html.Body.Links = GetLinks(doc); ret.Html.Body.Content = GetHtmlBody(doc); ret.Html.Body.Tokens = ParserCommon.GetTokens(ret.Html.Body.Content, _ParseOptions.Text); #endregion #region Data ret.Tokens = new List <Token>(); long bodyStartingPosition = 0; if (ret.Html.Head.Tokens != null && ret.Html.Head.Tokens.Count > 0) { ret.Tokens.AddRange(ret.Html.Head.Tokens); foreach (Token token in ret.Html.Head.Tokens) { if (token.Positions != null && token.Positions.Count > 0) { long maxPos = token.Positions.Max(); if (maxPos >= bodyStartingPosition) { bodyStartingPosition = (maxPos + 1); } } } } // bodyStartingPosition + [body token position] will yield the correct position across the entire set of tokens if (ret.Html.Body.Tokens != null && ret.Html.Body.Tokens.Count > 0) { List <Token> updatedTokens = new List <Token>(); foreach (Token token in ret.Html.Body.Tokens) { Token updated = new Token(); updated.Value = token.Value; updated.Count = token.Count; updated.Positions = new List <long>(); if (token.Positions != null && token.Positions.Count > 0) { foreach (long tokenPos in token.Positions) { long updatedPosition = bodyStartingPosition + tokenPos; updated.Positions.Add(updatedPosition); } } updatedTokens.Add(updated); ret.Html.Body.Tokens = updatedTokens; } foreach (Token token in ret.Html.Body.Tokens) { ret.Tokens = ParserCommon.AddToken(ret.Tokens, token); } } ret.Schema = BuildSchema(); #endregion ret.Success = true; ret.Time.End = DateTime.UtcNow; return(ret); }