public static void ExtractForPlotlyTables(string fid) { Console.WriteLine(DateTime.Now); DataFeatureExtractor.InitializeMetadata("en"); Console.WriteLine($"After InitializeMetadata: {DateTime.Now}"); PlotlyTable plotlyTable = PlotlyTable.GetInstance(fid); List <PlotlyChart> plotlyCharts = PlotlyChart.GetInstance(fid, plotlyTable); Console.WriteLine($"After GetInstance: {DateTime.Now}"); Common.FileInfo plotlyTableInfo = DataFeatureExtractor.ExtractTableFeatures(plotlyTable, out List <Dictionary <int, Dictionary <string, float[]> > > headerEmbeddings, out SourceFeatures sf); Console.WriteLine($"After FE: {DateTime.Now}"); string uid = plotlyTable.TUid.Substring(0, plotlyTable.TUid.Length - 3); Helpers.DumpJson($"{plotlyTable.TUid}.DF.json", sf, DataSerializer.Instance); Helpers.DumpJson($"{plotlyTable.TUid}.table.json", plotlyTable, DataSerializer.Instance); Helpers.DumpJson($"{uid}.EMB.json", headerEmbeddings, DataSerializer.Instance); Helpers.DumpJson($"{uid}.index.json", plotlyTableInfo, DataSerializer.Instance); Helpers.DumpCsv($"{plotlyTable.TUid}.csv", plotlyTable); for (int i = 0; i < plotlyCharts.Count; i++) { PlotlyChart plotlyChart = plotlyCharts[i]; Helpers.DumpJson($"{plotlyChart.CUid}.json", plotlyChart, DataSerializer.Instance); } Console.WriteLine($"After Dump: {DateTime.Now}"); }
public static void ExtractForPlotlyTablesAll(string plotlyTableFolder) { Console.WriteLine($"Extracting PlotlyTable features from {plotlyTableFolder}."); PlotlyList plotlyList = LoadPlotlyTablesAll(plotlyTableFolder); List <PlotlyTable> plotlyTables = plotlyList.PlotlyTables; List <PlotlyChart> plotlyCharts = plotlyList.PlotlyCharts; // Initialize some models from MetadataRecoSvr to support the two Bayesian features. DataFeatureExtractor.InitializeMetadata("en"); // Run features for each table and store embeddings for each plotly table. for (int i = 0; i < plotlyTables.Count; i++) { PlotlyTable plotlyTable = plotlyTables[i]; Common.FileInfo plotlyTableInfo = DataFeatureExtractor.ExtractTableFeatures(plotlyTable, out List <Dictionary <int, Dictionary <string, float[]> > > headerEmbeddings, out SourceFeatures sf); string uid = plotlyTable.TUid.Substring(0, plotlyTable.TUid.Length - 3); Helpers.DumpJson($"{plotlyTable.TUid}.DF.json", sf, DataSerializer.Instance); Helpers.DumpJson($"{plotlyTable.TUid}.table.json", plotlyTable, DataSerializer.Instance); Helpers.DumpJson($"{uid}.EMB.json", headerEmbeddings, DataSerializer.Instance); Helpers.DumpJson($"{uid}.index.json", plotlyTableInfo, DataSerializer.Instance); Helpers.DumpCsv($"{plotlyTable.TUid}.csv", plotlyTable); } for (int i = 0; i < plotlyCharts.Count; i++) { PlotlyChart plotlyChart = plotlyCharts[i]; Helpers.DumpJson($"{plotlyChart.CUid}.json", plotlyChart, DataSerializer.Instance); } }
public static SourceField GetField(string src, PlotlyTable plotlyTable) { if (src == null) { return(null); } src = src.Split(':').Last(); foreach (SourceField field in plotlyTable.Fields) { if (field.Uid == src) { return(field); } } return(null); }
public static PlotlyList LoadPlotlyTablesAll(string dataFolder) { Console.WriteLine($"Start loading Plotly tables from {dataFolder}."); TextReader tr = new StreamReader(dataFolder + @"\plotly_data_dedup.tsv"); string line; var processedLines = 0; List <PlotlyTable> plotlyTables = new List <PlotlyTable>(); List <PlotlyChart> plotlyCharts = new List <PlotlyChart>(); line = tr.ReadLine();//The fist line while ((line = tr.ReadLine()) != null) { if (processedLines % 100 == 0) { var logLine = string.Format("[Info][{0}] ================ Processed Lines: {1} ================", DateTime.Now.ToString("HH:mm:ss"), processedLines); Console.WriteLine(logLine); } processedLines++; var items = line.Split('\t'); PlotlyTable plotlyTable = PlotlyTable.GetInstance(items[0], dataFolder + @"\data_origin\"); List <PlotlyChart> plotlyChart = PlotlyChart.GetInstance(items[0], plotlyTable, dataFolder + @"\data_origin\"); if (plotlyChart.Count != 0) { plotlyTables.Add(plotlyTable); plotlyCharts = plotlyCharts.Concat(plotlyChart).ToList <PlotlyChart>(); } } Console.WriteLine($"{processedLines} plotly tables loaded."); return(new PlotlyList { PlotlyTables = plotlyTables, PlotlyCharts = plotlyCharts }); }
public static List <PlotlyChart> GetInstance(string fid, PlotlyTable plotlyTable, string filePath = "") { List <OriginalPlotlyChart> chartDatas = Helpers.LoadJson <List <OriginalPlotlyChart> >(filePath + fid + @"_chartdata.json", DataSerializer.Instance); List <PlotlyChart> charts = new List <PlotlyChart>(); int cUID = 0; int chart_num = 0; List <string> XsrcXaxisYaxisType = new List <string>(); List <string> XsrcXaxisYaxis = new List <string>(); List <string> delXsrcXaxisYaxis = new List <string>(); foreach (OriginalPlotlyChart chartData in chartDatas) { //If it is horizontal, switch x and y if (chartData?.Orientation == "h") { string axis = chartData.Xaxis; string src = chartData.Xsrc; chartData.Xaxis = chartData.Yaxis; chartData.Yaxis = axis; chartData.Xsrc = chartData.Ysrc; chartData.Ysrc = src; } //Pie charts only have valuessrc as ysrc if (chartData.Valuessrc != null) { chartData.Ysrc = chartData.Valuessrc; } //Draw line bool line = false; if (chartData.Mode == null ? false : chartData.Mode.Contains("line")) { line = true; } //Type string CType = string.Empty; if (chartData.Type != null) { if (chartData.Type.Contains("scatter") && !chartData.Type.Contains("scatter3d")) { CType = "scatter"; } else if (chartData.Type.Contains("pie")) { CType = "pie"; } else if (chartData.Type.Contains("line")) { CType = "line"; } else if (chartData.Type.Contains("bar")) { CType = "bar"; } else { continue; } } else if (line) { CType = "line"; } else { continue; //The chart without type and mode is incomplete } //Add new chart or find chart_num string chartXsrcXaxisYaxisType = chartData.Xsrc + '-' + chartData.Xaxis + '-' + chartData.Yaxis + '-' + CType; string chartXsrcXaxisYaxis = chartData.Xsrc + '-' + chartData.Xaxis + '-' + chartData.Yaxis; if (delXsrcXaxisYaxis.Exists(t => t == chartXsrcXaxisYaxis)) { //Delete same chart, different type with pie continue; } else if (XsrcXaxisYaxisType.Exists(t => t == chartXsrcXaxisYaxisType) && (CType == "line" || CType == "bar" || (CType == "scatter" && line))) { //If xsrc, xaxis, yaxis and type are same, they are the same chart chart_num = XsrcXaxisYaxisType.FindIndex(t => t == chartXsrcXaxisYaxisType); } else if (XsrcXaxisYaxis.Exists(t => t == chartXsrcXaxisYaxis) && (CType == "pie" || XsrcXaxisYaxisType[XsrcXaxisYaxis.FindIndex(t => t == chartXsrcXaxisYaxis)].Split('-').Last() == "pie")) { //Delete the first same chart, different type with pie chart_num = XsrcXaxisYaxis.FindIndex(t => t == chartXsrcXaxisYaxis); charts.RemoveAt(chart_num); XsrcXaxisYaxisType.RemoveAt(chart_num); XsrcXaxisYaxis.RemoveAt(chart_num); delXsrcXaxisYaxis.Add(chartXsrcXaxisYaxis); continue; } else { PlotlyChart chart = new PlotlyChart { CUid = $"{fid}.t0.c{cUID}", SheetName = chartData.Name, CType = chartData.Type, XFields = new List <Field>(), YFields = new List <Field>(), ValueDrawsLine = new List <bool>() }; chart.CType = CType; SourceField xfield = GetField(chartData.Xsrc, plotlyTable); if (xfield != null) { chart.XFields.Add(new Field { Index = xfield.Index, Name = xfield.Name }); } XsrcXaxisYaxis.Add(chartXsrcXaxisYaxis); XsrcXaxisYaxisType.Add(chartXsrcXaxisYaxisType); charts.Add(chart); chart_num = charts.Count() - 1; cUID += 1; } SourceField yfield = GetField(chartData.Ysrc, plotlyTable); if (yfield != null) { charts[chart_num].YFields.Add(new Field { Index = yfield.Index, Name = yfield.Name }); } charts[chart_num].ValueDrawsLine.Add(line); if (charts[chart_num].XFields.Count() == 0 && charts[chart_num].YFields.Count() == 0) { charts.RemoveAt(chart_num); XsrcXaxisYaxisType.RemoveAt(chart_num); XsrcXaxisYaxis.RemoveAt(chart_num); } } return(charts); }