public static void ExtractForPlotlyTables(string fid)
        {
            Console.WriteLine(DateTime.Now);
            DataFeatureExtractor.InitializeMetadata("en");
            Console.WriteLine($"After InitializeMetadata: {DateTime.Now}");
            PlotlyTable        plotlyTable  = PlotlyTable.GetInstance(fid);
            List <PlotlyChart> plotlyCharts = PlotlyChart.GetInstance(fid, plotlyTable);

            Console.WriteLine($"After GetInstance: {DateTime.Now}");
            Common.FileInfo plotlyTableInfo = DataFeatureExtractor.ExtractTableFeatures(plotlyTable,
                                                                                        out List <Dictionary <int, Dictionary <string, float[]> > > headerEmbeddings,
                                                                                        out SourceFeatures sf);
            Console.WriteLine($"After FE: {DateTime.Now}");
            string uid = plotlyTable.TUid.Substring(0, plotlyTable.TUid.Length - 3);

            Helpers.DumpJson($"{plotlyTable.TUid}.DF.json", sf, DataSerializer.Instance);
            Helpers.DumpJson($"{plotlyTable.TUid}.table.json", plotlyTable, DataSerializer.Instance);
            Helpers.DumpJson($"{uid}.EMB.json", headerEmbeddings, DataSerializer.Instance);
            Helpers.DumpJson($"{uid}.index.json", plotlyTableInfo, DataSerializer.Instance);
            Helpers.DumpCsv($"{plotlyTable.TUid}.csv", plotlyTable);
            for (int i = 0; i < plotlyCharts.Count; i++)
            {
                PlotlyChart plotlyChart = plotlyCharts[i];
                Helpers.DumpJson($"{plotlyChart.CUid}.json", plotlyChart, DataSerializer.Instance);
            }
            Console.WriteLine($"After Dump: {DateTime.Now}");
        }
        public static void ExtractForPlotlyTablesAll(string plotlyTableFolder)
        {
            Console.WriteLine($"Extracting PlotlyTable features from {plotlyTableFolder}.");

            PlotlyList         plotlyList   = LoadPlotlyTablesAll(plotlyTableFolder);
            List <PlotlyTable> plotlyTables = plotlyList.PlotlyTables;
            List <PlotlyChart> plotlyCharts = plotlyList.PlotlyCharts;

            // Initialize some models from MetadataRecoSvr to support the two Bayesian features.
            DataFeatureExtractor.InitializeMetadata("en");

            // Run features for each table and store embeddings for each plotly table.
            for (int i = 0; i < plotlyTables.Count; i++)
            {
                PlotlyTable     plotlyTable     = plotlyTables[i];
                Common.FileInfo plotlyTableInfo = DataFeatureExtractor.ExtractTableFeatures(plotlyTable,
                                                                                            out List <Dictionary <int, Dictionary <string, float[]> > > headerEmbeddings,
                                                                                            out SourceFeatures sf);
                string uid = plotlyTable.TUid.Substring(0, plotlyTable.TUid.Length - 3);
                Helpers.DumpJson($"{plotlyTable.TUid}.DF.json", sf, DataSerializer.Instance);
                Helpers.DumpJson($"{plotlyTable.TUid}.table.json", plotlyTable, DataSerializer.Instance);
                Helpers.DumpJson($"{uid}.EMB.json", headerEmbeddings, DataSerializer.Instance);
                Helpers.DumpJson($"{uid}.index.json", plotlyTableInfo, DataSerializer.Instance);
                Helpers.DumpCsv($"{plotlyTable.TUid}.csv", plotlyTable);
            }
            for (int i = 0; i < plotlyCharts.Count; i++)
            {
                PlotlyChart plotlyChart = plotlyCharts[i];
                Helpers.DumpJson($"{plotlyChart.CUid}.json", plotlyChart, DataSerializer.Instance);
            }
        }
示例#3
0
        public static SourceField GetField(string src, PlotlyTable plotlyTable)
        {
            if (src == null)
            {
                return(null);
            }
            src = src.Split(':').Last();
            foreach (SourceField field in plotlyTable.Fields)
            {
                if (field.Uid == src)
                {
                    return(field);
                }
            }

            return(null);
        }
        public static PlotlyList LoadPlotlyTablesAll(string dataFolder)
        {
            Console.WriteLine($"Start loading Plotly tables from {dataFolder}.");
            TextReader tr = new StreamReader(dataFolder + @"\plotly_data_dedup.tsv");

            string             line;
            var                processedLines = 0;
            List <PlotlyTable> plotlyTables   = new List <PlotlyTable>();
            List <PlotlyChart> plotlyCharts   = new List <PlotlyChart>();

            line = tr.ReadLine();//The fist line
            while ((line = tr.ReadLine()) != null)
            {
                if (processedLines % 100 == 0)
                {
                    var logLine = string.Format("[Info][{0}] ================ Processed Lines: {1} ================", DateTime.Now.ToString("HH:mm:ss"), processedLines);
                    Console.WriteLine(logLine);
                }
                processedLines++;
                var                items       = line.Split('\t');
                PlotlyTable        plotlyTable = PlotlyTable.GetInstance(items[0], dataFolder + @"\data_origin\");
                List <PlotlyChart> plotlyChart = PlotlyChart.GetInstance(items[0], plotlyTable, dataFolder + @"\data_origin\");
                if (plotlyChart.Count != 0)
                {
                    plotlyTables.Add(plotlyTable);
                    plotlyCharts = plotlyCharts.Concat(plotlyChart).ToList <PlotlyChart>();
                }
            }

            Console.WriteLine($"{processedLines} plotly tables loaded.");

            return(new PlotlyList
            {
                PlotlyTables = plotlyTables,
                PlotlyCharts = plotlyCharts
            });
        }
示例#5
0
        public static List <PlotlyChart> GetInstance(string fid, PlotlyTable plotlyTable, string filePath = "")
        {
            List <OriginalPlotlyChart> chartDatas = Helpers.LoadJson <List <OriginalPlotlyChart> >(filePath + fid + @"_chartdata.json", DataSerializer.Instance);
            List <PlotlyChart>         charts     = new List <PlotlyChart>();
            int           cUID               = 0;
            int           chart_num          = 0;
            List <string> XsrcXaxisYaxisType = new List <string>();
            List <string> XsrcXaxisYaxis     = new List <string>();
            List <string> delXsrcXaxisYaxis  = new List <string>();

            foreach (OriginalPlotlyChart chartData in chartDatas)
            {
                //If it is horizontal, switch x and y
                if (chartData?.Orientation == "h")
                {
                    string axis = chartData.Xaxis;
                    string src  = chartData.Xsrc;
                    chartData.Xaxis = chartData.Yaxis;
                    chartData.Yaxis = axis;
                    chartData.Xsrc  = chartData.Ysrc;
                    chartData.Ysrc  = src;
                }

                //Pie charts only have valuessrc as ysrc
                if (chartData.Valuessrc != null)
                {
                    chartData.Ysrc = chartData.Valuessrc;
                }

                //Draw line
                bool line = false;
                if (chartData.Mode == null ? false : chartData.Mode.Contains("line"))
                {
                    line = true;
                }

                //Type
                string CType = string.Empty;
                if (chartData.Type != null)
                {
                    if (chartData.Type.Contains("scatter") && !chartData.Type.Contains("scatter3d"))
                    {
                        CType = "scatter";
                    }
                    else if (chartData.Type.Contains("pie"))
                    {
                        CType = "pie";
                    }
                    else if (chartData.Type.Contains("line"))
                    {
                        CType = "line";
                    }
                    else if (chartData.Type.Contains("bar"))
                    {
                        CType = "bar";
                    }
                    else
                    {
                        continue;
                    }
                }
                else if (line)
                {
                    CType = "line";
                }
                else
                {
                    continue; //The chart without type and mode is incomplete
                }
                //Add new chart or find chart_num
                string chartXsrcXaxisYaxisType = chartData.Xsrc + '-' + chartData.Xaxis + '-' + chartData.Yaxis + '-' + CType;
                string chartXsrcXaxisYaxis     = chartData.Xsrc + '-' + chartData.Xaxis + '-' + chartData.Yaxis;
                if (delXsrcXaxisYaxis.Exists(t => t == chartXsrcXaxisYaxis))
                {
                    //Delete same chart, different type with pie
                    continue;
                }
                else if (XsrcXaxisYaxisType.Exists(t => t == chartXsrcXaxisYaxisType) && (CType == "line" || CType == "bar" || (CType == "scatter" && line)))
                {
                    //If xsrc, xaxis, yaxis and type are same, they are the same chart
                    chart_num = XsrcXaxisYaxisType.FindIndex(t => t == chartXsrcXaxisYaxisType);
                }
                else if (XsrcXaxisYaxis.Exists(t => t == chartXsrcXaxisYaxis) && (CType == "pie" || XsrcXaxisYaxisType[XsrcXaxisYaxis.FindIndex(t => t == chartXsrcXaxisYaxis)].Split('-').Last() == "pie"))
                {
                    //Delete the first same chart, different type with pie
                    chart_num = XsrcXaxisYaxis.FindIndex(t => t == chartXsrcXaxisYaxis);
                    charts.RemoveAt(chart_num);
                    XsrcXaxisYaxisType.RemoveAt(chart_num);
                    XsrcXaxisYaxis.RemoveAt(chart_num);
                    delXsrcXaxisYaxis.Add(chartXsrcXaxisYaxis);
                    continue;
                }
                else
                {
                    PlotlyChart chart = new PlotlyChart
                    {
                        CUid           = $"{fid}.t0.c{cUID}",
                        SheetName      = chartData.Name,
                        CType          = chartData.Type,
                        XFields        = new List <Field>(),
                        YFields        = new List <Field>(),
                        ValueDrawsLine = new List <bool>()
                    };
                    chart.CType = CType;
                    SourceField xfield = GetField(chartData.Xsrc, plotlyTable);
                    if (xfield != null)
                    {
                        chart.XFields.Add(new Field
                        {
                            Index = xfield.Index,
                            Name  = xfield.Name
                        });
                    }
                    XsrcXaxisYaxis.Add(chartXsrcXaxisYaxis);
                    XsrcXaxisYaxisType.Add(chartXsrcXaxisYaxisType);
                    charts.Add(chart);
                    chart_num = charts.Count() - 1;
                    cUID     += 1;
                }

                SourceField yfield = GetField(chartData.Ysrc, plotlyTable);
                if (yfield != null)
                {
                    charts[chart_num].YFields.Add(new Field
                    {
                        Index = yfield.Index,
                        Name  = yfield.Name
                    });
                }

                charts[chart_num].ValueDrawsLine.Add(line);
                if (charts[chart_num].XFields.Count() == 0 && charts[chart_num].YFields.Count() == 0)
                {
                    charts.RemoveAt(chart_num);
                    XsrcXaxisYaxisType.RemoveAt(chart_num);
                    XsrcXaxisYaxis.RemoveAt(chart_num);
                }
            }

            return(charts);
        }