public void GenerateHpProbArff(string arffFileName) { int preLength = TestParameters2.PreLength; int tpStart = TestParameters2.tpStart; int slStart = TestParameters2.slStart; int tpCount = TestParameters2.tpCount; int slCount = TestParameters2.slCount; var cp = TestParameters2.CandidateParameter; var dataDates = GetDataDateRange(); ForexDataRows[,] hpdvs = new ForexDataRows[cp.SymbolCount, cp.PeriodCount]; for (int s = 0; s < cp.SymbolCount; ++s) for (int p = 0; p < cp.PeriodCount; ++p) { string symbol = cp.AllSymbols[s + cp.SymbolStart]; string period = cp.AllPeriods[p + cp.PeriodStart]; string symbolPeriod = string.Format("{0}_{1}", symbol, period); hpdvs[s, p] = DbData.Instance.GetDbData(dataDates[0], dataDates[1], symbolPeriod, 0, true, cp); } if (File.Exists(arffFileName)) { System.IO.File.Delete(arffFileName); } string wekaFileName = string.Format(arffFileName); using (StreamWriter sw = new StreamWriter(wekaFileName)) { sw.WriteLine("@relation 'hpProb'"); sw.WriteLine("@attribute timestamp date \"yyyy-MM-dd\'T\'HH:mm:ss\""); sw.WriteLine("@attribute hpdate date \"yyyy-MM-dd\'T\'HH:mm:ss\""); sw.WriteLine("@attribute spread numeric"); sw.WriteLine("@attribute mainClose numeric"); sw.WriteLine("@attribute prop " + " {0,1,2,3}"); sw.WriteLine("@data"); sw.WriteLine(); var hps = HpData.Instance.GetHpSum(cp.MainSymbol, cp.MainPeriod); var hpdv = hpdvs[0, 0]; for (int i = preLength - 1; i < hpdv.Length; ++i) { DateTime nowDate = WekaUtils.GetDateFromTime((long)hpdv[i].Time); //if (nowDate.Hour % TestParameters2.MainPeriodOfHour != 0) // continue; if (!hps.ContainsKey(nowDate) && !(TestParameters2.RealTimeMode && i == hpdv.Length - 1)) continue; long hpTime = WekaUtils.GetTimeFromDate(Parameters.MaxDate); int hp = 0; if (!(TestParameters2.RealTimeMode && i == hpdv.Length - 1)) { hpTime = hps[nowDate].Item2; hp = hps[nowDate].Item1; } sw.Write(nowDate.ToString(Parameters.DateTimeFormat)); sw.Write(","); // hp sw.Write(WekaUtils.GetDateFromTime(hpTime).ToString(Parameters.DateTimeFormat)); sw.Write(","); sw.Write(Convert.ToInt32(hpdv[i]["spread"])); sw.Write(","); sw.Write(((double)hpdv[i]["close"]).ToString()); sw.Write(","); sw.WriteLine(hp.ToString()); } } }
public static void GenerateArffTemplate(bool generateTrainData, bool generateTestData, CandidateParameter cp) { if (!m_trainInstancesTemplates.ContainsKey(cp.Name)) { string arffFileName = string.Format("{0}\\mlea_header_{1}.arff", TestParameters.CommonDir, cp.Name); if (System.IO.File.Exists(arffFileName)) { System.IO.File.Delete(arffFileName); } using (StreamWriter sw = new StreamWriter(arffFileName)) { sw.WriteLine(GetArffHeader(System.DateTime.Today, System.DateTime.Today, cp)); } m_trainInstancesTemplates[cp.Name] = WekaUtils.LoadInstances(arffFileName); m_testInstancesTemplates[cp.Name] = WekaUtils.LoadInstances(arffFileName); } Instances trainInstancesTemplate = m_trainInstancesTemplates[cp.Name]; Instances testInstancesTemplate = m_testInstancesTemplates[cp.Name]; for (int k = 0; k < 2; ++k) { if (k == 0 && !generateTrainData) continue; if (k == 1 && !generateTestData) continue; bool isTrain = (k == 0); DateTime dt1 = isTrain ? m_trainTimeStart : m_testTimeStart; DateTime dt2 = isTrain ? m_trainTimeEnd : m_testTimeEnd; Instances hereInstances; if (isTrain) { hereInstances = trainInstancesTemplate; } else { hereInstances = testInstancesTemplate; } if (!TestParameters.EnablePerhourTrain) { while (hereInstances.numInstances() > 0) { if (WekaUtils.GetDateValueFromInstances(hereInstances, 0, 0) < dt1) { hereInstances.delete(0); } else { break; } } } else { hereInstances.delete(); } hereInstances.setRelationName(string.Format("{0}_{1}", dt1.ToString(Parameters.DateTimeFormat), dt2.ToString(Parameters.DateTimeFormat))); ForexDataRows[, ,] dvs = new ForexDataRows[cp.SymbolCount, cp.PeriodCount, cp.PeriodTimeCount + 1]; int[, ,] dvsIdx = new int[cp.SymbolCount, cp.PeriodCount, cp.PeriodTimeCount + 1]; for (int s = 0; s < cp.SymbolCount; ++s) { for (int i = 0; i < cp.PeriodCount; ++i) { for (int j = -1; j < Math.Max(0, Math.Min(cp.PeriodTimeCount - i, Parameters.PeriodTimeNames[i].Length)); ++j) { string tableName = cp.AllSymbols[s + cp.SymbolStart] + "_" + cp.AllPeriods[i + cp.PeriodStart] + (j < 0 ? string.Empty : "_" + Parameters.PeriodTimeNames[i + cp.PeriodStart][j]); dvs[s, i, j + 1] = DbData.Instance.GetDbData(dt1, dt2, tableName, s == 0 && i == 0 && j == -1 ? 0 : 1, isTrain, cp); dvsIdx[s, i, j + 1] = 0; } } } ForexDataRows mainDv = dvs[0, 0, 0]; if (mainDv.Length == 0) continue; if (dvs[0, cp.PeriodCount - 1, 0].Length <= cp.PrevTimeCount - 1) continue; int startRowIdx = 0; startRowIdx = 0;// FindRowByTime(mainDv, (long)dvs[0, PeriodCount - 1, 0][PrevTimeCount - 1]["Time"], ref startRowIdx); DateTime nowInstanceMaxDate = DateTime.MinValue; if (!TestParameters.EnablePerhourTrain && hereInstances.numInstances() > 0) { nowInstanceMaxDate = WekaUtils.GetDateValueFromInstances(hereInstances, 0, hereInstances.numInstances() - 1); } for (int rowIdx = startRowIdx; rowIdx < mainDv.Length; ++rowIdx) { ForexData mainDrv = mainDv[rowIdx]; //DateTime row_date = (DateTime)mainDrv[1]; // "Date" //long mainTime = (long)mainDrv[0]; // "Time" long mainTime = mainDrv.Time; DateTime row_date = WekaUtils.GetDateFromTime(mainTime); if (row_date <= nowInstanceMaxDate) continue; if (row_date < dt1) continue; if (row_date >= dt2) break; if (TestParameters.EnablePerhourTrain) { if (m_currentTestHour != row_date.Hour) continue; } int hp = 1; double[] instanceValue = new double[hereInstances.numAttributes()]; //instanceValue[0] = hereInstances.attribute(0).parseDate(row_date.ToString(Parameters.DateTimeFormat)); //instanceValue[1] = hereInstances.attribute(1).parseDate(hp_date.ToString(Parameters.DateTimeFormat)); //instance.setValue(1, hereInstances.attribute(1).indexOfValue(mainDrv["hour"].ToString())); //instance.setValue(2, hereInstances.attribute(2).indexOfValue(mainDrv["dayofweek"].ToString())); instanceValue[0] = (row_date - Parameters.MtStartTime).TotalMilliseconds; // if not set to gmt, should -8 * 60 * 60 * 1000; // utc8 instanceValue[1] = (Parameters.MaxDate - Parameters.MtStartTime).TotalMilliseconds; instanceValue[2] = Convert.ToDouble(mainDrv["spread"]); instanceValue[3] = (double)mainDrv["mainClose"]; instanceValue[4] = (int)mainDrv["hour"] / 24.0; instanceValue[5] = (int)mainDrv["dayofweek"] / 5.0; //if (mainDrv["AskVolume"] != System.DBNull.Value && mainDrv["BidVolume"] != System.DBNull.Value) //{ // instanceValue[4] = ((((double)mainDrv["AskVolume"]) - (double)mainDrv["BidVolume"]) / 100000); //} //else { instanceValue[6] = 0; } int start = 7; try { for (int s = 0; s < cp.SymbolCount; ++s) { //double mainClose = (double)mainDrv["close"]; int nowRowIdx_s = FindRowByTime(dvs[s, 0, 0], mainTime, ref dvsIdx[s, 0, 0]); WekaUtils.DebugAssert((long)dvs[s, 0, 0][nowRowIdx_s].Time == mainTime, "(long)dvs[s, 0, 0][nowRowIdx_s].Time == mainTime"); double mainClose = 0; if (cp.AllIndNames2.ContainsKey("close")) { mainClose = (double)dvs[s, 0, 0][nowRowIdx_s]["close"]; } for (int i = 0; i < cp.PeriodCount; ++i) { int periodSeconds = 60 * WekaUtils.GetMinuteofPeriod(cp.AllPeriods[i + cp.PeriodStart]); int nowRowIdx = FindRowByTime(dvs[s, i, 0], mainTime / periodSeconds * periodSeconds, ref dvsIdx[s, i, 0]); for (int p = 0; p < cp.PrevTimeCount; ++p) { if (nowRowIdx - p < 0) { throw new ArgumentException("No prev data!"); } ForexData nowDrv = dvs[s, i, 0][nowRowIdx - p]; foreach (var kvp in cp.AllIndNames2) { double v = Convert.ToDouble(nowDrv[kvp.Key]); double ind = WekaUtils.NormalizeValue(kvp.Key, kvp.Value, v, mainClose, WekaUtils.GetSymbolPoint(cp.AllSymbols[s])); instanceValue[start] = ind; start++; } for (int j = -1; j < Math.Max(0, Math.Min(cp.PeriodTimeCount - i, Parameters.PeriodTimeNames[i].Length)); ++j) { int nowRowIdx2; if (j == -1) nowRowIdx2 = nowRowIdx; else nowRowIdx2 = FindRowByTime(dvs[s, i, j + 1], mainTime / periodSeconds * periodSeconds, ref dvsIdx[s, i, j + 1]); if (nowRowIdx2 - p < 0) { throw new ArgumentException("No prev data!"); } ForexData nowDrv2 = dvs[s, i, j + 1][nowRowIdx2 - p]; foreach (var kvp in cp.AllIndNames) { double v = (double)nowDrv2[kvp.Key]; double ind = WekaUtils.NormalizeValue(kvp.Key, kvp.Value, v, mainClose, WekaUtils.GetSymbolPoint(cp.AllSymbols[s])); instanceValue[start] = ind; start++; } } } } } } catch (ArgumentException) { continue; } if (m_useClassAsAttribute) { instanceValue[hereInstances.numAttributes() - 2] = hp; } instanceValue[hereInstances.numAttributes() - 1] = hp; Instance instance = new weka.core.DenseInstance(1, instanceValue); //if (!hereInstances.checkInstance(instance)) //{ // throw new ArgumentException("Imcompatible instance!"); //} hereInstances.add(instance); } } }
private void AddIndicatorValues(ForexDataRows hpdv, Dictionary<long, double> indValues, Core.RetCode ret, int outBegIdx, int outNBElement, double[] outDouble) { WekaUtils.DebugAssert(ret == Core.RetCode.Success, "TaRet should Success."); for (int i = 0; i < outNBElement; ++i) { indValues[hpdv[i + outBegIdx].Time] = outDouble[i]; } }
private static int FindRowByTime(ForexDataRows dv, long time, ref int prevIdx) { if ((long)dv[prevIdx].Time == time) return prevIdx; //ForexDataTimeComparer c = new ForexDataTimeComparer(time); int idx = dv.BinarySearch(prevIdx, dv.Length - prevIdx, time); if (idx < 0) { //prevIdx = ~prevIdx - 1; } //long t = (long)dv[prevIdx]["Time"]; //while (t < time && prevIdx + 1 < dv.Length) //{ // prevIdx++; // t = (long)dv[prevIdx]["Time"]; //} if (idx >= 0 && idx < dv.Length) { WekaUtils.DebugAssert((long)dv[idx].Time <= time, "(long)dv[idx].Time <= time"); prevIdx = idx; } else { idx = 0; prevIdx = idx; throw new ArgumentException("There is no data"); } return prevIdx; }
public void GenerateArff(string arffFileName, string candlePatternFile) { int preLength = TestParameters2.PreLength; int tpStart = TestParameters2.tpStart; int slStart = TestParameters2.slStart; int tpCount = TestParameters2.tpCount; int slCount = TestParameters2.slCount; var dataDates = TestManager.GetDataDateRange(); int minOutBegIdx = TaLibTest.minOutBegIdx; var cp = TestParameters2.CandidateParameter; int[, , ,] cds = null; ForexDataRows[,] hpdvs = new ForexDataRows[cp.SymbolCount, cp.PeriodCount]; for (int s = 0; s < cp.SymbolCount; ++s) for (int p = 0; p < cp.PeriodCount; ++p) { string symbol = cp.AllSymbols[s + cp.SymbolStart]; string period = cp.AllPeriods[p + cp.PeriodStart]; string symbolPeriod = string.Format("{0}_{1}", symbol, period); hpdvs[s, p] = DbData.Instance.GetDbData(dataDates[0], dataDates[1], symbolPeriod, 0, true, cp); m_currentPeriod = period; //int minOutNBElement = m_minOutNBElements[m_currentPeriod]; int minOutNBElement = hpdvs[s, p].Length - minOutBegIdx; if (cds == null) { CreateCds(ref cds, cp, minOutNBElement + 100); } using (StreamReader sr = new StreamReader(candlePatternFile)) { int n = 0; while (!sr.EndOfStream) { string line = sr.ReadLine(); string[] ss = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); WekaUtils.DebugAssert(ss.Length == minOutNBElement, "ss.Length == minOutNBElement"); for (int i = 0; i < minOutNBElement; ++i) cds[s, p, i, n] = Convert.ToInt32(ss[i]); n++; } } } if (File.Exists(arffFileName)) { System.IO.File.Delete(arffFileName); } string wekaFileName = string.Format(arffFileName); using (StreamWriter sw = new StreamWriter(wekaFileName)) { sw.WriteLine("@relation 'candlePatterns'"); sw.WriteLine("@attribute timestamp date \"yyyy-MM-dd\'T\'HH:mm:ss\""); sw.WriteLine("@attribute hpdate date \"yyyy-MM-dd\'T\'HH:mm:ss\""); sw.WriteLine("@attribute spread numeric"); sw.WriteLine("@attribute mainClose numeric"); for(int pre = 0; pre<preLength; ++pre) for (int s = 0; s < cp.SymbolCount; ++s) for (int p = 0; p < cp.PeriodCount; ++p) for (int i = 0; i < 61; ++i) { sw.WriteLine(string.Format("@attribute {0}_{1}_{2}_p{3} {4}", cp.AllSymbols[s + cp.SymbolStart], cp.AllPeriods[p + cp.PeriodStart], i.ToString(), pre.ToString(), "{0,100,200,-100,-200}")); } sw.WriteLine("@attribute prop " + " {0,1,2,3}"); sw.WriteLine("@data"); sw.WriteLine(); var hps = HpData.Instance.GetHpSum(cp.MainSymbol, cp.MainPeriod); var hpdv = hpdvs[0, 0]; for (int i = minOutBegIdx + preLength - 1; i < hpdv.Length; ++i) { DateTime nowDate = WekaUtils.GetDateFromTime((long)hpdv[i].Time); //if (nowDate.Hour % TestParameters2.MainPeriodOfHour != 0) // continue; if (!hps.ContainsKey(nowDate) && !(TestParameters2.RealTimeMode && i == hpdv.Length-1)) continue; long hpTime = WekaUtils.GetTimeFromDate(Parameters.MaxDate); int hp = 0; if (!(TestParameters2.RealTimeMode && i == hpdv.Length - 1)) { hpTime = hps[nowDate].Item2; hp = hps[nowDate].Item1; } sw.Write(nowDate.ToString(Parameters.DateTimeFormat)); sw.Write(","); // hp sw.Write(WekaUtils.GetDateFromTime(hpTime).ToString(Parameters.DateTimeFormat)); sw.Write(","); sw.Write((int)hpdv[i]["spread"]); sw.Write(","); sw.Write(((double)hpdv[i]["close"]).ToString()); sw.Write(","); for (int pre = 0; pre < preLength; ++pre) for (int s = 0; s < cp.SymbolCount; ++s) for (int p = 0; p < cp.PeriodCount; ++p) for (int j = 0; j < 61; ++j) { int candlePattern = cds[s, p, i - minOutBegIdx - preLength + pre + 1, j]; if (candlePattern == -1) { throw new AssertException(string.Format("candle pattern should not be -1.idx={0}", i - minOutBegIdx - preLength + pre + 1)); } sw.Write(candlePattern); sw.Write(","); } sw.WriteLine(hp.ToString()); } } }