/// <summary> /// 将样本集划分为两个子集 /// </summary> /// <param name="ratioOfFirstGroup">第一个子集的占比</param> /// <param name="ratioOfSecondGroup">第二个子集的占比</param> /// <param name="method">切分方法</param> /// <param name="firstGroup">第一个子集</param> /// <param name="secondGroup">第二个子集</param> public void DivideIntoTwo(int ratioOfFirstGroup, int ratioOfSecondGroup, CollectionDividingMethod method, out SampleCollection firstGroup, out SampleCollection secondGroup) { this.DivideByRatioAndMethod(ratioOfFirstGroup, ratioOfSecondGroup, method, out List <Sample> samples1, out List <Sample> samples2); firstGroup = new SampleCollection(samples1); secondGroup = new SampleCollection(samples2); }
/// <summary> /// 算术平均值过滤。窗口长度内取平均值 /// </summary> /// <param name="windowLengths">各维度的窗口长度</param> /// <returns>新的样本集</returns> public SampleCollection MeanFilter(int[] windowLengths) { windowLengths.ShouldEachSatisfy((m) => m.ShouldLargerThan(0) && m.ShouldNotLargerThan(this.Count())); windowLengths.Length.ShouldEquals(this.TotalDimensionCount); SampleCollection result = this.Clone(); CollectionHelper.ForEachThree( this.Columns, result.Columns, windowLengths, (srcCol, resultCol, winlength) => { LimitingAmplifyFilter(srcCol, resultCol, winlength); }); return(result); }
/// <summary> /// 限幅过滤。放弃掉波动过大的样本,用前一个样本代替。 /// </summary> /// <param name="diffLimitations">波动最大值绝对值的向量. 都必须大于0</param> /// <returns>新的样本集</returns> public SampleCollection LimitingAmplifyFilter(IEnumerable <double> diffLimitations) { diffLimitations.Count().ShouldEquals(this.TotalDimensionCount); diffLimitations.ShouldEachSatisfy((item) => item > 0); SampleCollection result = this.Clone(); CollectionHelper.ForEachThree( this.Columns, result.Columns, diffLimitations, (srcCol, resultCol, diff) => { LimitingAmplifyFilter(srcCol, resultCol, diff); }); return(result); }
/// <summary> /// 检查字符串是否是样本集中的字段名 /// </summary> /// <param name="fieldName"></param> /// <returns>是字段名返回真,反之假</returns> public static SampleCollection LoadFromCSVFile(StreamReader reader, int outputDimension = 0, bool ignoreInvalidLine = false, bool firstLineFieldName = true) { SampleCollection sc = new SampleCollection(); int lineNum = 0; string[] fieldNames = null; bool validLine; int totalDimension = -1; foreach (var line in reader.ReadLines()) { lineNum++; validLine = true; var strValues = line.Replace(" ", "").Split(','); if (totalDimension == -1) { totalDimension = strValues.Length; } else { if (strValues.Length != totalDimension) { if (ignoreInvalidLine) { validLine = false; continue; } else { throw new DimensionNotMatchException("Dimension not match in line:" + lineNum.ToString()); } } } if (lineNum == 1 && firstLineFieldName) { fieldNames = strValues; continue; } var values = new double[totalDimension]; for (int i = 0; i < strValues.Length; i++) { if (!double.TryParse(strValues[i], out double value)) { if (ignoreInvalidLine) { validLine = false; break; } else { throw new Exception("Has non-double value in line:" + lineNum.ToString()); } } values[i] = value; } if (validLine) { sc.Add(new Sample(values, outputDimension, fieldNames)); } } return(sc); }