/** * <summary>Given counts of counts, this function will calculate the estimated counts of counts c$^*$ with * Good-Turing smoothing. First, the algorithm filters the non-zero counts from counts of counts array and constructs * c and r arrays. Then it constructs Z_n array with Z_n = (2C_n / (r_{n+1} - r_{n-1})). The algorithm then uses * simple linear regression on Z_n values to estimate w_1 and w_0, where log(N[i]) = w_1log(i) + w_0</summary> * <param name="countsOfCounts">Counts of counts. countsOfCounts[1] is the number of words occurred once in the corpus.</param> * countsOfCounts[i] is the number of words occurred i times in the corpus. * <returns>Estimated counts of counts array. N[1] is the estimated count for out of vocabulary words.</returns> */ private double[] LinearRegressionOnCountsOfCounts(int[] countsOfCounts) { var n = new double[countsOfCounts.Length]; var r = new List <int>(); var c = new List <int>(); for (var i = 1; i < countsOfCounts.Length; i++) { if (countsOfCounts[i] != 0) { r.Add(i); c.Add(countsOfCounts[i]); } } var a = new Matrix(2, 2); var y = new Vector(2, 0); for (var i = 0; i < r.Count; i++) { var xt = System.Math.Log(r[i]); double rt; if (i == 0) { rt = System.Math.Log(c[i]); } else { if (i == r.Count - 1) { rt = System.Math.Log(1.0 * c[i] / (r[i] - r[i - 1])); } else { rt = System.Math.Log(2.0 * c[i] / (r[i + 1] - r[i - 1])); } } a.AddValue(0, 0, 1.0); a.AddValue(0, 1, xt); a.AddValue(1, 0, xt); a.AddValue(1, 1, xt * xt); y.AddValue(0, rt); y.AddValue(1, rt * xt); } a.Inverse(); var w = a.MultiplyWithVectorFromRight(y); var w0 = w.GetValue(0); var w1 = w.GetValue(1); for (var i = 1; i < countsOfCounts.Length; i++) { n[i] = System.Math.Exp(System.Math.Log(i) * w1 + w0); } return(n); }
public Vector <T> Reverse() { Vector <T> result = new Vector <T>(); for (int i = Count() - 1; i >= 0; --i) { result.AddValue(m_values[i]); } return(result); }
public static Vector <T> operator --(Vector <T> vec) { Vector <T> result = new Vector <T>(); for (int i = 0; i < vec.Count(); ++i) { var value = vec.GetValue(i) as dynamic; result.AddValue(--value); } return(result); }
/** * <summary>calculatePi calculates the prior probability vector (initial probabilities for each state) from a set of * observations. For each observation, the function extracts the first state in that observation. Normalizing the * counts of the states returns us the prior probabilities for each state.</summary> * * <param name="observations">A set of observations used to calculate the prior probabilities.</param> */ protected override void CalculatePi(List <TState>[] observations) { _pi = new Vector(StateCount, 0.0); foreach (var observation in observations) { var index = StateIndexes[observation[0]]; _pi.AddValue(index, 1.0); } _pi.L1Normalize(); }
public static Vector <T> operator -(Vector <T> vec1, Vector <T> vec2) { Vector <T> result = new Vector <T>(); for (int i = 0; i < vec1.Count(); ++i) { var vec1Value = vec1.GetValue(i) as dynamic; var vec2Value = vec2.GetValue(i) as dynamic; result.AddValue(vec1Value - vec2Value); } return(result); }