private void CalculateAverageFieldLengths() { var fieldRefs = new List <FieldRef>(FieldLengths.Keys); var numberOfFields = fieldRefs.Count; var accumulator = new Dictionary <string, double>(); var documentsWithField = new Dictionary <string, double>(); for (var i = 0; i < numberOfFields; i++) { var fieldRef = FieldRef.FromString(fieldRefs[i].ToString()); var field = fieldRef.FieldName; if (!documentsWithField.ContainsKey(field)) { documentsWithField[field] = 0; } documentsWithField[field] += 1; if (!accumulator.ContainsKey(field)) { accumulator[field] = 0; } accumulator[field] += FieldLengths.First(pair => pair.Key.ToString() == fieldRef.ToString()).Value; } var fields = new List <string>(_fields.Keys); for (var i = 0; i < fields.Count; i++) { var fieldName = fields[i]; accumulator[fieldName] /= documentsWithField[fieldName]; } AverageFieldLength = accumulator; }
private void CreateFieldVectors() { var fieldVectors = new Dictionary <FieldRef, Vector>(); var fieldRefs = new List <FieldRef>(FieldTermFrequencies.Keys); var fieldRefsLength = fieldRefs.Count; var termIdfCache = new Dictionary <Token, double>(); for (var i = 0; i < fieldRefsLength; i++) { var fieldRef = FieldRef.FromString(fieldRefs[i].ToString()); var fieldName = fieldRef.FieldName; var fieldLength = FieldLengths.First(pair => pair.Key.ToString() == fieldRef.ToString()).Value; var fieldVector = new Vector(); var termFrequencies = FieldTermFrequencies.First(pair => pair.Key.ToString() == fieldRef.ToString()).Value; var terms = new List <Token>(termFrequencies.Keys); var termsLength = terms.Count; var fieldBoost = _fields[fieldName]?.Boost ?? 1; var docBoost = _documents[fieldRef.DocRef]?.Boost ?? 1; for (var j = 0; j < termsLength; j++) { var term = terms[j]; var tf = termFrequencies[term]; var termIndex = (int)InvertedIndex[term]["_index"]; double idf, score, scoreWithPrecision; if (!termIdfCache.ContainsKey(term)) { idf = Utils.InverseDocumentFrequency(InvertedIndex[term], DocumentCount); termIdfCache[term] = idf; } else { idf = termIdfCache[term]; } score = idf * ((_k1 + 1) * tf) / (_k1 * (1 - _b + _b * (fieldLength / AverageFieldLength[fieldName])) + tf); score *= fieldBoost; score *= docBoost; scoreWithPrecision = Math.Round(score * 1000) / 1000; // Converts 1.23456789 to 1.234. // Reducing the precision so that the vectors take up less // space when serialised. Doing it now so that they behave // the same before and after serialisation. Also, this is // the fastest approach to reducing a number's precision in // JavaScript. fieldVector.Insert(termIndex, scoreWithPrecision); } fieldVectors[fieldRef] = fieldVector; } FieldVectors = fieldVectors; }