public void Index(IRecord record) { var doc = new Document(); foreach (string propname in record.GetProperties()) { Property prop = _config.GetPropertyByName(propname); if (prop == null) { throw new Exception(String.Format("Record has property {0} for which there is no configuration.", propname)); } Field.Index ix; //TODO: could cache this. or get it from property ix = prop.IsIdProperty ? Field.Index.NOT_ANALYZED : Field.Index.ANALYZED; foreach (string v in record.GetValues(propname)) { if (v.Equals("")) { continue; //FIXME: not sure if this is necessary } doc.Add(new Field(propname, v, Field.Store.YES, ix)); } } try { _iwriter.AddDocument(doc); } catch (Exception ex) { logger.Error("Error adding document to index writer: {0}", ex.Message); } }
public double Compare(IRecord r1, IRecord r2) { double prob = 0.5; foreach (string propname in r1.GetProperties()) { Property prop = _config.GetPropertyByName(propname); if (prop.IsIdProperty || prop.IsIgnoreProperty()) { continue; } List <string> vs1 = r1.GetValues(propname); List <string> vs2 = r2.GetValues(propname); if ((vs1.Count == 0) || (vs2.Count == 0)) { continue; // no values to compare, so skip } double high = 0.0; foreach (string v1 in vs1) { if (v1.Equals("")) //TODO: These values shouldn't be here at all. { continue; } foreach (string v2 in vs2) { if (v2.Equals("")) //TODO: These values shouldn't be here at all. { continue; } try { double p = prop.Compare(v1, v2); high = Math.Max(high, p); } catch (Exception e) { throw new DukeException(String.Format("Comparison of values {0} and {1} failed. {2}", v1, v2, e.Message)); } } } prob = StandardUtils.ComputeBayes(prob, high); } return(prob); }