public Processor(Configuration config, IDatabase database) { _config = config; _database = database; _listeners = new List <IMatchListener>(); _passthrough = new PassThroughFilter(); _choosebest = new ChooseBestFilter(); // precomputing for later optimizations _proporder = new List <Property>(); foreach (Property p in _config.GetProperties()) { if (!p.IsIdProperty) { _proporder.Add(p); } } _proporder.Sort(new PropertyComparator()); // still precomputing double prob = 0.5; _accprob = new double[_proporder.Count]; for (int ix = _proporder.Count - 1; ix >= 0; ix--) { prob = StandardUtils.ComputeBayes(prob, _proporder[ix].HighProbability); _accprob[ix] = prob; } }
public Processor(Configuration config, IDatabase database) { _config = config; _database = database; _listeners = new List<IMatchListener>(); _passthrough = new PassThroughFilter(); _choosebest = new ChooseBestFilter(); // precomputing for later optimizations _proporder = new List<Property>(); foreach (Property p in _config.GetProperties()) { if (!p.IsIdProperty) _proporder.Add(p); } _proporder.Sort(new PropertyComparator()); // still precomputing double prob = 0.5; _accprob = new double[_proporder.Count]; for (int ix = _proporder.Count - 1; ix >= 0; ix--) { prob = StandardUtils.ComputeBayes(prob, _proporder[ix].HighProbability); _accprob[ix] = prob; } }
private void Match(IRecord record, IMatchListener filter) { List <IRecord> candidates = _database.FindCandidateMatches(record); logger.Debug("Match record {0} found {0} candidates", PrintMatchListener.RecordToString(record), candidates.Count); CompareCandidates(record, candidates, filter); }
private void LinkRecords(List <IDataSource> sources, IMatchListener filter) { foreach (IDataSource dataSource in sources) { dataSource.SetLogger(); IEnumerator <IRecord> it = dataSource.GetRecords().GetEnumerator(); while (it.MoveNext()) { IRecord record = it.Current; //Match(record, filter); } } foreach (IMatchListener matchListener in _listeners) { matchListener.EndProcessing(); } }
protected void CompareCandidates(IRecord record, List <IRecord> candidates, IMatchListener filter) { filter.StartRecord(record); foreach (IRecord candidate in candidates) { if (IsSameAs(record, candidate)) { continue; } double prob = Compare(record, candidate); if (prob > _config.Threshold) { filter.Matches(record, candidate, prob); } else if ((_config.ThresholdMaybe != 0.0) && (prob > _config.ThresholdMaybe)) { filter.MatchesPerhaps(record, candidate, prob); } } filter.EndRecord(); }
/// <summary> /// Registers a match listener /// </summary> /// <param name="listener"></param> public void AddMatchListener(IMatchListener listener) { _listeners.Add(listener); }
public void AddListener(IMatchListener listener) { this.listeners.Add(listener); }
private void Match(IRecord record, IMatchListener filter) { List<IRecord> candidates = _database.FindCandidateMatches(record); logger.Debug("Match record {0} found {0} candidates", PrintMatchListener.RecordToString(record), candidates.Count); CompareCandidates(record, candidates, filter); }
private void LinkRecords(List<IDataSource> sources, IMatchListener filter) { foreach (IDataSource dataSource in sources) { dataSource.SetLogger(); IEnumerator<IRecord> it = dataSource.GetRecords().GetEnumerator(); while (it.MoveNext()) { IRecord record = it.Current; //Match(record, filter); } } foreach (IMatchListener matchListener in _listeners) { matchListener.EndProcessing(); } }
protected void CompareCandidates(IRecord record, List<IRecord> candidates, IMatchListener filter) { filter.StartRecord(record); foreach (IRecord candidate in candidates) { if (IsSameAs(record, candidate)) continue; double prob = Compare(record, candidate); if (prob > _config.Threshold) { filter.Matches(record, candidate, prob); } else if ((_config.ThresholdMaybe != 0.0) && (prob > _config.ThresholdMaybe)) { filter.MatchesPerhaps(record, candidate, prob); } } filter.EndRecord(); }