public void Index(IRecord record)
        {
            var doc = new Document();

            foreach (string propname in record.GetProperties())
            {
                Property prop = _config.GetPropertyByName(propname);
                if (prop == null)
                {
                    throw new Exception(String.Format("Record has property {0} for which there is no configuration.",
                                                      propname));
                }

                Field.Index ix; //TODO: could cache this. or get it from property
                ix = prop.IsIdProperty ? Field.Index.NOT_ANALYZED : Field.Index.ANALYZED;

                foreach (string v in record.GetValues(propname))
                {
                    if (v.Equals(""))
                    {
                        continue; //FIXME: not sure if this is necessary
                    }
                    doc.Add(new Field(propname, v, Field.Store.YES, ix));
                }
            }

            try
            {
                _iwriter.AddDocument(doc);
            }
            catch (Exception ex)
            {
                logger.Error("Error adding document to index writer: {0}", ex.Message);
            }
        }
Example #2
0
        public double Compare(IRecord r1, IRecord r2)
        {
            double prob = 0.5;

            foreach (string propname in r1.GetProperties())
            {
                Property prop = _config.GetPropertyByName(propname);
                if (prop.IsIdProperty || prop.IsIgnoreProperty())
                {
                    continue;
                }

                List <string> vs1 = r1.GetValues(propname);
                List <string> vs2 = r2.GetValues(propname);
                if ((vs1.Count == 0) || (vs2.Count == 0))
                {
                    continue; // no values to compare, so skip
                }
                double high = 0.0;
                foreach (string v1 in vs1)
                {
                    if (v1.Equals("")) //TODO: These values shouldn't be here at all.
                    {
                        continue;
                    }

                    foreach (string v2 in vs2)
                    {
                        if (v2.Equals("")) //TODO: These values shouldn't be here at all.
                        {
                            continue;
                        }

                        try
                        {
                            double p = prop.Compare(v1, v2);
                            high = Math.Max(high, p);
                        }
                        catch (Exception e)
                        {
                            throw new DukeException(String.Format("Comparison of values {0} and {1} failed. {2}", v1, v2,
                                                                  e.Message));
                        }
                    }
                }

                prob = StandardUtils.ComputeBayes(prob, high);
            }

            return(prob);
        }