public void Preprocess(TripletTrain result)
 {
     if (ids.Contains(result.PredicateId))
     {
         result.Text = null;
     }
 }
 public void Preprocess(TripletTrain result)
 {
     if (result.Text.Length > 1000)
     {
         result.Text = null;
     }
 }
 public void Preprocess(TripletTrain result)
 {
     if (symbols.IsMatch(result.Text))
     {
         result.Text = null;
     }
 }
示例#4
0
        public void Preprocess(TripletTrain result)
        {
            var lineBreaksCount = result.Text.Count(c => c == '\n');

            if (lineBreaksCount > 0)
            {
                result.Text = null;
            }
        }
        public void Preprocess(TripletTrain result)
        {
            var subjectHere = result.Text.Contains(result.SubjectAnchor);
            var objectHere  = result.Text.Contains(result.ObjectAnchor);

            if (!subjectHere || !objectHere)
            {
                result.Text = null;
            }
        }
        public void Preprocess(TripletTrain result)
        {
            var textLength = result.Text.Length;

            //var minimumLength = result.Object.Length + 8 + result.Subject.Length; // magic numbers woohoo
            if (textLength < _min || textLength > _max)
            {
                result.Text = null;
            }
        }
        public void Preprocess(TripletTrain result)
        {
            var t = r.Replace(result.Text, "");

            if (string.IsNullOrEmpty(t))
            {
                result.Text = null;
            }
            else if (r2.Matches(result.Text).Count > 6)
            {
                result.Text = null;
            }
        }
示例#8
0
        public void Preprocess(TripletTrain result)
        {
            var hash =
                (result.Object + result.Subject + result.PredicateId + result.Text).GetHashCode();

            if (tripletHashes.Contains(hash))
            {
                result.Text = null;
            }
            else
            {
                tripletHashes.Add(hash);
            }
        }
        private string GoThroughRules(string text)
        {
            var triplet = new TripletTrain {
                Text = text
            };

            foreach (var rule in rules)
            {
                rule.Preprocess(triplet);
                if (string.IsNullOrEmpty(triplet.Text))
                {
                    break;
                }
            }
            return(triplet.Text);
        }
示例#10
0
        public async Task <int> PreprocessTrain(Dictionary <string, int> stats)
        {
            var counter  = 0;
            var triplets = _triplets.Find(t => t.ArticlePositions != null).ToCursor();
            await triplets.ForEachAsync(async t =>
            {
                foreach (var p in t.ArticlePositions)
                {
                    var tr = new TripletTrain
                    {
                        Object       = t.SubjectWikiName,
                        ObjectAnchor = p.SubjectPosition.Anchor,

                        Subject       = t.ObjectWikiName,
                        SubjectAnchor = p.ObjectPosition.Anchor,

                        Predicate   = _properties.First(pr => pr.WikidataId == t.Property).ReadTitleUk,
                        PredicateId = t.Property,

                        Text           = p.Text,
                        WikipediaLink  = "https://uk.wikipedia.org/wiki/" + p.ArticleTitle,
                        WikipediaTitle = p.ArticleTitle
                    };

                    foreach (var rule in _rules)
                    {
                        rule.Preprocess(tr);
                        if (string.IsNullOrEmpty(tr.Text))
                        {
                            stats[rule.GetType().Name] += 1;
                            break;
                        }
                    }

                    if (string.IsNullOrEmpty(tr.Text))
                    {
                        continue;
                    }

                    counter++;
                    await _tripletsTrain.InsertOneAsync(tr);
                }
            });

            return(counter);
        }
示例#11
0
 public void Preprocess(TripletTrain result)
 {
     result.Text = link.Replace(result.Text, "$1$2$3");
 }
 public void Preprocess(TripletTrain result)
 {
     result.Text = r.Replace(result.Text, "");
 }
示例#13
0
 public void Preprocess(TripletTrain result)
 {
     result.Text = regex.Replace(result.Text, "");
     result.Text = regex2.Replace(result.Text, "(");
     result.Text = regex3.Replace(result.Text, ")");
 }
示例#14
0
 public void Preprocess(TripletTrain result)
 {
     result.Text = result.Text.Trim();
 }
示例#15
0
 public void Preprocess(TripletTrain result)
 {
     result.Id = Guid.NewGuid().ToString();
 }
 public void Preprocess(TripletTrain result)
 {
     var objDistance  = LevenshteinDistanceNormalized(result.Object, result.ObjectAnchor);
     var subjDistance = LevenshteinDistanceNormalized(result.Subject, result.SubjectAnchor);
 }