public static Example exampleFromString(string data, DataSetSpecification dataSetSpec, string separator) { IRegularExpression splitter = TextFactory.CreateRegularExpression(separator); IMap <string, IAttribute> attributes = CollectionFactory.CreateInsertionOrderedMap <string, IAttribute>(); ICollection <string> attributeValues = CollectionFactory.CreateQueue <string>(splitter.Split(data)); if (dataSetSpec.isValid(attributeValues)) { ICollection <string> names = dataSetSpec.getAttributeNames(); int min = names.Size() > attributes.Size() ? names.Size() : attributes.Size(); for (int i = 0; i < min; ++i) { string name = names.Get(i); IAttributeSpecification attributeSpec = dataSetSpec.getAttributeSpecFor(name); IAttribute attribute = attributeSpec.CreateAttribute(attributeValues.Get(i)); attributes.Put(name, attribute); } string targetAttributeName = dataSetSpec.getTarget(); return(new Example(attributes, attributes.Get(targetAttributeName))); } else { throw new RuntimeException("Unable to construct Example from " + data); } }
// string split constructor public Rule(string lhs, string rhs, float probability) { this.lhs = CollectionFactory.CreateQueue <string>(); this.rhs = CollectionFactory.CreateQueue <string>(); IRegularExpression regex = TextFactory.CreateRegularExpression("\\s*,\\s*"); if (!string.IsNullOrEmpty(lhs)) { this.lhs = CollectionFactory.CreateQueue <string>(); foreach (string input in regex.Split(lhs)) { if (!string.IsNullOrEmpty(input)) { this.lhs.Add(input); } } } if (!string.IsNullOrEmpty(rhs)) { foreach (string input in regex.Split(rhs)) { if (!string.IsNullOrEmpty(input)) { this.rhs.Add(input); } } } this.PROB = validateProb(probability); }
// TODO // Make more intelligent link search public ICollection <string> getOutlinks(Page page) { string content = page.getContent(); ICollection <string> outLinks = CollectionFactory.CreateQueue <string>(); // search content for all href="x" outlinks ICollection <string> allMatches = CollectionFactory.CreateQueue <string>(); IRegularExpression m = TextFactory.CreateRegularExpression("href=\"(/wiki/.*?)\""); foreach (string ma in m.Matches(content)) { allMatches.Add(ma); } for (int i = 0; i < allMatches.Size(); ++i) { string match = allMatches.Get(i); string[] tokens = TextFactory.CreateRegularExpression("\"").Split(match); string location = tokens[1].ToLower(); // also, tokens[0] = the // text before the first // quote, // and tokens[2] is the // text after the second // quote outLinks.Add(location); } return(outLinks); }
private ICollection <double> exampleFromString(string line, string separator) { // assumes all values for inout and target are doubles ICollection <double> rexample = CollectionFactory.CreateQueue <double>(); IRegularExpression regex = TextFactory.CreateRegularExpression(separator); ICollection <string> attributeValues = CollectionFactory.CreateQueue <string>(regex.Split(line)); foreach (string valString in attributeValues) { rexample.Add(double.Parse(valString, System.Globalization.NumberStyles.Any, System.Globalization.CultureInfo.InvariantCulture)); } return(rexample); }