// This method is called if only a datafile is specified, without a loader/term and value columns. // It determines the type of the Value column and returns the appropriate TextLoader component factory. private static IComponentFactory <IMultiStreamSource, IDataLoader> GetLoaderFactory(string filename, bool keyValues, IHost host) { Contracts.AssertValue(host); // If the user specified non-key values, we define the value column to be numeric. if (!keyValues) { return(ComponentFactoryUtils.CreateFromFunction <IMultiStreamSource, IDataLoader>( (env, files) => new TextLoader( env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("Term", DataKind.TX, 0), new TextLoader.Column("Value", DataKind.Num, 1) } }, files))); } // If the user specified key values, we scan the values to determine the range of the key type. ulong min = ulong.MaxValue; ulong max = ulong.MinValue; try { var txtArgs = new TextLoader.Arguments(); bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs); host.Assert(parsed); var txtLoader = new TextLoader(host, txtArgs, new MultiFileSource(filename)); using (var cursor = txtLoader.GetRowCursor(c => true)) { var getTerm = cursor.GetGetter <DvText>(0); var getVal = cursor.GetGetter <DvText>(1); DvText txt = default(DvText); using (var ch = host.Start("Creating Text Lookup Loader")) { long countNonKeys = 0; while (cursor.MoveNext()) { getVal(ref txt); ulong res; // Try to parse the text as a key value between 1 and ulong.MaxValue. If this succeeds and res>0, // we update max and min accordingly. If res==0 it means the value is missing, in which case we ignore it for // computing max and min. if (Conversions.Instance.TryParseKey(ref txt, 1, ulong.MaxValue, out res)) { if (res < min && res != 0) { min = res; } if (res > max) { max = res; } } // If parsing as key did not succeed, the value can still be 0, so we try parsing it as a ulong. If it succeeds, // then the value is 0, and we update min accordingly. else if (Conversions.Instance.TryParse(ref txt, out res)) { ch.Assert(res == 0); min = 0; } //If parsing as a ulong fails, we increment the counter for the non-key values. else { var term = default(DvText); getTerm(ref term); if (countNonKeys < 5) { ch.Warning("Term '{0}' in mapping file is mapped to non key value '{1}'", term, txt); } countNonKeys++; } } if (countNonKeys > 0) { ch.Warning("Found {0} non key values in the file '{1}'", countNonKeys, filename); } if (min > max) { min = 0; max = uint.MaxValue - 1; ch.Warning("did not find any valid key values in the file '{0}'", filename); } else { ch.Info("Found key values in the range {0} to {1} in the file '{2}'", min, max, filename); } ch.Done(); } } } catch (Exception e) { throw host.Except(e, "Failed to parse the lookup file '{0}' in TermLookupTransform", filename); } TextLoader.Column valueColumn = new TextLoader.Column("Value", DataKind.U4, 1); if (max - min < (ulong)int.MaxValue) { valueColumn.KeyRange = new KeyRange(min, max); } else if (max - min < (ulong)uint.MaxValue) { valueColumn.KeyRange = new KeyRange(min); } else { valueColumn.Type = DataKind.U8; valueColumn.KeyRange = new KeyRange(min); } return(ComponentFactoryUtils.CreateFromFunction <IMultiStreamSource, IDataLoader>( (env, files) => new TextLoader( env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("Term", DataKind.TX, 0), valueColumn } }, files))); }