Home Reference Source Test

src/DataSet.mjs

import { ml, } from './ml';
import { util as utils, } from './util';

const transformConfigMap = {
  scale: 'scaleOptions',
  descale: 'descaleOptions',
  label: 'labelOptions',
  labelEncoder: 'labelOptions',
  labeldecode: 'labelOptions',
  labelDecode: 'labelOptions',
  labelDecoder: 'labelOptions',
  onehot: 'oneHotOptions',
  oneHot: 'oneHotOptions',
  oneHotEncode: 'oneHotOptions',
  oneHotEncoder: 'oneHotOptions',
  reducer: 'reducerOptions',
  reduce: 'reducerOptions',
  merge: 'mergeData',
};

/**
 * class for manipulating an array of objects, typically from CSV data
 * @class DataSet
 * @memberOf preprocessing
 */
export class DataSet {
  /**
   * Allows for fit transform short hand notation
   * @example
DataSet.getTransforms({
  Age: ['scale',],
  Rating: ['label',],  }); //=> [
//   {
//    name: 'Age', options: { strategy: 'scale', }, },
//   },
//   { 
//    name: 'Rating', options: { strategy: 'label', }, 
//   },
// ];
   * @param {Object} transforms 
   * @returns {Array<Object>} returns fit columns, columns property
   */
  static getTransforms(transforms = {}) {
    return Object.keys(transforms).reduce((result, columnName) => {
      const transformColumnObject = transforms[ columnName ];
      const transformObject = {
        name: columnName,
        options: {
          strategy: (Array.isArray(transformColumnObject))
            ? transformColumnObject[ 0 ]
            : transformColumnObject,
        },
      };
      if (Array.isArray(transformColumnObject) && transformColumnObject.length > 1) {
        transformObject.options[ transformConfigMap[ transformColumnObject[ 0 ] ] ] = transformColumnObject[ 1 ];
      }
      result.push(transformObject);
      return result;
    }, []);
  }
  /**
   * returns an array of objects by applying labels to matrix of columns
   * @example
const data = [{ Age: '44', Salary: '44' },
{ Age: '27', Salary: '27' }]
const AgeDataSet = new MS.DataSet(data);
const dependentVariables = [ [ 'Age', ], [ 'Salary', ], ];
const AgeSalMatrix = AgeDataSet.columnMatrix(dependentVariables); // =>
//  [ [ '44', '72000' ],
//  [ '27', '48000' ] ];
MS.DataSet.reverseColumnMatrix({vectors:AgeSalMatrix,labels:dependentVariables}); // => [{ Age: '44', Salary: '44' },
{ Age: '27', Salary: '27' }]
   * 
   * @param {*} options 
   * @param {Array[]} options.vectors - array of vectors
   * @param {String[]} options.labels - array of labels
   * @returns {Object[]} an array of objects with properties derived from options.labels
   */
  static reverseColumnMatrix(options = {}) {
    const { vectors, labels, } = options;
    const features = (Array.isArray(labels) && Array.isArray(labels[ 0 ]))
      ? labels
      : labels.map(label => [label, ]);
    return vectors.reduce((result, val) => {
      result.push(val.reduce((prop, value, index) => {
        prop[ features[ index ][ 0 ] ] = val[ index ];
        return prop;
      }, {}));
      return result;
    }, []);
  }
  static reverseColumnVector(options = {}) {
    const { vector, labels, } = options;
    const features = (Array.isArray(labels) && Array.isArray(labels[ 0 ]))
      ? labels
      : labels.map(label => [label, ]);
    return vector.reduce((result, val) => {
      result.push(
        { [ features[ 0 ][ 0 ] ]: val, }
      );
      return result;
    }, []);
  }
  /**
   * Returns an object into an one hot encoded object
   * @example
const labels = ['apple', 'orange', 'banana',];
const prefix = 'fruit_';
const name = 'fruit';
const options = { labels, prefix, name, };
const data = {
  fruit: 'apple',
};
EncodedCSVDataSet.encodeObject(data, options); // => { fruit_apple: 1, fruit_orange: 0, fruit_banana: 0, }
   * @param {Object} data - object to encode 
   * @param {{labels:Array<String>,prefix:String,name:String}} options - encoded object options
   * @returns {Object} one hot encoded object
   */
  static encodeObject(data, options) {
    const { labels, prefix, name, } = options;
    const encodedData = labels.reduce((encodedObj, label) => {
      const oneHotLabelArrayName = `${prefix}${label}`;
      encodedObj[ oneHotLabelArrayName ] = (label && data[ name ] && data[ name ].toString() === label.toString()) ? 1 : 0;
      return encodedObj;
    }, {});
    return encodedData;
  }
  /**
 * returns a new object of one hot encoded values
 * @example
 * // [ 'Brazil','Mexico','Ghana','Mexico','Ghana','Brazil','Mexico','Brazil','Ghana', 'Brazil' ]
const originalCountry = dataset.columnArray('Country'); 

// { originalCountry:
//    { Country_Brazil: [ 1, 0, 0, 0, 0, 1, 0, 1, 0, 1 ],
//      Country_Mexico: [ 0, 1, 0, 1, 0, 0, 1, 0, 0, 0 ],
//      Country_Ghana: [ 0, 0, 1, 0, 1, 0, 0, 0, 1, 0 ] },
//     }
const oneHotCountryColumn = dataset.oneHotEncoder('Country'); 
  * @param {string} name - csv column header, or JSON object property name 
  * @param options 
  * @see {@link http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html}
  * @return {Object}
  */
  static oneHotEncoder(name, options) {
    const config = Object.assign({
      merge: true,
    }, options);
    const labelData = config.data || this.columnArray(name, config.columnArrayOptions);
    const labels = Array.from(new Set(labelData).values());
    const prefix = config.prefix || `${name}_`;
    const encodedData = labelData.reduce(
      (result, val, index, arr) => {
        labels.forEach(encodedLabel => {
          const oneHotLabelArrayName = `${prefix}${encodedLabel}`;
          const oneHotVal = (val === encodedLabel) ? 1 : 0;
          if (Array.isArray(result[ oneHotLabelArrayName ])) {
            result[ oneHotLabelArrayName ].push(oneHotVal);
          } else {
            result[ oneHotLabelArrayName ] = [oneHotVal, ];
          }
        });
        return result;
      }, {});
    if (this.encoders.has(name) && config.merge) {
      this.encoders.get(name).labels = Array.from(new Set(labels.concat(this.encoders.get(name).labels)));
      // this.encoders.get(name);
    } else {
      this.encoders.set(name, {
        name,
        labels,
        prefix,
      });
    }
    return encodedData;
  }
  /**
   * Return one hot encoded data
   * @example
const csvData = [{
    'Country': 'Brazil',
    'Age': '44',
    'Salary': '72000',
    'Purchased': 'N',
  },
  {
    'Country': 'Mexico',
    'Age': '27',
    'Salary': '48000',
    'Purchased': 'Yes',
  },
  ...
];
const EncodedCSVDataSet = new ms.preprocessing.DataSet(csvData);
EncodedCSVDataSet.fitColumns({
  columns: [
    {
      name: 'Country',
      options: { strategy: 'onehot', },
    },
  ],
});

EncodedCSVDataSet.oneHotDecoder('Country);// =>
// [ { Country: 'Brazil' },
//  { Country: 'Mexico' },
//  { Country: 'Ghana' },
//  { Country: 'Mexico' },
//   ...]
   * @param {string} name - column name 
   * @param options 
   * @returns {Array<Object>} returns an array of objects from an one hot encoded column
   */
  static oneHotDecoder(name, options) {
    const config = Object.assign({
      // handle_unknown: 'error'
    }, options);
    const encoderMap = config.encoders || this.encoders;
    const prefix = config.prefix || encoderMap.get(name).prefix;
    const labels = config.labels || encoderMap.get(name).labels;
    const encodedData = config.data || this.oneHotColumnArray(name, config.oneHotColumnArrayOptions);
    // console.log({ encodedData, encoderMap, prefix });
    return encodedData.reduce((result, val) => {
      const columnNames = Object.keys(val).filter(prop => val[ prop ] === 1 && (labels.indexOf(prop.replace(prefix, '')) !== -1 || labels.map(label => String(label)).indexOf(prop.replace(prefix, '')) !== -1));
      const columnName = columnNames[ 0 ] || '';
      // console.log({ columnName, columnNames, labels, val},Object.keys(val));
      const datum = {
        [ name ]: columnName.replace(prefix, ''),
      };
      result.push(datum);
      return result;
    }, []);
  }
  /**
   * returns a list of objects with only selected columns as properties
 * @example
const data = [{ Age: '44', Salary: '44' , Height: '34' },
{ Age: '27', Salary: '44' , Height: '50'  }]
const AgeDataSet = new MS.DataSet(data);
const cols = [ 'Age', 'Salary' ];
const selectedCols = CSVDataSet.selectColumns(cols); // => [{ Age: '44', Salary: '44' },
{ Age: '27', Salary: '27' }]
   * 
   * @param {String[]} names - array of selected columns
   * @param {*} options 
   * @returns {Object[]} an array of objects with properties derived from names
   */
  static selectColumns(names, options = {}) {
    const config = Object.assign({}, options);
    const data = config.data || this.data;
    return data.reduce((result, val) => {
      const selectedData = {};
      names.forEach(name => {
        selectedData[ name ] = val[ name ];
      });
      result.push(selectedData);
      return result;
    }, []);
  }
  /**
   * returns a new array of a selected column from an array of objects, can filter, scale and replace values
   * @example 
   * //column Array returns column of data by name
// [ '44','27','30','38','40','35','','48','50', '37' ]
const OringalAgeColumn = dataset.columnArray('Age'); 
  * @param {string} name - csv column header, or JSON object property name 
  * @param options 
  * @param {function} [options.prefilter=(arr[val])=>true] - prefilter values to return
  * @param {function} [options.filter=(arr[val])=>true] - filter values to return
  * @param {function} [options.replace.test=undefined] - test function for replacing values (arr[val])
  * @param {(string|number|function)} [options.replace.value=undefined] - value to replace (arr[val]) if replace test is true, if a function (result,val,index,arr,name)=>your custom value
  * @param {number} [options.parseIntBase=10] - radix value for parseInt
  * @param {boolean} [options.parseFloat=false] - convert values to floats 
  * @param {boolean} [options.parseInt=false] - converts values to ints 
  * @param {boolean} [options.scale=false] - standard or minmax feature scale values 
  * @returns {array}
  */
  static columnArray(name, options = {}) {
    const config = Object.assign({
      prefilter: () => true,
      filter: () => true,
      replace: {
        test: undefined,
        value: undefined,
      },
      parseInt: false,
      parseIntBase: 10,
      parseFloat: (options.scale) ? true : false,
      scale: false,
    }, options);
    const data = config.data || this.data;
    const modifiedColumn = data
      .filter(config.prefilter)
      .reduce((result, val, index, arr) => {
        let objVal = val[ name ];
        let returnVal = (typeof config.replace.test === 'function') ?
          config.replace.test(objVal) ?
            typeof config.replace.value === 'function' ?
              config.replace.value(result, val, index, arr, name) :
              config.replace.value :
            objVal :
          objVal;
        if (config.filter(returnVal)) {
          if (config.parseInt) result.push(parseInt(returnVal, config.parseIntBase));
          else if (config.parseFloat) result.push(parseFloat(returnVal));
          else result.push(returnVal);
        }
        return result;
      }, []);
    if (typeof config.scale === 'function') {
      return modifiedColumn.map(config.scale);
    } else if (config.scale) {
      switch (config.scale) {
      case 'standard':
        return utils.StandardScaler(modifiedColumn);
      case 'log':
        return utils.LogScaler(modifiedColumn);
      case 'exp':
        return utils.ExpScaler(modifiedColumn);
      case 'normalize':
      default:
        return utils.MinMaxScaler(modifiedColumn);
      }
    } else {
      return modifiedColumn;
    }
  }
  
  /**
   * returns a matrix of values by combining column arrays into a matrix
   * @example const csvObj = new DataSet([{col1:1,col2:5},{col1:2,col2:6}]);
csvObj.columnMatrix([['col1',{parseInt:true}],['col2']]); // =>
//[ 
//  [1,5], 
//  [2,6], 
//]
  * @param {Array} [vectors=[]] - array of arguments for columnArray to merge columns into a matrix
  * @param {Array} [data=[]] - array of data to convert to matrix
  * @returns {Array} a matrix of column values 
  */
  static columnMatrix(vectors = [], data = []) {
    const options = (data.length) ? { data, } : {};
    const columnVectors = (Array.isArray(vectors) && Array.isArray(vectors[ 0 ]))
      ? vectors
      : vectors.map(vector => [vector, options, ]);
    const vectorArrays = columnVectors
      .map(vec => DataSet.columnArray.call(this, ...vec));
        
    return utils.pivotArrays(vectorArrays);
  }
  /**
   * returns a JavaScript Object from a Map (supports nested Map Objects)
   * @example const csvObj = new DataSet([{col1:1,col2:5},{col1:2,col2:6}]);
csvObj.columnMatrix([['col1',{parseInt:true}],['col2']]); // =>
//[ 
//  [1,5], 
//  [2,6], 
//]
  * @param {Map} mapObj - Map to convert into JavaScript Object
  * @returns {Object} JavaScript Object converted from a Map
  */
  static mapToObject(mapObj = new Map()){
    return Array.from(mapObj.keys())
      .reduce((result, val) => {
        const mapVal = mapObj.get(val);
        if (mapVal instanceof Map) {
          result[ val ] = DataSet.mapToObject(mapVal);
        } else if (typeof mapVal === 'function') {
          result[ val ] = `[Function ${mapVal.name}]`;
        } else {
          result[ val ] = JSON.parse(JSON.stringify(mapVal));
        }
        return result;
      }, {});
  }
  /**
   * returns 0 or 1 depending on the input value
   * @example DataSet.getBinaryValue('true') // => 1
DataSet.getBinaryValue('false') // => 0
DataSet.getBinaryValue('No') // => 0
DataSet.getBinaryValue(false) // => 0
  * @param {String|Number} [value=''] - value to convert to a 1 or a 0
  * @returns {Number} 0 or 1 depending on truthiness of value
  */
  static getBinaryValue(value='') {
    if (!value) return 0;
    switch (value) {
    case false:
    case 'N':
    case 'n':
    case 'NO':
    case 'No':
    case 'no':
    case 'False':
    case 'F':
    case 'f':
      return 0;
    default:
      return 1;
    }
  }
  /**
   * creates a new raw data instance for preprocessing data for machine learning
   * @example
   * const dataset = new ms.DataSet(csvData);
   * @param {Object[]} dataset
   * @returns {this} 
   */
  constructor(data = [], options = {}) {
    this.config = Object.assign({
      debug: true,
    }, options);
    this.data = [...data, ];
    this.labels = new Map();
    this.encoders = new Map();
    this.scalers = new Map();
    this.selectColumns = DataSet.selectColumns;
    this.columnArray = DataSet.columnArray;
    this.encodeObject = DataSet.encodeObject;
    this.oneHotEncoder = DataSet.oneHotEncoder;
    this.oneHotDecoder = DataSet.oneHotDecoder;
    this.columnMatrix = DataSet.columnMatrix;
    this.reverseColumnMatrix = DataSet.reverseColumnMatrix;
    this.reverseColumnVector = DataSet.reverseColumnVector;
    this.getTransforms = DataSet.getTransforms;
    if (this.config.labels || this.config.encoders || this.config.scalers) {
      this.importFeatures({
        labels: this.config.labels,
        encoders: this.config.encoders,
        scalers: this.config.scalers,
      });
    }
    return this;
  }
  /**
   * returns Object of all encoders and scalers 
   * @example const csvObj = new DataSet([{col1:1,col2:5},{col1:false,col2:6}]);
DataSet.fitColumns({col1:['label',{binary:true}]}); 
Dataset.data // => [{col1:true,col2:5},{col1:false,col2:6}]
Dataset.exportFeatures() //=> { labels: { col1: { "0": false, "1": true, "N": 0, "Yes": 1, "No": 0, "f": 0, "false": 1, } } }
  * @param {Function} [filter=()=>true] - filter function
  * @returns {{labels:Map,encoders:Map,scalers:map}} JavaScript Object of transforms encoders and scalers(labels, encoders, scalers) 
  */
  exportFeatures(options = {}) {
    const config = Object.assign({
    }, options);
    return {
      encoders: DataSet.mapToObject(this.encoders),
      labels: DataSet.mapToObject(this.labels),
      scalers: DataSet.mapToObject(this.scalers),
    };
  }
  /**
   * set encoders, labels and scalers 
   * @example const csvObj = new DataSet([{col1:1,col2:5},{col1:false,col2:6}]);
DataSet.fitColumns({col1:['label',{binary:true}]}); 
Dataset.data // => [{col1:true,col2:5},{col1:false,col2:6}]
Dataset.exportFeatures() //=> { labels: { col1: { "0": false, "1": true, "N": 0, "Yes": 1, "No": 0, "f": 0, "false": 1, } } }
  * @param {{labels:Map,encoders:Map,scalers:map}} [features={}] - JavaScript Object of transforms encoders and scalers(labels, encoders, scalers) 
  */
  importFeatures(features = {}) {
    Object.keys(features.encoders || {}).forEach(encoderName => { 
      const encoder = features.encoders[ encoderName ];
      this.encoders.set(encoderName, encoder);
    });
    Object.keys(features.labels || {}).forEach(labelName => {
      const labelData = features.labels[labelName];
      const labels = Object.keys(labelData)
        .map(labelProp => [labelProp, labelData[ labelProp ], ]);
      if (typeof labelData[ '0' ] !== 'undefined' && typeof labelData[ '1' ] !== 'undefined') {
        labels.push(...[
          [ 0, false, ],
          [ false, 0, ],
          [ null, 0, ],
          [ 'no', 0, ],
          [ 'No', 0, ],
          [ 'NO', 0, ],
          [ 'F', 0, ],
          [ 'f', 0, ],
          [ 'null', 0, ],
          [ '', 0, ],
          [ undefined, 0, ],
          [ 'undefined', 0, ],
          [ 1, true, ],
          [ true, 1, ],
        ]);
      }
      this.labels.set(labelName, new Map(labels));
    });
    Object.keys(features.scalers || {}).forEach(scalerName => {
      let transforms;
      const scalerData = features.scalers[ scalerName ];
      const { config, } = scalerData;
      switch (config.strategy) {
      case 'standard':
        transforms = utils.StandardScalerTransforms(...[undefined, config.nan_value, config.return_nan, scalerData.components,]);
        scalerData.scale = transforms.scale;  
        scalerData.descale = transforms.descale;  
        break;
      case 'normalize':
      case 'minmax':
        transforms = utils.MinMaxScalerTransforms(...[undefined, config.nan_value, config.return_nan, scalerData.components,]);
        scalerData.scale = transforms.scale;  
        scalerData.descale = transforms.descale;   
        break;
      case 'log':
      default:
        scalerData.scale = Math.log;  
        scalerData.descale = Math.exp;  
        break;
      }
      this.scalers.set(scalerName, scalerData);
    });
  }
  /**
   * returns filtered rows of data 
   * @example const csvObj = new DataSet([{col1:1,col2:5},{col1:2,col2:6}]);
csvObj.filterColumn((row)=>row.col1>=2); // =>
//[ 
//  [2,6], 
//]
  * @param {Function} [filter=()=>true] - filter function
  * @returns {Array} filtered array of data 
  */
  filterColumn(filter = () => true) {
    return this.data.filter(filter);
  }
  /**
   * Returns a new array of scaled values which can be reverse (descaled). The scaling transformations are stored on the DataSet
   * @example
//dataset.columnArray('Age') => [ '44','27','30','38','40','35',38.77777777777778,'48','50','37' ]
dataset.columnScale('Age',{strategy:'log'}) // => [ 3.784189633918261,
  3.295836866004329, 3.4011973816621555, 3.6375861597263857, 3.6888794541139363, 3.5553480614894135, 3.657847344866208, 3.8712010109078907, 3.912023005428146, 3.6109179126442243 ]
dataset.scalers.get('Age').scale(45) // => 3.8066624897703196
dataset.scalers.get('Age').descale(3.8066624897703196) // => 45
//this supports, log/exponent, minmax/normalization and standardscaling
   * @param {string} name - name - csv column header, or JSON object property name 
   * @param {string} [options.strategy="log"] - strategy for scaling values 
   * @returns {number[]} returns an array of scaled values
   */
  columnScale(name, options = {}) {
    const input = (typeof options === 'string')
      ? { strategy: options, }
      : options;
    const config = Object.assign({
      strategy: 'log',
      forced_coercion: false,
    }, input);
    let scaleData = config.data || this.columnArray(name, config.columnArrayOptions);
    let scaledData;
    let transforms;
      
    scaleData = scaleData.filter(datum => typeof datum !== 'undefined')
      .map((datum, i) => {
        if (typeof datum !== 'number') {
          if (this.config.debug && config.forced_coercion===false) {
            console.error(TypeError(`Each value must be a number, error at index [${name}][${i}]: <${typeof datum}>${datum}`));
          }
          const num = Number(datum);
          if (isNaN(num) && config.forced_coercion) {
            return 0;
          } else if (isNaN(num)) {
            throw TypeError(`Only numerical values in (${name}) can be scaled i: ${i} datum: ${datum}`);
          }
          return num;
        } else return datum;
      });
    switch (config.strategy) {
    case 'standard':
      transforms = utils.StandardScalerTransforms(...[scaleData, config.nan_value, config.return_nan,]);
      this.scalers.set(name, {
        name,
        scale: transforms.scale,
        descale: transforms.descale,
        components: transforms.components,
      });
      scaledData = transforms.values;
      break;
    case 'normalize':
    case 'minmax':
      transforms = utils.MinMaxScalerTransforms(...[scaleData, config.nan_value, config.return_nan,]);     
      this.scalers.set(name, {
        name,
        scale: transforms.scale,
        descale: transforms.descale,
        components: transforms.components,
      });
      scaledData = transforms.values;
      break;
    case 'log':
    default:
      this.scalers.set(name, {
        name,
        scale: Math.log,
        descale: Math.exp,
        components: {
          average : utils.avg(scaleData),
          standard_dev : utils.sd(scaleData),
          maximum : utils.max(scaleData),
          minimum : utils.min(scaleData),
        },
      });
      scaledData = utils.LogScaler(scaleData);
      break;
    }
    this.scalers.get(name).config = config;
    return scaledData;
  }
  /**
   * Returns a new array of descaled values
   * @example
//dataset.columnArray('Age') => [ '44','27','30','38','40','35',38.77777777777778,'48','50','37' ]
const scaledData = [ 3.784189633918261,
  3.295836866004329, 3.4011973816621555, 3.6375861597263857, 3.6888794541139363, 3.5553480614894135, 3.657847344866208, 3.8712010109078907, 3.912023005428146, 3.6109179126442243 ]
dataset.columnDescale('Age') // => [ '44','27','30','38','40','35',38.77777777777778,'48','50','37' ]
   * @param {string} name - name - csv column header, or JSON object property name 
   * @param {string} [options.strategy="log"] - strategy for scaling values 
   * @returns {number[]} returns an array of scaled values
   */
  columnDescale(name, options) {
    const config = Object.assign({ }, options);
    const scaledData = config.data || this.columnArray(name, config.columnArrayOptions);
    const descaleFunction = this.scalers.get(name).descale;
    return scaledData.map(descaleFunction);
  }
  /**
   * returns a new array and label encodes a selected column
   * @example
   * const oneHotCountryColumn = dataset.oneHotEncoder('Country'); 

// [ 'N', 'Yes', 'No', 'f', 'Yes', 'Yes', 'false', 'Yes', 'No', 'Yes' ] 
const originalPurchasedColumn = dataset.labelEncoder('Purchased');
// [ 0, 1, 0, 0, 1, 1, 1, 1, 0, 1 ]
const encodedBinaryPurchasedColumn = dataset.labelEncoder('Purchased',{ binary:true });
// [ 0, 1, 2, 3, 1, 1, 4, 1, 2, 1 ]
const encodedPurchasedColumn = dataset.labelEncoder('Purchased'); 
  * @param {string} name - csv column header, or JSON object property name 
  * @param options
  * @param {boolean} [options.binary=false] - only replace with (0,1) with binary values 
  * @param {function} options.sortFunction - custom label encoding value sort function
  * @see {@link http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html} 
  * @returns {array}
  */
  labelEncoder(name, options) {
    const config = Object.assign({
      binary: false,
      merge: true,
    }, options);
    const labelData = config.data || this.columnArray(name, config.columnArrayOptions);
    let labelDataUniqueValues = Array.from(new Set(labelData).values()).sort(config.sortFunction);
    const labels = new Map(
      labelDataUniqueValues
        .reduce((result, val, i) => {
          if (config.binary) {
            if (i === 0) {
              result.push(...[
                [0, false,],
                ['0', false,],
                [1, true,],
                ['1', true,],
              ]);
            }
            result.push([val, DataSet.getBinaryValue(val),]);
          } else {
            result.push([val, i,]);
            result.push([i, val,]);
          }
          return result;
        }, [])
    );
    if (this.labels.has(name) && config.merge) {
      this.labels.set(name, new Map([...this.labels.get(name), ...labels,]));
    } else this.labels.set(name, labels);
    const labeledData = (config.binary) ?
      labelData.map(DataSet.getBinaryValue) :
      labelData.map(label => labels.get(label));
    return labeledData;
  }
  /**
     * returns a new array and decodes an encoded column back to the original array values
     * @param {string} name - csv column header, or JSON object property name 
     * @param options
     * @returns {array}
     */
  labelDecode(name, options) {
    const config = Object.assign({}, options);
    const labelData = config.data || this.columnArray(name, config.columnArrayOptions);
    return labelData.map(val => this.labels.get(name).get(val));
  }
  /**
   * Return one hot encoded data
   * @example
const csvData = [{
    'Country': 'Brazil',
    'Age': '44',
    'Salary': '72000',
    'Purchased': 'N',
  },
  {
    'Country': 'Mexico',
    'Age': '27',
    'Salary': '48000',
    'Purchased': 'Yes',
  },
  ...
];
const EncodedCSVDataSet = new ms.preprocessing.DataSet(csvData);
EncodedCSVDataSet.fitColumns({
  columns: [
    {
      name: 'Country',
      options: { strategy: 'onehot', },
    },
  ],
});

EncodedCSVDataSet.oneHotColumnArray('Country);// =>
// [ { Country_Brazil: 1, Country_Mexico: 0, Country_Ghana: 0 },
//   { Country_Brazil: 0, Country_Mexico: 1, Country_Ghana: 0 },
//   { Country_Brazil: 0, Country_Mexico: 0, Country_Ghana: 1 },
//   ...]
   * @param {string} name - column name 
   * @param options 
   * @returns {Array<Object>} returns an array of objects from an one hot encoded column
   */
  oneHotColumnArray(name, options) {
    const config = Object.assign({
      // handle_unknown: 'error'
    }, options);
    const labels = config.labels || this.encoders.get(name).labels;
    const prefix = config.prefix || this.encoders.get(name).prefix;
    return this.selectColumns(labels.map(label => `${prefix}${label}`));
  }
  /**
 * it returns a new column that reduces a column into a new column object, this is used in data prep to create new calculated columns for aggregrate statistics
 * @example 
const reducer = (result, value, index, arr) => {
result.push(value * 2);
return result;
};
CSVDataSet.columnReducer('DoubleAge', {
columnName: 'Age',
reducer,
}); //=> { DoubleAge: [ 88, 54, 60, 76, 80, 70, 0, 96, 100, 74 ] }
  * @param {String} name - name of new Column 
  * @param {Object} options 
  * @param {String} options.columnName - name property for columnArray selection 
  * @param {Object} options.columnOptions - options property for columnArray  
  * @param {Function} options.reducer - reducer function to reduce into new array, it should push values into the resulting array  
  * @returns {Object} a new object that has reduced array as the value
  */
  columnReducer(name, options) {
    const newColumn = {
      [ name ]: this.columnArray(options.columnName, options.columnOptions).reduce(options.reducer, []),
    };
    return newColumn;
  }
  /**
   * it returns a new column that is merged onto the data set
   * @example 
CSVDataSet.columnMerge('DoubleAge', [ 88, 54, 60, 76, 80, 70, 0, 96, 100, 74 ]); //=> { DoubleAge: [ 88, 54, 60, 76, 80, 70, 0, 96, 100, 74 ] }
    * @param {String} name - name of new Column 
    * @param {Array} data - new dataset data  
    * @returns {Object} 
    */
  columnMerge(name, data=[]) {
    if (this.data.length !== data.length) throw new RangeError(`Merged data column must have the same length(${data.length}) as the DataSet's length (${this.data.length})`);
    return {
      [name]: data,
    };
  }
  /**
   * Inverses transform on an object
   * @example
DataSet.data; //[{
//   Age: 0.6387122698222066,
//   Salary: 72000,
//   Purchased: 0,
//   Country_Brazil: 1,
//   Country_Mexico: 0,
//   Country_Ghana: 0,
// }, ...] 
DataSet.inverseTransformObject(DataSet.data[0]); // => {
//  Country: 'Brazil', 
//  Age: 44, 
//  Salary: 72000, 
//  Purchased: 'N', 
// };
   * @param data 
   * @param options 
   * @returns {Object} returns object with inverse transformed data
   */
  inverseTransformObject(data, options) {
    const config = Object.assign({
      removeValues: false,
    }, options);
    const removedColumns = [];
    let transformedObject = Object.assign({}, data);
    const columnNames = Object.keys(this.data[ 0 ]);
    const scaledData = columnNames.reduce((scaleObject, columnName) => {
      if (this.scalers.has(columnName)){
        scaleObject[ columnName ] = this.scalers.get(columnName).descale(data[ columnName ]);
      }
      return scaleObject;
    }, {});
    const labeledData = columnNames.reduce((labelObject, columnName) => {
      if (this.labels.has(columnName)){
        labelObject[ columnName ] = this.labels.get(columnName).get(data[ columnName ]);
      }
      return labelObject;
    }, {});
    const encodedData = columnNames.reduce((encodedObject, columnName) => {
      if (this.encoders.has(columnName)) {
        const encoded = this.oneHotDecoder(columnName, {
          data: [data,],
        });
        // console.log({encoded})
        encodedObject = Object.assign({}, encodedObject, encoded[ 0 ]);
        if (config.removeValues) {
          removedColumns.push(...this.encoders.get(columnName).labels.map(label=>`${this.encoders.get(columnName).prefix}${label}`));
        }
      }
      return encodedObject;
    }, {});
    transformedObject = Object.assign(transformedObject, scaledData, labeledData, encodedData);
    if (config.removeValues && removedColumns.length) {
      transformedObject = Object.keys(transformedObject).reduce((removedObject, propertyName) => {
        if (removedColumns.indexOf(propertyName) === -1) {
          removedObject[ propertyName ] = transformedObject[ propertyName ];
        }
        return removedObject;
      }, {});
    }
    return transformedObject;
  }
  /**
   * transforms an object and replaces values that have been scaled or encoded
   * @example
DataSet.transformObject({
  'Country': 'Brazil',
  'Age': '44',
  'Salary': '72000',
  'Purchased': 'N',
}); // =>
// { 
//  Country: 'Brazil',
//  Age: 3.784189633918261,
//  Salary: '72000',
//  Purchased: 'N',
//  Country_Brazil: 1,
//  Country_Mexico: 0,
//  Country_Ghana: 0
// }
   * @param data 
   * @param options 
   * @returns {Object} 
   */
  transformObject(data, options) {
    const config = Object.assign({
      removeValues: false,
      checkColumnLength: true,
    }, options);
    const removedColumns = [];
    // if (Array.isArray(data)) return data.map(datum => this.transformObject);
    const encodedColumns = [].concat(...Array.from(this.encoders.keys())
      .map(encodedColumn => this.encoders.get(encodedColumn).labels
        .map(label=>`${this.encoders.get(encodedColumn).prefix}${label}`)
      )
    );
    const currentColumns = (this.data.length)
      ? Object.keys(this.data[ 0 ])
      : Object.keys(data);
    const objectColumns = Object.keys(data).concat(encodedColumns);
    // console.log({ encodedColumns,currentColumns,objectColumns });
    const differentKeys = objectColumns.reduce((diffKeys, val) => {
      if (currentColumns.indexOf(val) === -1 && encodedColumns.indexOf(val) === -1) diffKeys.push(val);
      return diffKeys;
    }, []);
    let transformedObject = Object.assign({}, data);
    if (config.checkColumnLength && currentColumns.length !== objectColumns.length && currentColumns.length+encodedColumns.length !== objectColumns.length ) {
      throw new RangeError(`Object must have the same number of keys (${objectColumns.length}) as data in your dataset(${currentColumns.length})`);
    } else if (config.checkColumnLength && differentKeys.length) {
      throw new ReferenceError(`Object must have identical keys as data in your DataSet. Invalid keys: ${differentKeys.join(',')}`);
    } else {
      const scaledData = objectColumns.reduce((scaleObject, columnName) => {
        if (this.scalers.has(columnName)){
          scaleObject[ columnName ] = this.scalers.get(columnName).scale(data[ columnName ]);
        }
        return scaleObject;
      }, {});
      const labeledData = objectColumns.reduce((labelObject, columnName) => {
        if (this.labels.has(columnName)){
          labelObject[ columnName ] = this.labels.get(columnName).get(data[ columnName ]);
        }
        return labelObject;
      }, {});
      const encodedData = objectColumns.reduce((encodedObject, columnName) => {
        if (this.encoders.has(columnName)) {
          encodedObject = Object.assign({}, encodedObject, this.encodeObject(data, this.encoders.get(columnName)));
          if (config.removeValues) {
            removedColumns.push(columnName);
          }
        }
        return encodedObject;
      }, {});
      transformedObject = Object.assign(transformedObject, scaledData, labeledData, encodedData);
      if (config.removeValues && removedColumns.length) {
        transformedObject = Object.keys(transformedObject).reduce((removedObject, propertyName) => {
          if (removedColumns.indexOf(propertyName) === -1) removedObject[ propertyName ] = transformedObject[ propertyName ];
          return removedObject;
        }, {});
      }
    }
    return transformedObject;
  } 
  /**
   * returns a new array of a selected column from an array of objects and replaces empty values, encodes values and scales values
   * @example
   * //column Replace returns new Array with replaced missing data
//[ '44','27','30','38','40','35',38.77777777777778,'48','50','37' ]
const ReplacedAgeMeanColumn = dataset.columnReplace('Age',{strategy:'mean'});
  * @param {string} name - csv column header, or JSON object property name 
  * @param options 
  * @param {boolean} [options.empty=true] - replace empty values 
  * @param {boolean} [options.strategy="mean"] - strategy for replacing value, any array stat method from ml.js (mean, standardDeviation, median) or (label,labelEncoder,onehot,oneHotEncoder)
  * @returns {array|Object[]}
  */
  columnReplace(name, options = {}) {
    const config = Object.assign({
      strategy: 'mean',
      empty: true,
      arrayOptions: {
        parseFloat: true,
        filter: val => val,
      },
      labelOptions: {},
    }, options);
    let replaceVal;
    let replace = {
      test: val => !val,
      value: replaceVal,
    };
    switch (config.strategy) {
    case 'scale':
      replaceVal = this.columnScale(name, config.scaleOptions);
      replace = {
        test: val => true,
        value: (result, val, index, arr) => replaceVal[index],
      };
      break;
    case 'descale':
      replaceVal = this.columnDescale(name, config.descaleOptions);
      replace = {
        test: val => true,
        value: (result, val, index, arr) => replaceVal[index],
      };
      break;
    case 'label':
    case 'labelEncoder':
      replaceVal = this.labelEncoder(name, config.labelOptions);
      replace = {
        test: val => true,
        value: (result, val, index, arr) => replaceVal[index],
      };
      break;
    case 'labeldecode':
    case 'labelDecode':
    case 'labelDecoder':
      replaceVal = this.labelDecode(name, config.labelOptions);
      replace = {
        test: val => true,
        value: (result, val, index, arr) => replaceVal[index],
      };
      break;
    case 'onehot':
    case 'oneHot':
    case 'oneHotEncode':
    case 'oneHotEncoder':
      replaceVal = this.oneHotEncoder(name, config.oneHotOptions);
      replace = {
        test: val => true,
        value: (result, val, index, arr) => replaceVal[index],
      };
      return replaceVal;
      // break;
    case 'reducer':
    case 'reduce':
      replaceVal = this.columnReducer(name, config.reducerOptions); 
      return replaceVal;  
    case 'merge':
      replaceVal = this.columnMerge(name, config.mergeData); 
      return replaceVal; 
    case 'parseNumber':
      replaceVal = this.columnArray(name).map(num => Number(num)); 
      return replaceVal; 
    default:
      replaceVal = ml.ArrayStat[config.strategy](this.columnArray(name, config.arrayOptions));
      replace.value = replaceVal;
      break;
    }
    return this.columnArray(name,
      Object.assign({}, {
        replace,
        scale: options.scale,
      }, options.columnOptions));
  }
  /**
     * mutates data property of DataSet by replacing multiple columns in a single command
     * @example
     * //fit Columns, mutates dataset
dataset.fitColumns({
  columns:[{name:'Age',options:{ strategy:'mean'} }]
});
// dataset
// class DataSet
//   data:[
//     {
//       'Country': 'Brazil',
//       'Age': '38.77777777777778',
//       'Salary': '72000',
//       'Purchased': 'N',
//     }
//     ...
//   ]
  * @param {Boolean} options.returnData - return updated DataSet data property 
  * @param {Object[]} options.columns - {name:'columnName',options:{strategy:'mean',labelOoptions:{}},}
  * @returns {Object[]}
  */
  fitColumns(options = {}, mockDataOptions = {}) {
    const config = Object.assign({
      returnData:true,
      columns: [],
    }, options);
    if ( !options.columns || Array.isArray(options.columns) ===false) {
      config.columns = (options.columns)
        ? DataSet.getTransforms(options.columns)
        : DataSet.getTransforms(options);
    }
    const fittedColumns = config.columns
      .reduce((result, val, index, arr) => {
        let replacedColumn = this.columnReplace(val.name, val.options);
        if (Array.isArray(replacedColumn)) {
          replacedColumn = replacedColumn.map(columnVal => ({
            [val.name]: columnVal,
          }));
          result[val.name] = replacedColumn;
        } else {
          Object.keys(replacedColumn).forEach(repColName => {
            result[repColName] = replacedColumn[repColName].map(columnVal => ({
              [repColName]: columnVal,
            }));
          });
        }
        return result;
      }, {});
    if (Object.keys(fittedColumns) && Object.keys(fittedColumns).length) {
      const columnNames = Object.keys(fittedColumns);
      const fittedData = fittedColumns[columnNames[0]]
        .reduce((result, val, index, arr) => {
          const returnObj = {};
          columnNames.forEach(colName => {
            returnObj[colName] = fittedColumns[colName][index][colName];
          });
          result.push(returnObj);
          return result;
        }, []);
      this.data = this.data.map((val, index) => Object.assign({}, val, fittedData[index]));
    }
    return config.returnData ? this.data : this;
  }
  /**
   * Mutate dataset data by inversing all transforms
   * @example
DataSet.data;
// [{ 
//  Country: 'Brazil',
//  Age: 3.784189633918261,
//  Salary: '72000',
//  Purchased: 'N',
//  Country_Brazil: 1,
//  Country_Mexico: 0,
//  Country_Ghana: 0
// },
// ...
// ]
DataSet.fitInverseTransforms(); // =>
// [{
//   'Country': 'Brazil',
//   'Age': '44',
//   'Salary': '72000',
//   'Purchased': 'N',
// },
// ...
// ]
   * @param options 
   */
  fitInverseTransforms(options = {}) {
    const config = Object.assign({
      returnData: true,
    }, options);
    this.data = this.data.map(val => {
      return (options.removeValues)
        ? this.inverseTransformObject(val, options)
        : Object.assign({}, val, this.inverseTransformObject(val, options));
    });
    return config.returnData ? this.data : this;
  }
  /**
   * Mutate dataset data with all transforms
   * @param options
   * @example
DataSet.data;
// [{
//   'Country': 'Brazil',
//   'Age': '44',
//   'Salary': '72000',
//   'Purchased': 'N',
// },
// ...
// ]
DataSet.fitTransforms(); // =>
// [{ 
//  Country: 'Brazil',
//  Age: 3.784189633918261,
//  Salary: '72000',
//  Purchased: 'N',
//  Country_Brazil: 1,
//  Country_Mexico: 0,
//  Country_Ghana: 0
// },
// ...
// ] 
   */
  fitTransforms(options = {}) {
    const config = Object.assign({
      returnData: true,
    }, options);
    this.data = this.data.map(val => {
      return (options.removeValues)
        ? this.transformObject(val, options)
        : Object.assign({}, val, this.transformObject(val, options));
    });
    return config.returnData ? this.data : this;
  }
}