Class: Kiba::Extend::Transforms::Deduplicate::Fields

Inherits:
Object
  • Object
show all
Includes:
MultivalPlusDelimDeprecatable, SepDeprecatable
Defined in:
lib/kiba/extend/transforms/deduplicate/fields.rb

Overview

Removes the value(s) of source from targets

Examples:

Multival, case sensitive, without sep param

# Used in pipeline as:
# transform Deduplicate::Fields,
#   source: :x,
#   targets: %i[y z],
#   multival: true,
Kiba::Extend.config.delim = ';'
xform = Deduplicate::Fields.new(
  source: :x,
  targets: %i[y z],
  multival: true
)
input = [
  {x: "a", y: "a", z: "b"},
  {x: "a", y: "a", z: "a"},
  {x: "a", y: "b;a", z: "a;c"},
  {x: "a;b", y: "b;a", z: "a;c"},
  {x: "a", y: "aa", z: "bat"},
  {x: nil, y: "a", z: nil},
  {x: "", y: ";a", z: "b;"},
  {x: "a", y: nil, z: nil},
  {x: "a", y: "A", z: "a"}
]
result = Kiba::StreamingRunner.transform_stream(input, xform)
  .map{ |row| row }
Kiba::Extend.reset_config
expected = [
  {x: "a", y: nil, z: "b"},
  {x: "a", y: nil, z: nil},
  {x: "a", y: "b", z: "c"},
  {x: "a;b", y: nil, z: "c"},
  {x: "a", y: "aa", z: "bat"},
  {x: nil, y: "a", z: nil},
  {x: "", y: "a", z: "b"},
  {x: "a", y: nil, z: nil},
  {x: "a", y: "A", z: nil}
]
expect(result).to eq(expected)

No multival param, case sensitive, with delim

# Used in pipeline as:
# transform Deduplicate::Fields,
#   source: :x,
#   targets: %i[y z],
#   delim: ";"
xform = Deduplicate::Fields.new(
  source: :x,
  targets: %i[y z],
  delim: ";"
)
input = [
  {x: "a", y: "a", z: "b"},
  {x: "a", y: "a", z: "a"},
  {x: "a", y: "b;a", z: "a;c"},
  {x: "a;b", y: "b;a", z: "a;c"},
  {x: "a", y: "aa", z: "bat"},
  {x: nil, y: "a", z: nil},
  {x: "", y: ";a", z: "b;"},
  {x: "a", y: nil, z: nil},
  {x: "a", y: "A", z: "a"}
]
result = Kiba::StreamingRunner.transform_stream(input, xform)
  .map{ |row| row }
expected = [
  {x: "a", y: nil, z: "b"},
  {x: "a", y: nil, z: nil},
  {x: "a", y: "b", z: "c"},
  {x: "a;b", y: nil, z: "c"},
  {x: "a", y: "aa", z: "bat"},
  {x: nil, y: "a", z: nil},
  {x: "", y: "a", z: "b"},
  {x: "a", y: nil, z: nil},
  {x: "a", y: "A", z: nil}
]
expect(result).to eq(expected)

Multival, case insensitive, with sep

# Used in pipeline as:
# transform Deduplicate::Fields,
#   source: :x,
#   targets: %i[y z],
#   multival: true,
#   sep: ";",
#   casesensitive: false
xform = Deduplicate::Fields.new(
  source: :x,
  targets: %i[y z],
  multival: true,
  sep: ";",
  casesensitive: false
)
input = [
  {x: "a", y: "A;a", z: "a"},
  {x: "a", y: "a", z: "B;A"}
]
result = Kiba::StreamingRunner.transform_stream(input, xform)
  .map{ |row| row }
expected = [
  {x: "a", y: nil, z: nil},
  {x: "a", y: nil, z: "B"}
]
expect(result).to eq(expected)

Single val, case insensitive

# Used in pipeline as:
# transform Deduplicate::Fields,
#   source: :x,
#   targets: %i[y z],
#   casesensitive: false
xform = Deduplicate::Fields.new(
  source: :x,
  targets: %i[y z],
  casesensitive: false
)
input = [
  {x: "a", y: "A", z: "a"},
  {x: "a", y: "a", z: "B"}
]
result = Kiba::StreamingRunner.transform_stream(input, xform)
  .map{ |row| row }
expected = [
  {x: "a", y: nil, z: nil},
  {x: "a", y: nil, z: "B"}
]
expect(result).to eq(expected)

Instance Method Summary collapse

Methods included from SepDeprecatable

#usedelim

Methods included from MultivalPlusDelimDeprecatable

#set_multival

Constructor Details

#initialize(source:, targets:, casesensitive: true, multival: omitted = true, sep: nil, delim: nil) ⇒ Fields

Returns a new instance of Fields.

Parameters:

  • source (Symbol)

    name of field containing value to remove from target fields

  • targets (Array<Symbol>)

    names of fields to remove source value(s) from

  • casesensitive (Boolean) (defaults to: true)

    whether matching should be case sensitive

  • multival (Boolean) (defaults to: omitted = true)

    DEPRECATED - Do not use

  • sep (String) (defaults to: nil)

    DEPRECATED - Do not use

  • delim (nil, String) (defaults to: nil)

    non-nil is used to split values in source and targets values



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/kiba/extend/transforms/deduplicate/fields.rb', line 145

def initialize(source:, targets:, casesensitive: true,
  multival: omitted = true, sep: nil, delim: nil)
  @source = source
  @casesensitive = casesensitive
  @multival = if omitted && delim
    true
  else
    set_multival(multival, omitted, self)
  end
  if sep.nil? && delim.nil? && multival && !omitted
    msg = "If you are expecting Kiba::Extend.delim to be used as "\
      "default `sep` value, please pass it as explicit `delim` "\
      "argument. In a future release of kiba-extend, the `delim` "\
      "value will no longer default to Kiba::Extend.delim."
    warn("#{Kiba::Extend.warning_label}:\n  #{self.class}: #{msg}")
    sep = Kiba::Extend.delim
  end
  @delim = usedelim(sepval: sep, delimval: delim, calledby: self,
    default: nil)
  getter_params = if @delim
    {fields: targets, delim: @delim}
  else
    {fields: targets}
  end
  @getter = Helpers::FieldValueGetter.new(**getter_params)
end

Instance Method Details

#process(row) ⇒ Object

Parameters:

  • row (Hash{ Symbol => String, nil })


173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/kiba/extend/transforms/deduplicate/fields.rb', line 173

def process(row)
  sourceval = row[source]
  return row unless sourceval

  targetdata = getter.call(row)
  return row if targetdata.empty?

  sourcevals = split_value(sourceval)
  targetdata.transform_values! { |val| split_value(val) }

  deduplicate(row, sourcevals, targetdata)
  row
end