Module: Kiba::Extend

Extended by:
Dry::Configurable
Defined in:
lib/kiba/extend.rb,
lib/kiba/extend/job.rb,
lib/kiba/extend/jobs.rb,
lib/kiba/extend/marc.rb,
lib/kiba/extend/error.rb,
lib/kiba/extend/command.rb,
lib/kiba/extend/sources.rb,
lib/kiba/extend/version.rb,
lib/kiba/extend/jobs/job.rb,
lib/kiba/extend/registry.rb,
lib/kiba/extend/transforms.rb,
lib/kiba/extend/command/reg.rb,
lib/kiba/extend/command/run.rb,
lib/kiba/extend/jobs/parser.rb,
lib/kiba/extend/jobs/runner.rb,
lib/kiba/extend/sources/csv.rb,
lib/kiba/extend/destinations.rb,
lib/kiba/extend/sources/marc.rb,
lib/kiba/extend/utils/lookup.rb,
lib/kiba/extend/jobs/base_job.rb,
lib/kiba/extend/jobs/marc_job.rb,
lib/kiba/extend/jobs/reporter.rb,
lib/kiba/extend/utils/fieldset.rb,
lib/kiba/extend/transforms/copy.rb,
lib/kiba/extend/transforms/marc.rb,
lib/kiba/extend/transforms/name.rb,
lib/kiba/extend/transforms/sort.rb,
lib/kiba/extend/transforms/take.rb,
lib/kiba/extend/transforms/warn.rb,
lib/kiba/extend/command/reg/list.rb,
lib/kiba/extend/command/runnable.rb,
lib/kiba/extend/destinations/csv.rb,
lib/kiba/extend/jobs/show_me_job.rb,
lib/kiba/extend/jobs/tell_me_job.rb,
lib/kiba/extend/registry/creator.rb,
lib/kiba/extend/sources/json_dir.rb,
lib/kiba/extend/transforms/clean.rb,
lib/kiba/extend/transforms/count.rb,
lib/kiba/extend/transforms/merge.rb,
lib/kiba/extend/transforms/split.rb,
lib/kiba/extend/destinations/marc.rb,
lib/kiba/extend/registry/fileable.rb,
lib/kiba/extend/transforms/append.rb,
lib/kiba/extend/transforms/cspace.rb,
lib/kiba/extend/transforms/delete.rb,
lib/kiba/extend/transforms/rename.rb,
lib/kiba/extend/utils/lookup_hash.rb,
lib/kiba/extend/jobs/job_segmenter.rb,
lib/kiba/extend/sources/enumerable.rb,
lib/kiba/extend/sources/lookupable.rb,
lib/kiba/extend/sources/sourceable.rb,
lib/kiba/extend/transforms/allable.rb,
lib/kiba/extend/transforms/compare.rb,
lib/kiba/extend/transforms/explode.rb,
lib/kiba/extend/transforms/extract.rb,
lib/kiba/extend/transforms/helpers.rb,
lib/kiba/extend/transforms/prepend.rb,
lib/kiba/extend/transforms/replace.rb,
lib/kiba/extend/transforms/reshape.rb,
lib/kiba/extend/utils/pre_job_task.rb,
lib/kiba/extend/destinations/lambda.rb,
lib/kiba/extend/jobs/dependency_job.rb,
lib/kiba/extend/transforms/collapse.rb,
lib/kiba/extend/transforms/fraction.rb,
lib/kiba/extend/jobs/json_to_csv_job.rb,
lib/kiba/extend/transforms/ms_access.rb,
lib/kiba/extend/transforms/copy/field.rb,
lib/kiba/extend/registry/file_registry.rb,
lib/kiba/extend/registry/registry_list.rb,
lib/kiba/extend/transforms/deduplicate.rb,
lib/kiba/extend/transforms/filter_rows.rb,
lib/kiba/extend/transforms/fingerprint.rb,
lib/kiba/extend/destinations/json_array.rb,
lib/kiba/extend/transforms/rename/field.rb,
lib/kiba/extend/transforms/string_value.rb,
lib/kiba/extend/utils/extract_fractions.rb,
lib/kiba/extend/utils/lookup/row_sorter.rb,
lib/kiba/extend/utils/marc_id_extractor.rb,
lib/kiba/extend/utils/marc_name_cleaner.rb,
lib/kiba/extend/utils/pre_job_nuke_task.rb,
lib/kiba/extend/utils/string_normalizer.rb,
lib/kiba/extend/mixins/iterative_cleanup.rb,
lib/kiba/extend/registry/registered_file.rb,
lib/kiba/extend/registry/requirable_file.rb,
lib/kiba/extend/transforms/delete/fields.rb,
lib/kiba/extend/transforms/rename/fields.rb,
lib/kiba/extend/utils/lookup/set_checker.rb,
lib/kiba/extend/data/convertible_fraction.rb,
lib/kiba/extend/transforms/combine_values.rb,
lib/kiba/extend/utils/field_value_matcher.rb,
lib/kiba/extend/utils/fingerprint_creator.rb,
lib/kiba/extend/utils/lookup/row_selector.rb,
lib/kiba/extend/utils/pre_job_backup_task.rb,
lib/kiba/extend/jobs/multi_source_prep_job.rb,
lib/kiba/extend/registry/registered_lookup.rb,
lib/kiba/extend/registry/registered_source.rb,
lib/kiba/extend/transforms/fingerprint/add.rb,
lib/kiba/extend/transforms/single_warnable.rb,
lib/kiba/extend/utils/lookup/pair_equality.rb,
lib/kiba/extend/registry/creator/type_error.rb,
lib/kiba/extend/registry/registry_validator.rb,
lib/kiba/extend/transforms/deduplicate/flag.rb,
lib/kiba/extend/transforms/marc/extract_ids.rb,
lib/kiba/extend/transforms/sep_deprecatable.rb,
lib/kiba/extend/utils/lookup/multival_pairs.rb,
lib/kiba/extend/utils/lookup/pair_inclusion.rb,
lib/kiba/extend/destinations/destinationable.rb,
lib/kiba/extend/registry/file_registry_entry.rb,
lib/kiba/extend/transforms/append/nil_fields.rb,
lib/kiba/extend/transforms/deduplicate/table.rb,
lib/kiba/extend/utils/lookup/field_emptiness.rb,
lib/kiba/extend/utils/marc_role_term_cleaner.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs.rb,
lib/kiba/extend/transforms/clean/strip_fields.rb,
lib/kiba/extend/transforms/count/field_values.rb,
lib/kiba/extend/transforms/deduplicate/fields.rb,
lib/kiba/extend/transforms/fingerprint/decode.rb,
lib/kiba/extend/transforms/warn/uneven_fields.rb,
lib/kiba/extend/utils/lookup/criteria_checker.rb,
lib/kiba/extend/utils/multi_source_normalizer.rb,
lib/kiba/extend/transforms/action_argumentable.rb,
lib/kiba/extend/transforms/delete/empty_fields.rb,
lib/kiba/extend/transforms/fraction/to_decimal.rb,
lib/kiba/extend/transforms/marc/field_linkable.rb,
lib/kiba/extend/transforms/marc/filter_records.rb,
lib/kiba/extend/transforms/name/split_inverted.rb,
lib/kiba/extend/transforms/sort/by_field_value.rb,
lib/kiba/extend/registry/registered_destination.rb,
lib/kiba/extend/transforms/compare/field_values.rb,
lib/kiba/extend/transforms/cspace/convert_to_id.rb,
lib/kiba/extend/transforms/deduplicate/flag_all.rb,
lib/kiba/extend/transforms/delete/fields_except.rb,
lib/kiba/extend/transforms/merge/constant_value.rb,
lib/kiba/extend/transforms/reshape/simple_pivot.rb,
lib/kiba/extend/registry/registry_entry_selector.rb,
lib/kiba/extend/transforms/append/to_field_value.rb,
lib/kiba/extend/transforms/merge/constant_values.rb,
lib/kiba/extend/transforms/string_value/to_array.rb,
lib/kiba/extend/transforms/cspace/address_country.rb,
lib/kiba/extend/transforms/marc/extract_245_title.rb,
lib/kiba/extend/transforms/marc/extract_name_data.rb,
lib/kiba/extend/transforms/merge/multi_row_lookup.rb,
lib/kiba/extend/transforms/prepend/to_field_value.rb,
lib/kiba/extend/utils/lookup/row_selector_by_hash.rb,
lib/kiba/extend/transforms/clean/even_field_values.rb,
lib/kiba/extend/transforms/cspace/normalize_for_id.rb,
lib/kiba/extend/transforms/filter_rows/with_lambda.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/final.rb,
lib/kiba/extend/transforms/deduplicate/field_values.rb,
lib/kiba/extend/transforms/fingerprint/flag_changed.rb,
lib/kiba/extend/transforms/helpers/org_name_checker.rb,
lib/kiba/extend/utils/lookup/row_selector_by_lambda.rb,
lib/kiba/extend/transforms/delete/empty_field_groups.rb,
lib/kiba/extend/transforms/delete/empty_field_values.rb,
lib/kiba/extend/transforms/marc/filter_records/by_id.rb,
lib/kiba/extend/transforms/marc/language_code_lookup.rb,
lib/kiba/extend/transforms/merge/compare_fields_flag.rb,
lib/kiba/extend/transforms/merge/multivalue_constant.rb,
lib/kiba/extend/transforms/combine_values/full_record.rb,
lib/kiba/extend/transforms/filter_rows/field_equal_to.rb,
lib/kiba/extend/transforms/helpers/delim_only_checker.rb,
lib/kiba/extend/transforms/helpers/field_value_getter.rb,
lib/kiba/extend/transforms/marc/extract_org_name_data.rb,
lib/kiba/extend/transforms/replace/empty_field_values.rb,
lib/kiba/extend/utils/iterative_cleanup_job_registrar.rb,
lib/kiba/extend/transforms/filter_rows/field_populated.rb,
lib/kiba/extend/transforms/fingerprint/merge_corrected.rb,
lib/kiba/extend/transforms/helpers/person_name_checker.rb,
lib/kiba/extend/transforms/marc/extract_base_name_data.rb,
lib/kiba/extend/transforms/split/into_multiple_columns.rb,
lib/kiba/extend/transforms/split/publication_statement.rb,
lib/kiba/extend/transforms/warn/if_field_value_matches.rb,
lib/kiba/extend/utils/delim_in_value_fingerprint_error.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/worksheet.rb,
lib/kiba/extend/registry/creator/hash_creator_key_error.rb,
lib/kiba/extend/transforms/prepend/field_to_field_value.rb,
lib/kiba/extend/transforms/count/matching_rows_in_lookup.rb,
lib/kiba/extend/transforms/marc/extract_person_name_data.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/corrections.rb,
lib/kiba/extend/transforms/clean/ensure_consistent_fields.rb,
lib/kiba/extend/transforms/cspace/flag_invalid_characters.rb,
lib/kiba/extend/transforms/filter_rows/field_match_regexp.rb,
lib/kiba/extend/transforms/helpers/field_evenness_checker.rb,
lib/kiba/extend/transforms/marc/extract_meeting_name_data.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/cleaned_uniq.rb,
lib/kiba/extend/registry/creator/hash_creator_callee_error.rb,
lib/kiba/extend/transforms/append/converted_value_and_unit.rb,
lib/kiba/extend/transforms/delete/fieldnames_starting_with.rb,
lib/kiba/extend/transforms/marc/filter_records/with_lambda.rb,
lib/kiba/extend/transforms/warn/unless_field_value_matches.rb,
lib/kiba/extend/transforms/deduplicate/grouped_field_values.rb,
lib/kiba/extend/transforms/explode/rows_from_multival_field.rb,
lib/kiba/extend/transforms/filter_rows/all_fields_populated.rb,
lib/kiba/extend/transforms/filter_rows/any_fields_populated.rb,
lib/kiba/extend/transforms/merge/constant_value_conditional.rb,
lib/kiba/extend/transforms/multival_plus_delim_deprecatable.rb,
lib/kiba/extend/transforms/marc/extract_subfields_from_field.rb,
lib/kiba/extend/registry/creator/hash_creator_args_type_error.rb,
lib/kiba/extend/registry/creator/jobless_module_creator_error.rb,
lib/kiba/extend/transforms/delete/delimiter_only_field_values.rb,
lib/kiba/extend/transforms/delete/field_value_matching_regexp.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/base_job_cleaned.rb,
lib/kiba/extend/transforms/collapse/fields_to_typed_field_pair.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/returned_compiled.rb,
lib/kiba/extend/mixins/iterative_cleanup/known_worksheet_values.rb,
lib/kiba/extend/transforms/clean/regexp_find_replace_field_vals.rb,
lib/kiba/extend/transforms/collapse/fields_with_custom_fieldmap.rb,
lib/kiba/extend/transforms/delete/field_value_containing_string.rb,
lib/kiba/extend/transforms/fingerprint/delimiter_in_value_error.rb,
lib/kiba/extend/transforms/name/convert_inverted_to_direct_form.rb,
lib/kiba/extend/transforms/fingerprint/delimiter_collision_error.rb,
lib/kiba/extend/transforms/replace/field_value_with_static_mapping.rb,
lib/kiba/extend/transforms/delete/field_value_if_equals_other_field.rb,
lib/kiba/extend/transforms/collapse/fields_to_repeatable_field_group.rb,
lib/kiba/extend/transforms/combine_values/from_fields_with_delimiter.rb,
lib/kiba/extend/transforms/explode/rows_from_grouped_multival_fields.rb,
lib/kiba/extend/transforms/replace/norm_with_most_frequently_used_form.rb,
lib/kiba/extend/transforms/reshape/fields_to_field_group_with_constant.rb

Overview

Handles:

  • auto-loading of the code
  • extending Kiba with Kiba::Extend::Jobs::JobSegmenter so we can call Kiba.job_segment
  • defining config settings, all of which can be overridden by project applications using kiba-extend

Also defines some CSV converters:

  • :stripextra – strips leading/trailing spaces, collapses multiple spaces, removes terminal commas, strips again
  • :nulltonil – replaces any values that are a literal string NULL with a nil value
  • :stripplus – strips leading/trailing spaces, collapses multiple spaces, removes terminal commas, strips again, removes “NULL” (i.e. literal string “NULL” becomes a nilValue

Note that :stripplus combines the functionality of :stripextra and :nulltonil

About pre-job task settings

If configured properly, the pre-job task is run when a job is run via Thor invocation. This includes run:job, run:jobs, and jobs:tagged -r tagvalue. The task is run once when the Thor task is invoked.

Defined Under Namespace

Modules: Command, Data, Destinations, ErrMod, Job, Jobs, Marc, Mixins, Registry, Sources, Transforms, Utils Classes: BooleanReturningLambdaError, Error, InvalidActionError, IterativeCleanupSettingUndefinedError, JobCannotBeUsedAsLookupError, NoLookupOnError, NonSymbolLookupOnError, PathRequiredError, ProjectSettingUndefinedError

Constant Summary collapse

VERSION =
"4.0.1"

Class Method Summary collapse

Class Method Details

.config_namespacesArray<Module>

Note:

You must set this from an individual project if you wish to use the Kiba::Extend::Mixins::IterativeCleanup mixin.

Ruby modules that serve as namespaces under which config modules for a project are nested.

Returns:

  • (Array<Module>)

Since:

  • 4.0.0



95
# File 'lib/kiba/extend.rb', line 95

setting :config_namespaces, default: [], reader: true

.csvoptsHash

Default options used for CSV sources/destinations

Returns:

  • (Hash)


100
101
102
# File 'lib/kiba/extend.rb', line 100

setting :csvopts,
default: {headers: true, header_converters: %i[symbol downcase]},
reader: true

.default_job_method_nameSymbol

The job definition module method expected to be present if you define a registry entry hash creator as a Module

Returns:

  • (Symbol)


181
# File 'lib/kiba/extend.rb', line 181

setting :default_job_method_name, default: :job, reader: true

.delimString

Default delimiter for splitting/joining values in multi-valued fields.

'a|b'.split(Kiba::Extend.delim) => ['a', 'b']

Returns:

  • (String)


116
# File 'lib/kiba/extend.rb', line 116

setting :delim, default: "|", reader: true

.destinationClass

Default destination class for jobs. Must meet implementation criteria in Kiba wiki

Returns:

  • (Class)


158
159
160
# File 'lib/kiba/extend.rb', line 158

setting :destination, constructor: proc {
  Kiba::Extend::Destinations::CSV
}, reader: true

.job_show_meBoolean

Whether to output results to STDOUT for debugging

Returns:

  • (Boolean)


230
# File 'lib/kiba/extend.rb', line 230

setting :job_show_me, default: false, reader: true

.job_tell_meBoolean

Whether to have computer audibly say something when job is complete

Returns:

  • (Boolean)


235
# File 'lib/kiba/extend.rb', line 235

setting :job_tell_me, default: false, reader: true

.job_verbosity:debug, ...

How much output about jobs to output to STDOUT

  • :debug - tells you A LOT - helpful when developing pipelines and debugging
  • :normal - reports what is running, from where, and the results
  • :minimal - bare minimum

Returns:

  • (:debug, :normal, :minimal)


245
# File 'lib/kiba/extend.rb', line 245

setting :job_verbosity, default: :normal, reader: true

.ke_dirString

Returns path to this application’s data directory (used internally by transforms and utils), and not specific to a project.

Returns:

  • (String)

    path to this application’s data directory (used internally by transforms and utils), and not specific to a project



56
57
58
59
60
# File 'lib/kiba/extend.rb', line 56

setting :ke_dir,
reader: true,
constructor: ->(value) do
  Gem.loaded_specs["kiba-extend"].full_gem_path
end

.lambdaoptsHash

Default settings for Lambda destination

Returns:

  • (Hash)


106
107
108
# File 'lib/kiba/extend.rb', line 106

setting :lambdaopts, default: {on_write: ->(r) {
  accumulator << r
}}, reader: true

.loaderObject



62
63
64
# File 'lib/kiba/extend.rb', line 62

def loader
  @loader ||= setup_loader
end

.nullvalueString

Default string to be treated as though it were a null/empty value.

Returns:

  • (String)


134
# File 'lib/kiba/extend.rb', line 134

setting :nullvalue, default: "%NULLVALUE%", reader: true

.pre_job_task_action:backup, :nuke

Controls what happens when pre-job task is run

  • :backup - Moves all existing files in specified directories to backup directory created in your :datadir
  • :nuke - Deletes all existing files in specified directories when a job is run. Make sure you only specify directories that contain derived/generated files!

Returns:

  • (:backup, :nuke)


211
# File 'lib/kiba/extend.rb', line 211

setting :pre_job_task_action, default: :backup, reader: true

.pre_job_task_backup_dirString

Full path to directory to which files will be moved if pre_job_task_action == :backup. The directory will be created if it does not exist.

Returns:

  • (String)


195
# File 'lib/kiba/extend.rb', line 195

setting :pre_job_task_backup_dir, default: nil, reader: true

.pre_job_task_directoriesArray<String>

Full paths to directories that will be affected by the specified pre-task action

Returns:

  • (Array<String>)


200
# File 'lib/kiba/extend.rb', line 200

setting :pre_job_task_directories, default: [], reader: true

.pre_job_task_mode:job, ...

Controls whether pre-job task is run

  • :job - runs pre-job task specified above whenever you invoke thor run:job .... All dependency jobs required for the invoked job will be run. This mode is recommended during development when you want any change in the dependency chain to get picked up.
  • any other value - only regenerates missing dependency files. Useful when your data is really big and/or your jobs are more stable

Returns:

  • (:job, nil, anyValue)


225
# File 'lib/kiba/extend.rb', line 225

setting :pre_job_task_mode, default: :job, reader: true

.pre_job_task_runBoolean

Whether to use Kiba::Extend’s pre-job task functionality. The default is false for backward compatibility, as existing projects may not have the required settings configured.

Returns:

  • (Boolean)


188
# File 'lib/kiba/extend.rb', line 188

setting :pre_job_task_run, default: false, reader: true

.project_configsArray<Module>

List of config modules in project namespaces set in config_namespaces setting

Returns:

  • (Array<Module>)

Since:

  • 4.0.0



252
253
254
255
256
# File 'lib/kiba/extend.rb', line 252

def project_configs
  config_namespaces.map { |ns| get_config_mods(ns, ns.constants) }
    .flatten
    .select { |obj| obj.is_a?(Module) && obj.respond_to?(:config) }
end

.registryKiba::Extend::Registry::FileRegistry

A customized dry-container for registering and resolving jobs



172
173
174
# File 'lib/kiba/extend.rb', line 172

setting :registry,
constructor: proc { Kiba::Extend::Registry::FileRegistry.new },
reader: true

.registry_namespace_separatorString

Used to join nested namespaces and registered keys in FileRegistry. With namespace ‘ns’ and registered key ‘foo’: ‘ns__foo’. With parent namespace ‘ns’, child namespace ‘child’, and registered key ‘foo’: ‘ns__child__foo’

Returns:

  • (String)


142
# File 'lib/kiba/extend.rb', line 142

setting :registry_namespace_separator, default: "__", reader: true

.reload!Object



84
85
86
# File 'lib/kiba/extend.rb', line 84

def reload!
  @loader.reload
end

.sgdelimString

Default subgrouping delimiter for splitting/joining values in multi-valued fields

orig = 'a^^y|b^^z'
delim_split = orig.split(delim)
sgdelim_split = delim_split.map{ |val| val.split(sgdelim) }
sgdelim_split => [['a', 'y'], ['b', 'z']]

Returns:

  • (String)


129
# File 'lib/kiba/extend.rb', line 129

setting :sgdelim, default: "^^", reader: true

.sourceClass

Default source class for jobs. Must meet implementation criteria in Kiba wiki

Returns:

  • (Class)


149
150
151
# File 'lib/kiba/extend.rb', line 149

setting :source, constructor: proc {
  Kiba::Extend::Sources::CSV
}, reader: true

.warning_labelString

Prefix for warnings from the ETL

Returns:

  • (String)


165
# File 'lib/kiba/extend.rb', line 165

setting :warning_label, default: "KIBA WARNING", reader: true