Module: Kiba::Extend
- Extended by:
- Dry::Configurable
- Defined in:
- lib/kiba/extend.rb,
lib/kiba/extend/job.rb,
lib/kiba/extend/jobs.rb,
lib/kiba/extend/marc.rb,
lib/kiba/extend/error.rb,
lib/kiba/extend/command.rb,
lib/kiba/extend/sources.rb,
lib/kiba/extend/version.rb,
lib/kiba/extend/jobs/job.rb,
lib/kiba/extend/registry.rb,
lib/kiba/extend/transforms.rb,
lib/kiba/extend/command/reg.rb,
lib/kiba/extend/command/run.rb,
lib/kiba/extend/jobs/parser.rb,
lib/kiba/extend/jobs/runner.rb,
lib/kiba/extend/sources/csv.rb,
lib/kiba/extend/destinations.rb,
lib/kiba/extend/sources/marc.rb,
lib/kiba/extend/utils/lookup.rb,
lib/kiba/extend/jobs/base_job.rb,
lib/kiba/extend/jobs/marc_job.rb,
lib/kiba/extend/jobs/reporter.rb,
lib/kiba/extend/utils/fieldset.rb,
lib/kiba/extend/transforms/copy.rb,
lib/kiba/extend/transforms/marc.rb,
lib/kiba/extend/transforms/name.rb,
lib/kiba/extend/transforms/sort.rb,
lib/kiba/extend/transforms/take.rb,
lib/kiba/extend/transforms/warn.rb,
lib/kiba/extend/command/reg/list.rb,
lib/kiba/extend/command/runnable.rb,
lib/kiba/extend/destinations/csv.rb,
lib/kiba/extend/jobs/show_me_job.rb,
lib/kiba/extend/jobs/tell_me_job.rb,
lib/kiba/extend/registry/creator.rb,
lib/kiba/extend/sources/json_dir.rb,
lib/kiba/extend/transforms/clean.rb,
lib/kiba/extend/transforms/count.rb,
lib/kiba/extend/transforms/merge.rb,
lib/kiba/extend/transforms/split.rb,
lib/kiba/extend/destinations/marc.rb,
lib/kiba/extend/registry/fileable.rb,
lib/kiba/extend/transforms/append.rb,
lib/kiba/extend/transforms/cspace.rb,
lib/kiba/extend/transforms/delete.rb,
lib/kiba/extend/transforms/rename.rb,
lib/kiba/extend/transforms/report.rb,
lib/kiba/extend/utils/lookup_hash.rb,
lib/kiba/extend/jobs/job_segmenter.rb,
lib/kiba/extend/sources/enumerable.rb,
lib/kiba/extend/sources/lookupable.rb,
lib/kiba/extend/sources/sourceable.rb,
lib/kiba/extend/transforms/allable.rb,
lib/kiba/extend/transforms/compare.rb,
lib/kiba/extend/transforms/explode.rb,
lib/kiba/extend/transforms/extract.rb,
lib/kiba/extend/transforms/helpers.rb,
lib/kiba/extend/transforms/prepend.rb,
lib/kiba/extend/transforms/replace.rb,
lib/kiba/extend/transforms/reshape.rb,
lib/kiba/extend/utils/pre_job_task.rb,
lib/kiba/extend/destinations/lambda.rb,
lib/kiba/extend/jobs/dependency_job.rb,
lib/kiba/extend/transforms/collapse.rb,
lib/kiba/extend/transforms/fraction.rb,
lib/kiba/extend/jobs/json_to_csv_job.rb,
lib/kiba/extend/transforms/ms_access.rb,
lib/kiba/extend/transforms/copy/field.rb,
lib/kiba/extend/registry/file_registry.rb,
lib/kiba/extend/registry/registry_list.rb,
lib/kiba/extend/transforms/deduplicate.rb,
lib/kiba/extend/transforms/filter_rows.rb,
lib/kiba/extend/transforms/fingerprint.rb,
lib/kiba/extend/destinations/json_array.rb,
lib/kiba/extend/transforms/rename/field.rb,
lib/kiba/extend/transforms/string_value.rb,
lib/kiba/extend/utils/extract_fractions.rb,
lib/kiba/extend/utils/lookup/row_sorter.rb,
lib/kiba/extend/utils/marc_id_extractor.rb,
lib/kiba/extend/utils/marc_name_cleaner.rb,
lib/kiba/extend/utils/pre_job_nuke_task.rb,
lib/kiba/extend/utils/string_normalizer.rb,
lib/kiba/extend/mixins/iterative_cleanup.rb,
lib/kiba/extend/registry/registered_file.rb,
lib/kiba/extend/registry/requirable_file.rb,
lib/kiba/extend/transforms/delete/fields.rb,
lib/kiba/extend/transforms/rename/fields.rb,
lib/kiba/extend/transforms/report/fields.rb,
lib/kiba/extend/utils/lookup/set_checker.rb,
lib/kiba/extend/data/convertible_fraction.rb,
lib/kiba/extend/transforms/combine_values.rb,
lib/kiba/extend/utils/field_value_matcher.rb,
lib/kiba/extend/utils/fingerprint_creator.rb,
lib/kiba/extend/utils/lookup/row_selector.rb,
lib/kiba/extend/utils/pre_job_backup_task.rb,
lib/kiba/extend/jobs/multi_source_prep_job.rb,
lib/kiba/extend/registry/registered_lookup.rb,
lib/kiba/extend/registry/registered_source.rb,
lib/kiba/extend/transforms/fingerprint/add.rb,
lib/kiba/extend/transforms/single_warnable.rb,
lib/kiba/extend/utils/lookup/pair_equality.rb,
lib/kiba/extend/registry/creator/type_error.rb,
lib/kiba/extend/registry/registry_validator.rb,
lib/kiba/extend/transforms/deduplicate/flag.rb,
lib/kiba/extend/transforms/marc/extract_ids.rb,
lib/kiba/extend/transforms/sep_deprecatable.rb,
lib/kiba/extend/utils/lookup/multival_pairs.rb,
lib/kiba/extend/utils/lookup/pair_inclusion.rb,
lib/kiba/extend/destinations/destinationable.rb,
lib/kiba/extend/registry/file_registry_entry.rb,
lib/kiba/extend/transforms/append/nil_fields.rb,
lib/kiba/extend/transforms/count/unique_vals.rb,
lib/kiba/extend/transforms/deduplicate/table.rb,
lib/kiba/extend/utils/lookup/field_emptiness.rb,
lib/kiba/extend/utils/marc_role_term_cleaner.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs.rb,
lib/kiba/extend/transforms/clean/strip_fields.rb,
lib/kiba/extend/transforms/count/field_values.rb,
lib/kiba/extend/transforms/deduplicate/fields.rb,
lib/kiba/extend/transforms/fingerprint/decode.rb,
lib/kiba/extend/transforms/warn/uneven_fields.rb,
lib/kiba/extend/utils/lookup/criteria_checker.rb,
lib/kiba/extend/utils/multi_source_normalizer.rb,
lib/kiba/extend/transforms/action_argumentable.rb,
lib/kiba/extend/transforms/delete/empty_fields.rb,
lib/kiba/extend/transforms/fraction/to_decimal.rb,
lib/kiba/extend/transforms/marc/field_linkable.rb,
lib/kiba/extend/transforms/marc/filter_records.rb,
lib/kiba/extend/transforms/name/split_inverted.rb,
lib/kiba/extend/transforms/sort/by_field_value.rb,
lib/kiba/extend/registry/registered_destination.rb,
lib/kiba/extend/transforms/compare/field_values.rb,
lib/kiba/extend/transforms/cspace/convert_to_id.rb,
lib/kiba/extend/transforms/deduplicate/flag_all.rb,
lib/kiba/extend/transforms/delete/fields_except.rb,
lib/kiba/extend/transforms/merge/constant_value.rb,
lib/kiba/extend/transforms/reshape/simple_pivot.rb,
lib/kiba/extend/registry/registry_entry_selector.rb,
lib/kiba/extend/transforms/append/to_field_value.rb,
lib/kiba/extend/transforms/merge/constant_values.rb,
lib/kiba/extend/transforms/string_value/to_array.rb,
lib/kiba/extend/transforms/cspace/address_country.rb,
lib/kiba/extend/transforms/marc/extract_245_title.rb,
lib/kiba/extend/transforms/marc/extract_name_data.rb,
lib/kiba/extend/transforms/merge/multi_row_lookup.rb,
lib/kiba/extend/transforms/prepend/to_field_value.rb,
lib/kiba/extend/utils/lookup/row_selector_by_hash.rb,
lib/kiba/extend/transforms/clean/even_field_values.rb,
lib/kiba/extend/transforms/cspace/normalize_for_id.rb,
lib/kiba/extend/transforms/filter_rows/with_lambda.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/final.rb,
lib/kiba/extend/transforms/boolean_lambda_paramable.rb,
lib/kiba/extend/transforms/deduplicate/field_values.rb,
lib/kiba/extend/transforms/fingerprint/flag_changed.rb,
lib/kiba/extend/transforms/helpers/org_name_checker.rb,
lib/kiba/extend/transforms/merge/incrementing_field.rb,
lib/kiba/extend/utils/lookup/row_selector_by_lambda.rb,
lib/kiba/extend/transforms/delete/empty_field_groups.rb,
lib/kiba/extend/transforms/delete/empty_field_values.rb,
lib/kiba/extend/transforms/marc/filter_records/by_id.rb,
lib/kiba/extend/transforms/marc/language_code_lookup.rb,
lib/kiba/extend/transforms/merge/compare_fields_flag.rb,
lib/kiba/extend/transforms/merge/multivalue_constant.rb,
lib/kiba/extend/transforms/combine_values/full_record.rb,
lib/kiba/extend/transforms/filter_rows/field_equal_to.rb,
lib/kiba/extend/transforms/helpers/delim_only_checker.rb,
lib/kiba/extend/transforms/helpers/field_value_getter.rb,
lib/kiba/extend/transforms/marc/extract_org_name_data.rb,
lib/kiba/extend/transforms/replace/empty_field_values.rb,
lib/kiba/extend/utils/iterative_cleanup_job_registrar.rb,
lib/kiba/extend/transforms/filter_rows/field_populated.rb,
lib/kiba/extend/transforms/fingerprint/merge_corrected.rb,
lib/kiba/extend/transforms/helpers/person_name_checker.rb,
lib/kiba/extend/transforms/marc/extract_base_name_data.rb,
lib/kiba/extend/transforms/split/into_multiple_columns.rb,
lib/kiba/extend/transforms/split/publication_statement.rb,
lib/kiba/extend/transforms/warn/if_field_value_matches.rb,
lib/kiba/extend/utils/delim_in_value_fingerprint_error.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/worksheet.rb,
lib/kiba/extend/registry/creator/hash_creator_key_error.rb,
lib/kiba/extend/transforms/prepend/field_to_field_value.rb,
lib/kiba/extend/transforms/count/matching_rows_in_lookup.rb,
lib/kiba/extend/transforms/marc/extract_person_name_data.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/corrections.rb,
lib/kiba/extend/transforms/clean/ensure_consistent_fields.rb,
lib/kiba/extend/transforms/cspace/flag_invalid_characters.rb,
lib/kiba/extend/transforms/delete/field_value_conditional.rb,
lib/kiba/extend/transforms/filter_rows/field_match_regexp.rb,
lib/kiba/extend/transforms/helpers/field_evenness_checker.rb,
lib/kiba/extend/transforms/marc/extract_meeting_name_data.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/cleaned_uniq.rb,
lib/kiba/extend/registry/creator/hash_creator_callee_error.rb,
lib/kiba/extend/transforms/append/converted_value_and_unit.rb,
lib/kiba/extend/transforms/delete/fieldnames_starting_with.rb,
lib/kiba/extend/transforms/marc/filter_records/with_lambda.rb,
lib/kiba/extend/transforms/warn/unless_field_value_matches.rb,
lib/kiba/extend/transforms/deduplicate/grouped_field_values.rb,
lib/kiba/extend/transforms/explode/field_values_to_new_rows.rb,
lib/kiba/extend/transforms/explode/rows_from_multival_field.rb,
lib/kiba/extend/transforms/filter_rows/all_fields_populated.rb,
lib/kiba/extend/transforms/filter_rows/any_fields_populated.rb,
lib/kiba/extend/transforms/merge/constant_value_conditional.rb,
lib/kiba/extend/transforms/multival_plus_delim_deprecatable.rb,
lib/kiba/extend/transforms/marc/extract_subfields_from_field.rb,
lib/kiba/extend/registry/creator/hash_creator_args_type_error.rb,
lib/kiba/extend/registry/creator/jobless_module_creator_error.rb,
lib/kiba/extend/transforms/delete/delimiter_only_field_values.rb,
lib/kiba/extend/transforms/delete/field_value_matching_regexp.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/base_job_cleaned.rb,
lib/kiba/extend/transforms/collapse/fields_to_typed_field_pair.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/returned_compiled.rb,
lib/kiba/extend/mixins/iterative_cleanup/known_worksheet_values.rb,
lib/kiba/extend/transforms/clean/regexp_find_replace_field_vals.rb,
lib/kiba/extend/transforms/collapse/fields_with_custom_fieldmap.rb,
lib/kiba/extend/transforms/delete/field_value_containing_string.rb,
lib/kiba/extend/transforms/explode/columns_remapped_in_new_rows.rb,
lib/kiba/extend/transforms/fingerprint/delimiter_in_value_error.rb,
lib/kiba/extend/transforms/name/convert_inverted_to_direct_form.rb,
lib/kiba/extend/transforms/fingerprint/delimiter_collision_error.rb,
lib/kiba/extend/transforms/replace/field_value_with_static_mapping.rb,
lib/kiba/extend/transforms/delete/field_value_if_equals_other_field.rb,
lib/kiba/extend/transforms/collapse/fields_to_repeatable_field_group.rb,
lib/kiba/extend/transforms/combine_values/from_fields_with_delimiter.rb,
lib/kiba/extend/transforms/explode/rows_from_grouped_multival_fields.rb,
lib/kiba/extend/transforms/replace/norm_with_most_frequently_used_form.rb,
lib/kiba/extend/transforms/reshape/fields_to_field_group_with_constant.rb
Overview
Handles:
- auto-loading of the code
- extending
KibawithKiba::Extend::Jobs::JobSegmenterso we can callKiba.job_segment - defining config settings, all of which can be overridden by
project applications using
kiba-extend
Also defines some CSV converters:
:stripextra– strips leading/trailing spaces, collapses multiple spaces, removes terminal commas, strips again:nulltonil– replaces any values that are a literal string NULL with a nil value:stripplus– strips leading/trailing spaces, collapses multiple spaces, removes terminal commas, strips again, removes “NULL” (i.e. literal string “NULL” becomes anilValue
Note that :stripplus combines the functionality of :stripextra
and :nulltonil
About pre-job task settings
If configured properly, the pre-job task is run when a job is run
via Thor invocation. This includes run:job, run:jobs, and
jobs:tagged -r tagvalue. The task is run once when the Thor
task is invoked.
Defined Under Namespace
Modules: Command, Data, Destinations, ErrMod, Job, Jobs, Marc, Mixins, Registry, Sources, Transforms, Utils Classes: BooleanReturningLambdaError, Error, InvalidActionError, IterativeCleanupSettingUndefinedError, JobCannotBeUsedAsLookupError, NoLookupOnError, NonSymbolLookupOnError, PathRequiredError, ProjectSettingUndefinedError, UnsafeParameterComboError
Constant Summary collapse
- VERSION =
"5.0.0"
Class Method Summary collapse
-
.config_namespaces ⇒ Array<Module>
Ruby modules that serve as namespaces under which config modules for a project are nested.
-
.csvopts ⇒ Hash
Default options used for CSV sources/destinations.
-
.default_job_method_name ⇒ Symbol
The job definition module method expected to be present if you define a registry entry hash creator as a Module.
-
.delim ⇒ String
Default delimiter for splitting/joining values in multi-valued fields.
-
.destination ⇒ Class
Default destination class for jobs.
-
.job_show_me ⇒ Boolean
Whether to output results to STDOUT for debugging.
-
.job_tell_me ⇒ Boolean
Whether to have computer audibly say something when job is complete.
-
.job_verbosity ⇒ :debug, ...
How much output about jobs to output to STDOUT.
-
.ke_dir ⇒ String
Path to this application’s data directory (used internally by transforms and utils), and not specific to a project.
-
.lambdaopts ⇒ Hash
Default settings for Lambda destination.
-
.loader ⇒ Object
-
.nullvalue ⇒ String
Default string to be treated as though it were a null/empty value.
-
.pre_job_task_action ⇒ :backup, :nuke
Controls what happens when pre-job task is run.
-
.pre_job_task_backup_dir ⇒ String
Full path to directory to which files will be moved if
pre_job_task_action == :backup. -
.pre_job_task_directories ⇒ Array<String>
Full paths to directories that will be affected by the specified pre-task action.
-
.pre_job_task_mode ⇒ :job, ...
Controls whether pre-job task is run.
-
.pre_job_task_run ⇒ Boolean
Whether to use Kiba::Extend’s pre-job task functionality.
-
.project_configs ⇒ Array<Module>
List of config modules in project namespaces set in Extend.config_namespaces setting.
-
.registry ⇒ Kiba::Extend::Registry::FileRegistry
A customized dry-container for registering and resolving jobs.
-
.registry_namespace_separator ⇒ String
Used to join nested namespaces and registered keys in FileRegistry.
-
.reload! ⇒ Object
-
.sgdelim ⇒ String
Default subgrouping delimiter for splitting/joining values in multi-valued fields.
-
.source ⇒ Class
Default source class for jobs.
-
.warning_label ⇒ String
Prefix for warnings from the ETL.
Class Method Details
.config_namespaces ⇒ Array<Module>
You must set this from an individual project if you wish to use the Kiba::Extend::Mixins::IterativeCleanup mixin.
Ruby modules that serve as namespaces under which config modules for a project are nested.
95 |
# File 'lib/kiba/extend.rb', line 95 setting :config_namespaces, default: [], reader: true |
.csvopts ⇒ Hash
Default options used for CSV sources/destinations
100 101 102 |
# File 'lib/kiba/extend.rb', line 100 setting :csvopts, default: {headers: true, header_converters: %i[symbol downcase]}, reader: true |
.default_job_method_name ⇒ Symbol
The job definition module method expected to be present if you define a registry entry hash creator as a Module
181 |
# File 'lib/kiba/extend.rb', line 181 setting :default_job_method_name, default: :job, reader: true |
.delim ⇒ String
116 |
# File 'lib/kiba/extend.rb', line 116 setting :delim, default: "|", reader: true |
.destination ⇒ Class
Default destination class for jobs. Must meet implementation criteria in Kiba wiki
158 159 160 |
# File 'lib/kiba/extend.rb', line 158 setting :destination, constructor: proc { Kiba::Extend::Destinations::CSV }, reader: true |
.job_show_me ⇒ Boolean
Whether to output results to STDOUT for debugging
230 |
# File 'lib/kiba/extend.rb', line 230 setting :job_show_me, default: false, reader: true |
.job_tell_me ⇒ Boolean
Whether to have computer audibly say something when job is complete
235 |
# File 'lib/kiba/extend.rb', line 235 setting :job_tell_me, default: false, reader: true |
.job_verbosity ⇒ :debug, ...
How much output about jobs to output to STDOUT
- :debug - everything from verbose, plus what is being registered, transformed, etc
- :verbose - reports what is running, from where, dependencies, and results
- :normal - reports what is running, from where, and the results
- :minimal - bare minimum
247 |
# File 'lib/kiba/extend.rb', line 247 setting :job_verbosity, default: :normal, reader: true |
.ke_dir ⇒ String
Returns path to this application’s data directory (used internally by transforms and utils), and not specific to a project.
56 57 58 59 60 |
# File 'lib/kiba/extend.rb', line 56 setting :ke_dir, reader: true, constructor: ->(value) do Gem.loaded_specs["kiba-extend"].full_gem_path end |
.lambdaopts ⇒ Hash
Default settings for Lambda destination
106 107 108 |
# File 'lib/kiba/extend.rb', line 106 setting :lambdaopts, default: {on_write: ->(r) { accumulator << r }}, reader: true |
.loader ⇒ Object
62 63 64 |
# File 'lib/kiba/extend.rb', line 62 def loader @loader ||= setup_loader end |
.nullvalue ⇒ String
Default string to be treated as though it were a null/empty value.
134 |
# File 'lib/kiba/extend.rb', line 134 setting :nullvalue, default: "%NULLVALUE%", reader: true |
.pre_job_task_action ⇒ :backup, :nuke
Controls what happens when pre-job task is run
- :backup - Moves all existing files in specified directories to backup
directory created in your
:datadir - :nuke - Deletes all existing files in specified directories when a job is run. Make sure you only specify directories that contain derived/generated files!
211 |
# File 'lib/kiba/extend.rb', line 211 setting :pre_job_task_action, default: :backup, reader: true |
.pre_job_task_backup_dir ⇒ String
Full path to directory to which files will be moved if
pre_job_task_action == :backup. The directory will be
created if it does not exist.
195 |
# File 'lib/kiba/extend.rb', line 195 setting :pre_job_task_backup_dir, default: nil, reader: true |
.pre_job_task_directories ⇒ Array<String>
Full paths to directories that will be affected by the specified pre-task action
200 |
# File 'lib/kiba/extend.rb', line 200 setting :pre_job_task_directories, default: [], reader: true |
.pre_job_task_mode ⇒ :job, ...
Controls whether pre-job task is run
- :job - runs pre-job task specified above whenever you invoke
thor run:job .... All dependency jobs required for the invoked job will be run. This mode is recommended during development when you want any change in the dependency chain to get picked up. - any other value - only regenerates missing dependency files. Useful when your data is really big and/or your jobs are more stable
225 |
# File 'lib/kiba/extend.rb', line 225 setting :pre_job_task_mode, default: :job, reader: true |
.pre_job_task_run ⇒ Boolean
Whether to use Kiba::Extend’s pre-job task functionality. The
default is false for backward compatibility, as existing
projects may not have the required settings configured.
188 |
# File 'lib/kiba/extend.rb', line 188 setting :pre_job_task_run, default: false, reader: true |
.project_configs ⇒ Array<Module>
List of config modules in project namespaces set in config_namespaces setting
254 255 256 257 258 |
# File 'lib/kiba/extend.rb', line 254 def project_configs config_namespaces.map { |ns| get_config_mods(ns, ns.constants) } .flatten .select { |obj| obj.is_a?(Module) && obj.respond_to?(:config) } end |
.registry ⇒ Kiba::Extend::Registry::FileRegistry
A customized dry-container for registering and resolving jobs
172 173 174 |
# File 'lib/kiba/extend.rb', line 172 setting :registry, constructor: proc { Kiba::Extend::Registry::FileRegistry.new }, reader: true |
.registry_namespace_separator ⇒ String
Used to join nested namespaces and registered keys in FileRegistry. With namespace ‘ns’ and registered key ‘foo’: ‘ns__foo’. With parent namespace ‘ns’, child namespace ‘child’, and registered key ‘foo’: ‘ns__child__foo’
142 |
# File 'lib/kiba/extend.rb', line 142 setting :registry_namespace_separator, default: "__", reader: true |
.reload! ⇒ Object
84 85 86 |
# File 'lib/kiba/extend.rb', line 84 def reload! @loader.reload end |
.sgdelim ⇒ String
Default subgrouping delimiter for splitting/joining values in multi-valued fields
orig = 'a^^y|b^^z'
delim_split = orig.split(delim)
sgdelim_split = delim_split.map{ |val| val.split(sgdelim) }
sgdelim_split => [['a', 'y'], ['b', 'z']]
129 |
# File 'lib/kiba/extend.rb', line 129 setting :sgdelim, default: "^^", reader: true |
.source ⇒ Class
Default source class for jobs. Must meet implementation criteria in Kiba wiki
149 150 151 |
# File 'lib/kiba/extend.rb', line 149 setting :source, constructor: proc { Kiba::Extend::Sources::CSV }, reader: true |
.warning_label ⇒ String
Prefix for warnings from the ETL
165 |
# File 'lib/kiba/extend.rb', line 165 setting :warning_label, default: "KIBA WARNING", reader: true |