Module: Kiba::Extend
- Extended by:
- Dry::Configurable
- Defined in:
- lib/kiba/extend.rb,
lib/kiba/extend/job.rb,
lib/kiba/extend/jobs.rb,
lib/kiba/extend/marc.rb,
lib/kiba/extend/error.rb,
lib/kiba/extend/command.rb,
lib/kiba/extend/sources.rb,
lib/kiba/extend/version.rb,
lib/kiba/extend/jobs/job.rb,
lib/kiba/extend/registry.rb,
lib/kiba/extend/transforms.rb,
lib/kiba/extend/command/reg.rb,
lib/kiba/extend/command/run.rb,
lib/kiba/extend/jobs/parser.rb,
lib/kiba/extend/jobs/runner.rb,
lib/kiba/extend/sources/csv.rb,
lib/kiba/extend/destinations.rb,
lib/kiba/extend/sources/marc.rb,
lib/kiba/extend/utils/lookup.rb,
lib/kiba/extend/jobs/base_job.rb,
lib/kiba/extend/jobs/marc_job.rb,
lib/kiba/extend/jobs/reporter.rb,
lib/kiba/extend/utils/fieldset.rb,
lib/kiba/extend/transforms/copy.rb,
lib/kiba/extend/transforms/marc.rb,
lib/kiba/extend/transforms/name.rb,
lib/kiba/extend/transforms/sort.rb,
lib/kiba/extend/transforms/take.rb,
lib/kiba/extend/transforms/warn.rb,
lib/kiba/extend/command/reg/list.rb,
lib/kiba/extend/command/runnable.rb,
lib/kiba/extend/destinations/csv.rb,
lib/kiba/extend/jobs/show_me_job.rb,
lib/kiba/extend/jobs/tell_me_job.rb,
lib/kiba/extend/registry/creator.rb,
lib/kiba/extend/sources/json_dir.rb,
lib/kiba/extend/transforms/clean.rb,
lib/kiba/extend/transforms/count.rb,
lib/kiba/extend/transforms/merge.rb,
lib/kiba/extend/transforms/split.rb,
lib/kiba/extend/destinations/marc.rb,
lib/kiba/extend/registry/fileable.rb,
lib/kiba/extend/transforms/append.rb,
lib/kiba/extend/transforms/cspace.rb,
lib/kiba/extend/transforms/delete.rb,
lib/kiba/extend/transforms/rename.rb,
lib/kiba/extend/utils/lookup_hash.rb,
lib/kiba/extend/jobs/job_segmenter.rb,
lib/kiba/extend/sources/enumerable.rb,
lib/kiba/extend/sources/lookupable.rb,
lib/kiba/extend/sources/sourceable.rb,
lib/kiba/extend/transforms/allable.rb,
lib/kiba/extend/transforms/compare.rb,
lib/kiba/extend/transforms/explode.rb,
lib/kiba/extend/transforms/extract.rb,
lib/kiba/extend/transforms/helpers.rb,
lib/kiba/extend/transforms/prepend.rb,
lib/kiba/extend/transforms/replace.rb,
lib/kiba/extend/transforms/reshape.rb,
lib/kiba/extend/utils/pre_job_task.rb,
lib/kiba/extend/destinations/lambda.rb,
lib/kiba/extend/jobs/dependency_job.rb,
lib/kiba/extend/transforms/collapse.rb,
lib/kiba/extend/transforms/fraction.rb,
lib/kiba/extend/jobs/json_to_csv_job.rb,
lib/kiba/extend/transforms/ms_access.rb,
lib/kiba/extend/transforms/copy/field.rb,
lib/kiba/extend/registry/file_registry.rb,
lib/kiba/extend/registry/registry_list.rb,
lib/kiba/extend/transforms/deduplicate.rb,
lib/kiba/extend/transforms/filter_rows.rb,
lib/kiba/extend/transforms/fingerprint.rb,
lib/kiba/extend/destinations/json_array.rb,
lib/kiba/extend/transforms/rename/field.rb,
lib/kiba/extend/transforms/string_value.rb,
lib/kiba/extend/utils/extract_fractions.rb,
lib/kiba/extend/utils/lookup/row_sorter.rb,
lib/kiba/extend/utils/marc_id_extractor.rb,
lib/kiba/extend/utils/marc_name_cleaner.rb,
lib/kiba/extend/utils/pre_job_nuke_task.rb,
lib/kiba/extend/utils/string_normalizer.rb,
lib/kiba/extend/mixins/iterative_cleanup.rb,
lib/kiba/extend/registry/registered_file.rb,
lib/kiba/extend/registry/requirable_file.rb,
lib/kiba/extend/transforms/delete/fields.rb,
lib/kiba/extend/transforms/rename/fields.rb,
lib/kiba/extend/utils/lookup/set_checker.rb,
lib/kiba/extend/data/convertible_fraction.rb,
lib/kiba/extend/transforms/combine_values.rb,
lib/kiba/extend/utils/field_value_matcher.rb,
lib/kiba/extend/utils/fingerprint_creator.rb,
lib/kiba/extend/utils/lookup/row_selector.rb,
lib/kiba/extend/utils/pre_job_backup_task.rb,
lib/kiba/extend/jobs/multi_source_prep_job.rb,
lib/kiba/extend/registry/registered_lookup.rb,
lib/kiba/extend/registry/registered_source.rb,
lib/kiba/extend/transforms/fingerprint/add.rb,
lib/kiba/extend/transforms/single_warnable.rb,
lib/kiba/extend/utils/lookup/pair_equality.rb,
lib/kiba/extend/registry/creator/type_error.rb,
lib/kiba/extend/registry/registry_validator.rb,
lib/kiba/extend/transforms/deduplicate/flag.rb,
lib/kiba/extend/transforms/marc/extract_ids.rb,
lib/kiba/extend/transforms/sep_deprecatable.rb,
lib/kiba/extend/utils/lookup/multival_pairs.rb,
lib/kiba/extend/utils/lookup/pair_inclusion.rb,
lib/kiba/extend/destinations/destinationable.rb,
lib/kiba/extend/registry/file_registry_entry.rb,
lib/kiba/extend/transforms/append/nil_fields.rb,
lib/kiba/extend/transforms/deduplicate/table.rb,
lib/kiba/extend/utils/lookup/field_emptiness.rb,
lib/kiba/extend/utils/marc_role_term_cleaner.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs.rb,
lib/kiba/extend/transforms/clean/strip_fields.rb,
lib/kiba/extend/transforms/count/field_values.rb,
lib/kiba/extend/transforms/deduplicate/fields.rb,
lib/kiba/extend/transforms/fingerprint/decode.rb,
lib/kiba/extend/transforms/warn/uneven_fields.rb,
lib/kiba/extend/utils/lookup/criteria_checker.rb,
lib/kiba/extend/utils/multi_source_normalizer.rb,
lib/kiba/extend/transforms/action_argumentable.rb,
lib/kiba/extend/transforms/delete/empty_fields.rb,
lib/kiba/extend/transforms/fraction/to_decimal.rb,
lib/kiba/extend/transforms/marc/field_linkable.rb,
lib/kiba/extend/transforms/marc/filter_records.rb,
lib/kiba/extend/transforms/name/split_inverted.rb,
lib/kiba/extend/transforms/sort/by_field_value.rb,
lib/kiba/extend/registry/registered_destination.rb,
lib/kiba/extend/transforms/compare/field_values.rb,
lib/kiba/extend/transforms/cspace/convert_to_id.rb,
lib/kiba/extend/transforms/deduplicate/flag_all.rb,
lib/kiba/extend/transforms/delete/fields_except.rb,
lib/kiba/extend/transforms/merge/constant_value.rb,
lib/kiba/extend/transforms/reshape/simple_pivot.rb,
lib/kiba/extend/registry/registry_entry_selector.rb,
lib/kiba/extend/transforms/append/to_field_value.rb,
lib/kiba/extend/transforms/merge/constant_values.rb,
lib/kiba/extend/transforms/string_value/to_array.rb,
lib/kiba/extend/transforms/cspace/address_country.rb,
lib/kiba/extend/transforms/marc/extract_245_title.rb,
lib/kiba/extend/transforms/marc/extract_name_data.rb,
lib/kiba/extend/transforms/merge/multi_row_lookup.rb,
lib/kiba/extend/transforms/prepend/to_field_value.rb,
lib/kiba/extend/utils/lookup/row_selector_by_hash.rb,
lib/kiba/extend/transforms/clean/even_field_values.rb,
lib/kiba/extend/transforms/cspace/normalize_for_id.rb,
lib/kiba/extend/transforms/filter_rows/with_lambda.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/final.rb,
lib/kiba/extend/transforms/deduplicate/field_values.rb,
lib/kiba/extend/transforms/fingerprint/flag_changed.rb,
lib/kiba/extend/transforms/helpers/org_name_checker.rb,
lib/kiba/extend/utils/lookup/row_selector_by_lambda.rb,
lib/kiba/extend/transforms/delete/empty_field_groups.rb,
lib/kiba/extend/transforms/delete/empty_field_values.rb,
lib/kiba/extend/transforms/marc/filter_records/by_id.rb,
lib/kiba/extend/transforms/marc/language_code_lookup.rb,
lib/kiba/extend/transforms/merge/compare_fields_flag.rb,
lib/kiba/extend/transforms/merge/multivalue_constant.rb,
lib/kiba/extend/transforms/combine_values/full_record.rb,
lib/kiba/extend/transforms/filter_rows/field_equal_to.rb,
lib/kiba/extend/transforms/helpers/delim_only_checker.rb,
lib/kiba/extend/transforms/helpers/field_value_getter.rb,
lib/kiba/extend/transforms/marc/extract_org_name_data.rb,
lib/kiba/extend/transforms/replace/empty_field_values.rb,
lib/kiba/extend/utils/iterative_cleanup_job_registrar.rb,
lib/kiba/extend/transforms/filter_rows/field_populated.rb,
lib/kiba/extend/transforms/fingerprint/merge_corrected.rb,
lib/kiba/extend/transforms/helpers/person_name_checker.rb,
lib/kiba/extend/transforms/marc/extract_base_name_data.rb,
lib/kiba/extend/transforms/split/into_multiple_columns.rb,
lib/kiba/extend/transforms/split/publication_statement.rb,
lib/kiba/extend/transforms/warn/if_field_value_matches.rb,
lib/kiba/extend/utils/delim_in_value_fingerprint_error.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/worksheet.rb,
lib/kiba/extend/registry/creator/hash_creator_key_error.rb,
lib/kiba/extend/transforms/prepend/field_to_field_value.rb,
lib/kiba/extend/transforms/count/matching_rows_in_lookup.rb,
lib/kiba/extend/transforms/marc/extract_person_name_data.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/corrections.rb,
lib/kiba/extend/transforms/clean/ensure_consistent_fields.rb,
lib/kiba/extend/transforms/cspace/flag_invalid_characters.rb,
lib/kiba/extend/transforms/filter_rows/field_match_regexp.rb,
lib/kiba/extend/transforms/helpers/field_evenness_checker.rb,
lib/kiba/extend/transforms/marc/extract_meeting_name_data.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/cleaned_uniq.rb,
lib/kiba/extend/registry/creator/hash_creator_callee_error.rb,
lib/kiba/extend/transforms/append/converted_value_and_unit.rb,
lib/kiba/extend/transforms/delete/fieldnames_starting_with.rb,
lib/kiba/extend/transforms/marc/filter_records/with_lambda.rb,
lib/kiba/extend/transforms/warn/unless_field_value_matches.rb,
lib/kiba/extend/transforms/deduplicate/grouped_field_values.rb,
lib/kiba/extend/transforms/explode/rows_from_multival_field.rb,
lib/kiba/extend/transforms/filter_rows/all_fields_populated.rb,
lib/kiba/extend/transforms/filter_rows/any_fields_populated.rb,
lib/kiba/extend/transforms/merge/constant_value_conditional.rb,
lib/kiba/extend/transforms/multival_plus_delim_deprecatable.rb,
lib/kiba/extend/transforms/marc/extract_subfields_from_field.rb,
lib/kiba/extend/registry/creator/hash_creator_args_type_error.rb,
lib/kiba/extend/registry/creator/jobless_module_creator_error.rb,
lib/kiba/extend/transforms/delete/delimiter_only_field_values.rb,
lib/kiba/extend/transforms/delete/field_value_matching_regexp.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/base_job_cleaned.rb,
lib/kiba/extend/transforms/collapse/fields_to_typed_field_pair.rb,
lib/kiba/extend/mixins/iterative_cleanup/jobs/returned_compiled.rb,
lib/kiba/extend/mixins/iterative_cleanup/known_worksheet_values.rb,
lib/kiba/extend/transforms/clean/regexp_find_replace_field_vals.rb,
lib/kiba/extend/transforms/collapse/fields_with_custom_fieldmap.rb,
lib/kiba/extend/transforms/delete/field_value_containing_string.rb,
lib/kiba/extend/transforms/fingerprint/delimiter_in_value_error.rb,
lib/kiba/extend/transforms/name/convert_inverted_to_direct_form.rb,
lib/kiba/extend/transforms/fingerprint/delimiter_collision_error.rb,
lib/kiba/extend/transforms/replace/field_value_with_static_mapping.rb,
lib/kiba/extend/transforms/delete/field_value_if_equals_other_field.rb,
lib/kiba/extend/transforms/collapse/fields_to_repeatable_field_group.rb,
lib/kiba/extend/transforms/combine_values/from_fields_with_delimiter.rb,
lib/kiba/extend/transforms/explode/rows_from_grouped_multival_fields.rb,
lib/kiba/extend/transforms/replace/norm_with_most_frequently_used_form.rb,
lib/kiba/extend/transforms/reshape/fields_to_field_group_with_constant.rb
Overview
Handles:
- auto-loading of the code
- extending
Kiba
withKiba::Extend::Jobs::JobSegmenter
so we can callKiba.job_segment
- defining config settings, all of which can be overridden by
project applications using
kiba-extend
Also defines some CSV converters:
:stripextra
– strips leading/trailing spaces, collapses multiple spaces, removes terminal commas, strips again:nulltonil
– replaces any values that are a literal string NULL with a nil value:stripplus
– strips leading/trailing spaces, collapses multiple spaces, removes terminal commas, strips again, removes “NULL” (i.e. literal string “NULL” becomes anilValue
Note that :stripplus
combines the functionality of :stripextra
and :nulltonil
About pre-job task settings
If configured properly, the pre-job task is run when a job is run
via Thor invocation. This includes run:job
, run:jobs
, and
jobs:tagged -r tagvalue
. The task is run once when the Thor
task is invoked.
Defined Under Namespace
Modules: Command, Data, Destinations, ErrMod, Job, Jobs, Marc, Mixins, Registry, Sources, Transforms, Utils Classes: BooleanReturningLambdaError, Error, InvalidActionError, IterativeCleanupSettingUndefinedError, JobCannotBeUsedAsLookupError, NoLookupOnError, NonSymbolLookupOnError, PathRequiredError, ProjectSettingUndefinedError
Constant Summary collapse
- VERSION =
"4.0.1"
Class Method Summary collapse
-
.config_namespaces ⇒ Array<Module>
Ruby modules that serve as namespaces under which config modules for a project are nested.
-
.csvopts ⇒ Hash
Default options used for CSV sources/destinations.
-
.default_job_method_name ⇒ Symbol
The job definition module method expected to be present if you define a registry entry hash creator as a Module.
-
.delim ⇒ String
Default delimiter for splitting/joining values in multi-valued fields.
-
.destination ⇒ Class
Default destination class for jobs.
-
.job_show_me ⇒ Boolean
Whether to output results to STDOUT for debugging.
-
.job_tell_me ⇒ Boolean
Whether to have computer audibly say something when job is complete.
-
.job_verbosity ⇒ :debug, ...
How much output about jobs to output to STDOUT.
-
.ke_dir ⇒ String
Path to this application’s data directory (used internally by transforms and utils), and not specific to a project.
-
.lambdaopts ⇒ Hash
Default settings for Lambda destination.
-
.loader ⇒ Object
-
.nullvalue ⇒ String
Default string to be treated as though it were a null/empty value.
-
.pre_job_task_action ⇒ :backup, :nuke
Controls what happens when pre-job task is run.
-
.pre_job_task_backup_dir ⇒ String
Full path to directory to which files will be moved if
pre_job_task_action == :backup
. -
.pre_job_task_directories ⇒ Array<String>
Full paths to directories that will be affected by the specified pre-task action.
-
.pre_job_task_mode ⇒ :job, ...
Controls whether pre-job task is run.
-
.pre_job_task_run ⇒ Boolean
Whether to use Kiba::Extend’s pre-job task functionality.
-
.project_configs ⇒ Array<Module>
List of config modules in project namespaces set in Extend.config_namespaces setting.
-
.registry ⇒ Kiba::Extend::Registry::FileRegistry
A customized dry-container for registering and resolving jobs.
-
.registry_namespace_separator ⇒ String
Used to join nested namespaces and registered keys in FileRegistry.
-
.reload! ⇒ Object
-
.sgdelim ⇒ String
Default subgrouping delimiter for splitting/joining values in multi-valued fields.
-
.source ⇒ Class
Default source class for jobs.
-
.warning_label ⇒ String
Prefix for warnings from the ETL.
Class Method Details
.config_namespaces ⇒ Array<Module>
You must set this from an individual project if you wish to use the Kiba::Extend::Mixins::IterativeCleanup mixin.
Ruby modules that serve as namespaces under which config modules for a project are nested.
95 |
# File 'lib/kiba/extend.rb', line 95 setting :config_namespaces, default: [], reader: true |
.csvopts ⇒ Hash
Default options used for CSV sources/destinations
100 101 102 |
# File 'lib/kiba/extend.rb', line 100 setting :csvopts, default: {headers: true, header_converters: %i[symbol downcase]}, reader: true |
.default_job_method_name ⇒ Symbol
The job definition module method expected to be present if you define a registry entry hash creator as a Module
181 |
# File 'lib/kiba/extend.rb', line 181 setting :default_job_method_name, default: :job, reader: true |
.delim ⇒ String
116 |
# File 'lib/kiba/extend.rb', line 116 setting :delim, default: "|", reader: true |
.destination ⇒ Class
Default destination class for jobs. Must meet implementation criteria in Kiba wiki
158 159 160 |
# File 'lib/kiba/extend.rb', line 158 setting :destination, constructor: proc { Kiba::Extend::Destinations::CSV }, reader: true |
.job_show_me ⇒ Boolean
Whether to output results to STDOUT for debugging
230 |
# File 'lib/kiba/extend.rb', line 230 setting :job_show_me, default: false, reader: true |
.job_tell_me ⇒ Boolean
Whether to have computer audibly say something when job is complete
235 |
# File 'lib/kiba/extend.rb', line 235 setting :job_tell_me, default: false, reader: true |
.job_verbosity ⇒ :debug, ...
How much output about jobs to output to STDOUT
- :debug - tells you A LOT - helpful when developing pipelines and debugging
- :normal - reports what is running, from where, and the results
- :minimal - bare minimum
245 |
# File 'lib/kiba/extend.rb', line 245 setting :job_verbosity, default: :normal, reader: true |
.ke_dir ⇒ String
Returns path to this application’s data directory (used internally by transforms and utils), and not specific to a project.
56 57 58 59 60 |
# File 'lib/kiba/extend.rb', line 56 setting :ke_dir, reader: true, constructor: ->(value) do Gem.loaded_specs["kiba-extend"].full_gem_path end |
.lambdaopts ⇒ Hash
Default settings for Lambda destination
106 107 108 |
# File 'lib/kiba/extend.rb', line 106 setting :lambdaopts, default: {on_write: ->(r) { accumulator << r }}, reader: true |
.loader ⇒ Object
62 63 64 |
# File 'lib/kiba/extend.rb', line 62 def loader @loader ||= setup_loader end |
.nullvalue ⇒ String
Default string to be treated as though it were a null/empty value.
134 |
# File 'lib/kiba/extend.rb', line 134 setting :nullvalue, default: "%NULLVALUE%", reader: true |
.pre_job_task_action ⇒ :backup, :nuke
Controls what happens when pre-job task is run
- :backup - Moves all existing files in specified directories to backup
directory created in your
:datadir
- :nuke - Deletes all existing files in specified directories when a job is run. Make sure you only specify directories that contain derived/generated files!
211 |
# File 'lib/kiba/extend.rb', line 211 setting :pre_job_task_action, default: :backup, reader: true |
.pre_job_task_backup_dir ⇒ String
Full path to directory to which files will be moved if
pre_job_task_action == :backup
. The directory will be
created if it does not exist.
195 |
# File 'lib/kiba/extend.rb', line 195 setting :pre_job_task_backup_dir, default: nil, reader: true |
.pre_job_task_directories ⇒ Array<String>
Full paths to directories that will be affected by the specified pre-task action
200 |
# File 'lib/kiba/extend.rb', line 200 setting :pre_job_task_directories, default: [], reader: true |
.pre_job_task_mode ⇒ :job, ...
Controls whether pre-job task is run
- :job - runs pre-job task specified above whenever you invoke
thor run:job ...
. All dependency jobs required for the invoked job will be run. This mode is recommended during development when you want any change in the dependency chain to get picked up. - any other value - only regenerates missing dependency files. Useful when your data is really big and/or your jobs are more stable
225 |
# File 'lib/kiba/extend.rb', line 225 setting :pre_job_task_mode, default: :job, reader: true |
.pre_job_task_run ⇒ Boolean
Whether to use Kiba::Extend’s pre-job task functionality. The
default is false
for backward compatibility, as existing
projects may not have the required settings configured.
188 |
# File 'lib/kiba/extend.rb', line 188 setting :pre_job_task_run, default: false, reader: true |
.project_configs ⇒ Array<Module>
List of config modules in project namespaces set in config_namespaces setting
252 253 254 255 256 |
# File 'lib/kiba/extend.rb', line 252 def project_configs config_namespaces.map { |ns| get_config_mods(ns, ns.constants) } .flatten .select { |obj| obj.is_a?(Module) && obj.respond_to?(:config) } end |
.registry ⇒ Kiba::Extend::Registry::FileRegistry
A customized dry-container for registering and resolving jobs
172 173 174 |
# File 'lib/kiba/extend.rb', line 172 setting :registry, constructor: proc { Kiba::Extend::Registry::FileRegistry.new }, reader: true |
.registry_namespace_separator ⇒ String
Used to join nested namespaces and registered keys in FileRegistry. With namespace ‘ns’ and registered key ‘foo’: ‘ns__foo’. With parent namespace ‘ns’, child namespace ‘child’, and registered key ‘foo’: ‘ns__child__foo’
142 |
# File 'lib/kiba/extend.rb', line 142 setting :registry_namespace_separator, default: "__", reader: true |
.reload! ⇒ Object
84 85 86 |
# File 'lib/kiba/extend.rb', line 84 def reload! @loader.reload end |
.sgdelim ⇒ String
Default subgrouping delimiter for splitting/joining values in multi-valued fields
orig = 'a^^y|b^^z'
delim_split = orig.split(delim)
sgdelim_split = delim_split.map{ |val| val.split(sgdelim) }
sgdelim_split => [['a', 'y'], ['b', 'z']]
129 |
# File 'lib/kiba/extend.rb', line 129 setting :sgdelim, default: "^^", reader: true |
.source ⇒ Class
Default source class for jobs. Must meet implementation criteria in Kiba wiki
149 150 151 |
# File 'lib/kiba/extend.rb', line 149 setting :source, constructor: proc { Kiba::Extend::Sources::CSV }, reader: true |
.warning_label ⇒ String
Prefix for warnings from the ETL
165 |
# File 'lib/kiba/extend.rb', line 165 setting :warning_label, default: "KIBA WARNING", reader: true |