diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..c99d2e73 --- /dev/null +++ b/.rspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/.rubocop.yml b/.rubocop.yml index 38f58068..fcaa9edf 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -88,12 +88,18 @@ Style/IfInsideElse: Style/IfUnlessModifier: Enabled: false +Style/InverseMethods: + Enabled: false + Style/NestedTernaryOperator: Enabled: false Style/PreferredHashMethods: Enabled: false +Style/Proc: + Enabled: false + Style/NumericPredicate: Enabled: false @@ -129,6 +135,9 @@ Style/SymbolProc: # old Ruby versions can't do this Style/TrailingCommaInHashLiteral: Enabled: false +Style/TrailingCommaInArrayLiteral: + Enabled: false + Style/TrailingUnderscoreVariable: Enabled: false @@ -138,6 +147,9 @@ Style/TrivialAccessors: # Style/UnlessModifier: # Enabled: false +Style/WordArray: + Enabled: false + Style/ZeroLengthPredicate: Enabled: false diff --git a/CHANGELOG.md b/CHANGELOG.md index ada0b079..3517be23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,39 @@ # SmarterCSV 1.x Change Log +## T.B.D. + + * code refactor + + * NEW BEHAVIOR: + - hidden `:v2_mode` options (incomplete!) + - pre-processing for v2 options + - implemented v2 `:header_transformations` (DO NOT USE YET!) + + -> check if all v1 transformations are correctly done + How are we going to + * disambiguate headers? + + + * do key_mapping? -> seems to work + - remove_unmapped_keys ? + - silence missing keys ... a missing mapped key should raise an exception, except when silenced + - required_keys needs to be a header-validation + + + * keep original headers? -> :none + * do strings_as_* ? -> either :keys_as_symbols, :keys_as_strings + * remove quote_chars? -> included in keys_as_* + * strip whitespace? -> included in keys_as_* + + TODO: + + - add tests for header_validations + + - modify options to handle v1 and v2 options + - add v1 defaults in v2 processing + - add tests for all options processing + - 100% backwards compatibility when working in v1 mode + ## 1.10.2 (2024-02-11) * improve error message for missing keys diff --git a/lib/smarter_csv.rb b/lib/smarter_csv.rb index 26b8914d..25ee4ddf 100644 --- a/lib/smarter_csv.rb +++ b/lib/smarter_csv.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'set' + require "smarter_csv/version" require "smarter_csv/file_io" require "smarter_csv/options_processing" diff --git a/lib/smarter_csv/hash_transformations.rb b/lib/smarter_csv/hash_transformations.rb index f6b4d538..c87d3961 100644 --- a/lib/smarter_csv/hash_transformations.rb +++ b/lib/smarter_csv/hash_transformations.rb @@ -2,7 +2,16 @@ module SmarterCSV class << self + # this is processing the headers from the input file def hash_transformations(hash, options) + if options[:v2_mode] + hash_transformations_v2(hash, options) + else + hash_transformations_v1(hash, options) + end + end + + def hash_transformations_v1(hash, options) # there may be unmapped keys, or keys purposedly mapped to nil or an empty key.. # make sure we delete any key/value pairs from the hash, which the user wanted to delete: remove_empty_values = options[:remove_empty_values] == true @@ -33,46 +42,117 @@ def hash_transformations(hash, options) end end - # def hash_transformations(hash, options) - # # there may be unmapped keys, or keys purposedly mapped to nil or an empty key.. - # # make sure we delete any key/value pairs from the hash, which the user wanted to delete: - # hash.delete(nil) - # hash.delete('') - # hash.delete(:"") - - # if options[:remove_empty_values] == true - # hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)} - # end - - # hash.delete_if{|_k, v| !v.nil? && v =~ /^(0+|0+\.0+)$/} if options[:remove_zero_values] # values are Strings - # hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching] - - # if options[:convert_values_to_numeric] - # hash.each do |k, v| - # # deal with the :only / :except options to :convert_values_to_numeric - # next if limit_execution_for_only_or_except(options, :convert_values_to_numeric, k) - - # # convert if it's a numeric value: - # case v - # when /^[+-]?\d+\.\d+$/ - # hash[k] = v.to_f - # when /^[+-]?\d+$/ - # hash[k] = v.to_i - # end - # end - # end - - # if options[:value_converters] - # hash.each do |k, v| - # converter = options[:value_converters][k] - # next unless converter - - # hash[k] = converter.convert(v) - # end - # end - - # hash - # end + def hash_transformations_v2(hash, options) + return hash if options[:hash_transformations].nil? || options[:hash_transformations].empty? + + # do the header transformations the user requested: + if options[:hash_transformations] + options[:hash_transformations].each do |transformation| + if transformation.respond_to?(:call) # this is used when a user-provided Proc is passed in + hash = transformation.call(hash, options) + else + case transformation + when Symbol # this is used for pre-defined transformations that are defined in the SmarterCSV module + hash = public_send(transformation, hash, options) + when Hash # this is called for hash arguments, e.g. hash_transformations + trans, args = transformation.first # .first treats the hash first element as an array + hash = apply_transformation(trans, hash, args, options) + when Array # this can be used for passing additional arguments in array form (e.g. into a Proc) + trans, *args = transformation + hash = apply_transformation(trans, hash, args, options) + else + raise SmarterCSV::IncorrectOption, "Invalid transformation type: #{transformation.class}" + end + end + end + end + + hash + end + + # + # To handle v1-backward-compatible behavior, it is faster to roll all behavior into one method + # + def v1_backwards_compatibility(hash, options) + hash.each_with_object({}) do |(k, v), new_hash| + next if k.nil? || k == '' || k == :"" # remove_empty_keys + next if has_rails ? v.blank? : blank?(v) # remove_empty_values + + # convert_values_to_numeric: + # deal with the :only / :except options to :convert_values_to_numeric + unless limit_execution_for_only_or_except(options, :convert_values_to_numeric, k) + if v =~ /^[+-]?\d+\.\d+$/ + v = v.to_f + elsif v =~ /^[+-]?\d+$/ + v = v.to_i + end + end + + new_hash[k] = v + end + end + + # + # Building Blocks in case you want to build your own flow: + # + + def value_converters(hash, _options) + # + # TO BE IMPLEMENTED + # + end + + def strip_spaces(hash, _options) + hash.each_key {|key| hash[key].strip! unless hash[key].nil? } # &. syntax was introduced in Ruby 2.3 - need to stay backwards compatible + end + + def remove_blank_values(hash, _options) + hash.each_key {|key| hash.delete(key) if hash[key].nil? || hash[key].is_a?(String) && hash[key] !~ /[^[:space:]]/ } + end + + def remove_zero_values(hash, _options) + hash.each_key {|key| hash.delete(key) if hash[key].is_a?(Numeric) && hash[key].zero? } + end + + def remove_empty_keys(hash, _options) + hash.reject!{|key, _v| key.nil? || key.empty?} + end + + def convert_values_to_numeric(hash, _options) + hash.each_key do |k| + case hash[k] + when /^[+-]?\d+\.\d+$/ + hash[k] = hash[k].to_f + when /^[+-]?\d+$/ + hash[k] = hash[k].to_i + end + end + end + + def convert_values_to_numeric_unless_leading_zeroes(hash, _options) + hash.each_key do |k| + case hash[k] + when /^[+-]?[1-9]\d*\.\d+$/ + hash[k] = hash[k].to_f + when /^[+-]?[1-9]\d*$/ + hash[k] = hash[k].to_i + end + end + end + + # IMPORTANT NOTE: + # this can lead to cases where a nil or empty value gets converted into 0 or 0.0, + # and can then not be properly removed! + # + # you should first try to use convert_values_to_numeric or convert_values_to_numeric_unless_leading_zeroes + # + def convert_to_integer(hash, _options) + hash.each_key {|key| hash[key] = hash[key].to_i } + end + + def convert_to_float(hash, _options) + hash.each_key {|key| hash[key] = hash[key].to_f } + end protected diff --git a/lib/smarter_csv/header_transformations.rb b/lib/smarter_csv/header_transformations.rb index fac326e1..ac05c263 100644 --- a/lib/smarter_csv/header_transformations.rb +++ b/lib/smarter_csv/header_transformations.rb @@ -2,8 +2,18 @@ module SmarterCSV class << self - # transform the headers that were in the file: + # this is processing the headers from the input file def header_transformations(header_array, options) + if options[:v2_mode] + header_transformations_v2(header_array, options) + else + header_transformations_v1(header_array, options) + end + end + + # ---- V1.x Version: transform the headers that were in the file: ------------------------------------------ + # + def header_transformations_v1(header_array, options) header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')} header_array.map!{|x| x.strip} if options[:strip_whitespace] @@ -57,7 +67,99 @@ def remap_headers(headers, options) header end end + headers end + + # ---- V2.x Version: transform the headers that were in the file: ------------------------------------------ + # + def header_transformations_v2(header_array, options) + return header_array if options[:header_transformations].nil? || options[:header_transformations].empty? + + # do the header transformations the user requested: + if options[:header_transformations] + options[:header_transformations].each do |transformation| + if transformation.respond_to?(:call) # this is used when a user-provided Proc is passed in + header_array = transformation.call(header_array, options) + else + case transformation + when Symbol # this is used for pre-defined transformations that are defined in the SmarterCSV module + header_array = public_send(transformation, header_array, options) + when Hash # this is called for hash arguments, e.g. header_transformations + trans, args = transformation.first # .first treats the hash first element as an array + header_array = apply_transformation(trans, header_array, args, options) + when Array # this can be used for passing additional arguments in array form (e.g. into a Proc) + trans, *args = transformation + header_array = apply_transformation(trans, header_array, args, options) + else + raise SmarterCSV::IncorrectOption, "Invalid transformation type: #{transformation.class}" + end + end + end + end + + header_array + end + + def apply_transformation(transformation, header_array, args, options) + if transformation.respond_to?(:call) + # If transformation is a callable object (like a Proc) + transformation.call(header_array, args, options) + else + # If transformation is a symbol (method name) + public_send(transformation, header_array, args, options) + end + end + + # pre-defined v2 header transformations: + + # these are some pre-defined header transformations which can be used + # all these take the headers array as the input + # + # the computed options can be accessed via @options + + def keys_as_symbols(headers, options) + headers.map do |header| + header.strip.downcase.gsub(%r{#{options[:quote_char]}}, '').gsub(/(\s|-)+/, '_').to_sym + end + end + + def keys_as_strings(headers, options) + headers.map do |header| + header.strip.gsub(%r{#{options[:quote_char]}}, '').downcase.gsub(/(\s|-)+/, '_') + end + end + + def downcase_headers(headers, _options) + headers.map do |header| + header.strip.downcase! + end + end + + def key_mapping(headers, mapping = {}, options) + raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") if mapping.empty? || !mapping.is_a?(Hash) + + headers_set = headers.to_set + mapping_keys_set = mapping.keys.to_set + silence_keys_set = (options[:silence_missing_keys] || []).to_set + + # Check for missing keys + missing_keys = mapping_keys_set - headers_set - silence_keys_set + raise SmarterCSV::KeyMappingError, "ERROR: cannot map headers: #{missing_keys.to_a.join(', ')}" if missing_keys.any? && !options[:silence_missing_keys] + + # Apply key mapping, retaining nils for explicitly mapped headers + headers.map do |header| + if mapping.key?(header) + # Maps the key according to the mapping, including nil mapping + mapping[header] + elsif options[:remove_unmapped_keys] + # Remove headers not specified in the mapping + nil + else + # Keep the original header if not specified in the mapping + header + end + end + end end end diff --git a/lib/smarter_csv/header_validations.rb b/lib/smarter_csv/header_validations.rb index 246c2da0..6dc41d49 100644 --- a/lib/smarter_csv/header_validations.rb +++ b/lib/smarter_csv/header_validations.rb @@ -3,11 +3,21 @@ module SmarterCSV class << self def header_validations(headers, options) - check_duplicate_headers(headers, options) - check_required_headers(headers, options) + if options[:v2_mode] + header_validations_v2(headers, options) + else + header_validations_v1(headers, options) + end + end + + # ---- V1.x Version: validate the headers ----------------------------------------------------------------- + + def header_validations_v1(headers, options) + check_duplicate_headers_v1(headers, options) + check_required_headers_v1(headers, options) end - def check_duplicate_headers(headers, _options) + def check_duplicate_headers_v1(headers, _options) header_counts = Hash.new(0) headers.each { |header| header_counts[header] += 1 unless header.nil? } @@ -18,9 +28,7 @@ def check_duplicate_headers(headers, _options) end end - require 'set' - - def check_required_headers(headers, options) + def check_required_headers_v1(headers, options) if options[:required_keys] && options[:required_keys].is_a?(Array) headers_set = headers.to_set missing_keys = options[:required_keys].select { |k| !headers_set.include?(k) } @@ -30,5 +38,99 @@ def check_required_headers(headers, options) end end end + + # ---- V2.x Version: validate the headers ----------------------------------------------------------------- + + # def header_validations_v2(headers, options) + # return unless options[:header_validations] + + # options[:header_validations].each do |validation| + # if validation.respond_to?(:call) + # # Directly call if it's a Proc or lambda + # validation.call(headers) + # else + # binding.pry + # # Handle Symbol, Hash, or Array + # method_name, args = validation.is_a?(Symbol) ? [validation, []] : validation + # public_send(method_name, headers, *Array(args)) + # end + # end + # end + + def header_validations_v2(headers, options) + return unless options[:header_validations] + + # do the header validations the user requested: + # Header validations typically raise errors directly + # + options[:header_validations].each do |validation| + if validation.respond_to?(:call) + # Directly call if it's a Proc or lambda + validation.call(headers) + else + case validation + when Symbol + public_send(validation, headers) + when Hash + val, args = validation.first + public_send(val, headers, args) + when Array + val, *args = validation + public_send(val, headers, args) + else + raise SmarterCSV::IncorrectOption, "Invalid validation type: #{validation.class}" + end + end + end + end + + # def header_validations_v2_orig(headers, options) + # # do the header validations the user requested: + # # Header validations typically raise errors directly + # # + # if options[:header_validations] + # options[:header_validations].each do |validation| + # case validation + # when Symbol + # public_send(validation, headers) + # when Hash + # val, args = validation.first + # public_send(val, headers, args) + # when Array + # val, args = validation + # public_send(val, headers, args) + # else + # validation.call(headers) unless validation.nil? + # end + # end + # end + # end + + # these are some pre-defined header validations which can be used + # all these take the headers array as the input + # + # the computed options can be accessed via @options + + def unique_headers(headers) + header_counts = Hash.new(0) + headers.each { |header| header_counts[header] += 1 unless header.nil? } + + duplicates = header_counts.select { |_, count| count > 1 } + + unless duplicates.empty? + raise(SmarterCSV::DuplicateHeaders, "Duplicate Headers in CSV: #{duplicates.inspect}") + end + end + + def required_headers(headers, required = []) + raise(SmarterCSV::IncorrectOption, "ERROR: required_headers validation needs an array argument") unless required.is_a?(Array) + + headers_set = headers.to_set + missing = required.select { |r| !headers_set.include?(r) } + + unless missing.empty? + raise(SmarterCSV::MissingKeys, "Missing Headers in CSV: #{missing.inspect}") + end + end end end diff --git a/lib/smarter_csv/options_processing.rb b/lib/smarter_csv/options_processing.rb index 6d14302f..4490f207 100644 --- a/lib/smarter_csv/options_processing.rb +++ b/lib/smarter_csv/options_processing.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module SmarterCSV - DEFAULT_OPTIONS = { + COMMON_OPTIONS = { acceleration: true, auto_row_sep_chars: 500, chunk_size: nil, @@ -15,37 +15,68 @@ module SmarterCSV force_utf8: false, headers_in_file: true, invalid_byte_sequence: '', + quote_char: '"', + remove_unmapped_keys: false, + row_sep: :auto, # was: $/, + silence_deprecations: false, # new in 1.11 + silence_missing_keys: false, + skip_lines: nil, + user_provided_headers: nil, + verbose: false, + with_line_numbers: false, + v2_mode: false, + }.freeze + + V1_DEFAULT_OPTIONS = { keep_original_headers: false, key_mapping: nil, - quote_char: '"', remove_empty_hashes: true, remove_empty_values: true, - remove_unmapped_keys: false, remove_values_matching: nil, remove_zero_values: false, required_headers: nil, required_keys: nil, - row_sep: :auto, # was: $/, - silence_missing_keys: false, - skip_lines: nil, strings_as_keys: false, strip_chars_from_headers: nil, strip_whitespace: true, - user_provided_headers: nil, value_converters: nil, - verbose: false, - with_line_numbers: false, + v2_mode: false, }.freeze + DEPRECATED_OPTIONS = [ + :convert_values_to_numeric, + :downcase_headers, + :keep_original_headers, + :key_mapping, + :remove_empty_hashes, + :remove_empty_values, + :remove_values_matching, + :remove_zero_values, + :required_headers, + :required_keys, + :stirngs_as_keys, + :strip_cars_from_headers, + :strip_whitespace, + :value_converters, + ].freeze + class << self # NOTE: this is not called when "parse" methods are tested by themselves def process_options(given_options = {}) puts "User provided options:\n#{pp(given_options)}\n" if given_options[:verbose] + @options = compute_default_options(given_options) + + # warn about deprecated options / raises error for v2_mode + handle_deprecations(given_options) + + given_options = preprocess_v2_options(given_options) if given_options[:v2_mode] + + @options.merge!(given_options) + # fix invalid input - given_options[:invalid_byte_sequence] = '' if given_options[:invalid_byte_sequence].nil? + @options[:invalid_byte_sequence] ||= '' - @options = DEFAULT_OPTIONS.dup.merge!(given_options) puts "Computed options:\n#{pp(@options)}\n" if given_options[:verbose] validate_options!(@options) @@ -56,11 +87,35 @@ def process_options(given_options = {}) # # ONLY FOR BACKWARDS-COMPATIBILITY def default_options - DEFAULT_OPTIONS + COMMON_OPTIONS.merge(V1_DEFAULT_OPTIONS) end private + def compute_default_options(options = {}) + return COMMON_OPTIONS.merge(V1_DEFAULT_OPTIONS) unless options[:v2_mode] + + default_options = {} + if options[:defaults].to_s != 'none' + default_options = COMMON_OPTIONS.dup.merge(V2_DEFAULT_OPTIONS) + if options[:defaults].to_s == 'v1' + default_options.merge(V1_TRANSFORMATIONS) + else + default_options.merge(V2_TRANSFORMATIONS) + end + end + end + + def handle_deprecations(options) + used_deprecated_options = DEPRECATED_OPTIONS & options.keys + message = "SmarterCSV #{VERSION} DEPRECATED OPTIONS: #{pp(used_deprecated_options)}" + if options[:v2_mode] + raise(SmarterCSV::DeprecatedOptions, "ERROR: #{message}") unless used_deprecated_options.empty? || options[:silence_deprecations] + else + puts "DEPRECATION WARNING: #{message}" unless used_deprecated_options.empty? || options[:silence_deprecations] + end + end + def validate_options!(options) # deprecate required_headers unless options[:required_headers].nil? @@ -89,5 +144,57 @@ def option_valid?(str) def pp(value) defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect end + + # ---- V2 code ---------------------------------------------------------------------------------------- + + V2_DEFAULT_OPTIONS = { + # These need to go to the COMMON_OPTIONS: + remove_empty_hashes: true, # this might need a transformation or move to common options + # ------------ + header_transformations: [:keys_as_symbols], + header_validations: [:unique_headers], + # data_transformations: [:replace_blank_with_nil], + # data_validations: [], + hash_transformations: [:strip_spaces, :remove_blank_values], + hash_validations: [], + v2_mode: true, + }.freeze + + V2_TRANSFORMATIONS = { + header_transformations: [:keys_as_symbols], + header_validations: [:unique_headers], + # data_transformations: [:replace_blank_with_nil], + # data_validations: [], + hash_transformations: [:v1_backwards_compatibility], + # hash_transformations: [:remove_empty_keys, :strip_spaces, :remove_blank_values, :convert_values_to_numeric], # ??? :convert_values_to_numeric] + hash_validations: [], + }.freeze + + V1_TRANSFORMATIONS = { + header_transformations: [:keys_as_symbols], + header_validations: [:unique_headers], + # data_transformations: [:replace_blank_with_nil], + # data_validations: [], + hash_transformations: [:strip_spaces, :remove_blank_values, :convert_values_to_numeric], + hash_validations: [], + }.freeze + + def preprocess_v2_options(options) + return options unless options[:v2_mode] || options[:header_transformations] + + # We want to provide safe defaults for easy processing, that is why we have a special keyword :none + # to not do any header transformations.. + # + # this is why we need to remove the 'none' here: + # + requested_header_transformations = options[:header_transformations] + if requested_header_transformations.to_s == 'none' + requested_header_transformations = [] + else + requested_header_transformations = requested_header_transformations.reject {|x| x.to_s == 'none'} unless requested_header_transformations.nil? + end + options[:header_transformations] = requested_header_transformations || [] + options + end end end diff --git a/lib/smarter_csv/smarter_csv.rb b/lib/smarter_csv/smarter_csv.rb index f7420a62..c27569aa 100644 --- a/lib/smarter_csv/smarter_csv.rb +++ b/lib/smarter_csv/smarter_csv.rb @@ -2,6 +2,7 @@ module SmarterCSV class SmarterCSVException < StandardError; end + class DeprecatedOptions < SmarterCSVException; end class HeaderSizeMismatch < SmarterCSVException; end class IncorrectOption < SmarterCSVException; end class ValidationError < SmarterCSVException; end @@ -108,6 +109,10 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint next if options[:remove_empty_hashes] && hash.empty? + # + # should HASH VALIDATIONS go here instead? + # + puts "CSV Line #{@file_line_count}: #{pp(hash)}" if @verbose == '2' # very verbose setting # optional adding of csv_line_number to the hash to help debugging hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers] @@ -165,22 +170,19 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint end class << self + # Counts the number of quote characters in a line, excluding escaped quotes. + # FYI: using Ruby built-in regex processing to determine the number of quotes def count_quote_chars(line, quote_char) return 0 if line.nil? || quote_char.nil? || quote_char.empty? - count = 0 - escaped = false + # Escaped quote character (e.g., if quote_char is ", then escaped is \") + escaped_quote = Regexp.escape(quote_char) - line.each_char do |char| - if char == '\\' && !escaped - escaped = true - else - count += 1 if char == quote_char && !escaped - escaped = false - end - end + # Pattern to match a quote character not preceded by a backslash + pattern = /(? required_keys' do @@ -31,7 +29,12 @@ it 'prints a deprecation warning when required_headers is used' do options[:required_headers] = [:first_name] - expect(SmarterCSV).to receive(:puts).with a_string_matching(/DEPRECATION WARNING/) + expect(SmarterCSV).to receive(:puts).with( + "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" + ) + expect(SmarterCSV).to receive(:puts).with( + "DEPRECATION WARNING: SmarterCSV #{SmarterCSV::VERSION} DEPRECATED OPTIONS: [:key_mapping, :required_headers]" + ) data end end @@ -54,7 +57,9 @@ it 'does not print a deprecation warning when required_keys is used' do options[:required_keys] = [:first_name] - expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/DEPRECATION WARNING/) + expect(SmarterCSV).to receive(:puts).with( + "DEPRECATION WARNING: SmarterCSV #{SmarterCSV::VERSION} DEPRECATED OPTIONS: [:key_mapping, :required_keys]" + ) data end end diff --git a/spec/features/header_handling/silence_missing_keys_spec.rb b/spec/features/header_handling/silence_missing_keys_spec.rb index 03319f1c..898c42b3 100644 --- a/spec/features/header_handling/silence_missing_keys_spec.rb +++ b/spec/features/header_handling/silence_missing_keys_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe 'silence missing keys' do @@ -10,7 +8,7 @@ subject(:read_csv) { SmarterCSV.process("#{fixture_path}/silence_missing_keys.csv", options) } it 'prints warning message for missing keys by default' do - expect(SmarterCSV).not_to receive(:puts) + expect(SmarterCSV).to receive(:puts).with(/DEPRECATED OPTION.*key_mapping/) expect{ read_csv }.to raise_exception( SmarterCSV::KeyMappingError, "ERROR: can not map headers: THIS, missing_key" ) @@ -18,7 +16,7 @@ it 'maps the keys from the CSV file correctly' do options[:silence_missing_keys] = true - expect(SmarterCSV).not_to receive(:puts) + expect(SmarterCSV).to receive(:puts).with(/DEPRECATED OPTION.*key_mapping/) data = SmarterCSV.process("#{fixture_path}/silence_missing_keys.csv", options) expect(data.size).to eq 1 expect(data[0].keys).to eq %i[this that] diff --git a/spec/features/header_handling/strings_as_keys_spec.rb b/spec/features/header_handling/strings_as_keys_spec.rb index 5bf1266d..b31629ff 100644 --- a/spec/features/header_handling/strings_as_keys_spec.rb +++ b/spec/features/header_handling/strings_as_keys_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe ':strings_as_keys option' do diff --git a/spec/features/header_handling/strip_chars_from_headers_spec.rb b/spec/features/header_handling/strip_chars_from_headers_spec.rb index db904d73..0e529c72 100644 --- a/spec/features/header_handling/strip_chars_from_headers_spec.rb +++ b/spec/features/header_handling/strip_chars_from_headers_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe ':strip_chars_from_headers option' do diff --git a/spec/features/ignore_comments_spec.rb b/spec/features/ignore_comments_spec.rb index 956774a5..f28b8797 100644 --- a/spec/features/ignore_comments_spec.rb +++ b/spec/features/ignore_comments_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe ':comment_regexp option' do diff --git a/spec/features/quotes/escaped_quote_chars_spec.rb b/spec/features/quotes/escaped_quote_chars_spec.rb index 6f9bd31c..d1eb3a5b 100644 --- a/spec/features/quotes/escaped_quote_chars_spec.rb +++ b/spec/features/quotes/escaped_quote_chars_spec.rb @@ -1,98 +1,153 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' [true, false].each do |bool| describe "handling files with escaped quote chars with#{bool ? ' C-' : 'out '}acceleration" do let(:options) { { acceleration: bool } } - subject(:data) { SmarterCSV.process(file, options) } - describe ".count_quote_chars" do it "handles escaped characters and regular characters" do expect(SmarterCSV.count_quote_chars("\"No\" \"Escaping\"", "\"")).to eq 4 expect(SmarterCSV.count_quote_chars("\"D\\\"Angelos\"", "\"")).to eq 2 expect(SmarterCSV.count_quote_chars("\!D\\\!Angelos\!", "\!")).to eq 2 end - end - context 'when it is a strangely delimited file' do - let(:file) { "#{fixture_path}/escaped_quote_char.csv" } - - it 'loads the csv file without issues' do - expect(data[0]).to eq( - content: 'Some content', - escapedname: "D\\\"Angelos", - othercontent: "Some More Content" - ) - expect(data[1]).to eq( - content: 'Some content', - escapedname: "O\\\"heard", - othercontent: "Some More Content\\\\" - ) - expect(data.size).to eq 2 + # Test with different quote characters: ", ', and ! + ['"', "'", '!'].each do |quote_char| + context "with quote character '#{quote_char}'" do + it "counts unescaped #{quote_char} characters" do + expect(SmarterCSV.count_quote_chars("a#{quote_char}bc#{quote_char}d", quote_char)).to eq(2) + end + + it "does not count escaped #{quote_char} characters" do + expect(SmarterCSV.count_quote_chars("a\\#{quote_char}bc#{quote_char}d", quote_char)).to eq(1) + end + + it "handles strings with only escaped #{quote_char} characters" do + expect(SmarterCSV.count_quote_chars("\\#{quote_char}\\#{quote_char}\\#{quote_char}", quote_char)).to eq(0) + end + + it "handles strings with mixed escaped and unescaped #{quote_char} characters" do + expect(SmarterCSV.count_quote_chars("#{quote_char}\\#{quote_char}#{quote_char}\\#{quote_char}#{quote_char}", quote_char)).to eq(3) + end + end end - end - context 'when it is a strangely delimited file' do - let(:file) { "#{fixture_path}/escaped_quote_char_2.csv" } - let(:options) do - { quote_char: "!" } + # Edge cases + context 'with edge cases' do + it 'returns 0 for nil line' do + expect(SmarterCSV.count_quote_chars(nil, '"')).to eq(0) + end + + it 'returns 0 for nil quote character' do + expect(SmarterCSV.count_quote_chars('some text', nil)).to eq(0) + end + + it 'returns 0 for empty quote character' do + expect(SmarterCSV.count_quote_chars('some text', '')).to eq(0) + end + + it 'returns 0 for empty line' do + expect(SmarterCSV.count_quote_chars('', '"')).to eq(0) + end + + it 'returns 0 when the line does not contain the quote character' do + expect(SmarterCSV.count_quote_chars('some text', '"')).to eq(0) + end end - it 'loads the csv file without issues' do - expect(data[0]).to eq( - content: 'Some content', - escapedname: "D\\\!Angelos", - othercontent: "Some More Content" - ) - expect(data[1]).to eq( - content: 'Some content', - escapedname: "O\\\!heard", - othercontent: "Some More Content\\\\" - ) - expect(data.size).to eq 2 + # Additional cases + context 'with additional cases' do + it 'handles escape characters not followed by a quote character' do + expect(SmarterCSV.count_quote_chars("abc\\ndef", '"')).to eq(0) + end + + it 'correctly processes consecutive escape characters' do + expect(SmarterCSV.count_quote_chars("a\\\\\"bc\"", '"')).to eq(2) + end end end - context 'when it is a strangely delimited file' do - let(:file) { "#{fixture_path}/escaped_quote_char_3.csv" } - - it 'loads the csv file without issues' do - expect(data[0]).to eq( - content: '\\"Some content\\"', - escapedname: "D\\\"Angelos", - othercontent: '\\"Some More Content\\"' - ) - expect(data[1]).to eq( - content: '\\"Some content\\"', - escapedname: "O\\\"heard", - othercontent: '\\"Some More Content\\"' - ) - expect(data.size).to eq 2 + context 'with fixture files' do + subject(:data) { SmarterCSV.process(file, options) } + + context 'when it is a strangely delimited file' do + let(:file) { "#{fixture_path}/escaped_quote_char.csv" } + + it 'loads the csv file without issues' do + expect(data[0]).to eq( + content: 'Some content', + escapedname: "D\\\"Angelos", + othercontent: "Some More Content" + ) + expect(data[1]).to eq( + content: 'Some content', + escapedname: "O\\\"heard", + othercontent: "Some More Content\\\\" + ) + expect(data.size).to eq 2 + end end - end - context 'when it is a strangely delimited file' do - let(:file) { "#{fixture_path}/escaped_quote_char_4.csv" } - let(:options) do - { quote_char: "'" } + context 'when it is a strangely delimited file' do + let(:file) { "#{fixture_path}/escaped_quote_char_2.csv" } + let(:options) do + { quote_char: "!" } + end + + it 'loads the csv file without issues' do + expect(data[0]).to eq( + content: 'Some content', + escapedname: "D\\\!Angelos", + othercontent: "Some More Content" + ) + expect(data[1]).to eq( + content: 'Some content', + escapedname: "O\\\!heard", + othercontent: "Some More Content\\\\" + ) + expect(data.size).to eq 2 + end end - it 'loads the csv file without issues' do - expect(data[0]).to eq( - content: "\\'Some content\\'", - escapedname: "D\\\'Angelos", - othercontent: "\\'Some More Content\\'" - ) - expect(data[1]).to eq( - content: "\\'Some content\\'", - escapedname: "O\\\'heard", - othercontent: "Some \\\\ More \\\\ Content\\\\" - ) - expect(data.size).to eq 2 + context 'when it is a strangely delimited file' do + let(:file) { "#{fixture_path}/escaped_quote_char_3.csv" } + + it 'loads the csv file without issues' do + expect(data[0]).to eq( + content: '\\"Some content\\"', + escapedname: "D\\\"Angelos", + othercontent: '\\"Some More Content\\"' + ) + expect(data[1]).to eq( + content: '\\"Some content\\"', + escapedname: "O\\\"heard", + othercontent: '\\"Some More Content\\"' + ) + expect(data.size).to eq 2 + end + end + + context 'when it is a strangely delimited file' do + let(:file) { "#{fixture_path}/escaped_quote_char_4.csv" } + let(:options) do + { quote_char: "'" } + end + + it 'loads the csv file without issues' do + expect(data[0]).to eq( + content: "\\'Some content\\'", + escapedname: "D\\\'Angelos", + othercontent: "\\'Some More Content\\'" + ) + expect(data[1]).to eq( + content: "\\'Some content\\'", + escapedname: "O\\\'heard", + othercontent: "Some \\\\ More \\\\ Content\\\\" + ) + expect(data.size).to eq 2 + end end end end diff --git a/spec/features/quotes/quoted_spec.rb b/spec/features/quotes/quoted_spec.rb index 4ee627a3..0d598bc8 100644 --- a/spec/features/quotes/quoted_spec.rb +++ b/spec/features/quotes/quoted_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe 'loading file with quoted fields' do diff --git a/spec/features/skip_lines_spec.rb b/spec/features/skip_lines_spec.rb index 6b19e380..7dfa632c 100644 --- a/spec/features/skip_lines_spec.rb +++ b/spec/features/skip_lines_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe ':skip_lines option' do diff --git a/spec/features/special_cases/hard_sample_spec.rb b/spec/features/special_cases/hard_sample_spec.rb index fbaf1c0a..e43579a3 100644 --- a/spec/features/special_cases/hard_sample_spec.rb +++ b/spec/features/special_cases/hard_sample_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe 'can handle the difficult CSV file' do diff --git a/spec/features/special_cases/malformed_spec.rb b/spec/features/special_cases/malformed_spec.rb index 5436bcf5..dc949890 100644 --- a/spec/features/special_cases/malformed_spec.rb +++ b/spec/features/special_cases/malformed_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' # according to RFC-4180 quotes inside of "words" shouldbe doubled, but our parser is robust against that. diff --git a/spec/features/special_cases/problematic_spec.rb b/spec/features/special_cases/problematic_spec.rb index 06417686..f8696dc9 100644 --- a/spec/features/special_cases/problematic_spec.rb +++ b/spec/features/special_cases/problematic_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe 'loading file with UTF-8 characters in the header' do diff --git a/spec/features/special_cases/trading_spec.rb b/spec/features/special_cases/trading_spec.rb index c6d99ac9..b88da4e2 100644 --- a/spec/features/special_cases/trading_spec.rb +++ b/spec/features/special_cases/trading_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' # somebody reported that a column called 'options_trader' would be truncated to 'trader' diff --git a/spec/smarter_csv/blank_spec.rb b/spec/smarter_csv/blank_spec.rb index 07dcf8ac..d6447908 100644 --- a/spec/smarter_csv/blank_spec.rb +++ b/spec/smarter_csv/blank_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - describe 'blank?' do it 'is true for nil' do expect(SmarterCSV.send(:blank?, nil)).to eq true diff --git a/spec/smarter_csv/close_file_spec.rb b/spec/smarter_csv/close_file_spec.rb index cf6b6eb8..a1541b62 100644 --- a/spec/smarter_csv/close_file_spec.rb +++ b/spec/smarter_csv/close_file_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe 'file operations' do diff --git a/spec/smarter_csv/file_encoding_spec.rb b/spec/smarter_csv/file_encoding_spec.rb index dce39fff..8737a8fc 100644 --- a/spec/smarter_csv/file_encoding_spec.rb +++ b/spec/smarter_csv/file_encoding_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - RSpec.describe SmarterCSV do describe 'encoding warning message' do let(:file_path) { 'path/to/csvfile.csv' } @@ -61,4 +59,3 @@ end end end - diff --git a/spec/smarter_csv/header_validations_spec.rb b/spec/smarter_csv/header_validations_spec.rb new file mode 100644 index 00000000..6ded184d --- /dev/null +++ b/spec/smarter_csv/header_validations_spec.rb @@ -0,0 +1,137 @@ +# frozen_string_literal: true + +require 'smarter_csv' +require 'smarter_csv/header_validations' + +RSpec.describe SmarterCSV do + describe '.header_validations' do + let(:headers) { ['header1', 'header2', 'header3'] } + + context 'when in V1 mode' do + let(:options) { { v2_mode: false } } + + it 'passes with no duplicate or missing headers' do + expect { described_class.header_validations(headers, options) }.not_to raise_error + end + + context 'with duplicate headers' do + let(:headers) { ['header1', 'header1', 'header3'] } + + it 'raises a DuplicateHeaders error' do + expect { described_class.header_validations(headers, options) }.to raise_error(SmarterCSV::DuplicateHeaders) + end + end + + context 'with missing required headers' do + let(:options) { { v2_mode: false, required_keys: ['header1', 'header4'] } } + + it 'raises a MissingKeys error' do + expect { described_class.header_validations(headers, options) }.to raise_error(SmarterCSV::MissingKeys) + end + end + end + + context 'when in V2 mode' do + let(:options) { { v2_mode: true, header_validations: [:unique_headers, { required_headers: ['header1', 'header4'] }] } } + + context 'with no duplicate or missing headers' do + let(:headers) { ['header1', 'header2', 'header3', 'header4'] } + + it 'passes validation' do + expect { described_class.header_validations(headers, options) }.not_to raise_error + end + end + + context 'with missing required headers' do + let(:headers) { ['header1', 'header2', 'header3'] } # 'header4' is missing + + it 'raises a MissingHeaders error' do + expect { described_class.header_validations(headers, options) }.to raise_error(SmarterCSV::MissingKeys) + end + end + + context 'with duplicate headers' do + let(:headers) { ['header1', 'header1', 'header3'] } + + it 'raises a DuplicateHeaders error' do + expect { described_class.header_validations(headers, options) }.to raise_error(SmarterCSV::DuplicateHeaders) + end + end + + context 'with missing required headers' do + it 'raises a MissingHeaders error' do + expect { described_class.header_validations(headers, options) }.to raise_error(SmarterCSV::MissingKeys) + end + end + + context 'with custom validation function' do + let(:custom_validation) { ->(headers) { raise StandardError, 'Custom validation error' if headers.include?('error_header') } } + let(:options) { { v2_mode: true, header_validations: [custom_validation] } } + + context 'when custom validation fails' do + let(:headers) { ['error_header', 'header2', 'header3'] } + + it 'raises a custom error' do + expect { described_class.header_validations(headers, options) }.to raise_error(StandardError, 'Custom validation error') + end + end + + context 'when custom validation passes' do + it 'passes without errors' do + expect { described_class.header_validations(headers, options) }.not_to raise_error + end + end + end + + context 'when in V2 mode with Array-based validation' do + let(:headers) { ['header1', 'header2', 'header3'] } + + context 'with custom Array-based validation' do + let(:options) do + { + v2_mode: true, + header_validations: [[:custom_validation, 'arg1', 'arg2']] + } + end + + before do + allow(described_class).to receive(:custom_validation).and_return(true) + end + + it 'calls the specified method with arguments' do + expect(described_class).to receive(:custom_validation).with(headers, ['arg1', 'arg2']) + described_class.header_validations_v2(headers, options) + end + end + end + + context 'when in V2 mode with a custom object validation' do + let(:headers) { ['header1', 'header2', 'header3'] } + + context 'with a custom object that responds to call' do + let(:custom_validator) { double("CustomValidator") } + let(:options) do + { + v2_mode: true, + header_validations: [custom_validator] + } + end + + it 'calls the call method on the custom object' do + expect(custom_validator).to receive(:call).with(headers) + described_class.header_validations_v2(headers, options) + end + end + end + + context 'with an invalid validation type' do + let(:invalid_validation) { 123 } # Using an integer as an invalid type + let(:options) { { v2_mode: true, header_validations: [invalid_validation] } } + + it 'raises an IncorrectOption error' do + expect { described_class.header_validations_v2(headers, options) }.to raise_error(SmarterCSV::IncorrectOption, /Invalid validation type/) + end + end + end + end +end diff --git a/spec/smarter_csv/option_validations_spec.rb b/spec/smarter_csv/option_validations_spec.rb index 9f070944..f497b1d3 100644 --- a/spec/smarter_csv/option_validations_spec.rb +++ b/spec/smarter_csv/option_validations_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - fixture_path = 'spec/fixtures' describe 'option validations' do diff --git a/spec/smarter_csv/options_processing_spec.rb b/spec/smarter_csv/options_processing_spec.rb index 61b65732..2deffe61 100644 --- a/spec/smarter_csv/options_processing_spec.rb +++ b/spec/smarter_csv/options_processing_spec.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true -require 'spec_helper' +def computed_default_options(options) + SmarterCSV.send(:compute_default_options, options) +end describe 'options processing' do describe '#process_options' do @@ -15,19 +17,68 @@ it 'it has the correct default options, when no input is given' do generated_options = SmarterCSV.process_options({}) - expect(generated_options).to eq SmarterCSV::DEFAULT_OPTIONS + expect(generated_options).to eq computed_default_options({}) end it 'lets the user clear out all default options' do options = {defaults: :none} generated_options = SmarterCSV.process_options(options) - expect(generated_options).to eq options.merge(SmarterCSV::DEFAULT_OPTIONS) + expect(generated_options).to eq options.merge(computed_default_options(options)) + end + + it 'works with frozen options hash' do + options = {chunk_size: 1}.freeze + generated_options = SmarterCSV.process_options(options) + expect(generated_options[:chunk_size]).to eq 1 end it 'corrects :invalid_byte_sequence if nil is given' do generated_options = SmarterCSV.process_options(invalid_byte_sequence: nil) expect(generated_options[:invalid_byte_sequence]).to eq '' end + + context 'when verbose option is true' do + it 'outputs the given options' do + options = { verbose: true } + expect { SmarterCSV.process_options(options) }.to output(/User provided options:/).to_stdout + end + end + + context 'when deprecated options are used in non-v2 mode' do + SmarterCSV::DEPRECATED_OPTIONS.each do |deprecated_option| + context "with deprecated option #{deprecated_option}" do + it 'outputs a warning' do + options = { deprecated_option => true, v2_mode: false } + expect { SmarterCSV.process_options(options) }.to output(/WARNING: SmarterCSV/).to_stdout + end + + it 'does not output warning when deprecations are silenced' do + options = { convert_values_to_numeric: true, v2_mode: false, silence_deprecations: true } + expect { SmarterCSV.process_options(options) }.not_to raise_error + expect { SmarterCSV.process_options(options) }.not_to output.to_stdout + end + end + end + end + + context 'when deprecated options are used in v2 mode' do + SmarterCSV::DEPRECATED_OPTIONS.each do |deprecated_option| + context "with deprecated option #{deprecated_option}" do + it 'raises a DeprecatedOptions error' do + options = { deprecated_option => true, v2_mode: true } + expect { SmarterCSV.process_options(options) }.to raise_error(SmarterCSV::DeprecatedOptions) + end + end + end + end + + context 'when silence_deprecations option is true' do + it 'does not raise a DeprecatedOptions error nor output a warning even with deprecated options' do + options = { convert_values_to_numeric: true, v2_mode: true, silence_deprecations: true } + expect { SmarterCSV.process_options(options) }.not_to raise_error + expect { SmarterCSV.process_options(options) }.not_to output.to_stdout + end + end end describe '#validate_options!' do @@ -59,7 +110,18 @@ describe '#default_options' do it 'surfaces the DEFAULT_OPTIONS hash' do - expect(SmarterCSV.default_options).to eq SmarterCSV::DEFAULT_OPTIONS + expect(SmarterCSV.default_options).to eq computed_default_options({}) + end + end + + describe 'v2_mode' do + it 'defaults to false, aka v1' do + expect(SmarterCSV.default_options[:v2_mode]).to eq false + end + + it 'can be switched to v2_mode' do + parsed_options = SmarterCSV.process_options(v2_mode: true) + expect(parsed_options[:v2_mode]).to eq true end end end diff --git a/spec/smarter_csv/parse/column_separator_spec.rb b/spec/smarter_csv/parse/column_separator_spec.rb index eb8ecaf4..74ed7de3 100644 --- a/spec/smarter_csv/parse/column_separator_spec.rb +++ b/spec/smarter_csv/parse/column_separator_spec.rb @@ -12,8 +12,6 @@ # code paths are behaving identically. # ------------------------------------------------------------------------------------------ -require 'spec_helper' - [true, false].each do |bool| describe "fulfills RFC-4180 and more with#{bool ? ' C-' : 'out '}acceleration" do describe 'parse with col_sep' do diff --git a/spec/smarter_csv/parse/max_size_spec.rb b/spec/smarter_csv/parse/max_size_spec.rb index fa2f7e44..8bbadc72 100644 --- a/spec/smarter_csv/parse/max_size_spec.rb +++ b/spec/smarter_csv/parse/max_size_spec.rb @@ -12,8 +12,6 @@ # code paths are behaving identically. # ------------------------------------------------------------------------------------------ -require 'spec_helper' - # the purpose of the max_size parameter is to handle a corner case where # CSV lines contain more fields than the header. # In which case the remaining fields in the line are ignored diff --git a/spec/smarter_csv/parse/old_csv_library_spec.rb b/spec/smarter_csv/parse/old_csv_library_spec.rb index 9231f7d4..c106bc80 100644 --- a/spec/smarter_csv/parse/old_csv_library_spec.rb +++ b/spec/smarter_csv/parse/old_csv_library_spec.rb @@ -12,8 +12,6 @@ # code paths are behaving identically. # ------------------------------------------------------------------------------------------ -require 'spec_helper' - [true, false].each do |bool| describe "fulfills RFC-4180 and more with#{bool ? ' C-' : 'out '}acceleration" do describe 'old CSV library parsing tests' do diff --git a/spec/smarter_csv/parse/rfc4180_and_more_spec.rb b/spec/smarter_csv/parse/rfc4180_and_more_spec.rb index e315cf2a..b47894d1 100644 --- a/spec/smarter_csv/parse/rfc4180_and_more_spec.rb +++ b/spec/smarter_csv/parse/rfc4180_and_more_spec.rb @@ -12,8 +12,6 @@ # code paths are behaving identically. # ------------------------------------------------------------------------------------------ -require 'spec_helper' - [true, false].each do |bool| describe "fulfills RFC-4180 and more with#{bool ? ' C-' : 'out '}acceleration" do let(:options) { {col_sep: ',', row_sep: $INPUT_RECORD_SEPARATOR, quote_char: '"', acceleration: bool } } diff --git a/spec/smarter_csv/v2_header_transformations_spec.rb b/spec/smarter_csv/v2_header_transformations_spec.rb new file mode 100644 index 00000000..2dfeeab3 --- /dev/null +++ b/spec/smarter_csv/v2_header_transformations_spec.rb @@ -0,0 +1,287 @@ +# frozen_string_literal: true + +fixture_path = 'spec/fixtures' +RSpec.describe SmarterCSV do + describe 'something .header_transformations_v2' do + # it 'with dashes in header fields as symbols when using v1 defaults' do + # options = { + # defaults: 'v1' + # } + # data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + + # expect(data.size).to eq 5 + # expect(data[0][:first_name]).to eq 'Dan' + # expect(data[0][:last_name]).to eq 'McAllister' + # end + + # it 'with dashes in header fields as symbols when using safe defaults' do + # options = { + # defaults: 'safe' + # } + # data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + + # expect(data.size).to eq 5 + # expect(data[0][:first_name]).to eq 'Dan' + # expect(data[0][:last_name]).to eq 'McAllister' + # end + + # Unit Tests + + context 'when transformation is an invalid type' do + it 'raises an ArgumentError' do + header_array = ['header1', 'header2'] + invalid_transformation = 'invalid' + options = { v2_mode: true, header_transformations: [invalid_transformation] } + + expect { SmarterCSV.header_transformations_v2(header_array, options) }.to raise_error(SmarterCSV::IncorrectOption, "Invalid transformation type: String") + end + end + + context 'when transformation is a symbol / pre-defined in SmarterCSV module' do + it 'applies the predefined :keys_as_strings transformation method' do + header_array = ['Header1', 'Header2'] + options = { v2_mode: true, header_transformations: [:keys_as_strings] } + + expect(SmarterCSV).to receive(:keys_as_strings).with(header_array, options).and_call_original + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq(['header1', 'header2']) + end + + it 'applies the predefined :downcase_headers transformation method' do + header_array = ['Header1', 'Header2'] + options = { v2_mode: true, header_transformations: [:downcase_headers] } + + expect(SmarterCSV).to receive(:downcase_headers).with(header_array, options).and_call_original + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq(['header1', 'header2']) + end + + it 'applies the predefined :keys_as_symbols transformation method' do + header_array = ['Header1', 'Header2'] + options = { v2_mode: true, header_transformations: [:keys_as_symbols] } + + expect(SmarterCSV).to receive(:keys_as_symbols).with(header_array, options).and_call_original + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq([:header1, :header2]) + end + end + + context 'when transformation with arguments is passed-in as a top-level hash' do + context 'when using a Proc' do + let(:custom_transformation) do + Proc.new do |headers, args, _options| + suffix = args.first + headers.map { |header| "#{header}_#{suffix}" } + end + end + + it 'applies the transformation method with arguments, processed as an array' do + header_array = ['header1', 'header2'] + options = { v2_mode: true, header_transformations: { custom_transformation => 'arg' } } + + expect(SmarterCSV).to receive(:apply_transformation).with(custom_transformation, header_array, ['arg'], options).and_call_original + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq(['header1_arg', 'header2_arg']) + end + end + end + + context 'when transformation with arguments is passed-in via hash' do + context 'when using a Proc' do + let(:custom_transformation) do + Proc.new do |headers, args, _options| + headers.map { |header| "#{header}_#{args}" } # no .first ! + end + end + + it 'applies the transformation method with arguments, processed as a hash' do + header_array = ['header1', 'header2'] + options = { v2_mode: true, header_transformations: [{ custom_transformation => 'arg' }] } + + expect(SmarterCSV).to receive(:apply_transformation).with(custom_transformation, header_array, 'arg', options).and_call_original + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq(['header1_arg', 'header2_arg']) + end + end + end + + context 'when transformation with arguments is passed-in an array' do + context 'when using a Proc' do + let(:apply_suffix) do + Proc.new do |headers, arg, _options| + suffix = arg.first + headers.map { |header| "#{header}_#{suffix}" } + end + end + + it 'applies the transformation method with array arguments' do + header_array = ['header1', 'header2'] + options = { v2_mode: true, header_transformations: [[apply_suffix, 'sfx']] } + + expect(SmarterCSV).to receive(:apply_transformation).with(apply_suffix, header_array, ['sfx'], options).and_call_original + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq(['header1_sfx', 'header2_sfx']) + end + end + end + + context 'when using :key_mapping transformation' do + let(:header_array) { ['header1', 'header2'] } + it 'raises an exception if invalid mapping is passed-in' do + options = { v2_mode: true, header_transformations: [{key_mapping: []}] } + + expect { SmarterCSV.header_transformations_v2(header_array, options) }.to raise_error(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") + end + + it 'applies the transformation method with array arguments' do + mapping = {'header1' => 'mapped1', 'header2' => 'mapped2'} + options = { v2_mode: true, header_transformations: [{key_mapping: mapping}] } + + expect(SmarterCSV).to receive(:apply_transformation).with(:key_mapping, header_array, mapping, options).and_call_original + + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq(['mapped1', 'mapped2']) + end + + it 'can delete a column when mapped to nil, and map other columns' do + mapping = {'header1' => nil, 'header2' => 'mapped2'} + options = { v2_mode: true, header_transformations: [{key_mapping: mapping}] } + + expect(SmarterCSV).to receive(:apply_transformation).with(:key_mapping, header_array, mapping, options).and_call_original + + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq([nil, 'mapped2']) + end + + context 'when mapping only one key' do + let(:mapping) { {'header2' => 'mapped2'} } + let(:options) { { v2_mode: true, header_transformations: [{key_mapping: mapping}] } } + + it 'removes unmapped keys' do + expect(SmarterCSV).to receive(:apply_transformation).with(:key_mapping, header_array, mapping, options).and_call_original + + result = SmarterCSV.header_transformations_v2(header_array, options) + + expect(result).to eq(['header1', 'mapped2']) + end + end + end + + # Functional tests + + context 'using built-in transformations' do + it 'with dashes in header fields as strings' do + options = { + v2_mode: true, + header_transformations: [:none, :keys_as_strings], + } + data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + expect(data.size).to eq 5 + expect(data[0]['first_name']).to eq 'Dan' + expect(data[0]['last_name']).to eq 'McAllister' + end + + it 'no transformations: with dashes in header fields as is' do + options = { + v2_mode: true, + header_transformations: [:none] + } + data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + expect(data.size).to eq 5 + expect(data[0]['First-Name']).to eq 'Dan' + expect(data[0]['Last-Name']).to eq 'McAllister' + end + + it 'with dashes in header fields as symbols' do + options = { + v2_mode: true, + header_transformations: [:none, :keys_as_symbols] + } + data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + + expect(data.size).to eq 5 + expect(data[0][:first_name]).to eq 'Dan' + expect(data[0][:last_name]).to eq 'McAllister' + end + + context "key_mapping" do + it 'can delete a column with :key_mapping when mapped to nil' do + mapping = {'Dogs' => :wuff, 'Cats' => :miau} + options = { v2_mode: true, header_transformations: [{key_mapping: mapping}] } + + data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + + expect(data.size).to eq 5 + expect(data.first.keys).to eq ["First-Name", "Last-Name", :wuff, :miau] + expect(data[0]["First-Name"]).to eq "Dan" + expect(data[0]["Last-Name"]).to eq 'McAllister' + expect(data[0][:wuff]).to eq 2 + expect(data[0][:miau]).to eq 0 + end + + it 'can delete a column with :key_mapping when mapped to nil' do + mapping = {'Dogs' => nil} + options = { v2_mode: true, header_transformations: [{key_mapping: mapping}] } + + data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + + expect(data.size).to eq 5 + expect(data.first.keys).to eq ["First-Name", "Last-Name", "Cats"] + data.each do |hash| + expect(hash.keys.include?("Dogs")).to eq false + end + end + end + end + + context 'with provided transformations' do + # user-provided custom transformation + let(:camelcase) do + Proc.new do |headers, _options| + headers.map do |header| + header.strip.downcase.gsub(/(\s|-)+/, '_').split('_').map(&:capitalize).join + end + end + end + + it 'applies the custom transformation' do + options = { + v2_mode: true, + header_transformations: [:none, camelcase], + } + data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + expect(data.size).to eq 5 + expect(data[0]['FirstName']).to eq 'Dan' + expect(data[0]['LastName']).to eq 'McAllister' + end + end + + context 'using transformations that have arguments' do + let(:prefix_proc) do + Proc.new do |headers, args, _options| + headers.map { |header| "#{args.first}_#{header}" } + end + end + + it 'with dashes in header fields as strings' do + options = { + v2_mode: true, + header_transformations: [:none, [prefix_proc, 'pre']], + } + data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) + + expect(data.size).to eq 5 + expect(data[0]['pre_First-Name']).to eq 'Dan' + expect(data[0]['pre_Last-Name']).to eq 'McAllister' + end + end + end +end diff --git a/spec/smarter_csv/v2_header_validations_spec.rb b/spec/smarter_csv/v2_header_validations_spec.rb new file mode 100644 index 00000000..68246c81 --- /dev/null +++ b/spec/smarter_csv/v2_header_validations_spec.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true + +fixture_path = 'spec/fixtures' +RSpec.describe SmarterCSV do + describe 'something .header_validations_v2' do + let(:options) { { v2_mode: true } } + + describe '#unique_headers' do + context 'when all headers are unique' do + it 'does not raise an error' do + headers = ['header1', 'header2', 'header3'] + expect { subject.unique_headers(headers) }.not_to raise_error + end + end + + context 'when there are duplicate headers' do + it 'raises a SmarterCSV::DuplicateHeaders error' do + headers = ['header1', 'header2', 'header1'] + expect { subject.unique_headers(headers) }.to raise_error(SmarterCSV::DuplicateHeaders) + end + + it 'includes the duplicate headers in the error message' do + headers = ['header1', 'header2', 'header1'] + expect { subject.unique_headers(headers) }.to raise_error(/Duplicate Headers in CSV: \{"header1"=>2\}/) + end + end + + context 'when headers array contains nil values' do + it 'ignores nil values and checks for duplicates among non-nil headers' do + headers = ['header1', nil, 'header1', 'header2', nil] + expect { subject.unique_headers(headers) }.to raise_error( + SmarterCSV::DuplicateHeaders, /Duplicate Headers in CSV: \{"header1"=>2\}/ + ) + end + end + end + + describe 'required_headers: test exceptions for invalid headers' do + let(:options) { {v2_mode: true} } + + it 'does not raise an error if required_headers not provided' do + data = SmarterCSV.process("#{fixture_path}/user_import.csv", options) + expect(data.size).to eq 2 + end + + it 'raises an error if invalid arguments are given' do + options[:header_validations] = [{required_headers: nil}] + expect {SmarterCSV.process("#{fixture_path}/user_import.csv", options)}.to raise_error( + SmarterCSV::IncorrectOption, "ERROR: required_headers validation needs an array argument" + ) + end + + context "required_headers: if keys are missing after mapping" do + it 'does not raise an error if required headers are empty' do + options[:header_validations] = [{required_headers: []}] # order does not matter + data = SmarterCSV.process("#{fixture_path}/user_import.csv", options) + expect(data.size).to eq 2 + end + + it 'does not raise an error if the required headers are present' do + options[:header_validations] = [{required_headers: %w[lastname email firstname manager_email]}] # order does not matter + data = SmarterCSV.process("#{fixture_path}/user_import.csv", options) + expect(data.size).to eq 2 + end + + it 'does not raise an error if the required headers are present (after mapping)' do + options[:header_transformations] = [:none, :keys_as_symbols] + options[:header_validations] = [{required_headers: %i[lastname email firstname manager_email]}] # order does not matter + data = SmarterCSV.process("#{fixture_path}/user_import.csv", options) + expect(data.size).to eq 2 + end + + it 'raises an error if a required header is missing' do + expect do + options[:header_transformations] = [:none, :keys_as_symbols] + options[:header_validations] = [{required_headers: %i[lastname email employee_id firstname manager_email]}] # order does not matter + SmarterCSV.process("#{fixture_path}/user_import.csv", options) + end.to raise_exception( + SmarterCSV::MissingKeys, "Missing Headers in CSV: [:employee_id]" + ) + end + + it 'raises error on missing mapped headers' do + options[:header_transformations] = [:keys_as_symbols, {key_mapping: {email: :something_was_mapped}}] + options[:header_validations] = [{ required_headers: [:email]}] + expect do + SmarterCSV.process("#{fixture_path}/user_import.csv", options) + end.to raise_exception( + SmarterCSV::MissingKeys, "Missing Headers in CSV: [:email]" # it was mapped, and is now missing + ) + end + end + + # TO BE FIXED: + # + # this raises: SmarterCSV::MissingKeys: RROR: missing attributes: middle_name + # but instead, the printed WARNING message for missing_keys should raise KeyMappingError + # See: Issue 139 https://github.com/tilo/smarter_csv/issues/139 + # + context 'mapping_keys: exception for missing keys / header names' do + subject(:process_file) { SmarterCSV.process("#{fixture_path}/user_import.csv", options) } + + context 'when one key_mapping key is missing' do + let(:options) do + { + required_headers: [:middle_name], + key_mapping: { missing_key: :middle_name}, + } + end + + it 'raises exception that header for the key mapping is missing in file' do + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + # we do not expect version 1.8 behavior: + expect{ process_file }.not_to raise_exception( + SmarterCSV::MissingKeys, "ERROR: missing attributes: middle_name" + ) + # we expect version 1.9 behavior: + expect{ process_file }.to raise_exception( + SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" + ) + end + end + + context "when multiple keys are missing" do + let(:options) do + { key_mapping: { missing_key: :middle_name, other_missing_key: :other } } + end + + it 'raises exception that headers for the key mapping are missing in the file' do + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + expect{ process_file }.to raise_exception( + SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key, other_missing_key" + ) + end + + it "does not raise any exception when :silence_missing_keys is true" do + options[:silence_missing_keys] = true + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + expect{ process_file }.not_to raise_exception + end + end + + context "when slience_missing_keys is used" do + let(:options) do + { + required_headers: [:middle_name], + key_mapping: { missing_key: :middle_name, other_optional_key: :other }, + } + end + + context "when invalid key_mapping is given" do + it "does not raise a KeyMappingError exception when :silence_missing_keys is true" do + options[:silence_missing_keys] = true + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + expect{ process_file }.not_to raise_exception SmarterCSV::KeyMappingError + # still raises an error because :middle_name is required + expect{ process_file }.to raise_exception( + SmarterCSV::MissingKeys, /ERROR: missing attributes: middle_name/ + ) + end + end + + it "does not raise an exception when :silence_missing_keys is an array containing the missing key" do + options[:silence_missing_keys] = [:missing_key, :other_optional_key] + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + expect{ process_file }.not_to raise_exception( + SmarterCSV::KeyMappingError, /ERROR: can not map headers: missing_key/ + ) + # still raises an error because :middle_name is required + expect{ process_file }.to raise_exception( + SmarterCSV::MissingKeys, /ERROR: missing attributes: middle_name/ + ) + end + + it "raises an exception when :silence_missing_keys is an array but does not contain the missing key" do + options[:silence_missing_keys] = [:other_optional_key] + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + # raises KeyMappingError because :missing_key is required: + expect{ process_file }.to raise_exception( + SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" + ) + end + end + end + end + end +end