|
12 | 12 | from sdv.datasets.demo import ( |
13 | 13 | _download, |
14 | 14 | _find_data_zip_key, |
| 15 | + _find_text_key, |
15 | 16 | _get_data_from_bucket, |
16 | 17 | _get_first_v1_metadata_bytes, |
| 18 | + _get_text_file_content, |
17 | 19 | _iter_metainfo_yaml_entries, |
18 | 20 | download_demo, |
19 | 21 | get_available_demos, |
| 22 | + get_readme, |
| 23 | + get_source, |
20 | 24 | ) |
21 | 25 | from sdv.errors import DemoResourceNotFoundError |
22 | 26 |
|
@@ -579,3 +583,189 @@ def test_download_demo_no_v1_metadata_raises(mock_list, mock_get): |
579 | 583 | # Run and Assert |
580 | 584 | with pytest.raises(DemoResourceNotFoundError, match='METADATA_SPEC_VERSION'): |
581 | 585 | download_demo('single_table', 'word') |
| 586 | + |
| 587 | + |
| 588 | +def test__find_text_key_returns_none_when_missing(): |
| 589 | + """Test it returns None when the key is missing.""" |
| 590 | + # Setup |
| 591 | + contents = [ |
| 592 | + {'Key': 'single_table/dataset/metadata.json'}, |
| 593 | + {'Key': 'single_table/dataset/data.zip'}, |
| 594 | + ] |
| 595 | + dataset_prefix = 'single_table/dataset/' |
| 596 | + |
| 597 | + # Run |
| 598 | + key = _find_text_key(contents, dataset_prefix, 'README.txt') |
| 599 | + |
| 600 | + # Assert |
| 601 | + assert key is None |
| 602 | + |
| 603 | + |
| 604 | +def test__find_text_key_ignores_nested_paths(): |
| 605 | + """Test it ignores files in nested folders under the dataset prefix.""" |
| 606 | + # Setup |
| 607 | + contents = [ |
| 608 | + {'Key': 'single_table/dataset1/bad_folder/SOURCE.txt'}, |
| 609 | + ] |
| 610 | + dataset_prefix = 'single_table/dataset1/' |
| 611 | + |
| 612 | + # Run |
| 613 | + key = _find_text_key(contents, dataset_prefix, 'SOURCE.txt') |
| 614 | + |
| 615 | + # Assert |
| 616 | + assert key is None |
| 617 | + |
| 618 | + |
| 619 | +@patch('sdv.datasets.demo._get_data_from_bucket') |
| 620 | +@patch('sdv.datasets.demo._list_objects') |
| 621 | +def test__get_text_file_content_happy_path(mock_list, mock_get, tmpdir): |
| 622 | + """Test it gets the text file content when it exists.""" |
| 623 | + # Setup |
| 624 | + mock_list.return_value = [ |
| 625 | + {'Key': 'single_table/dataset1/README.txt'}, |
| 626 | + ] |
| 627 | + mock_get.return_value = 'Hello README'.encode() |
| 628 | + |
| 629 | + # Run |
| 630 | + text = _get_text_file_content('single_table', 'dataset1', 'README.txt') |
| 631 | + |
| 632 | + # Assert |
| 633 | + assert text == 'Hello README' |
| 634 | + |
| 635 | + |
| 636 | +@patch('sdv.datasets.demo._list_objects') |
| 637 | +def test__get_text_file_content_missing_key_returns_none(mock_list): |
| 638 | + """Test it returns None when the key is missing.""" |
| 639 | + # Setup |
| 640 | + mock_list.return_value = [ |
| 641 | + {'Key': 'single_table/dataset1/metadata.json'}, |
| 642 | + ] |
| 643 | + |
| 644 | + # Run |
| 645 | + text = _get_text_file_content('single_table', 'dataset1', 'README.txt') |
| 646 | + |
| 647 | + # Assert |
| 648 | + assert text is None |
| 649 | + |
| 650 | + |
| 651 | +@patch('sdv.datasets.demo._list_objects') |
| 652 | +def test__get_text_file_content_logs_when_missing_key(mock_list, caplog): |
| 653 | + """It logs an info when the key is missing under the dataset prefix.""" |
| 654 | + # Setup |
| 655 | + mock_list.return_value = [ |
| 656 | + {'Key': 'single_table/dataset1/metadata.json'}, |
| 657 | + ] |
| 658 | + |
| 659 | + # Run |
| 660 | + caplog.set_level(logging.INFO, logger='sdv.datasets.demo') |
| 661 | + text = _get_text_file_content('single_table', 'dataset1', 'README.txt') |
| 662 | + |
| 663 | + # Assert |
| 664 | + assert text is None |
| 665 | + assert 'No README.txt found for dataset dataset1.' in caplog.text |
| 666 | + |
| 667 | + |
| 668 | +@patch('sdv.datasets.demo._get_data_from_bucket') |
| 669 | +@patch('sdv.datasets.demo._list_objects') |
| 670 | +def test__get_text_file_content_fetch_error_returns_none(mock_list, mock_get): |
| 671 | + """Test it returns None when the fetch error occurs.""" |
| 672 | + # Setup |
| 673 | + mock_list.return_value = [ |
| 674 | + {'Key': 'single_table/dataset1/SOURCE.txt'}, |
| 675 | + ] |
| 676 | + mock_get.side_effect = Exception('boom') |
| 677 | + |
| 678 | + # Run |
| 679 | + text = _get_text_file_content('single_table', 'dataset1', 'SOURCE.txt') |
| 680 | + |
| 681 | + # Assert |
| 682 | + assert text is None |
| 683 | + |
| 684 | + |
| 685 | +@patch('sdv.datasets.demo._get_data_from_bucket') |
| 686 | +@patch('sdv.datasets.demo._list_objects') |
| 687 | +def test__get_text_file_content_logs_on_fetch_error(mock_list, mock_get, caplog): |
| 688 | + """It logs an info when fetching the key raises an error.""" |
| 689 | + # Setup |
| 690 | + mock_list.return_value = [ |
| 691 | + {'Key': 'single_table/dataset1/SOURCE.txt'}, |
| 692 | + ] |
| 693 | + mock_get.side_effect = Exception('boom') |
| 694 | + |
| 695 | + # Run |
| 696 | + caplog.set_level(logging.INFO, logger='sdv.datasets.demo') |
| 697 | + text = _get_text_file_content('single_table', 'dataset1', 'SOURCE.txt') |
| 698 | + |
| 699 | + # Assert |
| 700 | + assert text is None |
| 701 | + assert 'Error fetching SOURCE.txt for dataset dataset1.' in caplog.text |
| 702 | + |
| 703 | + |
| 704 | +@patch('sdv.datasets.demo._get_data_from_bucket') |
| 705 | +@patch('sdv.datasets.demo._list_objects') |
| 706 | +def test__get_text_file_content_writes_file_when_output_filepath_given( |
| 707 | + mock_list, mock_get, tmp_path |
| 708 | +): |
| 709 | + """Test it writes the file when the output filepath is given.""" |
| 710 | + # Setup |
| 711 | + mock_list.return_value = [ |
| 712 | + {'Key': 'single_table/dataset1/README.txt'}, |
| 713 | + ] |
| 714 | + mock_get.return_value = 'Write me'.encode() |
| 715 | + out = tmp_path / 'subdir' / 'readme.txt' |
| 716 | + |
| 717 | + # Run |
| 718 | + text = _get_text_file_content('single_table', 'dataset1', 'README.txt', str(out)) |
| 719 | + |
| 720 | + # Assert |
| 721 | + assert text == 'Write me' |
| 722 | + with open(out, 'r', encoding='utf-8') as f: |
| 723 | + assert f.read() == 'Write me' |
| 724 | + |
| 725 | + |
| 726 | +@patch('sdv.datasets.demo._get_data_from_bucket') |
| 727 | +@patch('sdv.datasets.demo._list_objects') |
| 728 | +def test__get_text_file_content_logs_on_save_error( |
| 729 | + mock_list, mock_get, tmp_path, caplog, monkeypatch |
| 730 | +): |
| 731 | + """It logs an info when saving to disk fails.""" |
| 732 | + # Setup |
| 733 | + mock_list.return_value = [ |
| 734 | + {'Key': 'single_table/dataset1/README.txt'}, |
| 735 | + ] |
| 736 | + mock_get.return_value = 'Write me'.encode() |
| 737 | + out = tmp_path / 'subdir' / 'readme.txt' |
| 738 | + |
| 739 | + def _fail_open(*args, **kwargs): |
| 740 | + raise OSError('fail-open') |
| 741 | + |
| 742 | + monkeypatch.setattr('builtins.open', _fail_open) |
| 743 | + |
| 744 | + # Run |
| 745 | + caplog.set_level(logging.INFO, logger='sdv.datasets.demo') |
| 746 | + text = _get_text_file_content('single_table', 'dataset1', 'README.txt', str(out)) |
| 747 | + |
| 748 | + # Assert |
| 749 | + assert text == 'Write me' |
| 750 | + assert 'Error saving README.txt for dataset dataset1.' in caplog.text |
| 751 | + |
| 752 | + |
| 753 | +def test_get_readme_and_get_source_call_wrapper(monkeypatch): |
| 754 | + """Test it calls the wrapper function when the output filepath is given.""" |
| 755 | + # Setup |
| 756 | + calls = [] |
| 757 | + |
| 758 | + def fake(modality, dataset_name, filename, output_filepath=None): |
| 759 | + calls.append((modality, dataset_name, filename, output_filepath)) |
| 760 | + return 'X' |
| 761 | + |
| 762 | + monkeypatch.setattr('sdv.datasets.demo._get_text_file_content', fake) |
| 763 | + |
| 764 | + # Run |
| 765 | + r = get_readme('single_table', 'dataset1', '/tmp/readme') |
| 766 | + s = get_source('single_table', 'dataset1', '/tmp/source') |
| 767 | + |
| 768 | + # Assert |
| 769 | + assert r == 'X' and s == 'X' |
| 770 | + assert calls[0] == ('single_table', 'dataset1', 'README.txt', '/tmp/readme') |
| 771 | + assert calls[1] == ('single_table', 'dataset1', 'SOURCE.txt', '/tmp/source') |
0 commit comments