diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2361409c4c9..c1e870b8fd7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -32,7 +32,6 @@ jobs:
             - run: pip install pyarrow==1.0.0
             - run: HF_SCRIPTS_VERSION=master python -m pytest -sv ./tests/
 
-
     run_dataset_script_tests_pyarrow_latest_WIN:
         working_directory: ~/datasets
         executor:
@@ -82,6 +81,7 @@ jobs:
             - run: isort --check-only tests src benchmarks datasets metrics
             - run: flake8 tests src benchmarks datasets metrics
             - run: ./scripts/datasets_metadata_validator.py
+            - run: ./scripts/datasets_readme_validator.py
 
     build_doc:
         working_directory: ~/datasets
@@ -100,8 +100,8 @@ jobs:
             - image: circleci/python:3.6
         steps:
             - add_ssh_keys:
-                fingerprints:
-                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
+                  fingerprints:
+                      - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
             - checkout
             - run: sudo pip install .[docs]
             - run: ./.circleci/deploy.sh
diff --git a/scripts/datasets_readme_validator.py b/scripts/datasets_readme_validator.py
new file mode 100755
index 00000000000..af0cc05445d
--- /dev/null
+++ b/scripts/datasets_readme_validator.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+""" This script will run in CI and make sure all new changes to datasets readme files have valid readme content."""
+
+from pathlib import Path
+from subprocess import check_output
+from typing import List
+
+from datasets.utils.readme import ReadMe
+
+
+def get_changed_files(repo_path: Path) -> List[Path]:
+    diff_output = check_output(["git", "diff", "--name-only", "HEAD..origin/master"], cwd=repo_path)
+    changed_files = [Path(repo_path, f) for f in diff_output.decode().splitlines()]
+    return changed_files
+
+
+if __name__ == "__main__":
+    import logging
+    from argparse import ArgumentParser
+
+    logging.basicConfig(level=logging.DEBUG)
+
+    ap = ArgumentParser()
+    ap.add_argument("--repo_path", type=Path, default=Path.cwd())
+    ap.add_argument("--check_all", action="store_true")
+    args = ap.parse_args()
+
+    repo_path: Path = args.repo_path
+    if args.check_all:
+        readmes = [dd / "README.md" for dd in (repo_path / "datasets").iterdir()]
+    else:
+        changed_files = get_changed_files(repo_path)
+        readmes = [
+            f
+            for f in changed_files
+            if f.exists() and f.name.lower() == "readme.md" and f.parent.parent.name == "datasets"
+        ]
+
+    failed: List[Path] = []
+    for readme in sorted(readmes):
+        try:
+            ReadMe.from_readme(readme)
+            logging.debug(f"✅️ Validated '{readme.relative_to(repo_path)}'")
+        except ValueError as e:
+            failed.append(readme)
+            logging.warning(f"❌ Validation failed for '{readme.relative_to(repo_path)}':\n{e}")
+        except Exception as e:
+            failed.append(readme)
+            logging.warning(f"⁉️ Something unexpected happened on '{readme.relative_to(repo_path)}':\n{e}")
+
+    if len(failed) > 0:
+        logging.info(f"❌ Failed on {len(failed)} files.")
+        exit(1)
+    else:
+        logging.info("All is well, keep up the good work 🤗!")
+        exit(0)
diff --git a/setup.py b/setup.py
index c1d7542119d..0a87810e8d9 100644
--- a/setup.py
+++ b/setup.py
@@ -216,7 +216,7 @@
     license="Apache 2.0",
     package_dir={"": "src"},
     packages=find_packages("src"),
-    package_data={"datasets": ["scripts/templates/*"], "datasets.utils.resources": ["*.json"]},
+    package_data={"datasets": ["scripts/templates/*"], "datasets.utils.resources": ["*.json", "*.yaml"]},
     entry_points={"console_scripts": ["datasets-cli=datasets.commands.datasets_cli:main"]},
     install_requires=REQUIRED_PKGS,
     extras_require=EXTRAS_REQUIRE,
diff --git a/src/datasets/utils/readme.py b/src/datasets/utils/readme.py
new file mode 100644
index 00000000000..45b8713f085
--- /dev/null
+++ b/src/datasets/utils/readme.py
@@ -0,0 +1,279 @@
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, List, Tuple
+
+import yaml
+
+
+# loading package files: https://stackoverflow.com/a/20885799
+try:
+    import importlib.resources as pkg_resources
+except ImportError:
+    # Try backported to PY<37 `importlib_resources`.
+    import importlib_resources as pkg_resources
+
+from . import resources
+
+
+BASE_REF_URL = "https://github.com/huggingface/datasets/tree/master/src/datasets/utils"
+this_url = f"{BASE_REF_URL}/{__file__}"
+logger = logging.getLogger(__name__)
+
+
+def load_yaml_resource(resource: str) -> Tuple[Any, str]:
+    content = pkg_resources.read_text(resources, resource)
+    return yaml.safe_load(content), f"{BASE_REF_URL}/resources/{resource}"
+
+
+readme_structure, known_readme_structure_url = load_yaml_resource("readme_structure.yaml")
+
+FILLER_TEXT = [
+    "[Needs More Information]",
+    "[More Information Needed]",
+    "(https://github.com/huggingface/datasets/blob/master/CONTRIBUTING.md#how-to-contribute-to-the-dataset-cards)",
+]
+
+# Dictionary representation of section/readme, error_list, warning_list
+ReadmeValidatorOutput = Tuple[dict, List[str], List[str]]
+
+
+@dataclass
+class Section:
+    name: str
+    level: str
+    lines: List[str] = None
+
+    def __post_init__(self):
+        self.text = ""
+        self.is_empty_text = True
+        self.content = {}
+        self.parsing_error_list = []
+        self.parsing_warning_list = []
+        if self.lines is not None:
+            self.parse()
+
+    def parse(self):
+        current_sub_level = ""
+        current_lines = []
+        code_start = False
+        for line in self.lines:
+            if line.strip(" \n") == "":
+                continue
+            elif line.strip(" \n")[:3] == "```":
+                code_start = not code_start
+            elif line.split()[0] == self.level + "#" and not code_start:
+                if current_sub_level != "":
+                    self.content[current_sub_level] = Section(current_sub_level, self.level + "#", current_lines)
+                    current_lines = []
+                else:
+                    if current_lines != []:
+                        self.text += "".join(current_lines).strip()
+                        if self.text != "" and self.text not in FILLER_TEXT:
+                            self.is_empty_text = False
+                        current_lines = []
+
+                current_sub_level = " ".join(line.split()[1:]).strip(" \n")
+            else:
+                current_lines.append(line)
+        else:
+            if current_sub_level != "":
+                if current_sub_level in self.content:
+                    self.parsing_error_list.append(
+                        f"Multiple sections with the same heading `{current_sub_level}` have been found. Please keep only one of these sections."
+                    )
+                self.content[current_sub_level] = Section(current_sub_level, self.level + "#", current_lines)
+            else:
+                if current_lines != []:
+                    self.text += "".join(current_lines).strip()
+                    if self.text != "" and self.text not in FILLER_TEXT:
+                        self.is_empty_text = False
+
+    def validate(self, structure: dict) -> ReadmeValidatorOutput:
+        """Validates a Section class object recursively using the structure provided as a dictionary.
+
+        Args:
+            structute (:obj: `dict`): The dictionary representing expected structure.
+
+        Returns:
+            :obj: `ReadmeValidatorOutput`: The dictionary representation of the section, and the errors.
+        """
+        # Header text validation
+        error_list = []
+        warning_list = []
+        if structure["allow_empty"] is False:
+            # If content is expected
+            if self.is_empty_text and self.content == {}:
+                # If no content is found, mention it in the error_list
+                error_list.append(f"Expected some content in section `{self.name}` but it is empty.")
+
+        if structure["allow_empty_text"] is False:
+            # If some text is expected
+            if self.is_empty_text:
+                # If no text is found, mention it in the error_list
+                error_list.append(
+                    f"Expected some text in section `{self.name}` but it is empty (text in subsections are ignored)."
+                )
+        # Subsections Validation
+        if structure["subsections"] is not None:
+            # If subsections are expected
+            if self.content == {}:
+                # If no subsections are present
+                values = [subsection["name"] for subsection in structure["subsections"]]
+                # Mention the expected values in the error_list
+                error_list.append(
+                    f"Section `{self.name}` expected the following subsections: {', '.join(['`'+x+'`' for x in values])}. Found 'None'."
+                )
+            else:
+                # If some subsections are present
+                structure_names = [subsection["name"] for subsection in structure["subsections"]]
+                for idx, name in enumerate(structure_names):
+                    if name not in self.content:
+                        # If the expected subsection is not present
+                        error_list.append(f"Section `{self.name}` is missing subsection: `{name}`.")
+                    else:
+                        # If the subsection is present, validate subsection, return the result
+                        # and concat the errors from subsection to section error_list
+
+                        # Skip sublevel validation if current level is `###`
+                        if self.level == "###":
+                            continue
+                        else:
+                            _, subsec_error_list, subsec_warning_list = self.content[name].validate(
+                                structure["subsections"][idx]
+                            )
+                        error_list += subsec_error_list
+                        warning_list += subsec_warning_list
+
+                for name in self.content:
+                    if name not in structure_names:
+                        # If an extra subsection is present
+                        warning_list.append(
+                            f"`{self.name}` has an extra subsection: `{name}`. Skipping further validation checks for this subsection as expected structure is unknown."
+                        )
+        error_list = self.parsing_error_list + error_list
+        warning_list = self.parsing_warning_list + warning_list
+        if error_list:
+            # If there are errors, do not return the dictionary as it is invalid
+            return {}, error_list, warning_list
+        else:
+            return self.to_dict(), error_list, warning_list
+
+    def to_dict(self) -> dict:
+        """Returns the dictionary representation of a section."""
+        return {
+            "name": self.name,
+            "text": self.text,
+            "is_empty_text": self.is_empty_text,
+            "subsections": [value.to_dict() for value in self.content.values()],
+        }
+
+
+class ReadMe(Section):  # Level 0
+    def __init__(self, name: str, lines: List[str], structure: dict = None):
+        super().__init__(name=name, level="")  # Not using lines here as we need to use a child class parse
+        self.structure = structure
+        self.yaml_tags_line_count = -2
+        self.tag_count = 0
+        self.lines = lines
+        if self.lines is not None:
+            self.parse()
+
+        # Validation
+        if self.structure is None:
+            content, error_list, warning_list = self.validate(readme_structure)
+        else:
+            content, error_list, warning_list = self.validate(self.structure)
+
+        error_list = self.parsing_error_list + error_list
+        warning_list = self.parsing_warning_list + warning_list
+        if error_list != [] or warning_list != []:
+            errors = "\n".join(list(map(lambda x: "-\t" + x, error_list + warning_list)))
+            error_string = f"The following issues were found for the README at `{self.name}`:\n" + errors
+            raise ValueError(error_string)
+
+    @classmethod
+    def from_readme(cls, path: Path, structure: dict = None):
+        with open(path) as f:
+            lines = f.readlines()
+        return cls(path, lines, structure)
+
+    @classmethod
+    def from_string(cls, string: str, structure: dict = None, root_name: str = "root"):
+        lines = string.split("\n")
+        return cls(root_name, lines, structure)
+
+    def parse(self):
+        # Skip Tags
+        line_count = 0
+
+        for line in self.lines:
+            self.yaml_tags_line_count += 1
+            if line.strip(" \n") == "---":
+                self.tag_count += 1
+                if self.tag_count == 2:
+                    break
+            line_count += 1
+        if self.tag_count == 2:
+            self.lines = self.lines[line_count + 1 :]  # Get the last + 1 th item.
+        else:
+            self.lines = self.lines[self.tag_count :]
+        super().parse()
+
+    def __str__(self):
+        """Returns the string of dictionary representation of the ReadMe."""
+        return str(self.to_dict())
+
+    def validate(self, readme_structure):
+        error_list = []
+        warning_list = []
+        if self.yaml_tags_line_count == 0:
+            warning_list.append("Empty YAML markers are present in the README.")
+        elif self.tag_count == 0:
+            warning_list.append("No YAML markers are present in the README.")
+        elif self.tag_count == 1:
+            warning_list.append("Only the start of YAML tags present in the README.")
+        # Check how many first level sections are present.
+        num_first_level_keys = len(self.content.keys())
+        if num_first_level_keys > 1:
+            # If more than one, add to the error list, continue
+            error_list.append(
+                f"The README has several first-level headings: {', '.join(['`'+x+'`' for x in list(self.content.keys())])}. Only one heading is expected. Skipping further validation for this README."
+            )
+        elif num_first_level_keys < 1:
+            # If less than one, append error.
+            error_list.append(
+                f"The README has no first-level headings. One heading is expected. Skipping further validation for this README."
+            )
+
+        else:
+            # If one exactly
+            start_key = list(self.content.keys())[0]  # Get the key
+            if start_key.startswith("Dataset Card for"):  # Check correct start
+
+                # If the starting is correct, validate all the sections
+                _, sec_error_list, sec_warning_list = self.content[start_key].validate(
+                    readme_structure["subsections"][0]
+                )
+                error_list += sec_error_list
+                warning_list += sec_warning_list
+            else:
+                # If not found, append error
+                error_list.append(
+                    f"No first-level heading starting with `Dataset Card for` found in README. Skipping further validation for this README."
+                )
+        if error_list:
+            # If there are errors, do not return the dictionary as it is invalid
+            return {}, error_list, warning_list
+        else:
+            return self.to_dict(), error_list, warning_list
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    ap = ArgumentParser(usage="Validate the content (excluding YAML tags) of a README.md file.")
+    ap.add_argument("readme_filepath")
+    args = ap.parse_args()
+    readme_filepath = Path(args.readme_filepath)
+    readme = ReadMe.from_readme(readme_filepath)
diff --git a/src/datasets/utils/resources/readme_structure.yaml b/src/datasets/utils/resources/readme_structure.yaml
new file mode 100644
index 00000000000..755483d1d4f
--- /dev/null
+++ b/src/datasets/utils/resources/readme_structure.yaml
@@ -0,0 +1,116 @@
+name: "" # Filename comes here
+allow_empty: false
+allow_empty_text: true
+subsections:
+  - name: "Dataset Card for X" # First-level markdown heading
+    allow_empty: false
+    allow_empty_text: true
+    subsections:
+      - name: "Table of Contents"
+        allow_empty: false
+        allow_empty_text: false
+        subsections: null # meaning it should not be checked.
+      - name: "Dataset Description"
+        allow_empty: false
+        allow_empty_text: false
+        subsections:
+          - name: "Dataset Summary"
+            allow_empty: false
+            allow_empty_text: false
+            subsections: null
+          - name: "Supported Tasks and Leaderboards"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+          - name: Languages
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+      - name: "Dataset Structure"
+        allow_empty: false
+        allow_empty_text: true
+        subsections:
+          - name: "Data Instances"
+            allow_empty: false
+            allow_empty_text: false
+            subsections: null
+          - name: "Data Fields"
+            allow_empty: false
+            allow_empty_text: false
+            subsections: null
+          - name: "Data Splits"
+            allow_empty: false
+            allow_empty_text: false
+            subsections: null
+      - name: "Dataset Creation"
+        allow_empty: false
+        allow_empty_text: true
+        subsections:
+          - name: "Curation Rationale"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+          - name: "Source Data"
+            allow_empty: false
+            allow_empty_text: true
+            subsections:
+              - name: "Initial Data Collection and Normalization"
+                allow_empty: true
+                allow_empty_text: true
+                subsections: null
+              - name: "Who are the source language producers?"
+                allow_empty: true
+                allow_empty_text: true
+                subsections: null
+          - name: "Annotations"
+            allow_empty: false
+            allow_empty_text: true
+            subsections:
+              - name: "Annotation process"
+                allow_empty: true
+                allow_empty_text: true
+                subsections: null
+              - name: "Who are the annotators?"
+                allow_empty: true
+                allow_empty_text: true
+                subsections: null
+          - name: "Personal and Sensitive Information"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+      - name: "Considerations for Using the Data"
+        allow_empty: true
+        allow_empty_text: true
+        subsections:
+          - name: "Social Impact of Dataset"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+          - name: "Discussion of Biases"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+          - name: "Other Known Limitations"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+      - name: "Additional Information"
+        allow_empty: true
+        allow_empty_text: true
+        subsections:
+          - name: "Dataset Curators"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+          - name: "Licensing Information"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+          - name: "Citation Information"
+            allow_empty: false
+            allow_empty_text: true
+            subsections: null
+          - name: "Contributions"
+            allow_empty: false
+            allow_empty_text: false
+            subsections: null
diff --git a/tests/test_readme_util.py b/tests/test_readme_util.py
new file mode 100644
index 00000000000..711c6ea55c5
--- /dev/null
+++ b/tests/test_readme_util.py
@@ -0,0 +1,457 @@
+import re
+import tempfile
+from pathlib import Path
+
+import pytest
+import yaml
+
+from datasets.utils.readme import ReadMe
+
+
+# @pytest.fixture
+# def example_yaml_structure():
+
+example_yaml_structure = yaml.safe_load(
+    """\
+name: ""
+allow_empty: false
+allow_empty_text: true
+subsections:
+  - name: "Dataset Card for X" # First-level markdown heading
+    allow_empty: false
+    allow_empty_text: true
+    subsections:
+      - name: "Table of Contents"
+        allow_empty: false
+        allow_empty_text: false
+        subsections: null
+      - name: "Dataset Description"
+        allow_empty: false
+        allow_empty_text: false
+        subsections:
+          - name: "Dataset Summary"
+            allow_empty: false
+            allow_empty_text: false
+            subsections: null
+          - name: "Supported Tasks and Leaderboards"
+            allow_empty: true
+            allow_empty_text: true
+            subsections: null
+          - name: Languages
+            allow_empty: false
+            allow_empty_text: true
+            subsections: null
+"""
+)
+
+
+CORRECT_DICT = {
+    "name": "root",
+    "text": "",
+    "is_empty_text": True,
+    "subsections": [
+        {
+            "name": "Dataset Card for My Dataset",
+            "text": "",
+            "is_empty_text": True,
+            "subsections": [
+                {"name": "Table of Contents", "text": "Some text here.", "is_empty_text": False, "subsections": []},
+                {
+                    "name": "Dataset Description",
+                    "text": "Some text here.",
+                    "is_empty_text": False,
+                    "subsections": [
+                        {
+                            "name": "Dataset Summary",
+                            "text": "Some text here.",
+                            "is_empty_text": False,
+                            "subsections": [],
+                        },
+                        {
+                            "name": "Supported Tasks and Leaderboards",
+                            "text": "",
+                            "is_empty_text": True,
+                            "subsections": [],
+                        },
+                        {"name": "Languages", "text": "Language Text", "is_empty_text": False, "subsections": []},
+                    ],
+                },
+            ],
+        }
+    ],
+}
+
+
+README_CORRECT = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+
+
+README_CORRECT_FOUR_LEVEL = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+#### Extra Ignored Subsection
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+
+CORRECT_DICT_FOUR_LEVEL = {
+    "name": "root",
+    "text": "",
+    "is_empty_text": True,
+    "subsections": [
+        {
+            "name": "Dataset Card for My Dataset",
+            "text": "",
+            "is_empty_text": True,
+            "subsections": [
+                {"name": "Table of Contents", "text": "Some text here.", "is_empty_text": False, "subsections": []},
+                {
+                    "name": "Dataset Description",
+                    "text": "Some text here.",
+                    "is_empty_text": False,
+                    "subsections": [
+                        {
+                            "name": "Dataset Summary",
+                            "text": "Some text here.",
+                            "is_empty_text": False,
+                            "subsections": [
+                                {
+                                    "name": "Extra Ignored Subsection",
+                                    "text": "",
+                                    "is_empty_text": True,
+                                    "subsections": [],
+                                }
+                            ],
+                        },
+                        {
+                            "name": "Supported Tasks and Leaderboards",
+                            "text": "",
+                            "is_empty_text": True,
+                            "subsections": [],
+                        },
+                        {"name": "Languages", "text": "Language Text", "is_empty_text": False, "subsections": []},
+                    ],
+                },
+            ],
+        }
+    ],
+}
+
+README_EMPTY_YAML = """\
+---
+---
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+
+EXPECTED_ERROR_README_EMPTY_YAML = (
+    "The following issues were found for the README at `{path}`:\n-\tEmpty YAML markers are present in the README."
+)
+
+README_NO_YAML = """\
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+
+EXPECTED_ERROR_README_NO_YAML = (
+    "The following issues were found for the README at `{path}`:\n-\tNo YAML markers are present in the README."
+)
+
+README_INCORRECT_YAML = """\
+---
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+
+EXPECTED_ERROR_README_INCORRECT_YAML = "The following issues were found for the README at `{path}`:\n-\tOnly the start of YAML tags present in the README."
+
+README_MISSING_TEXT = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+EXPECTED_ERROR_README_MISSING_TEXT = "The following issues were found for the README at `{path}`:\n-\tExpected some content in section `Dataset Summary` but it is empty.\n-\tExpected some text in section `Dataset Summary` but it is empty (text in subsections are ignored)."
+
+
+README_NONE_SUBSECTION = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+"""
+EXPECTED_ERROR_README_NONE_SUBSECTION = "The following issues were found for the README at `{path}`:\n-\tExpected some content in section `Dataset Card for My Dataset` but it is empty.\n-\tSection `Dataset Card for My Dataset` expected the following subsections: `Table of Contents`, `Dataset Description`. Found 'None'."
+
+README_MISSING_SUBSECTION = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Languages
+Language Text
+"""
+
+EXPECTED_ERROR_README_MISSING_SUBSECTION = "The following issues were found for the README at `{path}`:\n-\tSection `Dataset Description` is missing subsection: `Supported Tasks and Leaderboards`."
+
+
+README_MISSING_CONTENT = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+"""
+
+EXPECTED_ERROR_README_MISSING_CONTENT = "The following issues were found for the README at `{path}`:\n-\tExpected some content in section `Languages` but it is empty."
+
+README_MISSING_FIRST_LEVEL = """\
+---
+languages:
+- zh
+- en
+---
+
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+EXPECTED_ERROR_README_MISSING_FIRST_LEVEL = "The following issues were found for the README at `{path}`:\n-\tThe README has no first-level headings. One heading is expected. Skipping further validation for this README."
+
+README_MULTIPLE_WRONG_FIRST_LEVEL = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+# Dataset Card My Dataset
+"""
+
+EXPECTED_ERROR_README_MULTIPLE_WRONG_FIRST_LEVEL = "The following issues were found for the README at `{path}`:\n-\tThe README has several first-level headings: `Dataset Card for My Dataset`, `Dataset Card My Dataset`. Only one heading is expected. Skipping further validation for this README."
+
+README_WRONG_FIRST_LEVEL = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+
+EXPECTED_ERROR_README_WRONG_FIRST_LEVEL = "The following issues were found for the README at `{path}`:\n-\tNo first-level heading starting with `Dataset Card for` found in README. Skipping further validation for this README."
+
+README_EMPTY = ""
+
+EXPECTED_ERROR_README_EMPTY = "The following issues were found for the README at `{path}`:\n-\tThe README has no first-level headings. One heading is expected. Skipping further validation for this README.\n-\tNo YAML markers are present in the README."
+
+README_MULTIPLE_SAME_HEADING_1 = """\
+---
+languages:
+- zh
+- en
+---
+
+# Dataset Card for My Dataset
+# Dataset Card for My Dataset
+## Table of Contents
+Some text here.
+## Dataset Description
+Some text here.
+### Dataset Summary
+Some text here.
+### Supported Tasks and Leaderboards
+### Languages
+Language Text
+"""
+
+EXPECTED_ERROR_README_MULTIPLE_SAME_HEADING_1 = "The following issues were found for the README at `{path}`:\n-\tMultiple sections with the same heading `Dataset Card for My Dataset` have been found. Please keep only one of these sections."
+
+
+@pytest.mark.parametrize(
+    "readme_md, expected_dict",
+    [
+        (README_CORRECT, CORRECT_DICT),
+        (README_CORRECT_FOUR_LEVEL, CORRECT_DICT_FOUR_LEVEL),
+    ],
+)
+def test_readme_from_string_correct(readme_md, expected_dict):
+    assert ReadMe.from_string(readme_md, example_yaml_structure).to_dict() == expected_dict
+
+
+@pytest.mark.parametrize(
+    "readme_md, expected_error",
+    [
+        (README_NO_YAML, EXPECTED_ERROR_README_NO_YAML),
+        (README_EMPTY_YAML, EXPECTED_ERROR_README_EMPTY_YAML),
+        (README_INCORRECT_YAML, EXPECTED_ERROR_README_INCORRECT_YAML),
+        (README_EMPTY, EXPECTED_ERROR_README_EMPTY),
+        (README_NONE_SUBSECTION, EXPECTED_ERROR_README_NONE_SUBSECTION),
+        (README_MISSING_FIRST_LEVEL, EXPECTED_ERROR_README_MISSING_FIRST_LEVEL),
+        (README_MISSING_SUBSECTION, EXPECTED_ERROR_README_MISSING_SUBSECTION),
+        (README_MISSING_TEXT, EXPECTED_ERROR_README_MISSING_TEXT),
+        (README_MULTIPLE_SAME_HEADING_1, EXPECTED_ERROR_README_MULTIPLE_SAME_HEADING_1),
+        (README_WRONG_FIRST_LEVEL, EXPECTED_ERROR_README_WRONG_FIRST_LEVEL),
+        (README_MULTIPLE_WRONG_FIRST_LEVEL, EXPECTED_ERROR_README_MULTIPLE_WRONG_FIRST_LEVEL),
+        (README_MISSING_CONTENT, EXPECTED_ERROR_README_MISSING_CONTENT),
+    ],
+)
+def test_readme_from_string_errors(readme_md, expected_error):
+    with pytest.raises(ValueError, match=re.escape(expected_error.format(path="root"))):
+        ReadMe.from_string(readme_md, example_yaml_structure)
+
+
+@pytest.mark.parametrize(
+    "readme_md, expected_dict",
+    [
+        (README_CORRECT, CORRECT_DICT),
+        (README_CORRECT_FOUR_LEVEL, CORRECT_DICT_FOUR_LEVEL),
+    ],
+)
+def test_readme_from_readme_correct(readme_md, expected_dict):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        path = Path(tmp_dir) / "README.md"
+        with open(path, "w+") as readme_file:
+            readme_file.write(readme_md)
+        out = ReadMe.from_readme(path, example_yaml_structure).to_dict()
+        assert out["name"] == path
+        assert out["text"] == ""
+        assert out["is_empty_text"]
+        assert out["subsections"] == expected_dict["subsections"]
+
+
+@pytest.mark.parametrize(
+    "readme_md, expected_error",
+    [
+        (README_NO_YAML, EXPECTED_ERROR_README_NO_YAML),
+        (README_EMPTY_YAML, EXPECTED_ERROR_README_EMPTY_YAML),
+        (README_INCORRECT_YAML, EXPECTED_ERROR_README_INCORRECT_YAML),
+        (README_EMPTY, EXPECTED_ERROR_README_EMPTY),
+        (README_NONE_SUBSECTION, EXPECTED_ERROR_README_NONE_SUBSECTION),
+        (README_MISSING_FIRST_LEVEL, EXPECTED_ERROR_README_MISSING_FIRST_LEVEL),
+        (README_MISSING_SUBSECTION, EXPECTED_ERROR_README_MISSING_SUBSECTION),
+        (README_MISSING_TEXT, EXPECTED_ERROR_README_MISSING_TEXT),
+        (README_MULTIPLE_SAME_HEADING_1, EXPECTED_ERROR_README_MULTIPLE_SAME_HEADING_1),
+        (README_WRONG_FIRST_LEVEL, EXPECTED_ERROR_README_WRONG_FIRST_LEVEL),
+        (README_MULTIPLE_WRONG_FIRST_LEVEL, EXPECTED_ERROR_README_MULTIPLE_WRONG_FIRST_LEVEL),
+        (README_MISSING_CONTENT, EXPECTED_ERROR_README_MISSING_CONTENT),
+    ],
+)
+def test_readme_from_readme_error(readme_md, expected_error):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        path = Path(tmp_dir) / "README.md"
+        with open(path, "w+") as readme_file:
+            readme_file.write(readme_md)
+        expected_error = expected_error.format(path=path)
+        with pytest.raises(ValueError, match=re.escape(expected_error)):
+            ReadMe.from_readme(path, example_yaml_structure)