diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 8c0266c36e8c1e..947bea9cee8ce9 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -3488,7 +3488,6 @@ function build_document_preview() { sh /paddle/tools/document_preview.sh ${PORT} } - # origin name: example function exec_samplecode_test() { if [ -d "${PADDLE_ROOT}/build/pr_whl" ];then @@ -3502,10 +3501,10 @@ function exec_samplecode_test() { cd ${PADDLE_ROOT}/tools if [ "$1" = "cpu" ] ; then - python sampcd_processor.py --debug --mode cpu; example_error=$? + python sampcd_processor.py --mode cpu; example_error=$? elif [ "$1" = "gpu" ] ; then SAMPLE_CODE_EXEC_THREADS=${SAMPLE_CODE_EXEC_THREADS:-2} - python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug --mode gpu; example_error=$? + python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --mode gpu; example_error=$? fi if [ "$example_error" != "0" ];then echo "Code instance execution failed" >&2 @@ -3513,6 +3512,75 @@ function exec_samplecode_test() { fi } +function need_type_checking() { + set +x + + # check pr title + TITLE_CHECK=`curl -s https://github.com/PaddlePaddle/Paddle/pull/${GIT_PR_ID} | grep "" | grep -i "typing" || true` + + if [[ ${TITLE_CHECK} ]]; then + set -x + return 0 + else + set -x + return 1 + fi +} + +function exec_type_checking() { + if [ -d "${PADDLE_ROOT}/build/pr_whl" ];then + pip install ${PADDLE_ROOT}/build/pr_whl/*.whl + else + echo "WARNING: PR wheel is not found. Use develop wheel !!!" + pip install ${PADDLE_ROOT}/build/python/dist/*.whl + fi + + python -c "import paddle;print(paddle.__version__);paddle.version.show()" + + cd ${PADDLE_ROOT}/tools + + # check all sample code + TITLE_CHECK_ALL=`curl -s https://github.com/PaddlePaddle/Paddle/pull/${GIT_PR_ID} | grep "<title>" | grep -i "typing all" || true` + + if [[ ${TITLE_CHECK_ALL} ]]; then + python type_checking.py --full-test; type_checking_error=$? + else + python type_checking.py; type_checking_error=$? + fi + + if [ "$type_checking_error" != "0" ];then + echo "Example code type checking failed" >&2 + exit 5 + fi +} + + +function exec_samplecode_checking() { + example_info_gpu="" + example_code_gpu=0 + if [ "${WITH_GPU}" == "ON" ] ; then + { example_info_gpu=$(exec_samplecode_test gpu 2>&1 1>&3 3>/dev/null); } 3>&1 + example_code_gpu=$? + fi + { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 + example_code=$? + + # TODO(megemini): type_checkding should be default after type annotation been done. + need_type_checking + type_checking_status=$? + + if [[ ${type_checking_status} -eq 0 ]]; then + { type_checking_info=$(exec_type_checking 2>&1 1>&3 3>/dev/null); } 3>&1 + type_checking_code=$? + fi + + summary_check_example_code_problems $[${example_code_gpu} + ${example_code}] "${example_info_gpu}\n${example_info}" + + if [[ ${type_checking_status} -eq 0 ]]; then + summary_type_checking_problems $type_checking_code "$type_checking_info" + fi +} + function collect_ccache_hits() { ccache -s @@ -3553,10 +3621,11 @@ function test_model_benchmark() { bash ${PADDLE_ROOT}/tools/test_model_benchmark.sh } -function summary_check_problems() { +function summary_check_example_code_problems() { set +x local example_code=$1 local example_info=$2 + if [ $example_code -ne 0 ];then echo "===============================================================================" echo "*****Example code error***** Please fix the error listed in the information:" @@ -3579,6 +3648,33 @@ function summary_check_problems() { } +function summary_type_checking_problems() { + set +x + local type_checking_code=$1 + local type_checking_info=$2 + + if [ $type_checking_code -ne 0 ];then + echo "===============================================================================" + echo "*****Example code type checking error***** Please fix the error listed in the information:" + echo "===============================================================================" + echo "$type_checking_info" + echo "===============================================================================" + echo "*****Example code type checking FAIL*****" + echo "===============================================================================" + exit $type_checking_code + else + echo "===============================================================================" + echo "*****Example code type checking info*****" + echo "===============================================================================" + echo "$type_checking_info" + echo "===============================================================================" + echo "*****Example code type checking PASS*****" + echo "===============================================================================" + fi + set -x +} + + function reuse_so_cache() { get_html="https://api.github.com/repos/PaddlePaddle/Paddle" curl -X GET ${get_html}/commits -H "authorization: token ${GITHUB_API_TOKEN}" >tmp.txt @@ -4262,15 +4358,7 @@ function main() { check_sequence_op_unittest generate_api_spec ${PYTHON_ABI:-""} "PR" set +e - example_info_gpu="" - example_code_gpu=0 - if [ "${WITH_GPU}" == "ON" ] ; then - { example_info_gpu=$(exec_samplecode_test gpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code_gpu=$? - fi - { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code=$? - summary_check_problems $[${example_code_gpu} + ${example_code}] "${example_info_gpu}\n${example_info}" + exec_samplecode_checking assert_api_spec_approvals ;; build_and_check_cpu) @@ -4282,15 +4370,7 @@ function main() { ;; build_and_check_gpu) set +e - example_info_gpu="" - example_code_gpu=0 - if [ "${WITH_GPU}" == "ON" ] ; then - { example_info_gpu=$(exec_samplecode_test gpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code_gpu=$? - fi - { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code=$? - summary_check_problems $[${example_code_gpu} + ${example_code}] "${example_info_gpu}\n${example_info}" + exec_samplecode_checking assert_api_spec_approvals ;; check_whl_size) @@ -4533,11 +4613,6 @@ function main() { build ${parallel_number} build_document_preview ;; - api_example) - { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code=$? - summary_check_problems $example_code "$example_info" - ;; test_op_benchmark) test_op_benchmark ;; diff --git a/pyproject.toml b/pyproject.toml index 4a4a5a73c5fdae..0391f1bf823f45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -131,3 +131,32 @@ known-first-party = ["paddle"] "test/dygraph_to_static/test_loop.py" = ["C416", "F821"] # Ignore unnecessary lambda in dy2st unittest test_lambda "test/dygraph_to_static/test_lambda.py" = ["PLC3002"] + +[tool.mypy] +python_version = "3.8" +cache_dir = ".mypy_cache" +# Miscellaneous strictness flags +allow_redefinition = true +local_partial_types = true +strict = false +# Untyped definitions and calls +check_untyped_defs = true +# Import discovery +follow_imports = "normal" +# Miscellaneous +warn_unused_configs = true +# Configuring warnings +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +# Configuring error messages +show_column_numbers = true + +[[tool.mypy.overrides]] +module = [ + "astor", + "cv2", + "scipy", + "xlsxwriter" +] +ignore_missing_imports = true diff --git a/python/unittest_py/requirements.txt b/python/unittest_py/requirements.txt index 15cf679177709c..40f16161ab71e3 100644 --- a/python/unittest_py/requirements.txt +++ b/python/unittest_py/requirements.txt @@ -19,3 +19,4 @@ wandb>=0.13 ; python_version<"3.12" xlsxwriter==3.0.9 xdoctest==1.1.1 ubelt==1.3.3 # just for xdoctest +mypy==1.10.0 diff --git a/tools/sampcd_processor_utils.py b/tools/sampcd_processor_utils.py index ff6de2b5983266..aaf61fcd88dc07 100644 --- a/tools/sampcd_processor_utils.py +++ b/tools/sampcd_processor_utils.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import argparse import inspect import logging @@ -48,6 +50,12 @@ API_DIFF_SPEC_FN = 'dev_pr_diff_api.spec' TEST_TIMEOUT = 10 +PAT_API_SPEC_MEMBER = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})') +# insert ArgSpec for changing the API's type annotation can trigger the CI +PAT_API_SPEC_SIGNATURE = re.compile( + r'^(paddle[^,]+)\s+\((ArgSpec.*),.*document\W*([0-9a-z]{32})' +) + class Result: # name/key for result @@ -66,7 +74,7 @@ class Result: order: int = 0 @classmethod - def msg(cls, count: int, env: typing.Set) -> str: + def msg(cls, count: int, env: set) -> str: """Message for logging with api `count` and running `env`.""" raise NotImplementedError @@ -85,8 +93,8 @@ class MetaResult(type): def __new__( mcs, name: str, - bases: typing.Tuple[type, ...], - namespace: typing.Dict[str, typing.Any], + bases: tuple[type, ...], + namespace: dict[str, typing.Any], ) -> type: cls = super().__new__(mcs, name, bases, namespace) if issubclass(cls, Result): @@ -104,7 +112,7 @@ def get(mcs, name: str) -> type: return mcs.__cls_map.get(name) @classmethod - def cls_map(mcs) -> typing.Dict[str, Result]: + def cls_map(mcs) -> dict[str, Result]: return mcs.__cls_map @@ -290,7 +298,7 @@ def prepare(self, test_capacity: set) -> None: """ pass - def run(self, api_name: str, docstring: str) -> typing.List[TestResult]: + def run(self, api_name: str, docstring: str) -> list[TestResult]: """Extract codeblocks from docstring, and run the test. Run only one docstring at a time. @@ -304,7 +312,7 @@ def run(self, api_name: str, docstring: str) -> typing.List[TestResult]: raise NotImplementedError def print_summary( - self, test_results: typing.List[TestResult], whl_error: typing.List[str] + self, test_results: list[TestResult], whl_error: list[str] ) -> None: """Post process test results and print test summary. @@ -333,17 +341,17 @@ def get_api_md5(path): API_spec = os.path.abspath(os.path.join(os.getcwd(), "..", path)) if not os.path.isfile(API_spec): return api_md5 - pat = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})') - patArgSpec = re.compile( - r'^(paddle[^,]+)\s+\(ArgSpec.*document\W*([0-9a-z]{32})' - ) + with open(API_spec) as f: for line in f.readlines(): - mo = pat.search(line) - if not mo: - mo = patArgSpec.search(line) + mo = PAT_API_SPEC_MEMBER.search(line) + if mo: api_md5[mo.group(1)] = mo.group(2) + else: + mo = PAT_API_SPEC_SIGNATURE.search(line) + api_md5[mo.group(1)] = f'{mo.group(2)}, {mo.group(3)}' + return api_md5 @@ -397,18 +405,6 @@ def get_full_api_from_pr_spec(): get_full_api_by_walk() -def get_full_api(): - """ - get all the apis - """ - global API_DIFF_SPEC_FN # readonly - from print_signatures import get_all_api_from_modulelist - - member_dict = get_all_api_from_modulelist() - with open(API_DIFF_SPEC_FN, 'w') as f: - f.write("\n".join(member_dict.keys())) - - def extract_code_blocks_from_docstr(docstr, google_style=True): """ extract code-blocks from the given docstring. @@ -599,9 +595,16 @@ def get_test_capacity(run_on_device="cpu"): return sample_code_test_capacity -def get_docstring(full_test=False): +def get_docstring( + full_test: bool = False, + filter_api: typing.Callable[[str], bool] | None = None, +): ''' this function will get the docstring for test. + + Args: + full_test, get all api + filter_api, a function that filter api, if `True` then skip add to `docstrings_to_test`. ''' import paddle import paddle.static.quantization # noqa: F401 @@ -616,6 +619,9 @@ def get_docstring(full_test=False): with open(API_DIFF_SPEC_FN) as f: for line in f.readlines(): api = line.replace('\n', '') + if filter_api is not None and filter_api(api.strip()): + continue + try: api_obj = eval(api) except AttributeError: @@ -637,7 +643,7 @@ def get_docstring(full_test=False): return docstrings_to_test, whl_error -def check_old_style(docstrings_to_test: typing.Dict[str, str]): +def check_old_style(docstrings_to_test: dict[str, str]): old_style_apis = [] for api_name, raw_docstring in docstrings_to_test.items(): for codeblock in extract_code_blocks_from_docstr( @@ -715,8 +721,8 @@ def exec_gen_doc(): def get_test_results( - doctester: DocTester, docstrings_to_test: typing.Dict[str, str] -) -> typing.List[TestResult]: + doctester: DocTester, docstrings_to_test: dict[str, str] +) -> list[TestResult]: """Get test results from doctester with docstrings to test.""" _test_style = ( doctester.style diff --git a/tools/test_sampcd_processor.py b/tools/test_sampcd_processor.py index 62c51a73ba8a7f..c61c7e610f98c5 100644 --- a/tools/test_sampcd_processor.py +++ b/tools/test_sampcd_processor.py @@ -103,19 +103,23 @@ def tearDown(self): def test_get_api_md5(self): res = get_api_md5('paddle/fluid/API_PR.spec') self.assertEqual( - "ff0f188c95030158cc6398d2a6c55one", res['paddle.one_plus_one'] + "ArgSpec(args=[], varargs=None, keywords=None, defaults=(,)), ff0f188c95030158cc6398d2a6c55one", + res['paddle.one_plus_one'], ) self.assertEqual( - "ff0f188c95030158cc6398d2a6c55two", res['paddle.two_plus_two'] + "ArgSpec(args=[], varargs=None, keywords=None, defaults=(,)), ff0f188c95030158cc6398d2a6c55two", + res['paddle.two_plus_two'], ) self.assertEqual( - "ff0f188c95030158cc6398d2a6cthree", res['paddle.three_plus_three'] + "ArgSpec(args=[], varargs=None, keywords=None, defaults=(,)), ff0f188c95030158cc6398d2a6cthree", + res['paddle.three_plus_three'], ) self.assertEqual( "ff0f188c95030158cc6398d2a6c5four", res['paddle.four_plus_four'] ) self.assertEqual( - "ff0f188c95030158cc6398d2a6c5five", res['paddle.five_plus_five'] + "ArgSpec(), ff0f188c95030158cc6398d2a6c5five", + res['paddle.five_plus_five'], ) @@ -302,8 +306,8 @@ def test_global_exec(self): >>> import paddle >>> a = paddle.to_tensor(.2) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'set_default': """ placeholder @@ -319,8 +323,8 @@ def test_global_exec(self): >>> paddle.set_default_dtype('float64') >>> a = paddle.to_tensor(.2) >>> print(a) - Tensor(shape=[1], dtype=float64, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'after_set_default': """ placeholder @@ -335,8 +339,8 @@ def test_global_exec(self): >>> import paddle >>> a = paddle.to_tensor(.2) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, } @@ -509,10 +513,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.20000000) """, 'cpu_to_cpu': """ @@ -528,10 +532,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'gpu_to_cpu': """ @@ -547,10 +551,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'cpu_to_gpu': """ @@ -566,10 +570,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.20000000) """, 'gpu_to_cpu_array': """ placeholder @@ -701,8 +705,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.123456780) """, 'cpu_to_cpu': """ @@ -719,8 +723,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.123456780) """, 'gpu_to_cpu': """ @@ -737,8 +741,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.123456780) """, 'cpu_to_gpu': """ @@ -755,8 +759,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.123456780) """, 'gpu_to_cpu_array': """ placeholder @@ -2046,7 +2050,7 @@ def test_timeout(self): def test_bad_statements(self): docstrings_to_test = { - 'bad_fluid': """ + 'good_fluid': """ this is docstring... Examples: @@ -2191,9 +2195,9 @@ def test_bad_statements(self): tr_10, ) = test_results - self.assertIn('bad_fluid', tr_0.name) - self.assertTrue(tr_0.badstatement) - self.assertFalse(tr_0.passed) + self.assertIn('good_fluid', tr_0.name) + self.assertFalse(tr_0.badstatement) + self.assertTrue(tr_0.passed) self.assertIn('bad_fluid_from', tr_1.name) self.assertTrue(tr_1.badstatement) diff --git a/tools/test_type_checking.py b/tools/test_type_checking.py new file mode 100644 index 00000000000000..714be765ca9b55 --- /dev/null +++ b/tools/test_type_checking.py @@ -0,0 +1,630 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from tools.type_checking import MypyChecker, get_test_results + + +class TestMypyChecker(unittest.TestCase): + def test_mypy_pass(self): + docstrings_pass = { + 'simple': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import abc + >>> print(1) + 1 + """, + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # doctest: -REQUIRES(env:GPU) + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> print(1-1) + 0 + """, + } + docstrings_from_sampcd = { + 'gpu_to_gpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, + [0.123456780]) + + """, + 'cpu_to_cpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.123456780]) + + """, + 'gpu_to_cpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.123456780]) + + """, + 'cpu_to_gpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, + [0.123456780]) + """, + 'gpu_to_cpu_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor([[1.123456789 ,2,3], [2,3,4], [3,4,5]]) + >>> print(a) + Tensor(shape=[3, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[1.123456780, 2., 3.], + [2., 3., 4.], + [3., 4., 5.]]) + """, + 'cpu_to_gpu_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> a = paddle.to_tensor([[1.123456789,2,3], [2,3,4], [3,4,5]]) + >>> print(a) + Tensor(shape=[3, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, + [[1.123456780, 2., 3.], + [2., 3., 4.], + [3., 4., 5.]]) + """, + 'mass_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor( + ... [[1.123456780, 2., -3, .3], + ... [2, 3, +4., 1.2+10.34e-5j], + ... [3, 5.e-3, 1e2, 3e-8]] + ... ) + >>> # Tensor(shape=[3, 4], dtype=complex64, place=Place(gpu:0), stop_gradient=True, + >>> # [[ (1.1234568357467651+0j) , + >>> # (2+0j) , + >>> # (-3+0j) , + >>> # (0.30000001192092896+0j) ], + >>> # [ (2+0j) , + >>> # (3+0j) , + >>> # (4+0j) , + >>> # (1.2000000476837158+0.00010340000153519213j)], + >>> # [ (3+0j) , + >>> # (0.004999999888241291+0j) , + >>> # (100+0j) , + >>> # (2.999999892949745e-08+0j) ]]) + >>> print(a) + Tensor(shape=[3, 4], dtype=complex64, place=Place(AAA), stop_gradient=True, + [[ (1.123456+0j), + (2+0j), + (-3+0j), + (0.3+0j)], + [ (2+0j), + (3+0j), + (4+0j), + (1.2+0.00010340j)], + [ (3+0j), + (0.00499999+0j), + (100+0j), + (2.999999e-08+0j)]]) + """, + 'float_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> x = [[2, 3, 4], [7, 8, 9]] + >>> x = paddle.to_tensor(x, dtype='float32') + >>> print(paddle.log(x)) + Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[0.69314718, 1.09861231, 1.38629436], + [1.94591010, 2.07944155, 2.19722462]]) + + """, + 'float_array_diff': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> x = [[2, 3, 4], [7, 8, 9]] + >>> x = paddle.to_tensor(x, dtype='float32') + >>> print(paddle.log(x)) + Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[0.69314712, 1.09861221, 1.386294], + [1.94591032, 2.07944156, 2.1972246]]) + + """, + 'float_begin': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0) + 7. + + """, + 'float_begin_long': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0000023) + 7.0000024 + + """, + 'float_begin_more': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0, 5., 6.123456) + 7.0 5.0 6.123457 + + """, + 'float_begin_more_diff': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0, 5., 6.123456) + 7.0 5.0 6.123457 + + """, + 'float_begin_more_brief': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0, 5., 6.123456) + 7. 5. 6.123457 + + """, + 'float_begin_fail': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0100023) + 7.0000024 + + """, + } + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_pass) + self.assertEqual(len(test_results), 3) + + for tr in test_results: + self.assertFalse(tr.fail) + + test_results = get_test_results(doctester, docstrings_from_sampcd) + self.assertEqual(len(test_results), 15) + + for tr in test_results: + print(tr.msg) + self.assertFalse(tr.fail) + + def test_mypy_fail(self): + docstrings_fail = { + 'fail_simple': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import blabla + """, + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_fail) + self.assertEqual(len(test_results), 3) + + for tr in test_results: + self.assertTrue(tr.fail) + + def test_mypy_partial_fail(self): + docstrings_fail = { + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> print(1-1) + 0 + """ + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_fail) + self.assertEqual(len(test_results), 2) + + tr_0, tr_1 = test_results + self.assertTrue(tr_0.fail) + self.assertFalse(tr_1.fail) + + def test_mypy_ignore(self): + docstrings_ignore = { + 'fail_simple': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # type: ignore + >>> import blabla + """, + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # type: ignore + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # type: ignore + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_ignore) + self.assertEqual(len(test_results), 3) + + for tr in test_results: + print(tr.msg) + self.assertFalse(tr.fail) + + docstrings_pass = { + 'pass': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> a = 1 + >>> # type: ignore + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> b = 2 + >>> # type: ignore + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_pass) + self.assertEqual(len(test_results), 2) + + for tr in test_results: + print(tr.msg) + self.assertFalse(tr.fail) + + docstrings_fail = { + 'fail': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import blabla + >>> a = 1 + >>> # type: ignore + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> import blabla + >>> # type: ignore + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_fail) + self.assertEqual(len(test_results), 2) + + for tr in test_results: + print(tr.msg) + self.assertTrue(tr.fail) diff --git a/tools/type_checking.py b/tools/type_checking.py new file mode 100644 index 00000000000000..78285cb87eaa40 --- /dev/null +++ b/tools/type_checking.py @@ -0,0 +1,276 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# We type-check the `Example` codes from docstring. + +from __future__ import annotations + +import argparse +import doctest +import pathlib +import re +from abc import abstractmethod +from concurrent.futures import ProcessPoolExecutor +from dataclasses import dataclass, field +from typing import Any + +from mypy import api as mypy_api +from sampcd_processor_utils import ( + extract_code_blocks_from_docstr, + get_docstring, + init_logger, + log_exit, + logger, +) + + +class TypeChecker: + style: str = 'google' + + def __init__(self, *args: Any, **kwargs: Any) -> None: + pass + + @abstractmethod + def run(self, api_name: str, codeblock: str) -> TestResult: + pass + + @abstractmethod + def print_summary( + self, test_results: list[TestResult], whl_error: list[str] + ) -> None: + pass + + +@dataclass +class TestResult: + api_name: str + msg: str + fail: bool = False + extra_info: dict[str, Any] = field(default_factory=dict) + + +class MypyChecker(TypeChecker): + def __init__( + self, config_file: str, cache_dir: str, *args: Any, **kwargs: Any + ) -> None: + self.config_file = config_file + self.cache_dir = cache_dir + super().__init__(*args, **kwargs) + + def run(self, api_name: str, codeblock: str) -> TestResult: + # skip checking when the codeblock startswith `>>> # type: ignore` + codeblock_for_checking = [] + for line in codeblock.splitlines(): + if line.strip().startswith('>>> # type: ignore'): + break + codeblock_for_checking.append(line) + codeblock_for_checking = '\n'.join(codeblock_for_checking) + + # remove `doctest` in the codeblock, or the module `doctest` cannot `get_examples`` correctly + codeblock_for_checking = re.sub( + r'#\s*x?doctest\s*:.*', '', codeblock_for_checking + ) + + # `get_examples` codes with `>>>` and `...` stripped + _example_code = doctest.DocTestParser().get_examples( + codeblock_for_checking + ) + example_code = '\n'.join( + [l for e in _example_code for l in e.source.splitlines()] + ) + + normal_report, error_report, exit_status = mypy_api.run( + [ + f'--config-file={self.config_file}', + f'--cache-dir={self.cache_dir}', + '-c', + example_code, + ] + ) + + logger.debug('-' * 20) + logger.debug(f'>>> Type hints with api {api_name} start ...') + logger.debug(example_code) + logger.debug('>>> Results ...') + logger.debug('>>> mypy normal_report is ...') + logger.debug(normal_report) + logger.debug('>>> mypy error_report is ...') + logger.debug(error_report) + logger.debug('>>> mypy exit_status is ...') + logger.debug(exit_status) + logger.debug(f'>>> Type hints with api {api_name} end...') + + return TestResult( + api_name=api_name, + msg='\n'.join([normal_report, error_report]), + fail=exit_status != 0, + extra_info={ + 'normal_report': normal_report, + 'error_report': error_report, + 'exit_status': exit_status, + }, + ) + + def print_summary( + self, test_results: list[TestResult], whl_error: list[str] + ) -> None: + is_fail = False + + logger.warning("----------------Check results--------------------") + + if whl_error is not None and whl_error: + logger.warning("%s is not in whl.", whl_error) + logger.warning("") + logger.warning("Please check the whl package and API_PR.spec!") + logger.warning( + "You can follow these steps in order to generate API.spec:" + ) + logger.warning("1. cd ${paddle_path}, compile paddle;") + logger.warning( + "2. pip install build/python/dist/(build whl package);" + ) + logger.warning( + "3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'." + ) + for test_result in test_results: + if test_result.fail: + logger.error( + ">>> In addition, mistakes found in type checking: %s", + test_result.api_name, + ) + logger.error(test_result.msg) + log_exit(1) + + else: + for test_result in test_results: + if test_result.fail: + is_fail = True + + logger.error(test_result.api_name) + logger.error(test_result.msg) + + else: + logger.debug(test_result.api_name) + logger.debug(test_result.msg) + + if is_fail: + logger.error(">>> Mistakes found in type checking!") + logger.error(">>> Please recheck the type annotations.") + log_exit(1) + + logger.warning(">>> Type checking is successful!") + logger.warning("----------------End of the Check--------------------") + + +def parse_args() -> argparse.Namespace: + """ + Parse input arguments + """ + parser = argparse.ArgumentParser( + description='run Sample Code Type Checking' + ) + parser.add_argument('--debug', dest='debug', action="store_true") + parser.add_argument( + '--logf', dest='logf', type=str, default=None, help='file for logging' + ) + parser.add_argument( + '--config-file', + dest='config_file', + type=str, + default=None, + help='config file for type checker', + ) + parser.add_argument( + '--cache-dir', + dest='cache_dir', + type=str, + default=None, + help='cache dir for mypy', + ) + parser.add_argument('--full-test', dest='full_test', action="store_true") + + args = parser.parse_args() + return args + + +def get_test_results( + type_checker: TypeChecker, docstrings_to_test: dict[str, str] +) -> list[TestResult]: + _test_style = ( + type_checker.style + if type_checker.style in {'google', 'freeform'} + else 'google' + ) + google_style = _test_style == 'google' + + api_names = [] + codeblocks = [] + for api_name, raw_docstring in docstrings_to_test.items(): + # we may extract more than one codeblocks from docsting. + for codeblock in extract_code_blocks_from_docstr( + raw_docstring, google_style=google_style + ): + codeblock_name = codeblock['name'] + codeblock_id = codeblock['id'] + + api_names.append(f'{api_name}:{codeblock_name or codeblock_id}') + codeblocks.append(codeblock['codes']) + + test_results = [] + with ProcessPoolExecutor() as exe: + test_results = exe.map( + type_checker.run, api_names, codeblocks, timeout=600 + ) + + return list(test_results) + + +def run_type_checker( + args: argparse.Namespace, type_checker: TypeChecker +) -> None: + # init logger + init_logger(debug=args.debug, log_file=args.logf) + + logger.info( + "----------------Codeblock Type Checking Start--------------------" + ) + + logger.info(">>> Get docstring from api ...") + filter_api = lambda api_name: 'libpaddle' in api_name + docstrings_to_test, whl_error = get_docstring( + full_test=args.full_test, filter_api=filter_api + ) + + logger.info(">>> Running type checker ...") + test_results = get_test_results(type_checker, docstrings_to_test) + + logger.info(">>> Print summary ...") + type_checker.print_summary(test_results, whl_error) + + +if __name__ == '__main__': + base_path = pathlib.Path(__file__).resolve().parent.parent + + args = parse_args() + mypy_checker = MypyChecker( + config_file=( + args.config_file + if args.config_file + else (base_path / 'pyproject.toml') + ), + cache_dir=( + args.cache_dir if args.cache_dir else (base_path / '.mypy_cache') + ), + ) + run_type_checker(args, mypy_checker)