PaddlePaddle / Paddle

PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习&机器学习高性能单机、分布式训练和跨平台部署)
http://www.paddlepaddle.org/
Apache License 2.0
21.66k stars 5.44k forks source link

[CINN]Support Dynamic Shape for CINN subgraph UT #64076

Closed Aurelius84 closed 1 week ago

Aurelius84 commented 1 week ago

PR Category

CINN

PR Types

Not User Facing

Description

Pcard-67164 CINN子图单测支持了动态Shape功能测试。

一、如何测试

对于一个子图单测已经验证过「静态Shape」开启CINN的功能,如何测试其「动态Shape」下是否跑得通CINN,额外执行一个FLAGS即可: FLAGS_prim_enable_dynamic=true ctest -R test_sub_graph_0 -VV

二、InputSpec如何确定

提供如下自动化检索脚本。

  1. 将如下代码复制到auto_support_dy_shape.py 文件,并放到test/ir/pir/cinn/auto_support_dy_shape.py 路径下
  2. 创建test/ir/pir/cinn/sub_graph_tmp 目录
  3. 执行python auto_support_dy_shape.py
  4. 在sub_graph_tmp中会包含同名文件,内部会在对应位置插入「最动态」的self.input_specs,且其是经过test_dy_shape_spec测试验证的。
import re
import os
import glob
import paddle 
import itertools
import logging
import subprocess

logging.getLogger().setLevel(logging.INFO)

class SpecInfoMeta:
    def __init__(self, shape, dtype, stop_gradient):
        self.shape = shape
        self.dtype = dtype
        self.stop_gradient = stop_gradient

    def as_spec(self, shape):
        return paddle.static.InputSpec(shape=shape, dtype=self.dtype, stop_gradient=self.stop_gradient)

    def maybe_shapes(self):
        shape_eles = [[-1, s] for s in self.shape]
        raw_shapes = itertools.product(*shape_eles)
        has_channel = len(self.shape) == 4 and self.shape[1] == 3

        maybe_shapes = []
        for shape in raw_shapes:
            # in case of channel
            if has_channel and shape[1] is None:
                continue
            maybe_shapes.append(shape)
        return maybe_shapes

    def maybe_specs(self):
        specs = []
        for shape in self.maybe_shapes():
            specs.append(self.as_spec(shape))
        return specs

class SpecStrategy:
    def __init__(self, inputs_info):
        self.inputs_info = inputs_info

    def next(self):
        strategy = self.parse_strategy()
        for specs in itertools.product(*strategy):
            yield specs

    def parse_strategy(self):
        strategy = []
        for input_info in self.inputs_info:
            strategy.append(input_info.maybe_specs())
        return strategy

class FileModifier:
    def __init__(self):
        self.dst_file_dir = "paddle-fork/test/ir/pir/cinn/sub_graphs_tmp"

    def save_dy_shape_file(self, source_file, specs):
        # To Find self.input_specs initialzation insert point.
        keywork = "def setUp(self):"
        template = "        self.input_specs = [{input_specs}]\n"
        # To Find specify input_spec insert point.
        modify_kw = "full_graph=True"
        # To insert dy_shape unittet point.
        ut_kw = "def test"
        ut_template = """
    def test_dy_shape_spec(self):
        self.train(self.net, to_static=True)
        """
        target_file = os.path.join(self.dst_file_dir, os.path.basename(source_file))
        is_success = False
        with open(source_file, "r") as f:
            lines = f.readlines()
            lines.insert(0, "from paddle.static import InputSpec\n")
            for i, line in enumerate(lines):
                if keywork in line:
                    new_line = template.format(input_specs='\n, '.join([str(spec) for spec in specs]))
                    lines[i] = ut_template + "\n" + line + "\n" + new_line
                if modify_kw in line:
                    line = line.strip("\n")  
                    if line.endswith(")"):
                        line = line[:-1] + ", input_spec=self.input_specs)\n"
                    else:
                        line = line + ", input_spec=self.input_specs"
                    lines[i] = line
                    is_success = True
            with open(target_file, "w") as f:
                f.writelines(lines)

        if not is_success:
            logging.error(f"Can't find {modify_kw} in file: {os.path.basename(self.source_file)}")
        return target_file

class RunState:
    def __init__(self, is_success: bool, tmp_file):
        self.is_success = is_success
        self.tmp_file = tmp_file

class ThreadTask:
    def __init__(self, source_file, specs):
        self.source_file = source_file
        self.specs = specs

    def run(self):
        target_file = FileModifier().save_dy_shape_file(self.source_file, self.specs)
        run_state = self.thread_run(target_file)
        return run_state

    def thread_run(self, script_file):
        try:
            subprocess.check_output(["python", script_file, "TestLayer.test_dy_shape_spec"])
            return RunState(True, script_file)
        except Exception as e:
            logging.error(f"Try Run {script_file} failed with specs: {', '.join([str(spec) for spec in self.specs])}")
            return RunState(False, script_file)

class GreedyDyShapeSeacher:
    def __init__(self, source_file):
        self.source_file = source_file

    def run(self):
        inputs_info = self.get_inputs_info()
        target_specs = self.get_target_spec(inputs_info)
        return target_specs

    def get_inputs_info(self):
        pattern = r"# \(shape: \[(\d+(?:,\s*\d+)*)\],\s*dtype:\s*(\w+\.\w+),\s*stop_gradient:\s*(\w+)\)"
        keyword = "# (shape:"
        inputs_info = []
        with open(self.source_file, "r") as f:
            for line in f.readlines():
                if keyword not in line: continue
                match = re.search(pattern, line)
                if not match:
                    logging.warning(f"Can't find shape info in line: {line}")
                    continue
                try:
                    shape = list(map(int, match.group(1).split(', ')))  
                    dtype = eval(match.group(2))
                    stop_gradient = match.group(3) == 'True'
                    inputs_info.append(SpecInfoMeta(shape, dtype, stop_gradient))
                except Exception as e:
                    logging.error(f"Can't parse shape info in line: {line}, errors: {e}")
                    continue
        if not inputs_info:
            logging.error(f"Can't find any shape info in file {os.path.basename(self.source_file)}")
        return inputs_info

    def get_target_spec(self, inputs_info):
        for specs in SpecStrategy(inputs_info).next():
            run_state = ThreadTask(self.source_file, specs).run()
            if run_state.is_success:
                return specs, run_state
            else:
                os.remove(run_state.tmp_file)
        logging.error(f"Can't find any valid specs in file {os.path.basename(self.source_file)}")

class DyShapeAutoGenerator:
    def __init__(self, source_file_dir: str):
        self.source_file_dir = source_file_dir

    def run(self):
        all_source_files = self.get_all_files(self.source_file_dir)
        for source_file in all_source_files:
            self.generate_dy_shape(source_file)

    def get_all_files(self, dir_path):
        os.chdir(dir_path)
        all_py_files = glob.glob("*.py")
        all_py_files.sort()
        logging.info(f"Found unittest files: {len(all_py_files)}")
        return [os.path.join(dir_path, py_file) for py_file in all_py_files]

    def generate_dy_shape(self, source_file):
        target_specs = GreedyDyShapeSeacher(source_file).run()
        logging.info(f"Target specs: {target_specs} for file: {os.path.basename(source_file)}")
        # exit()

if __name__ == "__main__":
    source_file_dir = "paddle-fork/test/ir/pir/cinn/sub_graphs"
    # os.mkdir(dst_file_dir)
    DyShapeAutoGenerator(source_file_dir).run()
paddle-bot[bot] commented 1 week ago

你的PR提交成功,感谢你对开源项目的贡献! 请关注后续CI自动化测试结果,详情请参考Paddle-CI手册。 Your PR has been submitted. Thanks for your contribution! Please wait for the result of CI firstly. See Paddle CI Manual for details.