Open brando90 opened 1 year ago
finall stores false:
brando9@ampere1:~/data/evaporate$ python ~/evaporate/evaporate/run_profiler.py \
> --data_lake small_debug_lin_alg_textbook \
> --do_end_to_end ''\
> --num_attr_to_cascade 15 \
> --num_top_k_scripts 5 \
> --train_size 10 \
> --combiner_mode mv \
> --use_dynamic_backoff True \
> --KEYS keys
Running main: main=<function main at 0x7f40486b4ca0>
os.getenv('CONDA_DEFAULT_ENV')='maf'
sys.argv=['/lfs/ampere1/0/brando9/evaporate/evaporate/run_profiler.py', '--data_lake', 'small_debug_lin_alg_textbook', '--do_end_to_end', '', '--num_attr_to_cascade', '15', '--num_top_k_scripts', '5', '--train_size', '10', '--combiner_mode', 'mv', '--use_dynamic_backoff', 'True', '--KEYS', 'keys']
experiment_args=Namespace(data_lake='small_debug_lin_alg_textbook', do_end_to_end=False, num_attr_to_cascade=15, num_top_k_scripts=5, train_size=10, combiner_mode='mv', use_dynamic_backoff=True, KEYS=['keys'])
experiment_args.do_end_to_end=False
its using the string False since strings are true:
brando9@ampere1:~/data/evaporate$ python ~/evaporate/evaporate/run_profiler.py \
> --data_lake small_debug_lin_alg_textbook \
> --do_end_to_end False\
> --num_attr_to_cascade 15 \
> --num_top_k_scripts 5 \
> --train_size 10 \
> --combiner_mode mv \
> --use_dynamic_backoff True \
> --KEYS keys
Running main: main=<function main at 0x7ff7f94b8ca0>
os.getenv('CONDA_DEFAULT_ENV')='maf'
sys.argv=['/lfs/ampere1/0/brando9/evaporate/evaporate/run_profiler.py', '--data_lake', 'small_debug_lin_alg_textbook', '--do_end_to_end', 'False', '--num_attr_to_cascade', '15', '--num_top_k_scripts', '5', '--train_size', '10', '--combiner_mode', 'mv', '--use_dynamic_backoff', 'True', '--KEYS', 'keys']
experiment_args=Namespace(data_lake='small_debug_lin_alg_textbook', do_end_to_end=True, num_attr_to_cascade=15, num_top_k_scripts=5, train_size=10, combiner_mode='mv', use_dynamic_backoff=True, KEYS=['keys'])
experiment_args.do_end_to_end=True
def get_experiment_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--data_lake",
type=str,
help="Name of the data lake to operate over. Must be in configs.py"
)
parser.add_argument(
"--do_end_to_end",
action='store_true',
default=False, # not end to end so do ClosedIE
help="True for OpenIE, False for ClosedIE. Default is False so it does ClosedIE."
)
parser.add_argument(
"--num_attr_to_cascade",
type=int,
default=35,
help="Number of attributes to generate functions for"
)
parser.add_argument(
"--num_top_k_scripts",
type=int,
default=10,
help="Number of generated functions to combine over for each attribute"
)
parser.add_argument(
"--train_size",
type=int,
default=10,
help="Number of files to prompt on"
)
parser.add_argument(
"--combiner_mode",
type=str,
default='ws',
help="Combiner mode for combining the outputs of the generated functions",
choices=['ws', 'mv', 'top_k']
)
parser.add_argument(
"--use_dynamic_backoff",
action='store_true', # flag --> gen functions
default=False, # default no flag --> evaporate direct
help="True (flag set) for using generate functions, False (default) for using evaporate direct."
)
parser.add_argument(
"--KEYS",
type=str,
default=[],
help="List of keys to use the model api",
nargs='*'
)
print(f'{sys.argv=}')
experiment_args = parser.parse_args(args=sys.argv[1:])
print(f'{experiment_args=}')
return experiment_args
if you really want to do it with the original method do
def get_experiment_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--data_lake",
type=str,
help="Name of the data lake to operate over. Must be in configs.py"
)
parser.add_argument(
"--do_end_to_end",
type=bool,
default="True",
help="True for generating schema from data/OpenIE, False for ClosedIE/given schema. Default is True genererate schema/OpenIE.",
)
parser.add_argument(
"--num_attr_to_cascade",
type=int,
default=35,
help="Number of attributes to generate functions for. "
)
parser.add_argument(
"--num_top_k_scripts",
type=int,
default=10,
help="Number of generated functions to combine over for each attribute"
)
parser.add_argument(
"--train_size",
type=int,
default=10,
help="Number of files to prompt on"
)
parser.add_argument(
"--combiner_mode",
type=str,
default='ws',
help="Combiner mode for combining the outputs of the generated functions",
choices=['ws', 'mv', 'top_k']
)
parser.add_argument(
"--use_dynamic_backoff",
type=bool,
default="True",
help="True (default) uses generated functions for extraction. Else, False uses evaporate direct/LLM cfor extraction."
)
parser.add_argument(
"--KEYS",
type=str,
default=[],
help="List of keys to use the model api",
nargs='*'
)
print(f'{sys.argv=}')
experiment_args = parser.parse_args(args=sys.argv[1:])
experiment_args.do_end_to_end = True if experiment_args.do_end_to_end.lower() == 'true' else False
experiment_args.use_dynamic_backoff = True if experiment_args.use_dynamic_backoff.lower() == 'true' else False
print(f'{experiment_args=}')
return experiment_args
The issue is due to how the argparse module handles boolean command line arguments. When you pass a string to a boolean argument in argparse (like you are doing in the command line), it doesn't convert 'False' string to a boolean False. In Python, the bool function treats any non-empty string as True.
For boolean arguments, instead of using type=bool in add_argument, a better practice would be to use action='store_true' or action='store_false'.
These will store True if the argument is present and False otherwise (for store_true), or vice versa for store_false. You then use the flag on the command line if you want the value to be True.
For example:
And in the command line, if you want do_end_to_end to be False, you would include the flag:
If you want the default to be False and to set it to True from the command line, use action='store_true' and include the flag when you want it to be True.
This way of handling boolean arguments is generally more intuitive for command-line users.