Add requirements file and run config script

This commit is contained in:
ncoop57 2021-07-11 22:38:22 +00:00
parent 8cb24cc0ea
commit a3b9f56a02
2 changed files with 132 additions and 0 deletions

95
requirements.txt Normal file
View File

@ -0,0 +1,95 @@
absl-py==0.13.0
aiohttp==3.7.4.post0
appdirs==1.4.4
astunparse==1.6.3
async-timeout==3.0.1
attrs==21.2.0
black==21.6b0
cachetools==4.2.2
certifi==2021.5.30
chardet==4.0.0
chex==0.0.8
click==8.0.1
cycler==0.10.0
-e git+https://github.com/huggingface/datasets.git@d7a72234436673d6394bc67cccb5489aacf5f981#egg=datasets
dill==0.3.4
dm-tree==0.1.6
fastcore==1.3.20
filelock==3.0.12
fire==0.4.0
flatbuffers==1.12
flax==0.3.4
fsspec==2021.6.1
gast==0.4.0
gdown==3.13.0
google-auth==1.32.1
google-auth-oauthlib==0.4.4
google-pasta==0.2.0
grpcio==1.38.1
gviz-api==1.9.0
h5py==3.1.0
huggingface-hub==0.0.12
idna==2.10
jax==0.2.16
jaxlib==0.1.68
joblib==1.0.1
jsonlines==2.0.0
keras-nightly==2.7.0.dev2021071100
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
libclang==11.1.0
libtpu-nightly==0.1.dev20210615
lm-dataformat==0.0.19
Markdown==3.3.4
matplotlib==3.4.2
msgpack==1.0.2
multidict==5.1.0
multiprocess==0.70.12.2
mypy-extensions==0.4.3
numpy==1.19.5
oauthlib==3.1.1
opt-einsum==3.3.0
optax==0.0.9
packaging==21.0
pandas==1.3.0
pathspec==0.8.1
Pillow==8.3.1
protobuf==3.17.3
pyarrow==4.0.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyparsing==2.4.7
python-dateutil==2.8.1
python-magic==0.4.24
pytz==2021.1
PyYAML==5.4.1
regex==2021.7.6
requests==2.25.1
requests-oauthlib==1.3.0
rsa==4.7.2
sacremoses==0.0.45
scipy==1.7.0
six==1.15.0
tb-nightly==2.6.0a20210711
tbp-nightly==2.5.0a20210511
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
termcolor==1.1.0
tf-estimator-nightly==2.7.0.dev2021071101
tf-nightly==2.7.0.dev20210711
tokenizers==0.10.3
toml==0.10.2
toolz==0.11.1
torch==1.9.0+cpu
torchaudio==0.9.0
torchvision==0.10.0+cpu
tqdm==4.61.2
-e git+https://github.com/ncoop57/transformers.git@d7e156bd1ae2467e9ea1dbc44f31da0ed2296aee#egg=transformers
typing-extensions==3.7.4.3
ujson==4.0.2
urllib3==1.26.6
Werkzeug==2.0.1
wrapt==1.12.1
xxhash==2.0.2
yarl==1.6.3
zstandard==0.15.2

View File

@ -0,0 +1,37 @@
#! /bin/bash
./run_clm_streaming_flax_v2.py \
--output_dir $HOME/gpt-neo-125M-code-clippy-test \
--model_name_or_path="flax-community-gpt-neo-125M-code-clippy" \
--dataset_name $HOME/gpt-code-clippy/code_clippy.py \
--data_dir /home/shared/code_clippy_data \
--text_column_name="text" \
--do_train --do_eval \
--block_size="2048" \
--per_device_train_batch_size="8" \
--per_device_eval_batch_size="16" \
--preprocessing_num_workers="8" \
--learning_rate="1e-4" \
--max_steps 2500 \
--warmup_steps 3000 \
--decay_steps 250 \
--adam_beta1="0.9" \
--adam_beta2="0.95" \
--weight_decay="0.01" \
--overwrite_output_dir \
--logging_steps="10" \
--eval_steps="500" \
--push_to_hub="False" \
--report_to="all" \
--dtype="bfloat16" \
--skip_memory_metrics="True" \
--save_steps="500" \
--save_total_limit 2 \
--gradient_accumulation_steps 16 \
--report_to="wandb" \
--run_name="poodle-long-run" \
--max_eval_samples 2000 \
--save_optimizer true \
# --adafactor \
# --resume_from_checkpoint $HOME/gpt-neo-125M-code-clippy/ \
# --max_train_samples="10000" \