gpt-code-clippy/gpt-neo-test.ipynb
2021-07-06 23:44:48 +03:00

488 lines
16 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"id": "84b1a438-cf1d-402e-a56f-2c4f9dd5ad51",
"metadata": {},
"outputs": [],
"source": [
"from transformers import AutoTokenizer, FlaxGPTNeoForCausalLM, AutoModelForMaskedLM"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7d50cd18-33ed-4b67-82ad-5c48eb9a9b36",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"# model_ckpt = 'EleutherAI/gpt-neo-125M'\n",
"model_ckpt = (Path.home()/'gpt-neo-125M-code-clippy').as_posix()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ec0c4cc-a1bc-4dda-bd0b-72891b519b39",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 10,
"id": "065c03c3-2e4a-4f20-a30d-25ada1418b18",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:absl:Starting the local TPU driver.\n",
"INFO:absl:Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local://\n",
"INFO:absl:Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: \"cuda\". Available platform names are: TPU Interpreter Host\n"
]
}
],
"source": [
"tokenizer = AutoTokenizer.from_pretrained(model_ckpt)\n",
"model = FlaxGPTNeoForCausalLM.from_pretrained(model_ckpt)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e2f9fb26-2e26-4f57-aa93-e349475203f3",
"metadata": {},
"outputs": [],
"source": [
"tokenizer.pad_token = tokenizer.eos_token"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "75c0c2f6-47ad-41c3-8c66-a1ceeecde061",
"metadata": {},
"outputs": [],
"source": [
"prompt = \"\"\"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"x = np.random.randn(10, 10)\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "666977a1-de0d-4900-bf61-ae2b672e51bc",
"metadata": {},
"outputs": [],
"source": [
"inputs = tokenizer(prompt, return_tensors='jax')\n",
"input_ids = inputs.input_ids"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "249e4a8a-7a7e-4e8b-83be-7184a4c0dd0b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 40, 50257)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outputs = model(**inputs)\n",
"outputs.logits.shape"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "eee873f5-073c-4cbe-8b15-114ea18b2de8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DeviceArray([[ 198, 11748, 299, 32152, 355, 45941, 198, 11748,\n",
" 19798, 292, 355, 279, 67, 198, 11748, 2603,\n",
" 29487, 8019, 13, 9078, 29487, 355, 458, 83,\n",
" 198, 198, 87, 796, 45941, 13, 25120, 13,\n",
" 25192, 77, 7, 940, 11, 838, 8, 198]], dtype=int32)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"input_ids"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "82666225-3ab7-405f-9536-4e9e3085be24",
"metadata": {},
"outputs": [],
"source": [
"out = model.generate(input_ids,\n",
" max_length=200, \n",
" num_beams=1,\n",
" pad_token_id = tokenizer.pad_token_id\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "c6cc862b-23ef-417d-ae83-1b2eafb0460f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"FlaxGreedySearchOutput(sequences=DeviceArray([[ 198, 11748, 299, 32152, 355, 45941, 198, 11748,\n",
" 19798, 292, 355, 279, 67, 198, 11748, 2603,\n",
" 29487, 8019, 13, 9078, 29487, 355, 458, 83,\n",
" 198, 198, 87, 796, 45941, 13, 25120, 13,\n",
" 25192, 77, 7, 940, 11, 838, 8, 198,\n",
" 88, 796, 45941, 13, 25120, 13, 25192, 77,\n",
" 7, 940, 11, 838, 8, 198, 198, 2,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220,\n",
" 220, 220, 220, 220, 220, 220, 220, 220]], dtype=int32))"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "8f6c746a-2d56-4da4-acb5-e066a6a230f2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"x = np.random.randn(10, 10)\n",
"y = np.random.randn(10, 10)\n",
"\n",
"# \n"
]
}
],
"source": [
"print(tokenizer.decode(out[0][0]))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "b6effeaa-2237-47bc-b0f6-940c4e274c38",
"metadata": {},
"outputs": [],
"source": [
"from transformers import GPTNeoForCausalLM, AutoModelForCausalLM"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "3665a3fd-5d92-45e8-8fde-393ec803383a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/arto/transformers/src/transformers/modeling_flax_pytorch_utils.py:201: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:180.)\n",
" pt_model_dict[flax_key] = torch.from_numpy(flax_tensor)\n",
"All Flax model weights were used when initializing GPTNeoForCausalLM.\n",
"\n",
"Some weights of GPTNeoForCausalLM were not initialized from the Flax model and are newly initialized: ['lm_head.weight', 'transformer.h.1.attn.attention.masked_bias', 'transformer.h.6.attn.attention.bias', 'transformer.h.7.attn.attention.masked_bias', 'transformer.h.10.attn.attention.masked_bias', 'transformer.h.4.attn.attention.bias', 'transformer.h.2.attn.attention.bias', 'transformer.h.6.attn.attention.masked_bias', 'transformer.h.2.attn.attention.masked_bias', 'transformer.h.0.attn.attention.bias', 'transformer.h.3.attn.attention.masked_bias', 'transformer.h.5.attn.attention.masked_bias', 'transformer.h.4.attn.attention.masked_bias', 'transformer.h.8.attn.attention.masked_bias', 'transformer.h.11.attn.attention.masked_bias', 'transformer.h.9.attn.attention.masked_bias', 'transformer.h.0.attn.attention.masked_bias', 'transformer.h.8.attn.attention.bias', 'transformer.h.10.attn.attention.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"model = GPTNeoForCausalLM.from_pretrained(model_ckpt, from_flax=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5fa23301-6f5d-40d5-b614-f14330df894a",
"metadata": {},
"outputs": [],
"source": [
"from transormers import AutoModelForMaskedLM\n",
"model = AutoModelForMaskedLM.from_pretrained(model_ckpt, from_flax=True)\n",
"model.save_pretrained(model_ckpt, save_config=False)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "35114633-bb5f-4c00-ae16-540a7fabb126",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"All Flax model weights were used when initializing GPTNeoForCausalLM.\n",
"\n",
"Some weights of GPTNeoForCausalLM were not initialized from the Flax model and are newly initialized: ['lm_head.weight', 'transformer.h.1.attn.attention.masked_bias', 'transformer.h.6.attn.attention.bias', 'transformer.h.7.attn.attention.masked_bias', 'transformer.h.10.attn.attention.masked_bias', 'transformer.h.4.attn.attention.bias', 'transformer.h.2.attn.attention.bias', 'transformer.h.6.attn.attention.masked_bias', 'transformer.h.2.attn.attention.masked_bias', 'transformer.h.0.attn.attention.bias', 'transformer.h.3.attn.attention.masked_bias', 'transformer.h.5.attn.attention.masked_bias', 'transformer.h.4.attn.attention.masked_bias', 'transformer.h.8.attn.attention.masked_bias', 'transformer.h.11.attn.attention.masked_bias', 'transformer.h.9.attn.attention.masked_bias', 'transformer.h.0.attn.attention.masked_bias', 'transformer.h.8.attn.attention.bias', 'transformer.h.10.attn.attention.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"model = AutoModelForCausalLM.from_pretrained(model_ckpt, from_flax=True)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "15cd3853-1308-46e1-90c1-52b3af0fcac4",
"metadata": {},
"outputs": [],
"source": [
"prompt = \"\"\"\n",
"my_list = ['banana', 'apple', 'orange', 'pineapple']\n",
"\n",
"#Using brute force method\n",
"last_element = my_list[len(my_list) - 1]\n",
"\n",
"#Using negative indeces\n",
"last_element = my_list[-1]\n",
"\n",
"#Using pop method\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "2f2fc69a-f8f5-4859-bb2e-5c33e63f064a",
"metadata": {},
"outputs": [],
"source": [
"prompt = \"\"\"\n",
"def get_vowels(string):\n",
" return [vowel for vowel in string if vowel in 'aeiou'] \n",
"\n",
"print(\"Vowels are:\",\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "517aa451-3316-45fc-97ab-1a9a52ba55b6",
"metadata": {},
"outputs": [],
"source": [
"prompt = \"\"\"import time\n",
"\n",
"start_time = time.time()\n",
"\n",
"total = 0\n",
"for i in range(10):\n",
" total += i\n",
"print(\"Sum:\", total)\n",
"\n",
"end_time = time.time()\n",
"time_taken = \"\"\""
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "749f6c3d-e1a4-4df7-be81-086024345766",
"metadata": {},
"outputs": [],
"source": [
"inputs = tokenizer(prompt, return_tensors='pt')\n",
"input_ids = inputs.input_ids"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "6f60e3f0-d051-4df1-8258-7bc479486603",
"metadata": {},
"outputs": [],
"source": [
"out = model.generate(input_ids,\n",
" max_length=200, \n",
" num_beams=1,\n",
" pad_token_id = tokenizer.pad_token_id\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "9d17aec3-e42a-43d6-a535-2eeaad2a9c78",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"import time\n",
"\n",
"start_time = time.time()\n",
"\n",
"total = 0\n",
"for i in range(10):\n",
" total += i\n",
"print(\"Sum:\", total)\n",
"\n",
"end_time = time.time()\n",
"time_taken = time.time()\n",
"\n",
"# \n"
]
}
],
"source": [
"print(tokenizer.decode(out[0]))"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "57574549-bd1d-46b0-98ca-352662f735d2",
"metadata": {},
"outputs": [],
"source": [
"model.save_pretrained(model_ckpt, save_config=False)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0578148d-497d-422f-b7fb-b644d2a7c62f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021-07-06 15:02:08.590730: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
"2021-07-06 15:02:08.590769: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
]
}
],
"source": [
"from transformers import TrainingArguments"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "63ffd8ff-8e95-4aad-9068-b27fd8c129bb",
"metadata": {},
"outputs": [],
"source": [
"from dataclasses import fields"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a9a89020-e7b0-4826-88e6-8ac4f4c6f89e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Field(name='skip_memory_metrics',type=<class 'bool'>,default=True,default_factory=<dataclasses._MISSING_TYPE object at 0x7f9a12926af0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Whether or not to skip adding of memory profiler reports to metrics.'}),_field_type=_FIELD)\n"
]
}
],
"source": [
"for f in fields(TrainingArguments):\n",
" if f.name == \"skip_memory_metrics\":\n",
" print(f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "beda87b7-c461-4f92-8988-4255a8e79cf9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}