gpt-code-clippy/gpt-neo-test.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "84b1a438-cf1d-402e-a56f-2c4f9dd5ad51",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer, FlaxGPTNeoForCausalLM, AutoModelForMaskedLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7d50cd18-33ed-4b67-82ad-5c48eb9a9b36",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "# model_ckpt = 'EleutherAI/gpt-neo-125M'\n",
    "model_ckpt = (Path.home()/'gpt-neo-125M-code-clippy').as_posix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ec0c4cc-a1bc-4dda-bd0b-72891b519b39",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "065c03c3-2e4a-4f20-a30d-25ada1418b18",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:absl:Starting the local TPU driver.\n",
      "INFO:absl:Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local://\n",
      "INFO:absl:Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: \"cuda\". Available platform names are: TPU Interpreter Host\n"
     ]
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(model_ckpt)\n",
    "model = FlaxGPTNeoForCausalLM.from_pretrained(model_ckpt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e2f9fb26-2e26-4f57-aa93-e349475203f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer.pad_token = tokenizer.eos_token"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "75c0c2f6-47ad-41c3-8c66-a1ceeecde061",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"\"\"\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "x = np.random.randn(10, 10)\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "666977a1-de0d-4900-bf61-ae2b672e51bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs = tokenizer(prompt, return_tensors='jax')\n",
    "input_ids = inputs.input_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "249e4a8a-7a7e-4e8b-83be-7184a4c0dd0b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 40, 50257)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outputs = model(**inputs)\n",
    "outputs.logits.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "eee873f5-073c-4cbe-8b15-114ea18b2de8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DeviceArray([[  198, 11748,   299, 32152,   355, 45941,   198, 11748,\n",
       "              19798,   292,   355,   279,    67,   198, 11748,  2603,\n",
       "              29487,  8019,    13,  9078, 29487,   355,   458,    83,\n",
       "                198,   198,    87,   796, 45941,    13, 25120,    13,\n",
       "              25192,    77,     7,   940,    11,   838,     8,   198]],            dtype=int32)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "82666225-3ab7-405f-9536-4e9e3085be24",
   "metadata": {},
   "outputs": [],
   "source": [
    "out = model.generate(input_ids,\n",
    "                     max_length=200, \n",
    "                     num_beams=1,\n",
    "                     pad_token_id = tokenizer.pad_token_id\n",
    "                    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "c6cc862b-23ef-417d-ae83-1b2eafb0460f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "FlaxGreedySearchOutput(sequences=DeviceArray([[  198, 11748,   299, 32152,   355, 45941,   198, 11748,\n",
       "              19798,   292,   355,   279,    67,   198, 11748,  2603,\n",
       "              29487,  8019,    13,  9078, 29487,   355,   458,    83,\n",
       "                198,   198,    87,   796, 45941,    13, 25120,    13,\n",
       "              25192,    77,     7,   940,    11,   838,     8,   198,\n",
       "                 88,   796, 45941,    13, 25120,    13, 25192,    77,\n",
       "                  7,   940,    11,   838,     8,   198,   198,     2,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220,\n",
       "                220,   220,   220,   220,   220,   220,   220,   220]],            dtype=int32))"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "8f6c746a-2d56-4da4-acb5-e066a6a230f2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "import numpy as np\n",
      "import pandas as pd\n",
      "import matplotlib.pyplot as plt\n",
      "\n",
      "x = np.random.randn(10, 10)\n",
      "y = np.random.randn(10, 10)\n",
      "\n",
      "#                                                                                                                                                \n"
     ]
    }
   ],
   "source": [
    "print(tokenizer.decode(out[0][0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "b6effeaa-2237-47bc-b0f6-940c4e274c38",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import GPTNeoForCausalLM, AutoModelForCausalLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "3665a3fd-5d92-45e8-8fde-393ec803383a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/arto/transformers/src/transformers/modeling_flax_pytorch_utils.py:201: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:180.)\n",
      "  pt_model_dict[flax_key] = torch.from_numpy(flax_tensor)\n",
      "All Flax model weights were used when initializing GPTNeoForCausalLM.\n",
      "\n",
      "Some weights of GPTNeoForCausalLM were not initialized from the Flax model and are newly initialized: ['lm_head.weight', 'transformer.h.1.attn.attention.masked_bias', 'transformer.h.6.attn.attention.bias', 'transformer.h.7.attn.attention.masked_bias', 'transformer.h.10.attn.attention.masked_bias', 'transformer.h.4.attn.attention.bias', 'transformer.h.2.attn.attention.bias', 'transformer.h.6.attn.attention.masked_bias', 'transformer.h.2.attn.attention.masked_bias', 'transformer.h.0.attn.attention.bias', 'transformer.h.3.attn.attention.masked_bias', 'transformer.h.5.attn.attention.masked_bias', 'transformer.h.4.attn.attention.masked_bias', 'transformer.h.8.attn.attention.masked_bias', 'transformer.h.11.attn.attention.masked_bias', 'transformer.h.9.attn.attention.masked_bias', 'transformer.h.0.attn.attention.masked_bias', 'transformer.h.8.attn.attention.bias', 'transformer.h.10.attn.attention.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "model = GPTNeoForCausalLM.from_pretrained(model_ckpt, from_flax=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fa23301-6f5d-40d5-b614-f14330df894a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transormers import AutoModelForMaskedLM\n",
    "model = AutoModelForMaskedLM.from_pretrained(model_ckpt, from_flax=True)\n",
    "model.save_pretrained(model_ckpt, save_config=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "35114633-bb5f-4c00-ae16-540a7fabb126",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "All Flax model weights were used when initializing GPTNeoForCausalLM.\n",
      "\n",
      "Some weights of GPTNeoForCausalLM were not initialized from the Flax model and are newly initialized: ['lm_head.weight', 'transformer.h.1.attn.attention.masked_bias', 'transformer.h.6.attn.attention.bias', 'transformer.h.7.attn.attention.masked_bias', 'transformer.h.10.attn.attention.masked_bias', 'transformer.h.4.attn.attention.bias', 'transformer.h.2.attn.attention.bias', 'transformer.h.6.attn.attention.masked_bias', 'transformer.h.2.attn.attention.masked_bias', 'transformer.h.0.attn.attention.bias', 'transformer.h.3.attn.attention.masked_bias', 'transformer.h.5.attn.attention.masked_bias', 'transformer.h.4.attn.attention.masked_bias', 'transformer.h.8.attn.attention.masked_bias', 'transformer.h.11.attn.attention.masked_bias', 'transformer.h.9.attn.attention.masked_bias', 'transformer.h.0.attn.attention.masked_bias', 'transformer.h.8.attn.attention.bias', 'transformer.h.10.attn.attention.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "model = AutoModelForCausalLM.from_pretrained(model_ckpt, from_flax=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "15cd3853-1308-46e1-90c1-52b3af0fcac4",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"\"\"\n",
    "my_list = ['banana', 'apple', 'orange', 'pineapple']\n",
    "\n",
    "#Using brute force method\n",
    "last_element = my_list[len(my_list) - 1]\n",
    "\n",
    "#Using negative indeces\n",
    "last_element = my_list[-1]\n",
    "\n",
    "#Using pop method\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "2f2fc69a-f8f5-4859-bb2e-5c33e63f064a",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"\"\"\n",
    "def get_vowels(string):\n",
    "    return [vowel for vowel in string if vowel in 'aeiou'] \n",
    "\n",
    "print(\"Vowels are:\",\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "517aa451-3316-45fc-97ab-1a9a52ba55b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"\"\"import time\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "total = 0\n",
    "for i in range(10):\n",
    "  total += i\n",
    "print(\"Sum:\", total)\n",
    "\n",
    "end_time = time.time()\n",
    "time_taken = \"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "749f6c3d-e1a4-4df7-be81-086024345766",
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs.input_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "6f60e3f0-d051-4df1-8258-7bc479486603",
   "metadata": {},
   "outputs": [],
   "source": [
    "out = model.generate(input_ids,\n",
    "                     max_length=200, \n",
    "                     num_beams=1,\n",
    "                     pad_token_id = tokenizer.pad_token_id\n",
    "                    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "9d17aec3-e42a-43d6-a535-2eeaad2a9c78",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "import time\n",
      "\n",
      "start_time = time.time()\n",
      "\n",
      "total = 0\n",
      "for i in range(10):\n",
      "  total += i\n",
      "print(\"Sum:\", total)\n",
      "\n",
      "end_time = time.time()\n",
      "time_taken =  time.time()\n",
      "\n",
      "#                                                                                                                                          \n"
     ]
    }
   ],
   "source": [
    "print(tokenizer.decode(out[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "57574549-bd1d-46b0-98ca-352662f735d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_pretrained(model_ckpt, save_config=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0578148d-497d-422f-b7fb-b644d2a7c62f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-07-06 15:02:08.590730: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
      "2021-07-06 15:02:08.590769: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
     ]
    }
   ],
   "source": [
    "from transformers import TrainingArguments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "63ffd8ff-8e95-4aad-9068-b27fd8c129bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataclasses import fields"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a9a89020-e7b0-4826-88e6-8ac4f4c6f89e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Field(name='skip_memory_metrics',type=<class 'bool'>,default=True,default_factory=<dataclasses._MISSING_TYPE object at 0x7f9a12926af0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Whether or not to skip adding of memory profiler reports to metrics.'}),_field_type=_FIELD)\n"
     ]
    }
   ],
   "source": [
    "for f in fields(TrainingArguments):\n",
    "    if f.name == \"skip_memory_metrics\":\n",
    "        print(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "beda87b7-c461-4f92-8988-4255a8e79cf9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}