mirror of
https://github.com/CodedotAl/gpt-code-clippy.git
synced 2024-10-26 09:17:45 +03:00
522 lines
16 KiB
Plaintext
522 lines
16 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "7b529017-a33e-4b2d-be5b-387ae7dd51e1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2021-07-18 19:07:02.959520: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
|
|
"2021-07-18 19:07:02.959564: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
|
"from datasets import load_dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "53610604-dff1-4a65-83b5-0795d5b3b473",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Using custom data configuration formatted-d9019fd8ed858445\n",
|
|
"Reusing dataset apps (/home/shared/.cache/hf/datasets/apps/formatted-d9019fd8ed858445/0.1.0/5987476458cc986e36654364319c6fe798b880d64a35518cbc00dc04f3c41e4d)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"dataset = load_dataset(\"/home/arto/datasets/datasets/apps/apps.py\", \"formatted\", split=\"test\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "3811070c-c6a0-4a84-9362-cf7de0d5bd75",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/arto/transformers/src/transformers/modeling_flax_pytorch_utils.py:201: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:180.)\n",
|
|
" pt_model_dict[flax_key] = torch.from_numpy(flax_tensor)\n",
|
|
"All Flax model weights were used when initializing GPTNeoForCausalLM.\n",
|
|
"\n",
|
|
"Some weights of GPTNeoForCausalLM were not initialized from the Flax model and are newly initialized: ['transformer.h.0.attn.attention.bias', 'transformer.h.6.attn.attention.masked_bias', 'transformer.h.3.attn.attention.masked_bias', 'transformer.h.2.attn.attention.bias', 'transformer.h.10.attn.attention.masked_bias', 'transformer.h.9.attn.attention.masked_bias', 'transformer.h.1.attn.attention.masked_bias', 'transformer.h.7.attn.attention.masked_bias', 'transformer.h.2.attn.attention.masked_bias', 'lm_head.weight', 'transformer.h.10.attn.attention.bias', 'transformer.h.11.attn.attention.masked_bias', 'transformer.h.4.attn.attention.masked_bias', 'transformer.h.8.attn.attention.masked_bias', 'transformer.h.0.attn.attention.masked_bias', 'transformer.h.5.attn.attention.masked_bias', 'transformer.h.4.attn.attention.bias', 'transformer.h.6.attn.attention.bias', 'transformer.h.8.attn.attention.bias']\n",
|
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model = AutoModelForCausalLM.from_pretrained(\"/home/shared/models/gpt-code-clippy-125M-apps-lr-5e-5/ckpt-8169/\",from_flax=True)\n",
|
|
"# model = AutoModelForCausalLM.from_pretrained(\"EleutherAI/gpt-neo-125M\")\n",
|
|
"tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/gpt-neo-125M\")\n",
|
|
"tokenizer.pad_token = tokenizer.eos_token"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"id": "830a374f-a70d-466d-a766-3671e5c11765",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id :\n",
|
|
"\n",
|
|
"33905\n",
|
|
"==========\n",
|
|
"question :\n",
|
|
"\n",
|
|
"\n",
|
|
"QUESTION:\n",
|
|
"There are N empty boxes arranged in a row from left to right.\n",
|
|
"The integer i is written on the i-th box from the left (1 \\leq i \\leq N).\n",
|
|
"For each of these boxes, Snuke can choose either to put a ball in it or to put nothing in it.\n",
|
|
"We say a set of choices to put a ball or not in the boxes is good when the following condition is satisfied:\n",
|
|
" - For every integer i between 1 and N (inclusive), the total number of balls contained in the boxes with multiples of i written on them is congruent to a_i modulo 2.\n",
|
|
"Does there exist a good set of choices? If the answer is yes, find one good set of choices.\n",
|
|
"\n",
|
|
"-----Constraints-----\n",
|
|
" - All values in input are integers.\n",
|
|
" - 1 \\leq N \\leq 2 \\times 10^5\n",
|
|
" - a_i is 0 or 1.\n",
|
|
"\n",
|
|
"-----Input-----\n",
|
|
"Input is given from Standard Input in the following format:\n",
|
|
"N\n",
|
|
"a_1 a_2 ... a_N\n",
|
|
"\n",
|
|
"-----Output-----\n",
|
|
"If a good set of choices does not exist, print -1.\n",
|
|
"If a good set of choices exists, print one such set of choices in the following format:\n",
|
|
"M\n",
|
|
"b_1 b_2 ... b_M\n",
|
|
"\n",
|
|
"where M denotes the number of boxes that will contain a ball, and b_1,\\ b_2,\\ ...,\\ b_M are the integers written on these boxes, in any order.\n",
|
|
"\n",
|
|
"-----Sample Input-----\n",
|
|
"3\n",
|
|
"1 0 0\n",
|
|
"\n",
|
|
"-----Sample Output-----\n",
|
|
"1\n",
|
|
"1\n",
|
|
"\n",
|
|
"Consider putting a ball only in the box with 1 written on it.\n",
|
|
" - There are three boxes with multiples of 1 written on them: the boxes with 1, 2, and 3. The total number of balls contained in these boxes is 1.\n",
|
|
" - There is only one box with a multiple of 2 written on it: the box with 2. The total number of balls contained in these boxes is 0.\n",
|
|
" - There is only one box with a multiple of 3 written on it: the box with 3. The total number of balls contained in these boxes is 0.\n",
|
|
"Thus, the condition is satisfied, so this set of choices is good.\n",
|
|
"\n",
|
|
"\n",
|
|
"Use Standard Input format\n",
|
|
"\n",
|
|
"ANSWER:\n",
|
|
"\n",
|
|
"==========\n",
|
|
"answer :\n",
|
|
"\n",
|
|
"N = int(input())\n",
|
|
"A = list(map(int,input().split()))\n",
|
|
"B = (N+1)*[0]\n",
|
|
"\n",
|
|
"for n in range(N,0,-1):\n",
|
|
"\tB[n] = (sum(B[n::n])+A[n-1])%2\n",
|
|
"\n",
|
|
"print(B.count(1))\n",
|
|
"for n in range(N+1):\n",
|
|
"\tif B[n]:\n",
|
|
"\t\tprint(n,end=\" \")\n",
|
|
"\n",
|
|
"==========\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import random\n",
|
|
"\n",
|
|
"id = random.randint(0, len(dataset)-1)\n",
|
|
"sample = dataset[id]\n",
|
|
"\n",
|
|
"for k, v in sample.items():\n",
|
|
" print(k, \":\\n\")\n",
|
|
" print(v)\n",
|
|
" print(\"=\"*10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "1b109cec-c721-410f-b3df-38a50ae8607b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# prompt = dataset[82239][\"question\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"id": "8fb6f26e-bc50-40e6-b373-3d9e50993b22",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Using custom data configuration formatted-d9019fd8ed858445\n",
|
|
"Reusing dataset apps (/home/shared/.cache/hf/datasets/apps/formatted-d9019fd8ed858445/0.1.0/5987476458cc986e36654364319c6fe798b880d64a35518cbc00dc04f3c41e4d)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"train_dataset = load_dataset(\"/home/arto/datasets/datasets/apps/apps.py\", \"formatted\", split=\"train\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"id": "0eb58b5c-4a3b-48cb-82e1-e68b792f806e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"QUESTION:\n",
|
|
"~~~if:csharp,javascript,cfml,php\n",
|
|
"Given a 2D array of size `m * n`. Your task is to find the sum of minimum value in each row.\n",
|
|
"~~~\n",
|
|
"~~~if:cpp\n",
|
|
"Given a 2D vector of size `m * n`. Your task is to find the sum of minimum value in each row.\n",
|
|
"~~~\n",
|
|
"~~~if:python,ruby\n",
|
|
"Given a 2D list of size `m * n`. Your task is to find the sum of minimum value in each row.\n",
|
|
"~~~\n",
|
|
"\n",
|
|
"For Example:\n",
|
|
"```python\n",
|
|
"[\n",
|
|
" [1, 2, 3, 4, 5], # minimum value of row is 1\n",
|
|
" [5, 6, 7, 8, 9], # minimum value of row is 5\n",
|
|
" [20, 21, 34, 56, 100] # minimum value of row is 20\n",
|
|
"]\n",
|
|
"```\n",
|
|
"So, the function should return `26` because sum of minimums is as `1 + 5 + 20 = 26`\n",
|
|
"\n",
|
|
"~~~if:javascript,php\n",
|
|
"Note: You will be always given non-empty array containing Positive values.\n",
|
|
"~~~\n",
|
|
"~~~if:python\n",
|
|
"Note: You will be always given non-empty list containing Positive values.\n",
|
|
"~~~\n",
|
|
"~~~if:cpp\n",
|
|
"Note: You will be always given non-empty vector containing Positive values.\n",
|
|
"~~~\n",
|
|
"~~~if:c#\n",
|
|
"Note: You will be always given non-empty vector containing Positive values.\n",
|
|
"~~~\n",
|
|
"~~~if:cfml\n",
|
|
"Note: You will be always given non-empty array containing Positive values.\n",
|
|
"~~~\n",
|
|
"\n",
|
|
"ENJOY CODING :)\n",
|
|
"def sum_of_minimums(numbers):\n",
|
|
"\t\n",
|
|
"\n",
|
|
"Use Call-Based format\n",
|
|
"\n",
|
|
"ANSWER:\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(train_dataset[82239][\"question\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"id": "4decd052-f96b-4469-abfa-25beb9ab805c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id :\n",
|
|
"\n",
|
|
"78360\n",
|
|
"==========\n",
|
|
"question :\n",
|
|
"\n",
|
|
"\n",
|
|
"QUESTION:\n",
|
|
"# Personalized greeting\n",
|
|
"\n",
|
|
"Create a function that gives a personalized greeting. This function takes two parameters: `name` and `owner`.\n",
|
|
"\n",
|
|
"Use conditionals to return the proper message:\n",
|
|
"\n",
|
|
"case | return\n",
|
|
"--- | ---\n",
|
|
"name equals owner | 'Hello boss'\n",
|
|
"otherwise | 'Hello guest'\n",
|
|
"def greet(name, owner):\n",
|
|
"\t\n",
|
|
"\n",
|
|
"Use Call-Based format\n",
|
|
"\n",
|
|
"ANSWER:\n",
|
|
"\n",
|
|
"==========\n",
|
|
"answer :\n",
|
|
"\n",
|
|
"def greet(name, owner):\n",
|
|
"\tgreet = 'guest'\n",
|
|
"\tif name == owner: \n",
|
|
"\t\tgreet = 'boss'\n",
|
|
"\treturn 'Hello ' + greet\n",
|
|
"\n",
|
|
"==========\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import random\n",
|
|
"\n",
|
|
"id = random.randint(0, len(train_dataset)-1)\n",
|
|
"sample = train_dataset[id]\n",
|
|
"\n",
|
|
"for k, v in sample.items():\n",
|
|
" print(k, \":\\n\")\n",
|
|
" print(v)\n",
|
|
" print(\"=\"*10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "d3be9853-0ce7-40a5-976e-6ff082a7ed2f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"115212"
|
|
]
|
|
},
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(dataset)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "eea91645-fbbc-4495-a829-357cd2833a6b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"id = 115212-10"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "2b72f491-8a44-4206-ba06-bc1bc6ba011c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prompt = dataset[id][\"question\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"id": "54768ef7-3752-45da-b5c8-721893c9b254",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"QUESTION:\n",
|
|
"Given an array of integers, return indices of the two numbers such that they add up to a specific target.\n",
|
|
"\n",
|
|
"You may assume that each input would have exactly one solution, and you may not use the same element twice.\n",
|
|
"\n",
|
|
"Example:\n",
|
|
"\n",
|
|
"\n",
|
|
"Given nums = [2, 7, 11, 15], target = 9,\n",
|
|
"\n",
|
|
"Because nums[0] + nums[1] = 2 + 7 = 9,\n",
|
|
"return [0, 1].\n",
|
|
"class Solution:\n",
|
|
" def twoSum(self, nums: List[int], target: int) -> List[int]:\n",
|
|
" \n",
|
|
"\n",
|
|
"Use Call-Based format\n",
|
|
"\n",
|
|
"ANSWER:\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(prompt)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"id": "820f8729-a368-4b41-91ff-56601d21aaaf",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"class Solution:\n",
|
|
"\tdef twoSum(self, nums, target):\n",
|
|
"\t \"\"\"\n",
|
|
"\t :type nums: List[int]\n",
|
|
"\t :type target: int\n",
|
|
"\t :rtype: List[int]\n",
|
|
"\t \"\"\"\n",
|
|
"\t a = {}\n",
|
|
"\t b = {}\n",
|
|
"\t for i in range(1, target+1):\n",
|
|
"\t\t a[i] += nums[i]\n",
|
|
"\t \n",
|
|
"\t a.clear()\n",
|
|
"\t for i in range(1, target+1):\n",
|
|
"\t\t a.get(i, 0)\n",
|
|
"\t return sorted(a)\n",
|
|
"\t\t\t\n",
|
|
"\t\t\n",
|
|
"\tdef helper(n):\n",
|
|
"\t s = 1\n",
|
|
"\t q ='sum'\n",
|
|
"\t while n!= s:\n",
|
|
"\t\t n += 1\n",
|
|
"\t\t q +='sum'\n",
|
|
"\t\t s += q\n",
|
|
"\t\t n += 1\n",
|
|
"\t return q\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"input_ids = tokenizer(prompt, return_tensors=\"pt\").input_ids\n",
|
|
"start = len(input_ids[0])\n",
|
|
"output = model.generate(\n",
|
|
" input_ids,\n",
|
|
" max_length=start+400,\n",
|
|
" do_sample=True,\n",
|
|
" top_p=0.95,\n",
|
|
" pad_token_id=tokenizer.pad_token_id,\n",
|
|
" early_stopping=True,\n",
|
|
" temperature=1.,\n",
|
|
" no_repeat_ngram_size=None,\n",
|
|
" repetition_penalty=None,\n",
|
|
" num_return_sequences=None,\n",
|
|
")\n",
|
|
"\n",
|
|
"print(tokenizer.decode(output[0][start:]).strip())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"id": "e8d5ad26-1cd6-49bb-8f4c-550091bfcc9d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"H, N = list(map(int, input().split()))\n",
|
|
"\n",
|
|
"ab = []\n",
|
|
"for i in range(N):\n",
|
|
"\ta, b = list(map(int, input().split()))\n",
|
|
"\tab.append([a, b])\n",
|
|
"\n",
|
|
"ab = np.array(ab)\n",
|
|
"a_list = ab[:, 0]\n",
|
|
"b_list = ab[:, 1]\n",
|
|
"max_a = ab.max()\n",
|
|
"\n",
|
|
"inf = float('inf')\n",
|
|
"dp = np.array([inf for _ in range(H + max_a)])\n",
|
|
"dp[0] = 0\n",
|
|
"\n",
|
|
"for i in range(1, len(dp)):\n",
|
|
"\tdp[i] = np.amin(dp[np.maximum(i - a_list, 0)] + b_list)\n",
|
|
"\n",
|
|
"print((int(min(dp[H:]))))\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(dataset[id][\"answer\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ee80ebdb-163c-43b8-b1cb-0585d013bba1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|