{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 49182 entries, 0 to 49181\n", "Data columns (total 27 columns):\n", "buffer_size 49182 non-null int64\n", "failure 49182 non-null object\n", "func_type 49182 non-null object\n", "function 49182 non-null object\n", "importable_prefix 1669 non-null object\n", "index_file_count 49182 non-null int64\n", "index_value_count 49182 non-null int64\n", "inspect 49182 non-null object\n", "message_id 49182 non-null object\n", "offset 49182 non-null int64\n", "outside_parens 49182 non-null bool\n", "platform 49182 non-null object\n", "prefix_dist 2441 non-null object\n", "region 49182 non-null object\n", "resolved_dist 7152 non-null object\n", "resolved_symbol 49182 non-null object\n", "unresolved.in_bad_node 6 non-null object\n", "unresolved.not_applicable 24361 non-null object\n", "unresolved.other 1370 non-null object\n", "unresolved.prefix_resolves.global 4008 non-null object\n", "unresolved.prefix_resolves.prefix 4008 non-null object\n", "unresolved.prefix_resolves.to 4008 non-null object\n", "unresolved.resolves 63 non-null object\n", "unresolved.unresolved_name.importable 19374 non-null object\n", "unresolved.unresolved_name.name 19374 non-null object\n", "unresolved_category 49182 non-null object\n", "user_id 49182 non-null int64\n", "dtypes: bool(1), int64(5), object(21)\n", "memory usage: 9.8+ MB\n" ] } ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import urllib\n", "import numpy as np\n", "import json\n", "\n", "REPORT_FILENAME = \"/Users/damian/report/report.json\"\n", "\n", "# don't truncate cell contents when displaying\n", "pd.set_option('display.max_colwidth', -1)\n", "\n", "def inspect_url(message_id):\n", " uri, mid = message_id.split(\"::\")\n", " return \"http://test-6.kite.com:3031/event?uri=%s&message_id=%s\" % \\\n", " (urllib.parse.quote(uri), urllib.parse.quote(mid))\n", "\n", "def preprocess(obj):\n", " # If a sub-object is not seen in every object, panda's json_normalize() won't flatten it\n", " if not obj.get('category', None):\n", " if obj['failure'] == 'unresolved_value':\n", " obj['category'] = {'other': True}\n", " else:\n", " obj['category'] = {'not_applicable': True}\n", " \n", " # If the unresolved function was an attribute, and some prefix of this was resolved\n", " # to a global symbol, prefix_dist holds the name of the distribution\n", " prefix_dist = None\n", " resolves_to = obj['category'].get('prefix_resolves', {}).get('to', None)\n", " if resolves_to and '@' in resolves_to:\n", " prefix_dist = resolves_to.split('@')[0]\n", " obj['prefix_dist'] = prefix_dist\n", " \n", " # If the function was resolved but the signature still failed (e.g. no signatures\n", " # were returned), resolved_dist holds the name of the distribution\n", " resolved_dist = None\n", " resolved_symbol = obj['resolved_symbol']\n", " if resolved_symbol and '@' in resolved_symbol:\n", " resolved_dist = resolved_symbol.split('@')[0]\n", " obj['resolved_dist'] = resolved_dist\n", " \n", " # If the top-level prefix of the function wasn't resolved, but could have been if a\n", " # module with that name was imported\n", " importable_prefix = None\n", " up = obj['category'].get('unresolved_name', {})\n", " if up.get('importable', False):\n", " importable_prefix = up['name']\n", " obj['importable_prefix'] = importable_prefix\n", " \n", " # Bring out the category name as a column\n", " obj['unresolved'] = obj['category']\n", " obj['unresolved_category'] = list(obj['unresolved'].keys())[0]\n", " del obj['category']\n", " \n", " # Inspector URL for each event\n", " obj['inspect'] = inspect_url(obj['message_id'])\n", " \n", " return obj\n", " \n", "def get_signature_data(filename):\n", " events = [preprocess(json.loads(line))\n", " for line in open(filename, 'r').readlines()]\n", " return pd.io.json.json_normalize(events)\n", " \n", "def render(df):\n", " def make_clickable(val):\n", " return '{}'.format(val, \"inspect\")\n", " return df.style.format({'inspect': make_clickable})\n", " \n", " \n", "df = get_signature_data(REPORT_FILENAME)\n", "N = len(df.index)\n", "\n", "def with_percent(series):\n", " \"\"\"Given a series of counts, return a dataframe with a column containing percentages of global count.\"\"\"\n", " return pd.DataFrame({'count': series, 'percent': series / len(df.index) * 100})\n", " \n", "unresolved = df[df['failure'] == 'unresolved_value']\n", "df.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Types of failures" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
failure
unresolved_value50.467651
no_signatures20.283844
call_expr_outside_parens12.488309
call_expr_not_found12.160953
invalid_callee_kind4.471148
no_context0.065064
nil_reference0.063031
\n", "
" ], "text/plain": [ " failure\n", "unresolved_value 50.467651\n", "no_signatures 20.283844\n", "call_expr_outside_parens 12.488309\n", "call_expr_not_found 12.160953\n", "invalid_callee_kind 4.471148 \n", "no_context 0.065064 \n", "nil_reference 0.063031 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(df['failure'].value_counts(normalize=True) * 100).to_frame()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## no_signatures\n", "\n", "In this case, the symbol was resolved, but not signatures were returned." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionresolved_symbolinspect
26429lenbuiltin-stdlib@2.7:__builtin__.intinspect
26391pygame.display.set_modeinspect
8823MockSubscriptioninspect
22063json.loadsbuiltin-stdlib@2.7:__builtin__.listinspect
10212subset.addinspect
1739typeinspect
41136e_path.gridinspect
20992button.gridinspect
31177SumDjango@2.0.1:django.db.models.aggregates.Suminspect
26893mac_to_ip.getinspect
30555printinspect
30257np.arraynumpy@1.14.0:numpy.ndarrayinspect
24616np.cosinspect
12072'{}.{}@email.com'.formatbuiltin-stdlib@2.7:__builtin__.strinspect
11012canvas.create_arcinspect
2862rangebuiltin-stdlib@2.7:__builtin__.listinspect
13055Pruebainspect
24186self.reseller_companyinspect
25582np.repeatnumpy@1.14.0:numpy.ndarrayinspect
14980pygame.draw.lineinspect
17331printinspect
10756lenbuiltin-stdlib@2.7:__builtin__.intinspect
3065np.squareinspect
17630dictbuiltin-stdlib@2.7:__builtin__.dictinspect
41158e_path.gridinspect
10076maininspect
22605lenbuiltin-stdlib@2.7:__builtin__.intinspect
44705np.multiplyinspect
23982shutil.copyfilebuiltin-stdlib@2.7:types.NoneTypeinspect
20008pd.read_sql_queryinspect
" ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "render(df[df['failure'] == 'no_signatures'][['function', 'resolved_symbol', 'inspect']].sample(n=30))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countpercent
builtin-stdlib44479.041926
numpy6731.368387
Django1430.290757
matplotlib880.178927
pandas880.178927
requests380.077264
peewee190.038632
Pygame190.038632
Flask130.026432
mrjob90.018299
tensorflow80.016266
tornado70.014233
SQLAlchemy70.014233
scikit-learn50.010166
beautifulsoup450.010166
WTForms50.010166
Pillow30.006100
pexpect30.006100
django-model-utils30.006100
mock20.004067
\n", "
" ], "text/plain": [ " count percent\n", "builtin-stdlib 4447 9.041926\n", "numpy 673 1.368387\n", "Django 143 0.290757\n", "matplotlib 88 0.178927\n", "pandas 88 0.178927\n", "requests 38 0.077264\n", "peewee 19 0.038632\n", "Pygame 19 0.038632\n", "Flask 13 0.026432\n", "mrjob 9 0.018299\n", "tensorflow 8 0.016266\n", "tornado 7 0.014233\n", "SQLAlchemy 7 0.014233\n", "scikit-learn 5 0.010166\n", "beautifulsoup4 5 0.010166\n", "WTForms 5 0.010166\n", "Pillow 3 0.006100\n", "pexpect 3 0.006100\n", "django-model-utils 3 0.006100\n", "mock 2 0.004067" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Most common distributions that result in no-signatures cases\n", "\n", "with_percent(df[df['failure'] == 'no_signatures']['resolved_dist'].value_counts().nlargest(20))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionresolved_symbolinspect
29010rangebuiltin-stdlib@2.7:__builtin__.listinspect
21433intbuiltin-stdlib@2.7:__builtin__.intinspect
2954zipbuiltin-stdlib@2.7:__builtin__.listinspect
26126plt.plotbuiltin-stdlib@2.7:__builtin__.listinspect
26482strbuiltin-stdlib@2.7:__builtin__.strinspect
24847os.systembuiltin-stdlib@2.7:__builtin__.intinspect
32757ValueErrorbuiltin-stdlib@2.7:exceptions.ValueErrorinspect
1584zipbuiltin-stdlib@2.7:__builtin__.listinspect
2029pontoPlotar.appendbuiltin-stdlib@2.7:types.NoneTypeinspect
42712NANDbuiltin-stdlib@2.7:__builtin__.intinspect
32354listbuiltin-stdlib@2.7:__builtin__.listinspect
21942math.powbuiltin-stdlib@2.7:__builtin__.floatinspect
32330subprocess.callbuiltin-stdlib@2.7:__builtin__.intinspect
34377zipbuiltin-stdlib@2.7:__builtin__.listinspect
27617floatbuiltin-stdlib@2.7:__builtin__.floatinspect
1592zipbuiltin-stdlib@2.7:__builtin__.listinspect
14280lenbuiltin-stdlib@2.7:__builtin__.intinspect
25998rangebuiltin-stdlib@2.7:__builtin__.listinspect
45789Entrybuiltin-stdlib@3.5:tkinter.Entryinspect
23178openbuiltin-stdlib@2.7:__builtin__.fileinspect
680arrayX.appendbuiltin-stdlib@2.7:types.NoneTypeinspect
27319intbuiltin-stdlib@2.7:__builtin__.intinspect
28936datas.appendbuiltin-stdlib@2.7:types.NoneTypeinspect
2973lenbuiltin-stdlib@2.7:__builtin__.intinspect
4838rangebuiltin-stdlib@2.7:__builtin__.listinspect
12116minbuiltin-stdlib@2.7:__builtin__.intinspect
19751iterbuiltin-stdlib@2.7:__builtin__.listinspect
22881matrix.appendbuiltin-stdlib@2.7:types.NoneTypeinspect
278setbuiltin-stdlib@2.7:__builtin__.setinspect
3181rangebuiltin-stdlib@2.7:__builtin__.listinspect
" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Some builtin no-signature cases\n", "\n", "render(df[(df['failure'] == 'no_signatures') & (df['resolved_dist'] == 'builtin-stdlib')][\n", " ['function', 'resolved_symbol', 'inspect']].sample(n=30))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## unresolved_value\n", "\n", "In this case, the relevant function expression was found but not present in the resolved AST." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countpercent
not_applicable2436149.532349
unresolved_name1937439.392461
prefix_resolves40088.149323
other13702.785572
resolves630.128096
in_bad_node60.012200
\n", "
" ], "text/plain": [ " count percent\n", "not_applicable 24361 49.532349\n", "unresolved_name 19374 39.392461\n", "prefix_resolves 4008 8.149323 \n", "other 1370 2.785572 \n", "resolves 63 0.128096 \n", "in_bad_node 6 0.012200 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Unresolved value categorization\n", "with_percent(df['unresolved_category'].value_counts())" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionunresolved_categoryinspect
39314pritunresolved_nameinspect
19484UserFormunresolved_nameinspect
36375gsw.distanceunresolved_nameinspect
18090file.saveunresolved_nameinspect
32962db.Columnprefix_resolvesinspect
27147st.encodeunresolved_nameinspect
38369MailSenderunresolved_nameinspect
4420Article.objects.filterunresolved_nameinspect
46605root.focusunresolved_nameinspect
19582auth.get_account_infounresolved_nameinspect
38020metrics.observe_histogramunresolved_nameinspect
37179pg.image.load(path.join(img_dir, \"ship.png\")).convertotherinspect
31513self.makeClientUnitprefix_resolvesinspect
18841self.client.getprefix_resolvesinspect
8734_re.subunresolved_nameinspect
" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Some unresolved value events\n", "render(unresolved[['function', 'unresolved_category', 'inspect']].sample(n=15))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## prefix_resolves\n", "\n", "In this case, the function does not resolve but some prefix does." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionunresolved.prefix_resolves.prefixunresolved.prefix_resolves.toinspect
7070models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
21544self.assertTrueselfinspect
33628db.relationshipdbFlask-SQLAlchemy@2.3.2:flask_sqlalchemy.SQLAlchemyinspect
33465db.ColumndbFlask-SQLAlchemy@2.3.2:flask_sqlalchemy.SQLAlchemyinspect
40549app.config.from_objectappFlask@0.12.2:flask.app.Flaskinspect
14775self._load_csv_infileselfinspect
5712BookletClient.objects.getBookletClientBookletClientinspect
13857type.__new__typebuiltin-stdlib@2.7:__builtin__.typeinspect
46297np.random.choicenp.randomnumpy@1.14.0:numpy.randominspect
47640player.initplayerplayerinspect
22282Question.objects.order_byQuestionQuestioninspect
842pygame.set_modepygamePygame@1.9.3:pygameinspect
40088t&(list(x)[0])).get_distancet&(list(x)[0]builtin-stdlib@2.7:__builtin__.intinspect
29667line.set_xdatalinebuiltin-stdlib@2.7:__builtin__.listinspect
22313self.assrtIsselfinspect
" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "render(unresolved[unresolved['unresolved_category'] == 'prefix_resolves'][\n", " ['function','unresolved.prefix_resolves.prefix', 'unresolved.prefix_resolves.to', 'inspect']].sample(n=15))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Most common resolved distributions that lead to unresolved values" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "resolved prefix distribution: 4.963198%\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countpercent
Flask-SQLAlchemy8431.714042
builtin-stdlib7421.508682
numpy3490.709609
Django1620.329389
pandas1300.264324
Flask570.115896
tensorflow480.097597
Pygame380.077264
matplotlib240.048798
Scrapy130.026432
requests90.018299
scipy80.016266
PyYAML50.010166
statsmodels40.008133
lxml30.006100
elasticsearch20.004067
python-dateutil20.004067
oauth210.002033
scikit-learn10.002033
\n", "
" ], "text/plain": [ " count percent\n", "Flask-SQLAlchemy 843 1.714042\n", "builtin-stdlib 742 1.508682\n", "numpy 349 0.709609\n", "Django 162 0.329389\n", "pandas 130 0.264324\n", "Flask 57 0.115896\n", "tensorflow 48 0.097597\n", "Pygame 38 0.077264\n", "matplotlib 24 0.048798\n", "Scrapy 13 0.026432\n", "requests 9 0.018299\n", "scipy 8 0.016266\n", "PyYAML 5 0.010166\n", "statsmodels 4 0.008133\n", "lxml 3 0.006100\n", "elasticsearch 2 0.004067\n", "python-dateutil 2 0.004067\n", "oauth2 1 0.002033\n", "scikit-learn 1 0.002033" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "print(\"resolved prefix distribution: %f%%\" % (\n", " 100 * len(unresolved[unresolved['prefix_dist'].notnull()])/len(df.index)))\n", "with_percent(unresolved['prefix_dist'].value_counts())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Numpy" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "np.random: 0.449351%\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionunresolved.prefix_resolves.prefixunresolved.prefix_resolves.toinspect
41844random.randragerandombuiltin-stdlib@2.7:randominspect
20134data.to_stringdatabuiltin-stdlib@2.7:__builtin__.dictinspect
42430hashlib.md5().updatehashlib.md5()builtin-stdlib@2.7:__builtin__.strinspect
10883food.appendfoodbuiltin-stdlib@2.7:__builtin__.strinspect
39471dic[a].appenddic[a]builtin-stdlib@2.7:__builtin__.intinspect
13657type.__new__typebuiltin-stdlib@2.7:__builtin__.typeinspect
8656re.complierebuiltin-stdlib@2.7:reinspect
15536df_doubt[[1,2,3]].astypedf_doubt[[1,2,3]]builtin-stdlib@2.7:__builtin__.strinspect
37344outdir.mkdiroutdirbuiltin-stdlib@2.7:__builtin__.strinspect
43274logger.INFOloggerbuiltin-stdlib@2.7:logging.getLoggerinspect
273str1.lower().substr1.lower()builtin-stdlib@2.7:__builtin__.strinspect
35166item_to_purchase.get_costitem_to_purchasebuiltin-stdlib@2.7:types.NoneTypeinspect
19766all_categories.dropall_categoriesbuiltin-stdlib@2.7:__builtin__.dictinspect
25271re.search('(CRISPR\\s+\\d+)', line).groupre.search('(CRISPR\\s+\\d+)', line)builtin-stdlib@2.7:__builtin__.strinspect
8317re.find_allrebuiltin-stdlib@2.7:reinspect
13673type.__new__typebuiltin-stdlib@2.7:__builtin__.typeinspect
43252datetime.dateimte.strptimedatetimebuiltin-stdlib@2.7:datetimeinspect
39211e_path.girde_pathbuiltin-stdlib@3.5:tkinter.Entryinspect
11881settlements.applysettlementsbuiltin-stdlib@2.7:__builtin__.dictinspect
25238re.search('(\\d+)\\s+\\-\\s+(\\d+)', line).groupre.search('(\\d+)\\s+\\-\\s+(\\d+)', line)builtin-stdlib@2.7:__builtin__.strinspect
35743os.joinosbuiltin-stdlib@2.7:osinspect
11481upload_data.getvalueupload_databuiltin-stdlib@2.7:__builtin__.dictinspect
43154datetime.dateimte.strptimedatetimebuiltin-stdlib@2.7:datetimeinspect
40514os.environ.etos.environbuiltin-stdlib@2.7:os.environinspect
46846f.openfbuiltin-stdlib@2.7:__builtin__.fileinspect
39357e_path.girde_pathbuiltin-stdlib@3.5:tkinter.Entryinspect
24244df1.to_csvdf1builtin-stdlib@2.7:__builtin__.dictinspect
20120all_categories.joinall_categoriesbuiltin-stdlib@2.7:__builtin__.dictinspect
35757os.joinosbuiltin-stdlib@2.7:osinspect
13516type.__new__typebuiltin-stdlib@2.7:__builtin__.typeinspect
" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"np.random: %f%%\" % (100 * len(df[df['unresolved.prefix_resolves.prefix'] == 'np.random'])/len(df.index)))\n", "\n", "def module_prefix_examples(module_name):\n", " return render(unresolved[unresolved['prefix_dist'] == module_name][\n", " ['function', 'unresolved.prefix_resolves.prefix', 'unresolved.prefix_resolves.to', 'inspect']].sample(n=30))\n", "\n", "module_prefix_examples('builtin-stdlib')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Django" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### prefix resolves, but not to a global distribution" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionunresolved.prefix_resolves.prefixunresolved.prefix_resolves.toinspect
46429self.plyer.killselfinspect
38443self.model.objects.createselfinspect
47890self.game.spritesheet.get_imageselfinspect
47761self.game.spritesheet.get_imageselfinspect
47743self.game.spritesheet.get_imageselfinspect
27131self.screen.blitselfinspect
22514self.client.getselfinspect
31304Client.objects.filterClientClientinspect
17922Log.objects.filterLogLoginspect
31641self.comments.filterselfinspect
38768self.LOG.infoselfinspect
40456self.popselfinspect
38998Trait.objects.get_or_createTraitTraitinspect
47965self.screen.blitselfinspect
32067self.assertIsEqualselfinspect
" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "render(unresolved[(unresolved['unresolved_category'] == 'prefix_resolves') & (unresolved['prefix_dist'].isna())][\n", " ['function','unresolved.prefix_resolves.prefix', 'unresolved.prefix_resolves.to', 'inspect']].sample(n=15))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionunresolved.prefix_resolves.prefixunresolved.prefix_resolves.toinspect
45248models.IntergerFieldmodelsDjango@2.0.1:django.db.modelsinspect
10938admin.site.registeadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
33650models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
7612models.CHarFieldmodelsDjango@2.0.1:django.db.modelsinspect
30989admin.site.register_viewadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
33703models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
45065models.IntergerFieldmodelsDjango@2.0.1:django.db.modelsinspect
37332models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
7499self.date.__str__self.dateDjango@2.0.1:django.db.models.fields.DateFieldinspect
7216models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
45276models.IntergerFieldmodelsDjango@2.0.1:django.db.modelsinspect
10575admin.site.registeadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
22314data.decodedataDjango@2.0.1:django.core.handlers.wsgi.WSGIRequest.bodyinspect
33716models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
13630request.GET.getrequestDjango@2.0.1:django.http.request.HttpRequestinspect
10650admin.site.registeadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
42449cache.getcacheDjango@2.0.1:django.core.cache.cacheinspect
45231models.IntergerFieldmodelsDjango@2.0.1:django.db.modelsinspect
6876models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
19803request.POST.getrequestDjango@2.0.1:django.http.request.HttpRequestinspect
45205models.IntergerFieldmodelsDjango@2.0.1:django.db.modelsinspect
10726admin.site.registeadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
10809admin.site.registeadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
33479models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
17572request.sessionrequestDjango@2.0.1:django.http.request.HttpRequestinspect
34276models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
31202admin.site.register_viewadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
11086admin.site.registeadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
31151admin.site.register_viewadmin.siteDjango@2.0.1:django.contrib.admin.sites.siteinspect
34213models.CharfieldmodelsDjango@2.0.1:django.db.modelsinspect
" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "module_prefix_examples('Django')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## unresolved_name\n", "\n", "In this case, there is no prefix of the unresolved function that resolves." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionunresolved.unresolved_name.nameinspect
36733pathpathinspect
38479np.arraynpinspect
44086client.delete_bot_versionclientinspect
6499printmeprintmeinspect
21138canvas.get_tk_widget.gridcanvasinspect
28056DotAccessable.buildDotAccessableinspect
38337MailSenderMailSenderinspect
26505screen.fillscreeninspect
7121app.add_routeappinspect
16870dummy_minimizedummy_minimizeinspect
27698reponse.replacereponseinspect
40844classclassinspect
47233canvas.bindcanvasinspect
4699qMaxqMaxinspect
1162run_with_lockrun_with_lockinspect
42691s.entersinspect
37213gsw.distancegswinspect
40903pyautogui.typewritepyautoguiinspect
46221log.msgloginspect
7933ClientClientinspect
" ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "render(unresolved[unresolved['unresolved_category'] == 'unresolved_name'][\n", " ['function', 'unresolved.unresolved_name.name', 'inspect']].sample(n=20))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Most common unresolved-name cases" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countpercent
canvas6381.297223
path3990.811272
np2660.540848
client2400.487983
torch2260.459518
Facility1980.402586
BSEHandler1890.384287
reverse1780.361921
m1680.341588
writer1620.329389
DotAccessable1610.327356
metrics1580.321256
cv21560.317189
gameDisplay1560.317189
self1490.302956
app1420.288724
dummy_minimize1420.288724
session1360.276524
request1350.274491
Canvas1340.272457
\n", "
" ], "text/plain": [ " count percent\n", "canvas 638 1.297223\n", "path 399 0.811272\n", "np 266 0.540848\n", "client 240 0.487983\n", "torch 226 0.459518\n", "Facility 198 0.402586\n", "BSEHandler 189 0.384287\n", "reverse 178 0.361921\n", "m 168 0.341588\n", "writer 162 0.329389\n", "DotAccessable 161 0.327356\n", "metrics 158 0.321256\n", "cv2 156 0.317189\n", "gameDisplay 156 0.317189\n", "self 149 0.302956\n", "app 142 0.288724\n", "dummy_minimize 142 0.288724\n", "session 136 0.276524\n", "request 135 0.274491\n", "Canvas 134 0.272457" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with_percent(unresolved['unresolved.unresolved_name.name'].value_counts().nlargest(20))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Some are most likely missed imports" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countpercent
False1770535.998943
True16693.393518
\n", "
" ], "text/plain": [ " count percent\n", "False 17705 35.998943\n", "True 1669 3.393518 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with_percent(unresolved['unresolved.unresolved_name.importable'].value_counts())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Digging into some specific examples" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idfunctionunresolved.unresolved_name.nameinspect
4680245958canvas.bindcanvasinspect
4717445958canvas.bindcanvasinspect
4723545958canvas.movecanvasinspect
4700445958canvas.coordscanvasinspect
4710845958canvas.bindcanvasinspect
4716845958canvas.bindcanvasinspect
4741945958canvas.create_rectanglecanvasinspect
4735845958canvas.coordscanvasinspect
4708645958canvas.bindcanvasinspect
4733245958canvas.create_rectanglecanvasinspect
4715545958canvas.bindcanvasinspect
4713845958canvas.bindcanvasinspect
4684745958canvas.bindcanvasinspect
4731745958canvas.create_rectanglecanvasinspect
4690845958canvas.bindcanvasinspect
4689945958canvas.create_rectanglecanvasinspect
4705445958canvas.movecanvasinspect
4689445958canvas.create_rectanglecanvasinspect
4692545958canvas.movecanvasinspect
4681145958canvas.bindcanvasinspect
" ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def unresolved_examples_for_name(name):\n", " return render(unresolved[(unresolved['unresolved_category'] == 'unresolved_name') \n", " & (unresolved['unresolved.unresolved_name.name'] == name)\n", " & (unresolved['importable_prefix'].isna())][\n", " ['user_id', 'function', 'unresolved.unresolved_name.name', 'inspect']].sample(n=20))\n", "\n", "unresolved_examples_for_name('canvas')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Some missed-import cases" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functionunresolved.unresolved_name.nameinspect
39807path.joinpathinspect
36758pathpathinspect
40909pathpathinspect
25108os.makedirsosinspect
6613pathpathinspect
34983csv.readercsvinspect
36826pathpathinspect
36691pathpathinspect
7163app.add_routeappinspect
37200pathpathinspect
9600pathpathinspect
9647pathpathinspect
11445nltk.downloadnltkinspect
37048pathpathinspect
46005arrowarrowinspect
36719pathpathinspect
40893pathpathinspect
23777hand.add_cardhandinspect
37222pathpathinspect
9563pathpathinspect
" ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "render(unresolved[unresolved['importable_prefix'].notnull()][\n", " ['function', 'unresolved.unresolved_name.name', 'inspect']].sample(n=20))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Most common missed-import modules" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countpercent
path3990.811272
Canvas1340.272457
os1290.262291
app1240.252125
parser1010.205360
csv830.168761
arrow640.130129
models500.101663
random470.095563
time470.095563
math370.075231
json330.067098
auto210.042699
datetime210.042699
re200.040665
pint200.040665
user190.038632
pprint190.038632
api180.036599
workspace180.036599
\n", "
" ], "text/plain": [ " count percent\n", "path 399 0.811272\n", "Canvas 134 0.272457\n", "os 129 0.262291\n", "app 124 0.252125\n", "parser 101 0.205360\n", "csv 83 0.168761\n", "arrow 64 0.130129\n", "models 50 0.101663\n", "random 47 0.095563\n", "time 47 0.095563\n", "math 37 0.075231\n", "json 33 0.067098\n", "auto 21 0.042699\n", "datetime 21 0.042699\n", "re 20 0.040665\n", "pint 20 0.040665\n", "user 19 0.038632\n", "pprint 19 0.038632\n", "api 18 0.036599\n", "workspace 18 0.036599" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with_percent(unresolved['importable_prefix'].value_counts().nlargest(20))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Unresolved categorization falls through\n", "\n", "For many of these, the function expr is not a name or attribute expression." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
functioninspect
25290self.__discrepancy_set.valuation_discrepancies.filter_by(wmc_trade_id=wmc_trade_id).firstinspect
42732schedule.every().sunday.at(\"01:00\").doinspect
9259file_string.getvalue().stripinspect
33884conn.cursor().executeinspect
8529re.findall(a, response.text)[0].splitinspect
41393pyautogui.screenshot().yourImage.cropinspect
18853session.query(BuildingInfo).order_byinspect
20103data.to_csv(float_format='$%.0f', index=False, sep='/').replace(os.linesep, '\\n').replaceinspect
22437self.players[0].shuffle_deckinspect
38282Student.select().order_byinspect
12695np.square(matrix).suminspect
34361Facility.query.filter_by(acct_id=current_user.acct_id).allinspect
19265session.query(BuildingInfo).order_byinspect
17781item.get('data', {}).getinspect
32983self._sql_context.read.format(self.output_format).\\\n", " option('hbase.table', table_name).\\\n", " option('hbase.columns.mapping', cm).\\\n", " option('hbase.use.hbase.context', False).\\\n", " option('hbase.config.resources', HBASE_SITE).\\\n", " optioninspect
38294Student.select().order_byinspect
26498df_cl[\"free\"].applyinspect
25882re.search('(CRISPR\\s+\\d+)', line).group(1,2).replaceinspect
38416train[\"Age\"].fillnainspect
37407details1['プロジェクトコード'].applyinspect
24913session.query(User).filter_byinspect
18874session.query(BuildingInfo).filter_byinspect
41845boto3_client('lex-models', profile_name,\r\n", " bot_name)inspect
159data_p[\"CLose Amount\"].dropnainspect
41927Entry.select().order_byinspect
26883Invoice.objects.filter(object_option_id__in=channel_ids).filterinspect
7042dtrades['Symbol'].value_counts().sorted_valuesinspect
24184Transaction.objects.filter(donation__association=association.id).valuesinspect
38446train[\"Age\"].fillnainspect
38310Student.select().order_byinspect
" ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "render(unresolved[unresolved['unresolved_category'] == 'other'][\n", " ['function', 'inspect']].sample(n=30))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# call_expr_not_found cases\n", "\n", "For many of these:\n", "* the cursor is in square brackets and not parens\n", "* the file does not parse\n", "* the cursor is inside a tuple" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
inspect
10003inspect
29907inspect
27675inspect
7668inspect
34596inspect
30285inspect
4882inspect
28493inspect
20974inspect
8099inspect
837inspect
34568inspect
26783inspect
40993inspect
14661inspect
18575inspect
8190inspect
491inspect
29273inspect
39337inspect
" ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "render(df[df['failure'] == 'call_expr_not_found'][['inspect']].sample(n=20))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABI4AAAF1CAYAAACOBqHkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3X+0ZldZJ/jvY8KP8EOTgFSHJG0FzTgGM0AsQxxcPSXRJCS2oXtgdZiMFBinbDvY0KanKXBWB/nRK3QPoqCigUSCHQkxoEmTKNZE7nIxLSEgkBAipgwlKRKJmBApGcHCPX+cXfKmOLfurdR7733fez+ftc665+yzzzn7PPete3c995y9q7UWAAAAADjQt6x1AwAAAACYTRJHAAAAAIySOAIAAABglMQRAAAAAKMkjgAAAAAYJXEEAAAAwCiJI+CbVNXuqvrhR3jsUVX136rqoar67V72+qr6YlX95XRb+k3XfnVVvWMlrwEAsNKq6o6q2voIj31nVb1+yk062PX0v2CdO3KtGwCsOy9IsinJk1pr+6rqxCSXJPmO1tr9K3nh1tp/WsnzAwCshtba09e6Dcul/wXrnyeOgGn7jiR/1lrbN7H9148kaVQDP6cAAADWiP+QAYv5/qr6dFU9WFW/UVWPTZKqeklVfWiyYlW1qvquqvr5JP8xyb+qqr1V9VNJdiZ5at9+Z69/RlX996r6UlV9cvJR7KpaqKo3VNX/m+QrSZ52YMOq6pVV9fmq+nJVfaaqzuzlr6mq/9rXf7lfc/+yr6pe0/c9tareW1V/VVWfrap/O/XoAQA8QpPDBvT+zbVV9a7e97mjqrZM1H1WVf1J3/eeJI894Fw/WlWf6P2u/15V/1Mv/86qeqCqTuvbT+1DC2xdpE36X7BBSRwBi7kwydlJvjPJ/5Dk/1rqgNbapUn+U5L3tNae0Fr79STPS3Jv335JVR2f5MYkr09ybJJ/n+S9VfXtE6f68STbkzwxyV9MXqOqvjvJy5J8f2vtib2Nu0fa8rJ+zSck+cEkDya5vj/B9N+SfDLJ8UnOTPKKqjp7eWEBAFh1P5bkmiRHJ7khyS8nSVU9OsnvJvnNDP2q307yv+4/qCeFrkzyU0melOTXk9xQVY9prf15klcmubqqHpfkN5K8s7W2cODF9b9gY5M4Ahbzy621e1prDyR5Q5IXTem8/3uSm1prN7XW/qG1tjPJR5OcO1Hnna21O1pr+1prf3/A8V9P8pgkp1TVo1pru3vHZ1RPSP1ukp9prX08yfcn+fbW2mtba19rrd2d5O1JLpjS/QEATNuHet/p6xmSRM/o5WckeVSSX2yt/X1r7bokt04c938k+fXW2i2tta+31q5K8tV+XFprb09yV5JbkhyX5OcWub7+F2xgEkfAYu6ZWP+LJE+d0nm/I8kL++PSX6qqL2X4i9Rxi1z7YVpru5K8IslrktxfVddU1WjbqupRSa5L8luttWsmrv/UA67/6gwDegMAzKLJmWm/kuSxVXVkhv7Z51trbWL/5NPa35HkkgP6PSfm4f26tyf53iRvba19dezi+l+wsUkcAYs5cWL9nya5t6//bZLH7d9RVf/kEM97T5LfbK0dPbE8vrV22USdttjBSdJa+63W2g9m6IS0JG9cpOpbk3w5D3/N7p4knz3g+k9srZ07fgoAgJl1X5Ljq6omyv7pxPo9Sd5wQL/nca21dydJVT0hyS8muSLJa6rq2MUupP8FG5fEEbCYi6vqhN6BeHWS9/TyTyZ5elU9sw+Y/ZpDPO9/TfLPq+rsqjqiqh5bVVur6oTlHFxV311Vz62qxyT5uyT/X4bHpw+s91NJ/pck/1tr7R8mdn0kyd/0AR6P6m343qr6/kO8DwCAtfbHSfYl+bdVdWRV/cskp0/sf3uSf11Vz67B46vqvKp6Yt//S0k+1lr7yQxjUP7a2EX0v2BjkzgCFvNbSf4gyd19eX2StNb+LMlrk/w/Gd6J/9BiJxjTWrsnyfkZklF/leEvUP9nlv/z6DFJLkvyxQyPbT+ln+tAL8owI9u9EzN7vLqPDfDPkzwzyWf7ed6R5NsO5T4AANZaa+1rSf5lkpdkGIj6XyV538T+j2YY5+iX+/5dvW6q6vwk5yT51736zyY5raouHLmU/hdsYPXw12EBAAAAYOCJIwAAAABGSRwBAAAAMEriCAAAAIBREkcAAAAAjJI4AgAAAGDUkWvdgIN58pOf3DZv3rwi5/7bv/3bPP7xj1+Rc88TcRiIw0AcBuIwEIeBOAxWMg4f+9jHvtha+/YVOTmPiD7YyhIDMUjEIBGDRAwSMUjWJgaH0v+a6cTR5s2b89GPfnRFzr2wsJCtW7euyLnniTgMxGEgDgNxGIjDQBwGKxmHqvqLFTkxj5g+2MoSAzFIxCARg0QMEjFI1iYGh9L/8qoaAAAAAKMkjgAAAAAYJXEEAAAAwCiJIwAAAABGSRwBAAAAMEriCAAAAIBREkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEAAAAw6si1bsBauf3zD+UlO25cst7uy85bhdYAAGwMy+mD6X8BwOzwxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEAAAAwSuIIAAAAgFESRwAAAACMkjgCAAAAYJTEEQDAGqmqE6vqg1V1Z1XdUVUv7+WvqarPV9Un+nLuxDGvqqpdVfWZqjp7ovycXrarqnZMlJ9UVbdU1V1V9Z6qevTq3iUAMM8kjgAA1s6+JJe01r4nyRlJLq6qU/q+N7fWntmXm5Kk77sgydOTnJPkV6vqiKo6IsmvJHleklOSvGjiPG/s5zo5yYNJLlqtmwMA5p/EEQDAGmmt3dda+5O+/uUkdyY5/iCHnJ/kmtbaV1trn02yK8npfdnVWru7tfa1JNckOb+qKslzk1zXj78qyfNX5m4AgPVI4ggAYAZU1eYkz0pySy96WVXdVlVXVtUxvez4JPdMHLanly1W/qQkX2qt7TugHABgWY5c6wYAAGx0VfWEJO9N8orW2t9U1duSvC5J61/flOQnktTI4S3jfwxsB6k/1obtSbYnyaZNm7KwsHCId7E8m45KLjl130HrrNS1Z8XevXvX/T0uRQzEIBGDRAwSMUhmPwYSR0vYvOPGJevsvuy8VWgJALAeVdWjMiSNrm6tvS9JWmtfmNj/9iTv75t7kpw4cfgJSe7t62PlX0xydFUd2Z86mqz/MK21y5NcniRbtmxpW7duPbwbW8Rbr74+b7r94F3Q3ReuzLVnxcLCQlYqvvNCDMQgEYNEDBIxSGY/Bst6Va2qjq6q66rqT/usHz9QVcdW1c4+Q8fO/Y9Q1+AtfUaP26rqtInzbOv176qqbSt1UwAA86CPQXRFkjtba78wUX7cRLV/keRTff2GJBdU1WOq6qQkJyf5SJJbk5zcZ1B7dIYBtG9orbUkH0zygn78tiTXr+Q9AQDry3LHOPqlJL/fWvsfkzwjw8CNO5Lc3GfouLlvJ8NsHif3ZXuStyVJVR2b5NIkz84wgOOlE+/rAwBsRM9J8uNJnltVn+jLuUn+c1XdXlW3JfmhJP8uSVprdyS5Nsmnk/x+kotba1/vTxO9LMkHMvTTru11k+SVSX62qnZlGPPoilW8PwBgzi35qlpVfWuSf5bkJUnSZ+r4WlWdn2Rrr3ZVkoUMHZPzk7yr/4Xrw/1ppeN63Z2ttQf6eXdmmEb23dO7HQCA+dFa+1DGxyG66SDHvCHJG0bKbxo7rrV2d4Y/2gEAHLLljHH0tCR/leQ3quoZST6W5OVJNrXW7kuGqWSr6im9/qHO9vEwszQw43LN8iBWS5n1QbhWizgMxGEgDgNxGIjDQBwAADam5SSOjkxyWpKfaa3dUlW/lG+8ljZmsdk7ljWrxywNzLhc8zyA46wPwrVaxGEgDgNxGIjDQBwG4gAAsDEtZ4yjPUn2tNZu6dvXZUgkfWH/wI396/0T9cdm9TjYLCAAAAAAzJglE0ettb9Mck9VfXcvOjPDgIw3ZJiZI3n4DB03JHlxn13tjCQP9VfaPpDkrKo6pg+KfVYvAwAAAGAGLfddrZ9JcnWf3vXuJC/NkHS6tqouSvK5JC/sdW9Kcm6SXUm+0uumtfZAVb0uw3SxSfLa/QNlAwAAADB7lpU4aq19IsmWkV1njtRtSS5e5DxXJrnyUBoIAAAAwNpYzhhHAAAAAGxAEkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEAAAAwSuIIAAAAgFESRwAAAACMkjgCAAAAYJTEEQAAAACjJI4AAAAAGCVxBAAAAMAoiSMAAAAARkkcAQAAADBK4ggAAACAURJHAAAAAIySOAIAAABglMQRAAAAAKMkjgAAAAAYJXEEAAAAwCiJIwAAAABGSRwBAAAAMEriCAAAAIBREkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEAAAAwSuIIAAAAgFESRwAAAACMkjgCAAAAYJTEEQAAAACjJI4AAAAAGCVxBAAAAMAoiSMAAAAARi0rcVRVu6vq9qr6RFV9tJcdW1U7q+qu/vWYXl5V9Zaq2lVVt1XVaRPn2dbr31VV21bmlgAAAACYhkN54uiHWmvPbK1t6ds7ktzcWjs5yc19O0mel+TkvmxP8rZkSDQluTTJs5OcnuTS/ckmAAAAAGbP4byqdn6Sq/r6VUmeP1H+rjb4cJKjq+q4JGcn2dlae6C19mCSnUnOOYzrAwAAALCClps4akn+oKo+VlXbe9mm1tp9SdK/PqWXH5/knolj9/SyxcoBAAAAmEFHLrPec1pr91bVU5LsrKo/PUjdGilrByl/+MFDYmp7kmzatCkLCwvLbOKh2XRUcsmp+6ZyrpVq42rYu3fvXLd/WsRhIA4DcRiIw0AcBuIAALAxLStx1Fq7t3+9v6p+J8MYRV+oquNaa/f1V9Hu79X3JDlx4vATktzby7ceUL4wcq3Lk1yeJFu2bGlbt249sMpUvPXq6/Om25ebNzu43Rduncp51sLCwkJWKsbzRBwG4jAQh4E4DMRhIA4AABvTkq+qVdXjq+qJ+9eTnJXkU0luSLJ/ZrRtSa7v6zckeXGfXe2MJA/1V9k+kOSsqjqmD4p9Vi8DANiQqurEqvpgVd1ZVXdU1ct7+dRmr62q7+uz4+7qx449BQ4AMGo5YxxtSvKhqvpkko8kubG19vtJLkvyI1V1V5If6dtJclOSu5PsSvL2JP8mSVprDyR5XZJb+/LaXgYAsFHtS3JJa+17kpyR5OKqOiXTnb32bb3u/uNMTgIALNuS72q11u5O8oyR8r9OcuZIeUty8SLnujLJlYfeTACA9ac/lb1/spEvV9WdGSYPOT/feMX/qgyv978yE7PXJvlwVe2fvXZr+uy1SVJVO5OcU1ULSb61tfbHvfxdGWbC/b3VuD8AYP5NZ5AfAAAOS1VtTvKsJLfkgNlr+wQlyaHPXnt8Xz+wfOz6MzNByXofiN1g82KQiEEiBokYJGKQzH4MJI6mYPOOG5ess/uy81ahJQDAPKqqJyR5b5JXtNb+5iDDEB3q7LXLmtU2ma0JSuZ54pHlMNi8GCRikIhBIgaJGCSzH4PljHEEAMAKqapHZUgaXd1ae18v/kJ/BS2HMHvtYuUnjJQDACyLxBEAwBrpM5xdkeTO1tovTOyayuy1fd+Xq+qMfq0XT5wLAGBJXlUDAFg7z0ny40lur6pP9LJXZ5it9tqquijJ55K8sO+7Kcm5GWav/UqSlybD7LVVtX/22uThs9f+dJJ3Jjkqw6DYBsYGAJZN4ggAYI201j6U8XGIkinNXtta+2iS7z2MZgIAG5hX1QAAAAAYJXEEAAAAwCiJIwAAAABGSRwBAAAAMEriCAAAAIBREkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEAAAAwSuIIAAAAgFESRwAAAACMkjgCAAAAYJTEEQAAAACjJI4AAAAAGCVxBAAAAMAoiSMAAAAARkkcAQAAADBK4ggAAACAURJHAAAAAIySOAIAAABglMQRAAAAAKMkjgAAAAAYJXEEAAAAwCiJIwAAAABGSRwBAAAAMEriCAAAAIBREkcAAAAAjJI4AgAAAGCUxBEAAAAAo45c6wZsFJt33Lhknd2XnbcKLQEAAABYHk8cAQAAADBq2Ymjqjqiqj5eVe/v2ydV1S1VdVdVvaeqHt3LH9O3d/X9myfO8ape/pmqOnvaNwMAAADA9BzKE0cvT3LnxPYbk7y5tXZykgeTXNTLL0ryYGvtu5K8uddLVZ2S5IIkT09yTpJfraojDq/5AAAAAKyUZSWOquqEJOcleUffriTPTXJdr3JVkuf39fP7dvr+M3v985Nc01r7amvts0l2JTl9GjcBAAAAwPQt94mjX0zyH5L8Q99+UpIvtdb29e09SY7v68cnuSdJ+v6Hev1/LB85BgAAAIAZs+SsalX1o0nub619rKq27i8eqdqW2HewYyavtz3J9iTZtGlTFhYWlmriI7LpqOSSU/ctXXEVrdS9HszevXvX5LqzRhwG4jAQh4E4DMRhIA4AABvTkomjJM9J8mNVdW6Sxyb51gxPIB1dVUf2p4pOSHJvr78nyYlJ9lTVkUm+LckDE+X7TR7zj1prlye5PEm2bNnStm7d+ghua2lvvfr6vOn25dz+6tl94dZVv+bCwkJWKsbzRBwG4jAQh4E4DMRhIA4AABvTkq+qtdZe1Vo7obW2OcPg1n/YWrswyQeTvKBX25bk+r5+Q99O3/+HrbXWyy/os66dlOTkJB+Z2p0AAAAAMFWH88jNK5NcU1WvT/LxJFf08iuS/GZV7crwpNEFSdJau6Oqrk3y6ST7klzcWvv6YVwfAAAAgBV0SImj1tpCkoW+fndGZkVrrf1dkhcucvwbkrzhUBsJAAAAwOpb7qxqAAAAAGwwEkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEArKGqurKq7q+qT02UvaaqPl9Vn+jLuRP7XlVVu6rqM1V19kT5Ob1sV1XtmCg/qapuqaq7quo9VfXo1bs7AGDeSRwBAKytdyY5Z6T8za21Z/blpiSpqlOSXJDk6f2YX62qI6rqiCS/kuR5SU5J8qJeN0ne2M91cpIHk1y0oncDAKwrEkcAAGuotfZHSR5YZvXzk1zTWvtqa+2zSXYlOb0vu1prd7fWvpbkmiTnV1UleW6S6/rxVyV5/lRvAABY1ySOAABm08uq6rb+Ktsxvez4JPdM1NnTyxYrf1KSL7XW9h1QDgCwLEeudQMAAPgmb0vyuiStf31Tkp9IUiN1W8b/GNgOUv+bVNX2JNuTZNOmTVlYWDjkRi/HpqOSS07dd9A6K3XtWbF37951f49LEQMxSMQgEYNEDJLZj4HEEQDAjGmtfWH/elW9Pcn7++aeJCdOVD0hyb19faz8i0mOrqoj+1NHk/UPvOblSS5Pki1btrStW7ce/o2MeOvV1+dNtx+8C7r7wpW59qxYWFjISsV3XoiBGCRikIhBIgbJ7MfAq2oAADOmqo6b2PwXSfbPuHZDkguq6jFVdVKSk5N8JMmtSU7uM6g9OsMA2je01lqSDyZ5QT9+W5LrV+MeAID1wRNHAABrqKrenWRrkidX1Z4klybZWlXPzPBa2e4kP5UkrbU7quraJJ9Osi/Jxa21r/fzvCzJB5IckeTK1tod/RKvTHJNVb0+yceTXLFKtwYArAMSRwAAa6i19qKR4kWTO621NyR5w0j5TUluGim/O8OsawAAh8yragAAAACMkjgCAAAAYJTEEQAAAACjJI4AAAAAGCVxBAAAAMAoiSMAAAAARkkcAQAAADBK4ggAAACAURJHAAAAAIySOAIAAABglMQRAAAAAKMkjgAAAAAYJXEEAAAAwCiJIwAAAABGSRwBAAAAMEriCAAAAIBREkcAAAAAjDpyrRvAN2zeceOSdXZfdt4qtAQAAADAE0cAAAAALELiCAAAAIBREkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAAAAABi1ZOKoqh5bVR+pqk9W1R1V9fO9/KSquqWq7qqq91TVo3v5Y/r2rr5/88S5XtXLP1NVZ6/UTQEAAABw+JbzxNFXkzy3tfaMJM9Mck5VnZHkjUne3Fo7OcmDSS7q9S9K8mBr7buSvLnXS1WdkuSCJE9Pck6SX62qI6Z5MwAAAABMz5KJozbY2zcf1ZeW5LlJruvlVyV5fl8/v2+n7z+zqqqXX9Na+2pr7bNJdiU5fSp3AQAAAMDUHbmcSv3JoI8l+a4kv5Lkz5N8qbW2r1fZk+T4vn58knuSpLW2r6oeSvKkXv7hidNOHjN5re1JtifJpk2bsrCwcGh3tEybjkouOXXf0hVnzLTjsXfv3hWL8TwRh4E4DMRhIA4DcRiIAwDAxrSsxFFr7etJnllVRyf5nSTfM1atf61F9i1WfuC1Lk9yeZJs2bKlbd26dTlNPGRvvfr6vOn2Zd3+TNl94dapnm9hYSErFeN5Ig4DcRiIw0AcBuIwEAcAgI3pkGZVa619KclCkjOSHF1V+zMvJyS5t6/vSXJikvT935bkgcnykWMAAAAAmDHLmVXt2/uTRqmqo5L8cJI7k3wwyQt6tW1Jru/rN/Tt9P1/2FprvfyCPuvaSUlOTvKRad0IAAAAANO1nHe1jktyVR/n6FuSXNtae39VfTrJNVX1+iQfT3JFr39Fkt+sql0ZnjS6IElaa3dU1bVJPp1kX5KL+ytwAAAAAMygJRNHrbXbkjxrpPzujMyK1lr7uyQvXORcb0jyhkNvJgAAAACr7ZDGOAIAAABg45A4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEAAAAwSuIIAAAAgFESRwAAAACMkjgCAAAAYJTEEQAAAACjJI4AAAAAGCVxBAAAAMAoiSMAAAAARkkcAQAAADBK4ggAAACAURJHAAAAAIw6cq0bwKHZvOPGZdXbfdl5K9wSAAAAYL3zxBEAwBqqqiur6v6q+tRE2bFVtbOq7upfj+nlVVVvqapdVXVbVZ02ccy2Xv+uqto2Uf59VXV7P+YtVVWre4cAwDyTOAIAWFvvTHLOAWU7ktzcWjs5yc19O0mel+TkvmxP8rZkSDQluTTJs5OcnuTS/cmmXmf7xHEHXgsAYFESRwAAa6i19kdJHjig+PwkV/X1q5I8f6L8XW3w4SRHV9VxSc5OsrO19kBr7cEkO5Oc0/d9a2vtj1trLcm7Js4FALAkiSMAgNmzqbV2X5L0r0/p5ccnuWei3p5edrDyPSPlAADLYnBsAID5MTY+UXsE5d984qrtGV5py6ZNm7KwsPAIm3hwm45KLjl130HrrNS1Z8XevXvX/T0uRQzEIBGDRAwSMUhmPwYSRwAAs+cLVXVca+2+/rrZ/b18T5ITJ+qdkOTeXr71gPKFXn7CSP1v0lq7PMnlSbJly5a2devWsWqH7a1XX5833X7wLujuC1fm2rNiYWEhKxXfeSEGYpCIQSIGiRgksx8Dr6oBAMyeG5LsnxltW5LrJ8pf3GdXOyPJQ/1Vtg8kOauqjumDYp+V5AN935er6ow+m9qLJ84FALAkTxwBAKyhqnp3hqeFnlxVezLMjnZZkmur6qIkn0vywl79piTnJtmV5CtJXpokrbUHqup1SW7t9V7bWts/4PZPZ5i57agkv9cXAIBlkTgCAFhDrbUXLbLrzJG6LcnFi5znyiRXjpR/NMn3Hk4bAYCNS+Jondq848Yl6+y+7LxVaAkAAAAwr4xxBAAAAMAoiSMAAAAARkkcAQAAADBK4ggAAACAURJHAAAAAIySOAIAAABglMQRAAAAAKMkjgAAAAAYJXEEAAAAwCiJIwAAAABGHbnWDWDtbN5xYy45dV9esuPGRevsvuy8VWwRAAAAMEs8cQQAAADAqCUTR1V1YlV9sKrurKo7qurlvfzYqtpZVXf1r8f08qqqt1TVrqq6rapOmzjXtl7/rqratnK3BQAAAMDhWs4TR/uSXNJa+54kZyS5uKpOSbIjyc2ttZOT3Ny3k+R5SU7uy/Ykb0uGRFOSS5M8O8npSS7dn2wCAAAAYPYsmThqrd3XWvuTvv7lJHcmOT7J+Umu6tWuSvL8vn5+kne1wYeTHF1VxyU5O8nO1toDrbUHk+xMcs5U7wYAAACAqTmkwbGranOSZyW5Jcmm1tp9yZBcqqqn9GrHJ7ln4rA9vWyx8gOvsT3Dk0rZtGlTFhYWDqWJy7bpqOSSU/etyLnnyVJxWKn4z5q9e/dumHs9GHEYiMNAHAbiMBAHAICNadmJo6p6QpL3JnlFa+1vqmrRqiNl7SDlDy9o7fIklyfJli1b2tatW5fbxEPy1quvz5tuN6ncJafuO2gcdl+4dfUas4YWFhayUp+1eSIOA3EYiMNAHAbiAACwMS1rVrWqelSGpNHVrbX39eIv9FfQ0r/e38v3JDlx4vATktx7kHIAAAAAZtByZlWrJFckubO19gsTu25Isn9mtG1Jrp8of3GfXe2MJA/1V9o+kOSsqjqmD4p9Vi8DAAAAYAYt512t5yT58SS3V9Unetmrk1yW5NqquijJ55K8sO+7Kcm5SXYl+UqSlyZJa+2Bqnpdklt7vde21h6Yyl0AAAAAMHVLJo5aax/K+PhESXLmSP2W5OJFznVlkisPpYEAAAAArI1ljXEEAAAAwMYjcQQAAADAKIkjAAAAAEZJHAEAAAAwSuIIAAAAgFESRwAAAACMkjgCAAAAYJTEEQAAAACjJI4AAAAAGCVxBAAAAMAoiSMAAAAARkkcAQAAADBK4ggAAACAURJHAAAAAIySOAIAAABglMQRAAAAAKOOXOsGMNs277hxyTq7LztvFVoCAAAArDZPHAEAAAAwSuIIAAAAgFFeVWPVeO0NAFiO5fQZEv0GAFgNnjgCAAAAYJQnjjhsy/2rIAAAADBfPHEEAAAAwCiJIwAAAABGSRwBAAAAMEriCABG/v4CAAAQI0lEQVQAAIBREkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAABmVFXtrqrbq+oTVfXRXnZsVe2sqrv612N6eVXVW6pqV1XdVlWnTZxnW69/V1VtW6v7AQDmj8QRAMBs+6HW2jNba1v69o4kN7fWTk5yc99OkuclObkv25O8LRkSTUkuTfLsJKcnuXR/sgkAYCkSRwAA8+X8JFf19auSPH+i/F1t8OEkR1fVcUnOTrKztfZAa+3BJDuTnLPajQYA5tORa90AAAAW1ZL8QVW1JL/eWrs8yabW2n1J0lq7r6qe0usen+SeiWP39LLFyh+mqrZneFIpmzZtysLCwpRvZbDpqOSSU/dN5Vwr1caVtnfv3rlt+7SIgRgkYpCIQSIGyezHQOIIAGB2Pae1dm9PDu2sqj89SN0aKWsHKX94wZCUujxJtmzZ0rZu3foImru0t159fd50+3S6oLsv3DqV86y2hYWFrFR854UYiEEiBokYJGKQzH4MvKoGADCjWmv39q/3J/mdDGMUfaG/gpb+9f5efU+SEycOPyHJvQcpBwBYksQRAMAMqqrHV9UT968nOSvJp5LckGT/zGjbklzf129I8uI+u9oZSR7qr7R9IMlZVXVMHxT7rF4GALAkr6oBAMymTUl+p6qSoc/2W62136+qW5NcW1UXJflckhf2+jclOTfJriRfSfLSJGmtPVBVr0tya6/32tbaA6t3GwDAPJM4AgCYQa21u5M8Y6T8r5OcOVLekly8yLmuTHLltNsIAKx/XlUDAAAAYJQnjpgpm3fcuGSd3ZedtwotAQAAAJZ84qiqrqyq+6vqUxNlx1bVzqq6q389ppdXVb2lqnZV1W1VddrEMdt6/buqatvYtQAAAACYHct5Ve2dSc45oGxHkptbaycnublvJ8nzkpzcl+1J3pYMiaYklyZ5doZpZC/dn2wCAAAAYDYtmThqrf1RkgNn3jg/yVV9/aokz58of1cbfDjJ0VV1XJKzk+xsrT3QWnswyc58czIKAAAAgBnySMc42tRauy9JWmv3VdVTevnxSe6ZqLenly1W/k2qanuGp5WyadOmLCwsPMImHtymo5JLTt23IueeJ/MYh5X4TOzdu3fFPmvzRBwG4jAQh4E4DMQBAGBjmvbg2DVS1g5S/s2FrV2e5PIk2bJlS9u6devUGjfprVdfnzfdbmzwS07dN3dx2H3h1qmfc2FhISv1WZsn4jAQh4E4DMRhIA4AABvTcsY4GvOF/gpa+tf7e/meJCdO1Dshyb0HKQcAAABgRj3SxNENSfbPjLYtyfUT5S/us6udkeSh/krbB5KcVVXH9EGxz+plAAAAAMyoJd9Rqqp3J9ma5MlVtSfD7GiXJbm2qi5K8rkkL+zVb0pybpJdSb6S5KVJ0lp7oKpel+TWXu+1rbUDB9wGAAAAYIYsmThqrb1okV1njtRtSS5e5DxXJrnykFoHM2DzjhuXrLP7svNWoSUAAACwuh7pq2oAAAAArHMSRwAAAACMmq952CHTfXVs/7kuOXVfXrKM86605dxb4tU4AAAAVofEEevSchMwAAAAwOK8qgYAAADAKIkjAAAAAEZJHAEAAAAwyhhHMAXTHLAbAAAAZoXEEQAAc8kfbgBg5XlVDQAAAIBREkcAAAAAjJI4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEYdudYNAACAtbR5x41L1tl92Xmr0BIAmD2eOAIAAABglCeOYJUs56+ZAAAAMEskjmAOeaQeAACA1eBVNQAAAABGSRwBAAAAMMqrarBOTet1tuWOzeTVOAAAgPVH4gg2sAOTQpecui8vMYg3AOuIySkA4PB4VQ0AAACAURJHAAAAAIzyqhrAjJnW+FSrbV7bDQAALM4TRwAAAACM8sQRAACsU2ZHBeBwSRwBbGAH+w/F/ln2pvmfCa+zAQDAfJE4AuCgTGUNsP5J7AOwGIkjYEMb6yjvf9JmPx1lAJZjuYn2d57z+FW9HgAcDokjYNWs5l8zdaYBmCa/VwDYqCSOgJkyi4/KT+s/C55cAoDlMag3wOyQOAJYp/x1fDYTkQAAME8kjoCpWM0kxbwmRKbZ7nmNwXJI9gDMJj+fATYmiSMAWCVevQCWa17/QCC5BLD+SBwBwAa22rNAAQAwXySOAJg78/qX+OXyF3sAAGbFqieOquqcJL+U5Igk72itXbbabQCAQ7HeE1Wsf/pfs+X2zz+Ul2zgnyubd9yYS07dN5UYmPkUYOWtauKoqo5I8itJfiTJniS3VtUNrbVPr2Y7AGC/pf7Tccmp+zKLD+h6Konl0v+CpfmZCrC41e4Jn55kV2vt7iSpqmuSnJ9ExwUApsyTUnT6XzAFszjBwTR/zkuMAYtZ7cTR8Unumdjek+TZq9wGAICNRP8LWNIsPnU1i22aNfOaPJxs98FeXV3P39/lxGBW7r9aa6t3saoXJjm7tfaTffvHk5zeWvuZiTrbk2zvm9+d5DMr1JwnJ/niCp17nojDQBwG4jAQh4E4DMRhsJJx+I7W2rev0Lk3vOX0v3q5PtjqEQMxSMQgEYNEDBIxSNYmBsvuf632E0d7kpw4sX1CknsnK7TWLk9y+Uo3pKo+2lrbstLXmXXiMBCHgTgMxGEgDgNxGIjDXFuy/5Xog60mMRCDRAwSMUjEIBGDZPZj8C2rfL1bk5xcVSdV1aOTXJDkhlVuAwDARqL/BQA8Yqv6xFFrbV9VvSzJBzJMB3tla+2O1WwDAMBGov8FAByOVZ9fuLV2U5KbVvu6I1b8Uew5IQ4DcRiIw0AcBuIwEIeBOMyxGep/JT5LiRgkYpCIQSIGiRgkYpDMeAxWdXBsAAAAAObHao9xBAAAAMCc2JCJo6o6p6o+U1W7qmrHWrdnmqrqxKr6YFXdWVV3VNXLe/mxVbWzqu7qX4/p5VVVb+mxuK2qTps417Ze/66q2rZW93Q4quqIqvp4Vb2/b59UVbf0e3pPHyQ0VfWYvr2r7988cY5X9fLPVNXZa3Mnj1xVHV1V11XVn/bPxQ9sxM9DVf27/m/iU1X17qp67Eb4PFTVlVV1f1V9aqJsat//qvq+qrq9H/OWqqrVvcPlWSQO/6X/u7itqn6nqo6e2Df6fV7s98din6VZMxaHiX3/vqpaVT25b6/bzwNrZ7F/Q+tJTbEvNu9qCv2weVZT6oPNs5pS/2ueLNLn2FB970ViMLV+1zwYi8HEvsPuc6261tqGWjIMCvnnSZ6W5NFJPpnklLVu1xTv77gkp/X1Jyb5sySnJPnPSXb08h1J3tjXz03ye0kqyRlJbunlxya5u389pq8fs9b39wji8bNJfivJ+/v2tUku6Ou/luSn+/q/SfJrff2CJO/p66f0z8hjkpzUPztHrPV9HWIMrkryk3390UmO3mifhyTHJ/lskqMmPgcv2QifhyT/LMlpST41UTa173+SjyT5gX7M7yV53lrf8yHE4awkR/b1N07EYfT7nIP8/ljsszRry1gcevmJGQZO/oskT17vnwfL2iwH+ze0npZMqS+2HpYcZj9s3pdMoQ82z0um1P+at2Xsd+2hft8P9rt2HpZFYjC1ftc8LGMx6OVT6XOt9rIRnzg6Pcmu1trdrbWvJbkmyflr3Kapaa3d11r7k77+5SR3ZvihfX6GX17pX5/f189P8q42+HCSo6vquCRnJ9nZWnugtfZgkp1JzlnFWzlsVXVCkvOSvKNvV5LnJrmuVzkwDvvjc12SM3v985Nc01r7amvts0l2ZfgMzYWq+tYMP7SuSJLW2tdaa1/KBvw8ZJgM4KiqOjLJ45Lclw3weWit/VGSBw4onsr3v+/71tbaH7fht9u7Js41U8bi0Fr7g9bavr754SQn9PXFvs+jvz+W+NkyUxb5PCTJm5P8hySTAx+u288Da2Zd98H2m2JfbK5NqR82t6bYB5t30+h/zZWV7HutfOunYyX7XatyA1Owkn2uFW76qI2YODo+yT0T23t62brTH+98VpJbkmxqrd2XDB2aJE/p1RaLx3qI0y9m+Ef5D337SUm+NPEDa/Ke/vF++/6Hev15j8PTkvxVkt+o4VHxd1TV47PBPg+ttc8n+b+TfC5Dh+WhJB/Lxvs87Det7//xff3A8nn0Exn+0pMcehwO9rNl5lXVjyX5fGvtkwfs2sifB1bGevkZumyH2Rebd9Poh82zafXB5tYU+1/rwYbqey/D4fS75tYU+1yrbiMmjsay1utuarmqekKS9yZ5RWvtbw5WdaSsHaR8LlTVjya5v7X2scnikaptiX1zHYcMf+U5LcnbWmvPSvK3GR6PXcy6jEN/j/z8DI+/PjXJ45M8b6Tqev88LOVQ73tdxKOqfi7JviRX7y8aqbYu41BVj0vyc0n+49jukbJ1GQdWzYb6jEyhLza3ptgPm2fT6oPNrSn2v9azDfc7dQr9rrk05T7XqtuIiaM9Gd4r3O+EJPeuUVtWRFU9KkNH5erW2vt68Rf2P+7av97fyxeLx7zH6TlJfqyqdmd4rPG5Gf7ydXR/VDZ5+D394/32/d+W4dHCeY/DniR7Wmu39O3rMnRiNtrn4YeTfLa19lettb9P8r4k/3M23udhv2l9//fkG48ZT5bPjT7I4I8mubC/XpUcehy+mMU/S7PuOzN06D/Zf16ekORPquqfZAN+Hlhx6+Vn6JKm1BebZ9Pqh82zafXB5tm0+l/rwUbre4+aUr9rXk2zz7XqNmLi6NYkJ9cwmv+jMwy8dsMat2lq+nvAVyS5s7X2CxO7bkiyfxT2bUmunyh/cR/J/YwkD/XHJz+Q5KyqOqb/teCsXjYXWmuvaq2d0FrbnOF7/IettQuTfDDJC3q1A+OwPz4v6PVbL7+ghlkeTkpycobBX+dCa+0vk9xTVd/di85M8ulssM9Dhkekz6iqx/V/I/vjsKE+DxOm8v3v+75cVWf0uL544lwzr6rOSfLKJD/WWvvKxK7Fvs+jvz/6Z2Oxz9JMa63d3lp7Smttc/95uSfDoL5/mQ32eWBVrOs+2H5T7IvNrSn2w+bWFPtg82xa/a/1YKP1vb/JtPpdq93uaZlmn2utbmDDLRlGLf+zDKO0/9xat2fK9/aDGR5fuy3JJ/pybob3g29Oclf/emyvX0l+pcfi9iRbJs71ExkGJ9uV5KVrfW+HEZOt+cZsHk/L8INoV5LfTvKYXv7Yvr2r73/axPE/1+PzmczhDEFJnpnko/0z8bsZRuTfcJ+HJD+f5E+TfCrJb2aYuWHdfx6SvDvDuAJ/n+EX1EXT/P4n2dJj+udJfjlJrfU9H0IcdmV4b3z/z8pfW+r7nEV+fyz2WZq1ZSwOB+zfnW/M8LFuPw+WtVsW+ze0npZMsS+2HpYcZj9snpdMqQ82z0um1P+ap2WRPseG6nsvEoOp9bvmYRmLwQH7d+cw+lyrvVRvDAAAAAA8zEZ8VQ0AAACAZZA4AgAAAGCUxBEAAAAAoySOAAAAABglcQQAAADAKIkjAAAAAEZJHAEAAAAwSuIIAAAAgFH/P7ewFHMzwC+7AAAAAElFTkSuQmCC", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Histograms of buffer size and index size\n", "\n", "plt.figure(figsize=(20,6))\n", "plt.subplot(121)\n", "df['buffer_size'].hist(bins=50)\n", "plt.title('buffer size')\n", "plt.subplot(122)\n", "plt.title('index size')\n", "df['index_value_count'].hist(bins=50)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unique users: 548\n" ] }, { "data": { "image/png": "", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Number of failures per user\n", "\n", "plt.figure(figsize=(8,4))\n", "failures_by_user = df.groupby('user_id').size()\n", "print(\"Unique users: %d\" % len(failures_by_user))\n", "ax = failures_by_user.hist(bins=50)\n", "plt.title('failures per user')\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }