2014-12-07 04:16:07 +01:00
|
|
|
|
#!/usr/bin/env python
|
2014-11-23 20:09:42 +01:00
|
|
|
|
# coding: utf-8
|
|
|
|
|
|
|
|
|
|
from __future__ import unicode_literals, print_function
|
2015-02-16 21:25:19 +01:00
|
|
|
|
from collections import OrderedDict
|
|
|
|
|
import io
|
2014-11-23 20:09:42 +01:00
|
|
|
|
import os
|
2015-02-16 21:25:19 +01:00
|
|
|
|
import platform
|
2014-11-24 12:04:01 +01:00
|
|
|
|
from random import choice
|
2015-02-16 21:25:19 +01:00
|
|
|
|
import re
|
|
|
|
|
import sqlite3
|
|
|
|
|
from subprocess import check_output
|
2014-11-23 20:09:42 +01:00
|
|
|
|
from time import time
|
2014-12-14 10:46:52 +01:00
|
|
|
|
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
2014-12-07 03:43:16 +01:00
|
|
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
2014-12-07 04:16:07 +01:00
|
|
|
|
import django
|
2015-10-05 21:06:15 +02:00
|
|
|
|
django.setup()
|
|
|
|
|
|
2014-11-23 20:09:42 +01:00
|
|
|
|
from django.conf import settings
|
|
|
|
|
from django.contrib.auth.models import User, Group
|
2015-12-18 12:37:21 +01:00
|
|
|
|
from django.core.cache import caches
|
2014-11-23 20:09:42 +01:00
|
|
|
|
from django.db import connections, connection
|
2014-12-08 19:43:43 +01:00
|
|
|
|
from django.test.utils import CaptureQueriesContext, override_settings
|
2015-03-15 15:12:05 +01:00
|
|
|
|
from django.utils.encoding import force_text
|
2014-11-23 20:09:42 +01:00
|
|
|
|
import matplotlib.pyplot as plt
|
2015-12-28 00:38:37 +01:00
|
|
|
|
import _mysql
|
2014-11-23 20:09:42 +01:00
|
|
|
|
import pandas as pd
|
2015-02-16 21:25:19 +01:00
|
|
|
|
import psycopg2
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
2015-02-16 21:25:19 +01:00
|
|
|
|
import cachalot
|
2015-10-05 22:31:47 +02:00
|
|
|
|
from cachalot.api import invalidate
|
2014-11-23 20:09:42 +01:00
|
|
|
|
from cachalot.tests.models import Test
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RESULTS_PATH = 'benchmark/'
|
2016-01-11 21:06:22 +01:00
|
|
|
|
DATA_PATH = '/var/lib/'
|
2014-12-13 20:05:17 +01:00
|
|
|
|
CONTEXTS = ('Control', 'Cold cache', 'Hot cache')
|
2016-01-11 21:06:22 +01:00
|
|
|
|
DIVIDER = 'divider'
|
|
|
|
|
DISK_DATA_RE = re.compile(r'^MODEL="(.*)" MOUNTPOINT="(.*)"$')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_disk_model_for_path(path):
|
|
|
|
|
out = force_text(check_output(['lsblk', '-Po', 'MODEL,MOUNTPOINT']))
|
|
|
|
|
mount_points = []
|
|
|
|
|
previous_model = None
|
|
|
|
|
for model, mount_point in [DISK_DATA_RE.match(line).groups()
|
|
|
|
|
for line in out.split('\n') if line]:
|
|
|
|
|
if model:
|
|
|
|
|
previous_model = model.strip()
|
|
|
|
|
if mount_point:
|
|
|
|
|
mount_points.append((previous_model, mount_point))
|
|
|
|
|
mount_points = sorted(mount_points, key=lambda t: -len(t[1]))
|
|
|
|
|
for model, mount_point in mount_points:
|
|
|
|
|
if path.startswith(mount_point):
|
|
|
|
|
return model
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
|
|
|
|
|
2015-02-16 21:25:19 +01:00
|
|
|
|
def write_conditions():
|
|
|
|
|
versions = OrderedDict()
|
|
|
|
|
|
|
|
|
|
# CPU
|
|
|
|
|
with open('/proc/cpuinfo') as f:
|
|
|
|
|
versions['CPU'] = re.search(r'^model name\s+: (.+)$', f.read(),
|
|
|
|
|
flags=re.MULTILINE).group(1)
|
|
|
|
|
# RAM
|
|
|
|
|
with open('/proc/meminfo') as f:
|
|
|
|
|
versions['RAM'] = re.search(r'^MemTotal:\s+(.+)$', f.read(),
|
|
|
|
|
flags=re.MULTILINE).group(1)
|
2016-01-11 21:06:22 +01:00
|
|
|
|
versions.update((
|
|
|
|
|
('Disk', get_disk_model_for_path(DATA_PATH)),
|
|
|
|
|
))
|
2015-02-16 21:25:19 +01:00
|
|
|
|
# OS
|
|
|
|
|
linux_dist = ' '.join(platform.linux_distribution()).strip()
|
|
|
|
|
if linux_dist:
|
|
|
|
|
versions['Linux distribution'] = linux_dist
|
|
|
|
|
else:
|
|
|
|
|
versions['OS'] = platform.system() + ' ' + platform.release()
|
|
|
|
|
|
|
|
|
|
versions.update((
|
|
|
|
|
('Python', platform.python_version()),
|
2016-01-11 21:06:22 +01:00
|
|
|
|
('Django', django.__version__),
|
|
|
|
|
('cachalot', cachalot.__version__),
|
2015-02-16 21:25:19 +01:00
|
|
|
|
('sqlite', sqlite3.sqlite_version),
|
|
|
|
|
))
|
|
|
|
|
# PostgreSQL
|
2015-04-11 02:39:27 +02:00
|
|
|
|
with connections['postgresql'].cursor() as cursor:
|
|
|
|
|
cursor.execute('SELECT version();')
|
|
|
|
|
versions['PostgreSQL'] = re.match(r'^PostgreSQL ([\d\.]+) on .+$',
|
|
|
|
|
cursor.fetchone()[0]).group(1)
|
2015-02-16 21:25:19 +01:00
|
|
|
|
# MySQL
|
2015-04-11 02:39:27 +02:00
|
|
|
|
with connections['mysql'].cursor() as cursor:
|
|
|
|
|
cursor.execute('SELECT version();')
|
|
|
|
|
versions['MySQL'] = cursor.fetchone()[0].split('-')[0]
|
2015-02-16 21:25:19 +01:00
|
|
|
|
# Redis
|
2016-01-11 21:06:22 +01:00
|
|
|
|
out = force_text(
|
|
|
|
|
check_output(['redis-cli', 'INFO', 'server'])).replace('\r', '')
|
2015-02-16 21:25:19 +01:00
|
|
|
|
versions['Redis'] = re.search(r'^redis_version:([\d\.]+)$', out,
|
|
|
|
|
flags=re.MULTILINE).group(1)
|
|
|
|
|
# memcached
|
2015-03-15 15:12:05 +01:00
|
|
|
|
out = force_text(check_output(['memcached', '-h']))
|
2015-02-16 21:25:19 +01:00
|
|
|
|
versions['memcached'] = re.match(r'^memcached ([\d\.]+)$', out,
|
|
|
|
|
flags=re.MULTILINE).group(1)
|
|
|
|
|
|
|
|
|
|
versions.update((
|
|
|
|
|
('psycopg2', psycopg2.__version__.split()[0]),
|
2015-12-28 00:38:37 +01:00
|
|
|
|
('mysqlclient', _mysql.__version__),
|
2015-02-16 21:25:19 +01:00
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
with io.open(os.path.join('benchmark', 'conditions.rst'), 'w') as f:
|
2015-02-22 05:00:48 +01:00
|
|
|
|
f.write('In this benchmark, a small database is generated, '
|
|
|
|
|
'and each test is executed %s times '
|
|
|
|
|
'under the following conditions:\n\n' % Benchmark.n)
|
|
|
|
|
|
2015-02-16 21:25:19 +01:00
|
|
|
|
def write_table_sep(char='='):
|
2016-01-11 21:06:22 +01:00
|
|
|
|
f.write((char * 20) + ' ' + (char * 50) + '\n')
|
2015-02-16 21:25:19 +01:00
|
|
|
|
write_table_sep()
|
|
|
|
|
for k, v in versions.items():
|
|
|
|
|
f.write(k.ljust(20) + ' ' + v + '\n')
|
|
|
|
|
write_table_sep()
|
|
|
|
|
|
|
|
|
|
|
2014-11-23 20:09:42 +01:00
|
|
|
|
class AssertNumQueries(CaptureQueriesContext):
|
|
|
|
|
def __init__(self, n, using=None):
|
|
|
|
|
self.n = n
|
|
|
|
|
self.using = using
|
|
|
|
|
super(AssertNumQueries, self).__init__(self.get_connection())
|
|
|
|
|
|
|
|
|
|
def get_connection(self):
|
|
|
|
|
if self.using is None:
|
|
|
|
|
return connection
|
|
|
|
|
return connections[self.using]
|
|
|
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
|
|
|
super(AssertNumQueries, self).__exit__(exc_type, exc_val, exc_tb)
|
|
|
|
|
if len(self) != self.n:
|
|
|
|
|
print('The amount of queries should be %s, but %s were captured.'
|
|
|
|
|
% (self.n, len(self)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Benchmark(object):
|
2015-02-22 05:00:48 +01:00
|
|
|
|
n = 20
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
2014-11-23 20:09:42 +01:00
|
|
|
|
self.data = []
|
|
|
|
|
|
2014-11-23 23:11:51 +01:00
|
|
|
|
def bench_once(self, context, num_queries, invalidate_before=False):
|
2014-11-23 20:09:42 +01:00
|
|
|
|
for _ in range(self.n):
|
2014-11-23 23:11:51 +01:00
|
|
|
|
if invalidate_before:
|
2015-10-05 22:31:47 +02:00
|
|
|
|
invalidate(db_alias=self.db_alias)
|
2014-11-23 20:09:42 +01:00
|
|
|
|
with AssertNumQueries(num_queries, using=self.db_alias):
|
|
|
|
|
start = time()
|
|
|
|
|
self.query_function(self.db_alias)
|
|
|
|
|
end = time()
|
|
|
|
|
self.data.append(
|
|
|
|
|
{'query': self.query_name,
|
|
|
|
|
'time': end - start,
|
|
|
|
|
'context': context,
|
|
|
|
|
'db': self.db_vendor,
|
|
|
|
|
'cache': self.cache_name})
|
|
|
|
|
|
|
|
|
|
def benchmark(self, query_str, to_list=True, num_queries=1):
|
2017-06-04 18:38:13 +02:00
|
|
|
|
# Clears the cache before a single benchmark to ensure the same
|
|
|
|
|
# conditions across single benchmarks.
|
|
|
|
|
caches[settings.CACHALOT_CACHE].clear()
|
|
|
|
|
|
2014-11-23 20:09:42 +01:00
|
|
|
|
self.query_name = query_str
|
|
|
|
|
query_str = 'Test.objects.using(using)' + query_str
|
|
|
|
|
if to_list:
|
|
|
|
|
query_str = 'list(%s)' % query_str
|
|
|
|
|
self.query_function = eval('lambda using: ' + query_str)
|
|
|
|
|
|
2014-12-08 19:43:43 +01:00
|
|
|
|
with override_settings(CACHALOT_ENABLED=False):
|
2014-11-23 20:09:42 +01:00
|
|
|
|
self.bench_once(CONTEXTS[0], num_queries)
|
|
|
|
|
|
2014-11-23 23:11:51 +01:00
|
|
|
|
self.bench_once(CONTEXTS[1], num_queries, invalidate_before=True)
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
|
|
|
|
self.bench_once(CONTEXTS[2], 0)
|
|
|
|
|
|
|
|
|
|
def execute_benchmark(self):
|
|
|
|
|
self.benchmark('.count()', to_list=False)
|
|
|
|
|
self.benchmark('.first()', to_list=False)
|
2014-11-24 12:04:01 +01:00
|
|
|
|
self.benchmark('[:10]')
|
2014-12-13 20:05:17 +01:00
|
|
|
|
self.benchmark('[5000:5010]')
|
|
|
|
|
self.benchmark(".filter(name__icontains='e')[0:10]")
|
|
|
|
|
self.benchmark(".filter(name__icontains='e')[5000:5010]")
|
|
|
|
|
self.benchmark(".order_by('owner')[0:10]")
|
|
|
|
|
self.benchmark(".order_by('owner')[5000:5010]")
|
|
|
|
|
self.benchmark(".select_related('owner')[0:10]")
|
|
|
|
|
self.benchmark(".select_related('owner')[5000:5010]")
|
|
|
|
|
self.benchmark(".prefetch_related('owner__groups')[0:10]",
|
|
|
|
|
num_queries=3)
|
|
|
|
|
self.benchmark(".prefetch_related('owner__groups')[5000:5010]",
|
|
|
|
|
num_queries=3)
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
for db_alias in settings.DATABASES:
|
|
|
|
|
self.db_alias = db_alias
|
|
|
|
|
self.db_vendor = connections[self.db_alias].vendor
|
|
|
|
|
print('Benchmarking %s…' % self.db_vendor)
|
|
|
|
|
for cache_alias in settings.CACHES:
|
2015-12-18 12:37:21 +01:00
|
|
|
|
cache = caches[cache_alias]
|
2014-11-23 20:09:42 +01:00
|
|
|
|
self.cache_name = cache.__class__.__name__[:-5].lower()
|
2014-12-08 19:43:43 +01:00
|
|
|
|
with override_settings(CACHALOT_CACHE=cache_alias):
|
2014-11-23 20:09:42 +01:00
|
|
|
|
self.execute_benchmark()
|
|
|
|
|
|
|
|
|
|
self.df = pd.DataFrame.from_records(self.data)
|
|
|
|
|
if not os.path.exists(RESULTS_PATH):
|
|
|
|
|
os.mkdir(RESULTS_PATH)
|
|
|
|
|
self.df.to_csv(os.path.join(RESULTS_PATH, 'data.csv'))
|
|
|
|
|
|
2014-11-24 12:04:01 +01:00
|
|
|
|
self.xlim = (0, self.df['time'].max() * 1.01)
|
2014-11-24 00:13:29 +01:00
|
|
|
|
self.output('db')
|
|
|
|
|
self.output('cache')
|
|
|
|
|
|
|
|
|
|
def output(self, param):
|
|
|
|
|
gp = self.df.groupby(('context', 'query', param))['time']
|
2014-11-24 12:04:01 +01:00
|
|
|
|
self.means = gp.mean().unstack().unstack().reindex(CONTEXTS)
|
|
|
|
|
los = self.means - gp.min().unstack().unstack().reindex(CONTEXTS)
|
|
|
|
|
ups = gp.max().unstack().unstack().reindex(CONTEXTS) - self.means
|
|
|
|
|
self.errors = dict(
|
|
|
|
|
(key, dict(
|
|
|
|
|
(subkey,
|
|
|
|
|
[[los[key][subkey][context] for context in self.means.index],
|
|
|
|
|
[ups[key][subkey][context] for context in self.means.index]])
|
|
|
|
|
for subkey in self.means.columns.levels[1]))
|
|
|
|
|
for key in self.means.columns.levels[0])
|
2014-12-14 10:46:52 +01:00
|
|
|
|
self.get_perfs(param)
|
2014-11-24 12:04:01 +01:00
|
|
|
|
self.plot_detail(param)
|
|
|
|
|
|
|
|
|
|
gp = self.df.groupby(('context', param))['time']
|
|
|
|
|
self.means = gp.mean().unstack().reindex(CONTEXTS)
|
|
|
|
|
los = self.means - gp.min().unstack().reindex(CONTEXTS)
|
|
|
|
|
ups = gp.max().unstack().reindex(CONTEXTS) - self.means
|
|
|
|
|
self.errors = [
|
|
|
|
|
[[los[key][context] for context in self.means.index],
|
|
|
|
|
[ups[key][context] for context in self.means.index]]
|
|
|
|
|
for key in self.means]
|
|
|
|
|
self.plot_general(param)
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
2014-12-14 10:46:52 +01:00
|
|
|
|
def get_perfs(self, param):
|
|
|
|
|
with io.open(os.path.join(RESULTS_PATH, param + '_results.rst'),
|
|
|
|
|
'w') as f:
|
|
|
|
|
for v in self.means.columns.levels[0]:
|
|
|
|
|
g = self.means[v].mean(axis=1)
|
|
|
|
|
perf = ('%s is %.1f× slower then %.1f× faster'
|
|
|
|
|
% (v.ljust(10), g[CONTEXTS[1]] / g[CONTEXTS[0]],
|
|
|
|
|
g[CONTEXTS[0]] / g[CONTEXTS[2]]))
|
|
|
|
|
print(perf)
|
|
|
|
|
f.write('- %s\n' % perf)
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
2014-11-24 12:04:01 +01:00
|
|
|
|
def plot_detail(self, param):
|
2014-11-24 00:13:29 +01:00
|
|
|
|
for v in self.means.columns.levels[0]:
|
2014-11-23 20:09:42 +01:00
|
|
|
|
plt.figure()
|
2014-11-24 12:04:01 +01:00
|
|
|
|
axes = self.means[v].plot(
|
|
|
|
|
kind='barh', xerr=self.errors[v],
|
2014-12-13 20:05:17 +01:00
|
|
|
|
xlim=self.xlim, figsize=(15, 15), subplots=True, layout=(6, 2),
|
2014-11-23 20:09:42 +01:00
|
|
|
|
sharey=True, legend=False)
|
2016-01-11 21:06:22 +01:00
|
|
|
|
plt.gca().invert_yaxis()
|
2014-11-24 12:04:01 +01:00
|
|
|
|
for row in axes:
|
|
|
|
|
for ax in row:
|
2016-01-10 21:56:48 +01:00
|
|
|
|
ax.xaxis.grid(True)
|
2014-11-24 12:04:01 +01:00
|
|
|
|
ax.set_ylabel('')
|
|
|
|
|
ax.set_xlabel('Time (s)')
|
|
|
|
|
plt.savefig(os.path.join(RESULTS_PATH, '%s_%s.svg' % (param, v)))
|
|
|
|
|
|
|
|
|
|
def plot_general(self, param):
|
|
|
|
|
plt.figure()
|
2016-01-10 21:56:48 +01:00
|
|
|
|
ax = self.means.plot(kind='barh', xerr=self.errors, xlim=self.xlim)
|
|
|
|
|
ax.invert_yaxis()
|
|
|
|
|
ax.xaxis.grid(True)
|
|
|
|
|
ax.set_ylabel('')
|
|
|
|
|
ax.set_xlabel('Time (s)')
|
2014-11-24 12:04:01 +01:00
|
|
|
|
plt.savefig(os.path.join(RESULTS_PATH, '%s.svg' % param))
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_data(using):
|
|
|
|
|
User.objects.using(using).bulk_create(
|
|
|
|
|
[User(username='user%d' % i) for i in range(50)])
|
|
|
|
|
Group.objects.using(using).bulk_create(
|
|
|
|
|
[Group(name='test%d' % i) for i in range(10)])
|
2014-11-24 12:04:01 +01:00
|
|
|
|
groups = list(Group.objects.using(using))
|
2014-11-23 20:09:42 +01:00
|
|
|
|
for u in User.objects.using(using):
|
2014-11-24 12:04:01 +01:00
|
|
|
|
u.groups.add(choice(groups), choice(groups))
|
|
|
|
|
users = list(User.objects.using(using))
|
2014-11-23 20:09:42 +01:00
|
|
|
|
Test.objects.using(using).bulk_create(
|
2014-11-24 12:04:01 +01:00
|
|
|
|
[Test(name='test%d' % i, owner=choice(users)) for i in range(10000)])
|
2014-11-23 20:09:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2016-01-11 21:06:22 +01:00
|
|
|
|
if not os.path.exists(RESULTS_PATH):
|
|
|
|
|
os.mkdir(RESULTS_PATH)
|
|
|
|
|
|
2015-02-16 21:25:19 +01:00
|
|
|
|
write_conditions()
|
|
|
|
|
|
2014-11-23 20:09:42 +01:00
|
|
|
|
old_db_names = {}
|
|
|
|
|
for alias in connections:
|
|
|
|
|
conn = connections[alias]
|
|
|
|
|
old_db_names[alias] = conn.settings_dict['NAME']
|
|
|
|
|
conn.creation.create_test_db(autoclobber=True)
|
|
|
|
|
|
2016-01-11 22:08:44 +01:00
|
|
|
|
print("Populating %s…" % connections[alias].vendor)
|
2014-11-23 20:09:42 +01:00
|
|
|
|
create_data(alias)
|
|
|
|
|
|
|
|
|
|
Benchmark().run()
|
|
|
|
|
|
|
|
|
|
for alias in connections:
|
|
|
|
|
connections[alias].creation.destroy_test_db(old_db_names[alias])
|