WIP: first draft of stats rewrite (#81734) #777

Closed
pducroquet wants to merge 6 commits from wip/81734-rewrite-stats-in-sql into main
4 changed files with 62 additions and 41 deletions

View File

@ -1290,7 +1290,7 @@ def test_statistics_resolution_time_median(pub, freezer):
resp = get_app(pub).get(sign_uri('/api/statistics/resolution-time/?form=test'))
assert get_humanized_duration_serie(resp.json) == [
'1 day(s) and 0 hour(s)', # min
'89 day(s) and 22 hour(s)', # max
'89 day(s) and 23 hour(s)', # max
'13 day(s) and 23 hour(s)', # mean
'5 day(s) and 0 hour(s)', # median
]

BIN
wcs/.sql_criterias.py.swp Normal file

Binary file not shown.

View File

@ -2213,6 +2213,10 @@ class SqlDataMixin(SqlMixin):
_evolution = None
@classmethod
def evolutions_table_name(cls):
return '%s_evolutions' % (cls._table_name)
@guard_postgres
def get_evolution(self):
if self._evolution is not None:

View File

@ -15,7 +15,6 @@
# along with this program; if not, see <http://www.gnu.org/licenses/>.
import collections
import time
from django.http import HttpResponseBadRequest, HttpResponseForbidden, JsonResponse
from django.urls import reverse
@ -713,16 +712,17 @@ class ResolutionTimeView(RestrictedView):
]
def get_statistics(self, formdef):
criterias = [StrictNotEqual('status', 'draft')]
if self.request.GET.get('start'):
criterias.append(GreaterOrEqual('receipt_time', self.request.GET['start']))
if self.request.GET.get('end'):
criterias.append(Less('receipt_time', self.request.GET['end']))
__, cursor = sql.get_connection_and_cursor()
values = formdef.data_class().select(criterias)
# load all evolutions in a single batch, to avoid as many query as
# there are formdata when computing resolution times statistics.
formdef.data_class().load_all_evolutions(values)
formdata_class = formdef.data_class()
formdata_tbl = formdata_class._table_name
formdata_evo_tbl = formdata_class.evolutions_table_name()
criterias = [StrictNotEqual('f.status', 'draft')]
if self.request.GET.get('start'):
criterias.append(GreaterOrEqual('f.receipt_time', self.request.GET['start']))
if self.request.GET.get('end'):
criterias.append(Less('f.receipt_time', self.request.GET['end']))
start_status = self.request.GET.get('start_status', formdef.workflow.possible_status[0].id)
end_status = self.request.GET.get('end_status', 'done')
@ -749,39 +749,56 @@ class ResolutionTimeView(RestrictedView):
'end_status': _('"%s"') % end_status.name if end_status != 'done' else _('any final status'),
}
res_time_forms = []
for filled in values:
start_time = None
for evo in filled.evolution or []:
if start_status and evo.status == 'wf-%s' % start_status.id:
start_time = time.mktime(evo.time)
elif evo.status in end_statuses:
if start_status and not start_time:
break
start_time = start_time or time.mktime(filled.receipt_time)
res_time_forms.append(time.mktime(evo.time) - start_time)
break
# query building
parameters = {'start_status': 'wf-%s' % start_status.id}
end_statuses_placeholders = []
for i, status in zip(range(len(end_statuses)), end_statuses):
end_statuses_placeholders.append('(%%(end_status_%s)s)' % i)
parameters['end_status_%s' % i] = status
end_statuses_placeholder = ', '.join(end_statuses_placeholders)
if not res_time_forms:
# transform criterias in sql clause
where_clauses, where_parameters, __ = sql.parse_clause(criterias)
where_clause = ' AND '.join(where_clauses)
parameters.update(where_parameters)
query = f"""
WITH
start_status AS (SELECT (VALUES(%(start_status)s)) status),

Il y a https://www.psycopg.org/docs/sql.html pour rendre ce genre de gros template de requête un poil plus digeste.

Il y a https://www.psycopg.org/docs/sql.html pour rendre ce genre de gros template de requête un poil plus digeste.
end_statuses AS (SELECT * FROM (VALUES {end_statuses_placeholder}) status),
form_datas AS (
SELECT
f.id,
f.status,
f.receipt_time,
array_agg(start_evo.time) as starts,
(array_agg(end_evo.time))[1] as end_time
FROM {formdata_tbl} f
JOIN {formdata_evo_tbl} start_evo ON start_evo.formdata_id = f.id AND start_evo.status IN (SELECT * FROM start_status)
JOIN {formdata_evo_tbl} end_evo ON end_evo.formdata_id = f.id AND end_evo.status IN (SELECT * FROM end_statuses)
WHERE {where_clause}
GROUP BY 1
),
form_times AS (
SELECT id, COALESCE((select * from unnest(starts) x where x <= form_datas.end_time order by x desc limit 1), receipt_time) as start_time, end_time FROM form_datas
),
form_durations AS (
SELECT end_time - start_time as d FROM form_times WHERE end_time IS NOT NULL AND start_time IS NOT NULL
)
SELECT min(d), max(d), avg(d), percentile_cont(0.5) WITHIN GROUP (ORDER BY d) from form_durations;
"""
cursor.execute(query, parameters)
r = cursor.fetchone()
if r is None or r[0] is None:
return label, []
res_time_forms.sort()
sum_times = sum(res_time_forms)
len_times = len(res_time_forms)
mean = sum_times // len_times
if len_times % 2:
median = res_time_forms[len_times // 2]
else:
midpt = len_times // 2
median = (res_time_forms[midpt - 1] + res_time_forms[midpt]) // 2
return label, [
(_('Minimum time'), res_time_forms[0]),
(_('Maximum time'), res_time_forms[-1]),
(_('Mean'), mean),
(_('Median'), median),
]
return label, [
(_('Minimum time'), int(r[0].total_seconds())),
(_('Maximum time'), int(r[1].total_seconds())),
(_('Mean'), int(r[2].total_seconds())),
(_('Median'), int(r[3].total_seconds())),
]
class CardsResolutionTimeView(ResolutionTimeView):