Regexp backreference substitution in MellonCond

git-svn-id: https://modmellon.googlecode.com/svn/trunk/mod_mellon2@116 a716ebb1-153a-0410-b759-cfb97c6a1b53
This commit is contained in:
manu@netbsd.org 2011-03-22 17:19:24 +00:00
parent adc2367c38
commit 738cde54f8
4 changed files with 313 additions and 54 deletions

25
README
View File

@ -236,6 +236,24 @@ MellonPostCount 100
# compatibility). The syntax is
# 'MellonCond <attribute name> <value> [<options>]'
#
# <value> is an attribute value to match. Unlike with MellonRequire,
# multiples values are not allowed.
#
# If the [REG] flag is specified (see below), <value> is a regular
# expression. The syntax for backslash escape is the same as in
# Apache's <LocationMatch>'s directives.
#
# Format strings are substituted into <value> prior evaluation.
# Here are the supported syntaxes:
# %n With n being a digit between 0 and 9. If [REG,REF]
# flags (see below) were used in an earlier matching
# MellonCond, then regular expression back references
# are substituted.
# %{num} Same as %n, with num being a number that may be
# greater than 9.
# %{ENV:x} Substitute Apache environement variable x.
# %% Escape substitution to get a litteral %.
#
# <options> is an optional, comma-separated list of option
# encloseed with brackets. Here is an example: [NOT,NC]
# The valid options are:
@ -244,11 +262,16 @@ MellonPostCount 100
# then the overall check succeeds.
# NOT This MellonCond evaluates to true if the attribute
# does not match the value.
# SUB Substring match, evaluates to true if value is
# included in attribute.
# REG Value to check is a regular expression.
# NC Perform case insensitive match.
# NC Perform case insensitive match.
# MAP Attempt to search an attribute with name remapped by
# MellonSetEnv. Fallback to non remapped name if not
# found.
# REF Used with REG, track regular expression back references,
# So that they can be substituted in an upcoming
# MellonCond directive.
#
# It is allowed to have multiple MellonCond on the same
# attribute, and to mix MellonCond and MellonRequire.

View File

@ -125,18 +125,20 @@ typedef enum {
} am_decoder_t;
typedef enum {
AM_COND_FLAG_NULL = 0x00, /* No flags */
AM_COND_FLAG_OR = 0x01, /* Or with next condition */
AM_COND_FLAG_NOT = 0x02, /* Negate this condition */
AM_COND_FLAG_REG = 0x04, /* Condition is regex */
AM_COND_FLAG_NC = 0x08, /* Case insensitive match */
AM_COND_FLAG_MAP = 0x10, /* Try to use attribute name from MellonSetEnv */
AM_COND_FLAG_IGN = 0x20, /* Condition is to be ignored */
AM_COND_FLAG_REQ = 0x40, /* Condition was configure using MellonRequire */
} am_cond_flag_t;
AM_COND_FLAG_NULL = 0x000, /* No flags */
AM_COND_FLAG_OR = 0x001, /* Or with next condition */
AM_COND_FLAG_NOT = 0x002, /* Negate this condition */
AM_COND_FLAG_REG = 0x004, /* Condition is regex */
AM_COND_FLAG_NC = 0x008, /* Case insensitive match */
AM_COND_FLAG_MAP = 0x010, /* Try to use attribute name from MellonSetEnv */
AM_COND_FLAG_REF = 0x020, /* Set regex backreferences */
AM_COND_FLAG_SUB = 0x040, /* Substring match */
/* Not counting AM_COND_FLAG_NULL */
#define AM_COND_FLAG_COUNT 7
/* The other options are internally used */
AM_COND_FLAG_IGN = 0x1000, /* Condition is to be ignored */
AM_COND_FLAG_REQ = 0x2000, /* Condition was set using MellonRequire */
AM_COND_FLAG_FSTR = 0x4000, /* Value contains a format string */
} am_cond_flag_t;
extern const char *am_cond_options[];

View File

@ -422,6 +422,7 @@ static const char *am_set_setenv_slot(cmd_parms *cmd,
return NULL;
}
/* This function decodes MellonCond flags, such as [NOT,REG]
*
* Parameters:
@ -430,19 +431,20 @@ static const char *am_set_setenv_slot(cmd_parms *cmd,
* Returns:
* flags, or -1 on error
*/
const char *am_cond_options[] = {
"OR", /* AM_EXPIRE_FLAG_OR */
"NOT", /* AM_EXPIRE_FLAG_NOT */
"REG", /* AM_EXPIRE_FLAG_REG */
"NC", /* AM_EXPIRE_FLAG_NC */
"MAP", /* AM_EXPIRE_FLAG_MAP */
"IGN", /* AM_EXPIRE_FLAG_IGN */
"REQ", /* AM_EXPIRE_FLAG_REQ */
};
static int am_cond_flags(const char *arg)
{
int flags = AM_COND_FLAG_NULL;
static const char const *options[] = {
"OR", /* AM_EXPIRE_FLAG_OR */
"NOT", /* AM_EXPIRE_FLAG_NOT */
"REG", /* AM_EXPIRE_FLAG_REG */
"NC", /* AM_EXPIRE_FLAG_NC */
"MAP", /* AM_EXPIRE_FLAG_MAP */
"REF", /* AM_EXPIRE_FLAG_REF */
"SUB", /* AM_EXPIRE_FLAG_SUB */
/* The other options (IGN, REQ, FSTR, ...) are only internally used */
};
apr_size_t options_count = sizeof(options) / sizeof(*options);
/* Skip inital [ */
if (arg[0] == '[')
@ -453,10 +455,10 @@ static int am_cond_flags(const char *arg)
do {
apr_size_t i;
for (i = 0; i < AM_COND_FLAG_COUNT; i++) {
apr_size_t optlen = strlen(am_cond_options[i]);
for (i = 0; i < options_count; i++) {
apr_size_t optlen = strlen(options[i]);
if (strncmp(arg, am_cond_options[i], optlen) == 0) {
if (strncmp(arg, options[i], optlen) == 0) {
/* Make sure we have a separator next */
if (arg[optlen] && !strchr("]\t ,", (int)arg[optlen]))
return -1;
@ -466,16 +468,21 @@ static int am_cond_flags(const char *arg)
break;
}
/* no match */
if (i == AM_COND_FLAG_COUNT)
return -1;
/* skip spaces, tabs and commas */
arg += strspn(arg, " \t,");
/* Garbage after ] is ignored */
if (*arg == ']')
return flags;
/* no match */
if (i == options_count)
return -1;
/* skip spaces, tabs and commas */
arg += strspn(arg, " \t,");
/*
* End of option, but we fire an error if
* there is trailing garbage
*/
if (*arg == ']') {
arg++;
return (*arg == '\0') ? flags : -1;
}
}
} while (*arg);
@ -505,32 +512,28 @@ static const char *am_set_cond_slot(cmd_parms *cmd,
const char *options)
{
am_dir_cfg_rec *d = struct_ptr;
int flags = AM_COND_FLAG_NULL;
am_cond_t *element;
if (*attribute == '\0' || *value == '\0')
if (attribute == NULL || *attribute == '\0' ||
value == NULL || *value == '\0')
return apr_pstrcat(cmd->pool, cmd->cmd->name,
" takes two or three arguments", NULL);
" takes at least two arguments", NULL);
if (options != NULL && *options != '\0')
flags = am_cond_flags(options);
if (flags == -1)
return apr_psprintf(cmd->pool, "%s - invalid flags %s",
cmd->cmd->name, options);
element = (am_cond_t *)apr_array_push(d->cond);
element->varname = attribute;
element->flags = AM_COND_FLAG_NULL;
element->flags = flags;
element->str = NULL;
element->regex = NULL;
element->directive = apr_pstrcat(cmd->pool, cmd->directive->directive,
" ", cmd->directive->args, NULL);
/* Handle optional flags */
if (*options != '\0') {
int flags;
flags = am_cond_flags(options);
if (flags == -1)
return apr_psprintf(cmd->pool, "%s - invalid flags %s",
cmd->cmd->name, options);
element->flags = flags;
}
if (element->flags & AM_COND_FLAG_REG) {
int regex_flags = AP_REG_EXTENDED|AP_REG_NOSUB;
@ -543,9 +546,16 @@ static const char *am_set_cond_slot(cmd_parms *cmd,
cmd->cmd->name, value);
}
/*
* Flag values containing format strings to that we do
* not have to process the others at runtime.
*/
if (strchr(value, '%') != NULL)
element->flags |= AM_COND_FLAG_FSTR;
/*
* We keep the string also for regex, so that we can
* print it for debug purpose.
* print it for debug purpose and perform substitutions on it.
*/
element->str = value;

View File

@ -19,6 +19,8 @@
*
*/
#include <assert.h>
#include <openssl/err.h>
#include <openssl/rand.h>
@ -49,6 +51,201 @@ char *am_reconstruct_url(request_rec *r)
return url;
}
/* This function builds an array of regexp backreferences
*
* Parameters:
* request_rec *r The current request.
* const am_cond_t *ce The condition
* const char *value Attribute value
* const ap_regmatch_t *regmatch regmatch_t from ap_regexec()
*
* Returns:
* An array of collected backreference strings
*/
const apr_array_header_t *am_cond_backrefs(request_rec *r,
const am_cond_t *ce,
const char *value,
const ap_regmatch_t *regmatch)
{
apr_array_header_t *backrefs;
const char **ref;
int nsub;
int i;
nsub = ce->regex->re_nsub + 1; /* +1 for %0 */
backrefs = apr_array_make(r->pool, nsub, sizeof(const char *));
backrefs->nelts = nsub;
ref = (const char **)(backrefs->elts);
for (i = 0; i < nsub; i++) {
if ((regmatch[i].rm_so == -1) || (regmatch[i].rm_eo == -1)) {
ref[i] = "";
} else {
int len = regmatch[i].rm_eo - regmatch[i].rm_so;
int off = regmatch[i].rm_so;
ref[i] = apr_pstrndup(r->pool, value + off, len);
}
}
return (const apr_array_header_t *)backrefs;
}
/* This function clones an am_cond_t and substitute value to
* match (both regexp and string) with backreferences from
* a previous regex match.
*
* Parameters:
* request_rec *r The current request.
* const am_cond_t *cond The am_cond_t to clone and substiture
* const apr_array_header_t *backrefs Collected backreferences
*
* Returns:
* The cloned am_cond_t
*/
const am_cond_t *am_cond_substitue(request_rec *r, const am_cond_t *ce,
const apr_array_header_t *backrefs)
{
am_cond_t *c;
const char *instr = ce->str;
apr_size_t inlen = strlen(instr);
const char *outstr = "";
size_t last;
size_t i;
c = (am_cond_t *)apr_pmemdup(r->pool, ce, sizeof(*ce));
c->str = outstr;
last = 0;
for (i = strcspn(instr, "%"); i < inlen; i += strcspn(instr + i, "%")) {
const char *fstr;
const char *ns;
const char *name;
const char *value;
apr_size_t flen;
apr_size_t pad;
apr_size_t nslen;
/*
* Make sure we got a %
*/
assert(instr[i] == '%');
/*
* Copy the format string in fstr. It can be a single
* digit (e.g.: %1) , or a curly-brace enclosed text
* (e.g.: %{12})
*/
fstr = instr + i + 1;
if (*fstr == '{') { /* Curly-brace enclosed text */
pad = 3; /* 3 for %{} */
fstr++;
flen = strcspn(fstr, "}");
/* If there is no closing }, we do not substitute */
if (fstr[flen] == '\0') {
pad = 2; /* 2 for %{ */
i += flen + pad;
break;
}
} else if (*fstr == '\0') { /* String ending by a % */
break;
} else { /* Single digit */
pad = 1; /* 1 for % */
flen = 1;
}
/*
* Try to extract a namespace (ns) and a name, e.g: %{ENV:foo}
*/
fstr = apr_pstrndup(r->pool, fstr, flen);
if ((nslen = strcspn(fstr, ":")) != flen) {
ns = apr_pstrndup(r->pool, fstr, nslen);
name = fstr + nslen + 1; /* +1 for : */
} else {
nslen = 0;
ns = "";
name = fstr;
}
value = NULL;
if ((*ns == '\0') && (strspn(fstr, "0123456789") == flen)) {
/*
* If fstr has only digits, this is a regexp backreference
*/
int d = (int)apr_atoi64(fstr);
if ((d >= 0) && (d < backrefs->nelts))
value = ((const char **)(backrefs->elts))[d];
} else if ((*ns == '\0') && (strcmp(fstr, "%") == 0)) {
/*
* %-escape
*/
value = fstr;
} else if (strcmp(ns, "ENV") == 0) {
/*
* ENV namespace. Get value from apache environement
*/
value = getenv(name);
}
/*
* If we did not find a value, substitue the
* format string with an empty string.
*/
if (value == NULL)
value = "";
/*
* Concatenate the value with leading text, and * keep track
* of the last location we copied in source string
*/
outstr = apr_pstrcat(r->pool, outstr,
apr_pstrndup(r->pool, instr + last, i - last),
value, NULL);
last = i + flen + pad;
/*
* Move index to the end of the format string
*/
i += flen + pad;
}
/*
* Copy text remaining after the last format string.
*/
outstr = apr_pstrcat(r->pool, outstr,
apr_pstrndup(r->pool, instr + last, i - last),
NULL);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
"Directive %s, \"%s\" substituted into \"%s\"",
ce->directive, instr, outstr);
/*
* If this was a regexp, recompile it.
*/
if (ce->flags & AM_COND_FLAG_REG) {
int regex_flags = AP_REG_EXTENDED|AP_REG_NOSUB;
if (ce->flags & AM_COND_FLAG_NC)
regex_flags |= AP_REG_ICASE;
c->regex = ap_pregcomp(r->pool, outstr, regex_flags);
if (c->regex == NULL) {
ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
"Invalid regular expression \"%s\"", outstr);
return ce;
}
}
return (const am_cond_t *)c;
}
/* This function checks if the user has access according
* to the MellonRequire and MellonCond directives.
@ -65,12 +262,13 @@ int am_check_permissions(request_rec *r, am_cache_entry_t *session)
am_dir_cfg_rec *dir_cfg;
int i, j;
int skip_or = 0;
const apr_array_header_t *backrefs = NULL;
dir_cfg = am_get_dir_cfg(r);
/* Iterate over all cond-directives */
for (i = 0; i < dir_cfg->cond->nelts; i++) {
am_cond_t *ce;
const am_cond_t *ce;
const char *value = NULL;
int match = 0;
@ -124,16 +322,42 @@ int am_check_permissions(request_rec *r, am_cache_entry_t *session)
value = session->env[j].value;
/*
* Substiture backrefs if available
*/
if ((ce->flags & AM_COND_FLAG_FSTR) && (backrefs != NULL))
ce = am_cond_substitue(r, ce, backrefs);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
"Evaluate %s vs \"%s\"",
ce->directive, value);
if (value == NULL) {
match = 0; /* can not happen */
} else if (ce->flags & (AM_COND_FLAG_REG|AM_COND_FLAG_REF)) {
int nsub = ce->regex->re_nsub + 1;
ap_regmatch_t *regmatch;
regmatch = (ap_regmatch_t *)apr_palloc(r->pool,
nsub * sizeof(*regmatch));
match = !ap_regexec(ce->regex, value, nsub, regmatch, 0);
if (match)
backrefs = am_cond_backrefs(r, ce, value, regmatch);
} else if (ce->flags & AM_COND_FLAG_REG) {
match = !ap_regexec(ce->regex, value, 0, NULL, 0);
} else if (ce->flags & (AM_COND_FLAG_SUB|AM_COND_FLAG_NC)) {
match = (strcasestr(ce->str, value) != NULL);
} else if (ce->flags & AM_COND_FLAG_SUB) {
match = (strstr(ce->str, value) != NULL);
} else if (ce->flags & AM_COND_FLAG_NC) {
match = !strcasecmp(ce->str, value);
} else {
match = !strcmp(ce->str, value);
}