997 lines
32 KiB
Python
997 lines
32 KiB
Python
import pytest
|
|
|
|
from whoosh import analysis, fields, query
|
|
from whoosh.compat import u, text_type
|
|
from whoosh.qparser import default
|
|
from whoosh.qparser import plugins
|
|
|
|
|
|
def test_whitespace():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin()])
|
|
assert repr(p.tag("hello there amiga")) == "<AndGroup <None:'hello'>, < >, <None:'there'>, < >, <None:'amiga'>>"
|
|
|
|
|
|
def test_singlequotes():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.SingleQuotePlugin()])
|
|
assert repr(p.process("a 'b c' d")) == "<AndGroup <None:'a'>, <None:'b c'>, <None:'d'>>"
|
|
|
|
|
|
def test_prefix():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.PrefixPlugin()])
|
|
assert repr(p.process("a b* c")) == "<AndGroup <None:'a'>, <None:'b'*>, <None:'c'>>"
|
|
|
|
|
|
def test_range():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.RangePlugin()])
|
|
ns = p.tag("a [b to c} d")
|
|
assert repr(ns) == "<AndGroup <None:'a'>, < >, <None:['b' 'c'}>, < >, <None:'d'>>"
|
|
|
|
assert repr(p.process("a {b to]")) == "<AndGroup <None:'a'>, <None:{'b' None]>>"
|
|
assert repr(p.process("[to c] d")) == "<AndGroup <None:[None 'c']>, <None:'d'>>"
|
|
assert repr(p.process("[to]")) == "<AndGroup <None:[None None]>>"
|
|
|
|
|
|
def test_sq_range():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.SingleQuotePlugin(),
|
|
plugins.RangePlugin()])
|
|
assert repr(p.process("['a b' to ']']")) == "<AndGroup <None:['a b' ']']>>"
|
|
|
|
|
|
def test_phrase():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.PhrasePlugin()])
|
|
assert repr(p.process('a "b c"')) == "<AndGroup <None:'a'>, <None:PhraseNode 'b c'~1>>"
|
|
assert repr(p.process('"b c" d')) == "<AndGroup <None:PhraseNode 'b c'~1>, <None:'d'>>"
|
|
assert repr(p.process('"b c"')) == "<AndGroup <None:PhraseNode 'b c'~1>>"
|
|
|
|
q = p.parse('alfa "bravo charlie"~2 delta')
|
|
assert q[1].__class__ == query.Phrase
|
|
assert q[1].words == ["bravo", "charlie"]
|
|
assert q[1].slop == 2
|
|
|
|
|
|
def test_groups():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.GroupPlugin()])
|
|
|
|
ns = p.process("a ((b c) d) e")
|
|
assert repr(ns) == "<AndGroup <None:'a'>, <AndGroup <AndGroup <None:'b'>, <None:'c'>>, <None:'d'>>, <None:'e'>>"
|
|
|
|
|
|
def test_fieldnames():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.FieldsPlugin(),
|
|
plugins.GroupPlugin()])
|
|
ns = p.process("a:b c d:(e f:(g h)) i j:")
|
|
assert repr(ns) == "<AndGroup <'a':'b'>, <None:'c'>, <AndGroup <'d':'e'>, <AndGroup <'f':'g'>, <'f':'h'>>>, <None:'i'>, <None:'j:'>>"
|
|
assert repr(p.process("a:b:")) == "<AndGroup <'a':'b:'>>"
|
|
|
|
|
|
def test_operators():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.OperatorsPlugin()])
|
|
ns = p.process("a OR b")
|
|
assert repr(ns) == "<AndGroup <OrGroup <None:'a'>, <None:'b'>>>"
|
|
|
|
|
|
def test_boost():
|
|
p = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.GroupPlugin(),
|
|
plugins.BoostPlugin()])
|
|
ns = p.tag("a^3")
|
|
assert repr(ns) == "<AndGroup <None:'a'>, <^ 3.0>>"
|
|
ns = p.filterize(ns)
|
|
assert repr(ns) == "<AndGroup <None:'a' ^3.0>>"
|
|
|
|
assert repr(p.process("a (b c)^2.5")) == "<AndGroup <None:'a'>, <AndGroup <None:'b'>, <None:'c'> ^2.5>>"
|
|
assert repr(p.process("a (b c)^.5 d")) == "<AndGroup <None:'a'>, <AndGroup <None:'b'>, <None:'c'> ^0.5>, <None:'d'>>"
|
|
assert repr(p.process("^2 a")) == "<AndGroup <None:'^2'>, <None:'a'>>"
|
|
assert repr(p.process("a^2^3")) == "<AndGroup <None:'a^2' ^3.0>>"
|
|
|
|
|
|
#
|
|
|
|
def test_empty_querystring():
|
|
s = fields.Schema(content=fields.TEXT, title=fields.TEXT, id=fields.ID)
|
|
qp = default.QueryParser("content", s)
|
|
q = qp.parse(u(""))
|
|
assert q == query.NullQuery
|
|
|
|
|
|
def test_fields():
|
|
s = fields.Schema(content=fields.TEXT, title=fields.TEXT, id=fields.ID)
|
|
qp = default.QueryParser("content", s)
|
|
q = qp.parse(u("test"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "content"
|
|
assert q.text == "test"
|
|
|
|
mq = default.MultifieldParser(("title", "content"), s)
|
|
q = mq.parse(u("test"))
|
|
assert q.__class__ == query.Or
|
|
assert q[0].__class__ == query.Term
|
|
assert q[1].__class__ == query.Term
|
|
assert q[0].fieldname == "title"
|
|
assert q[1].fieldname == "content"
|
|
assert q[0].text == "test"
|
|
assert q[1].text == "test"
|
|
|
|
q = mq.parse(u("title:test"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "title"
|
|
assert q.text == "test"
|
|
|
|
|
|
def test_multifield():
|
|
schema = fields.Schema(content=fields.TEXT, title=fields.TEXT,
|
|
cat=fields.KEYWORD, date=fields.DATETIME)
|
|
|
|
qs = u("a (b c cat:d) OR (b c cat:e)")
|
|
qp = default.MultifieldParser(['x', 'y'], schema)
|
|
|
|
q = qp.parse(qs)
|
|
assert text_type(q) == "((x:a OR y:a) AND (((x:b OR y:b) AND (x:c OR y:c) AND cat:d) OR ((x:b OR y:b) AND (x:c OR y:c) AND cat:e)))"
|
|
|
|
|
|
def test_fieldname_chars():
|
|
s = fields.Schema(abc123=fields.TEXT, nisbah=fields.KEYWORD)
|
|
qp = default.QueryParser("content", s)
|
|
fieldmap = {'nisbah': [u('\u0646\u0633\u0628\u0629')],
|
|
'abc123': ['xyz']}
|
|
qp.add_plugin(plugins.FieldAliasPlugin(fieldmap))
|
|
|
|
q = qp.parse(u("abc123:456"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == u('abc123')
|
|
assert q.text == u('456')
|
|
|
|
q = qp.parse(u("abc123:456 def"))
|
|
assert text_type(q) == u("(abc123:456 AND content:def)")
|
|
|
|
q = qp.parse(u('\u0646\u0633\u0628\u0629:\u0627\u0644\u0641\u0644\u0633'
|
|
'\u0637\u064a\u0646\u064a'))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == u('nisbah')
|
|
assert q.text == u('\u0627\u0644\u0641\u0644\u0633\u0637\u064a\u0646\u064a')
|
|
|
|
q = qp.parse(u("abc123 (xyz:123 OR qrs)"))
|
|
assert text_type(q) == "(content:abc123 AND (abc123:123 OR content:qrs))"
|
|
|
|
|
|
def test_colonspace():
|
|
s = fields.Schema(content=fields.TEXT, url=fields.ID)
|
|
qp = default.QueryParser("content", s)
|
|
q = qp.parse(u("url:test"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "url"
|
|
assert q.text == "test"
|
|
|
|
q = qp.parse(u("url: test"))
|
|
assert q.__class__ == query.And
|
|
assert q[0].__class__ == query.Term
|
|
assert q[1].__class__ == query.Term
|
|
assert q[0].fieldname == "content"
|
|
assert q[1].fieldname == "content"
|
|
assert q[0].text == "url"
|
|
assert q[1].text == "test"
|
|
|
|
q = qp.parse(u("url:"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "content"
|
|
assert q.text == "url"
|
|
|
|
s = fields.Schema(foo=fields.KEYWORD)
|
|
qp = default.QueryParser("foo", s)
|
|
q = qp.parse(u("blah:"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "foo"
|
|
assert q.text == "blah:"
|
|
|
|
|
|
def test_andor():
|
|
qp = default.QueryParser("a", None)
|
|
q = qp.parse("a AND b OR c AND d OR e AND f")
|
|
assert text_type(q) == "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))"
|
|
|
|
q = qp.parse("aORb")
|
|
assert q == query.Term("a", "aORb")
|
|
|
|
q = qp.parse("aOR b")
|
|
assert q == query.And([query.Term("a", "aOR"), query.Term("a", "b")])
|
|
|
|
q = qp.parse("a ORb")
|
|
assert q == query.And([query.Term("a", "a"), query.Term("a", "ORb")])
|
|
|
|
assert qp.parse("OR") == query.Term("a", "OR")
|
|
|
|
|
|
def test_andnot():
|
|
qp = default.QueryParser("content", None)
|
|
q = qp.parse(u("this ANDNOT that"))
|
|
assert q.__class__ == query.AndNot
|
|
assert q.a.__class__ == query.Term
|
|
assert q.b.__class__ == query.Term
|
|
assert q.a.text == "this"
|
|
assert q.b.text == "that"
|
|
|
|
q = qp.parse(u("foo ANDNOT bar baz"))
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 2
|
|
assert q[0].__class__ == query.AndNot
|
|
assert q[1].__class__ == query.Term
|
|
|
|
q = qp.parse(u("foo fie ANDNOT bar baz"))
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 3
|
|
assert q[0].__class__ == query.Term
|
|
assert q[1].__class__ == query.AndNot
|
|
assert q[2].__class__ == query.Term
|
|
|
|
q = qp.parse(u("a AND b ANDNOT c"))
|
|
assert q.__class__ == query.AndNot
|
|
assert text_type(q) == "((content:a AND content:b) ANDNOT content:c)"
|
|
|
|
|
|
def test_boost_query():
|
|
qp = default.QueryParser("content", None)
|
|
q = qp.parse(u("this^3 fn:that^0.5 5.67 hi^5x"))
|
|
assert q[0].boost == 3.0
|
|
assert q[1].boost == 0.5
|
|
assert q[1].fieldname == "fn"
|
|
assert q[2].text == "5.67"
|
|
assert q[3].text == "hi^5x"
|
|
|
|
q = qp.parse("alfa (bravo OR charlie)^2.5 ^3")
|
|
assert len(q) == 3
|
|
assert q[0].boost == 1.0
|
|
assert q[1].boost == 2.5
|
|
assert q[2].text == "^3"
|
|
|
|
|
|
def test_boosts():
|
|
qp = default.QueryParser("t", None)
|
|
q = qp.parse("alfa ((bravo^2)^3)^4 charlie")
|
|
assert q.__unicode__() == "(t:alfa AND t:bravo^24.0 AND t:charlie)"
|
|
|
|
|
|
def test_wild():
|
|
qp = default.QueryParser("t", None, [plugins.WhitespacePlugin(),
|
|
plugins.WildcardPlugin()])
|
|
assert repr(qp.process("a b*c? d")) == "<AndGroup <None:'a'>, <None:Wild 'b*c?'>, <None:'d'>>"
|
|
assert repr(qp.process("a * ? d")) == "<AndGroup <None:'a'>, <None:Wild '*'>, <None:Wild '?'>, <None:'d'>>"
|
|
|
|
#
|
|
qp = default.QueryParser("content", None)
|
|
q = qp.parse(u("hello *the?e* ?star*s? test"))
|
|
assert len(q) == 4
|
|
assert q[0].__class__ == query.Term
|
|
assert q[0].text == "hello"
|
|
assert q[1].__class__ == query.Wildcard
|
|
assert q[1].text == "*the?e*"
|
|
assert q[2].__class__ == query.Wildcard
|
|
assert q[2].text == "?star*s?"
|
|
assert q[3].__class__ == query.Term
|
|
assert q[3].text == "test"
|
|
|
|
#
|
|
qp = default.QueryParser("content", None)
|
|
q = qp.parse(u("*the?e*"))
|
|
assert q.__class__ == query.Wildcard
|
|
assert q.text == "*the?e*"
|
|
|
|
|
|
def test_parse_fieldname_underscores():
|
|
s = fields.Schema(my_name=fields.ID(stored=True), my_value=fields.TEXT)
|
|
qp = default.QueryParser("my_value", schema=s)
|
|
q = qp.parse(u("my_name:Green"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "my_name"
|
|
assert q.text == "Green"
|
|
|
|
|
|
def test_endstar():
|
|
qp = default.QueryParser("text", None)
|
|
q = qp.parse(u("word*"))
|
|
assert q.__class__ == query.Prefix
|
|
assert q.text == "word"
|
|
|
|
q = qp.parse(u("first* second"))
|
|
assert q[0].__class__ == query.Prefix
|
|
assert q[0].text == "first"
|
|
|
|
|
|
def test_singlequotes_query():
|
|
qp = default.QueryParser("text", None)
|
|
q = qp.parse("hell's hot 'i stab at thee'")
|
|
assert q.__class__.__name__ == 'And'
|
|
assert len(q) == 3
|
|
assert q[0].__class__ == query.Term
|
|
assert q[1].__class__ == query.Term
|
|
assert q[2].__class__ == query.Term
|
|
assert q[0].text == "hell's"
|
|
assert q[1].text == "hot"
|
|
assert q[2].text == "i stab at thee"
|
|
|
|
q = qp.parse("alfa zulu:'bravo charlie' delta")
|
|
assert q.__class__.__name__ == 'And'
|
|
assert len(q) == 3
|
|
assert q[0].__class__ == query.Term
|
|
assert q[1].__class__ == query.Term
|
|
assert q[2].__class__ == query.Term
|
|
assert (q[0].fieldname, q[0].text) == ("text", "alfa")
|
|
assert (q[1].fieldname, q[1].text) == ("zulu", "bravo charlie")
|
|
assert (q[2].fieldname, q[2].text) == ("text", "delta")
|
|
|
|
q = qp.parse("The rest 'is silence")
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 4
|
|
assert [t.text for t in q.subqueries] == ["The", "rest", "'is", "silence"]
|
|
|
|
q = qp.parse("I don't like W's stupid face")
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 6
|
|
assert [t.text for t in q.subqueries] == ["I", "don't", "like", "W's",
|
|
"stupid", "face"]
|
|
|
|
q = qp.parse("I forgot the drinkin' in '98")
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 6
|
|
assert [t.text for t in q.subqueries] == ["I", "forgot", "the", "drinkin'",
|
|
"in", "'98"]
|
|
|
|
# def test_escaping():
|
|
# qp = default.QueryParser("text", None)
|
|
#
|
|
# q = qp.parse(r'big\small')
|
|
# assert q.__class__, query.Term, q)
|
|
# assert q.text == "bigsmall"
|
|
#
|
|
# q = qp.parse(r'big\\small')
|
|
# assert q.__class__ == query.Term
|
|
# assert q.text == r'big\small'
|
|
#
|
|
# q = qp.parse(r'http\:example')
|
|
# assert q.__class__ == query.Term
|
|
# assert q.fieldname == "text"
|
|
# assert q.text == "http:example"
|
|
#
|
|
# q = qp.parse(r'hello\ there')
|
|
# assert q.__class__ == query.Term
|
|
# assert q.text == "hello there"
|
|
#
|
|
# q = qp.parse(r'\[start\ TO\ end\]')
|
|
# assert q.__class__ == query.Term
|
|
# assert q.text == "[start TO end]"
|
|
#
|
|
# schema = fields.Schema(text=fields.TEXT)
|
|
# qp = default.QueryParser("text", None)
|
|
# q = qp.parse(r"http\:\/\/www\.example\.com")
|
|
# assert q.__class__ == query.Term
|
|
# assert q.text == "http://www.example.com"
|
|
#
|
|
# q = qp.parse(u("\u005c\u005c"))
|
|
# assert q.__class__ == query.Term
|
|
# assert q.text == "\\"
|
|
|
|
# def test_escaping_wildcards():
|
|
# qp = default.QueryParser("text", None)
|
|
#
|
|
# q = qp.parse(u("a*b*c?d"))
|
|
# assert q.__class__ == query.Wildcard
|
|
# assert q.text == "a*b*c?d"
|
|
#
|
|
# q = qp.parse(u("a*b\u005c*c?d"))
|
|
# assert q.__class__ == query.Wildcard
|
|
# assert q.text == "a*b*c?d"
|
|
#
|
|
# q = qp.parse(u("a*b\u005c\u005c*c?d"))
|
|
# assert q.__class__ == query.Wildcard
|
|
# assert q.text, u('a*b\u005c*c?d'))
|
|
#
|
|
# q = qp.parse(u("ab*"))
|
|
# assert q.__class__ == query.Prefix
|
|
# assert q.text, u("ab"))
|
|
#
|
|
# q = qp.parse(u("ab\u005c\u005c*"))
|
|
# assert q.__class__ == query.Wildcard
|
|
# assert q.text, u("ab\u005c*"))
|
|
|
|
|
|
def test_phrase_phrase():
|
|
qp = default.QueryParser("content", None)
|
|
q = qp.parse('"alfa bravo" "charlie delta echo"^2.2 test:"foxtrot golf"')
|
|
assert q[0].__class__ == query.Phrase
|
|
assert q[0].words == ["alfa", "bravo"]
|
|
assert q[1].__class__ == query.Phrase
|
|
assert q[1].words == ["charlie", "delta", "echo"]
|
|
assert q[1].boost == 2.2
|
|
assert q[2].__class__ == query.Phrase
|
|
assert q[2].words == ["foxtrot", "golf"]
|
|
assert q[2].fieldname == "test"
|
|
|
|
|
|
def test_weird_characters():
|
|
qp = default.QueryParser("content", None)
|
|
q = qp.parse(u(".abcd@gmail.com"))
|
|
assert q.__class__ == query.Term
|
|
assert q.text == ".abcd@gmail.com"
|
|
q = qp.parse(u("r*"))
|
|
assert q.__class__ == query.Prefix
|
|
assert q.text == "r"
|
|
q = qp.parse(u("."))
|
|
assert q.__class__ == query.Term
|
|
assert q.text == "."
|
|
q = qp.parse(u("?"))
|
|
assert q.__class__ == query.Wildcard
|
|
assert q.text == "?"
|
|
|
|
|
|
def test_euro_chars():
|
|
schema = fields.Schema(text=fields.TEXT)
|
|
qp = default.QueryParser("text", schema)
|
|
q = qp.parse(u("stra\xdfe"))
|
|
assert q.__class__ == query.Term
|
|
assert q.text == u("stra\xdfe")
|
|
|
|
|
|
def test_star():
|
|
schema = fields.Schema(text=fields.TEXT(stored=True))
|
|
qp = default.QueryParser("text", schema)
|
|
q = qp.parse(u("*"))
|
|
assert q.__class__ == query.Every
|
|
assert q.fieldname == "text"
|
|
|
|
q = qp.parse(u("*h?ll*"))
|
|
assert q.__class__ == query.Wildcard
|
|
assert q.text == "*h?ll*"
|
|
|
|
q = qp.parse(u("h?pe"))
|
|
assert q.__class__ == query.Wildcard
|
|
assert q.text == "h?pe"
|
|
|
|
q = qp.parse(u("*? blah"))
|
|
assert q.__class__ == query.And
|
|
assert q[0].__class__ == query.Wildcard
|
|
assert q[0].text == "*?"
|
|
assert q[1].__class__ == query.Term
|
|
assert q[1].text == "blah"
|
|
|
|
q = qp.parse(u("*ending"))
|
|
assert q.__class__ == query.Wildcard
|
|
assert q.text == "*ending"
|
|
|
|
q = qp.parse(u("*q"))
|
|
assert q.__class__ == query.Wildcard
|
|
assert q.text == "*q"
|
|
|
|
|
|
def test_star_field():
|
|
schema = fields.Schema(text=fields.TEXT)
|
|
qp = default.QueryParser("text", schema)
|
|
|
|
q = qp.parse(u("*:*"))
|
|
assert q.__class__ == query.Every
|
|
assert q.fieldname is None
|
|
|
|
# This gets parsed to a term with text="*:test" which is then analyzed down
|
|
# to just "test"
|
|
q = qp.parse(u("*:test"))
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "text"
|
|
assert q.text == "test"
|
|
|
|
|
|
def test_range_query():
|
|
schema = fields.Schema(name=fields.ID(stored=True),
|
|
text=fields.TEXT(stored=True))
|
|
qp = default.QueryParser("text", schema)
|
|
|
|
q = qp.parse(u("[alfa to bravo}"))
|
|
assert q.__class__ == query.TermRange
|
|
assert q.start == "alfa"
|
|
assert q.end == "bravo"
|
|
assert q.startexcl is False
|
|
assert q.endexcl is True
|
|
|
|
q = qp.parse(u("['hello there' to 'what ever']"))
|
|
assert q.__class__ == query.TermRange
|
|
assert q.start == "hello there"
|
|
assert q.end == "what ever"
|
|
assert q.startexcl is False
|
|
assert q.endexcl is False
|
|
|
|
q = qp.parse(u("name:{'to' to 'b'}"))
|
|
assert q.__class__ == query.TermRange
|
|
assert q.start == "to"
|
|
assert q.end == "b"
|
|
assert q.startexcl is True
|
|
assert q.endexcl is True
|
|
|
|
q = qp.parse(u("name:{'a' to 'to']"))
|
|
assert q.__class__ == query.TermRange
|
|
assert q.start == "a"
|
|
assert q.end == "to"
|
|
assert q.startexcl is True
|
|
assert q.endexcl is False
|
|
|
|
q = qp.parse(u("name:[a to to]"))
|
|
assert q.__class__ == query.TermRange
|
|
assert q.start == "a"
|
|
assert q.end == "to"
|
|
|
|
q = qp.parse(u("name:[to to b]"))
|
|
assert q.__class__ == query.TermRange
|
|
assert q.start == "to"
|
|
assert q.end == "b"
|
|
|
|
q = qp.parse(u("[alfa to alfa]"))
|
|
assert q.__class__ == query.Term
|
|
assert q.text == "alfa"
|
|
|
|
q = qp.parse(u("Ind* AND name:[d TO]"))
|
|
assert q.__class__ == query.And
|
|
assert q[0].__class__ == query.Prefix
|
|
assert q[1].__class__ == query.TermRange
|
|
assert q[0].text == "ind"
|
|
assert q[1].start == "d"
|
|
assert q[1].fieldname == "name"
|
|
|
|
q = qp.parse(u("name:[d TO]"))
|
|
assert q.__class__ == query.TermRange
|
|
assert q.start == "d"
|
|
assert q.fieldname == "name"
|
|
|
|
|
|
def test_numeric_range():
|
|
schema = fields.Schema(id=fields.STORED, number=fields.NUMERIC)
|
|
qp = default.QueryParser("number", schema)
|
|
|
|
teststart = 40
|
|
testend = 100
|
|
|
|
q = qp.parse("[%s to *]" % teststart)
|
|
assert q == query.NullQuery
|
|
|
|
q = qp.parse("[%s to]" % teststart)
|
|
assert q.__class__ == query.NumericRange
|
|
assert q.start == teststart
|
|
assert q.end is None
|
|
|
|
q = qp.parse("[to %s]" % testend)
|
|
assert q.__class__ == query.NumericRange
|
|
assert q.start is None
|
|
assert q.end == testend
|
|
|
|
q = qp.parse("[%s to %s]" % (teststart, testend))
|
|
assert q.__class__ == query.NumericRange
|
|
assert q.start == teststart
|
|
assert q.end == testend
|
|
|
|
|
|
def test_regressions():
|
|
qp = default.QueryParser("f", None)
|
|
|
|
# From 0.3.18, these used to require escaping. Mostly good for
|
|
# regression testing.
|
|
assert qp.parse(u("re-inker")) == query.Term("f", "re-inker")
|
|
assert qp.parse(u("0.7 wire")) == query.And([query.Term("f", "0.7"),
|
|
query.Term("f", "wire")])
|
|
assert (qp.parse(u("daler-rowney pearl 'bell bronze'"))
|
|
== query.And([query.Term("f", "daler-rowney"),
|
|
query.Term("f", "pearl"),
|
|
query.Term("f", "bell bronze")]))
|
|
|
|
q = qp.parse(u('22" BX'))
|
|
assert q, query.And([query.Term("f", '22"') == query.Term("f", "BX")])
|
|
|
|
|
|
def test_empty_ranges():
|
|
schema = fields.Schema(name=fields.TEXT, num=fields.NUMERIC,
|
|
date=fields.DATETIME)
|
|
qp = default.QueryParser("text", schema)
|
|
|
|
for fname in ("name", "date"):
|
|
q = qp.parse(u("%s:[to]") % fname)
|
|
assert q.__class__ == query.Every
|
|
|
|
|
|
def test_empty_numeric_range():
|
|
schema = fields.Schema(id=fields.ID, num=fields.NUMERIC)
|
|
qp = default.QueryParser("num", schema)
|
|
q = qp.parse("num:[to]")
|
|
assert q.__class__ == query.NumericRange
|
|
assert q.start is None
|
|
assert q.end is None
|
|
|
|
|
|
def test_numrange_multi():
|
|
schema = fields.Schema(text=fields.TEXT, start=fields.NUMERIC,
|
|
end=fields.NUMERIC)
|
|
qp = default.QueryParser("text", schema)
|
|
|
|
q = qp.parse("start:[2008 to]")
|
|
assert q.__class__ == query.NumericRange
|
|
assert q.fieldname == "start"
|
|
assert q.start == 2008
|
|
assert q.end is None
|
|
|
|
q = qp.parse("start:[2011 to 2012]")
|
|
assert q.__class__.__name__ == "NumericRange"
|
|
assert q.fieldname == "start"
|
|
assert q.start == 2011
|
|
assert q.end == 2012
|
|
|
|
q = qp.parse("start:[2008 to] AND end:[2011 to 2012]")
|
|
assert q.__class__ == query.And
|
|
assert q[0].__class__ == query.NumericRange
|
|
assert q[1].__class__ == query.NumericRange
|
|
assert q[0].start == 2008
|
|
assert q[0].end is None
|
|
assert q[1].start == 2011
|
|
assert q[1].end == 2012
|
|
|
|
|
|
def test_nonexistant_fieldnames():
|
|
# Need an analyzer that won't mangle a URL
|
|
a = analysis.SimpleAnalyzer("\\S+")
|
|
schema = fields.Schema(id=fields.ID, text=fields.TEXT(analyzer=a))
|
|
|
|
qp = default.QueryParser("text", schema)
|
|
q = qp.parse(u("id:/code http://localhost/"))
|
|
assert q.__class__ == query.And
|
|
assert q[0].__class__ == query.Term
|
|
assert q[0].fieldname == "id"
|
|
assert q[0].text == "/code"
|
|
assert q[1].__class__ == query.Term
|
|
assert q[1].fieldname == "text"
|
|
assert q[1].text == "http://localhost/"
|
|
|
|
|
|
def test_stopped():
|
|
schema = fields.Schema(text=fields.TEXT)
|
|
qp = default.QueryParser("text", schema)
|
|
q = qp.parse(u("a b"))
|
|
assert q == query.NullQuery
|
|
|
|
|
|
def test_analyzing_terms():
|
|
ana = analysis.StemmingAnalyzer()
|
|
schema = fields.Schema(text=fields.TEXT(analyzer=ana))
|
|
qp = default.QueryParser("text", schema)
|
|
q = qp.parse(u("Indexed!"))
|
|
assert q.__class__ == query.Term
|
|
assert q.text == "index"
|
|
|
|
|
|
def test_simple_parsing():
|
|
parser = default.SimpleParser("x", None)
|
|
q = parser.parse(u("alfa bravo charlie delta"))
|
|
assert text_type(q) == "(x:alfa OR x:bravo OR x:charlie OR x:delta)"
|
|
|
|
q = parser.parse(u("alfa +bravo charlie delta"))
|
|
assert text_type(q) == "(x:bravo ANDMAYBE (x:alfa OR x:charlie OR x:delta))"
|
|
|
|
q = parser.parse(u("alfa +bravo -charlie delta"))
|
|
assert (text_type(q)
|
|
== "((x:bravo ANDMAYBE (x:alfa OR x:delta)) ANDNOT x:charlie)")
|
|
|
|
q = parser.parse(u("- alfa +bravo + delta"))
|
|
assert text_type(q) == "((x:bravo AND x:delta) ANDNOT x:alfa)"
|
|
|
|
|
|
def test_dismax():
|
|
parser = default.DisMaxParser({"body": 0.8, "title": 2.5}, None)
|
|
q = parser.parse(u("alfa bravo charlie"))
|
|
assert text_type(q) == "(DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:bravo^0.8 title:bravo^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5))"
|
|
|
|
q = parser.parse(u("alfa +bravo charlie"))
|
|
assert text_type(q) == "(DisMax(body:bravo^0.8 title:bravo^2.5) ANDMAYBE (DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5)))"
|
|
|
|
q = parser.parse(u("alfa -bravo charlie"))
|
|
assert text_type(q) == "((DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5)) ANDNOT DisMax(body:bravo^0.8 title:bravo^2.5))"
|
|
|
|
q = parser.parse(u("alfa -bravo +charlie"))
|
|
assert text_type(q) == "((DisMax(body:charlie^0.8 title:charlie^2.5) ANDMAYBE DisMax(body:alfa^0.8 title:alfa^2.5)) ANDNOT DisMax(body:bravo^0.8 title:bravo^2.5))"
|
|
|
|
|
|
def test_many_clauses():
|
|
qs = "1" + (" OR 1" * 1000)
|
|
|
|
parser = default.QueryParser("content", None)
|
|
parser.parse(qs)
|
|
|
|
|
|
def test_roundtrip():
|
|
parser = default.QueryParser("a", None)
|
|
q = parser.parse(u("a OR ((b AND c AND d AND e) OR f OR g) ANDNOT h"))
|
|
assert text_type(q) == "((a:a OR (a:b AND a:c AND a:d AND a:e) OR a:f OR a:g) ANDNOT a:h)"
|
|
|
|
|
|
def test_ngrams():
|
|
schema = fields.Schema(grams=fields.NGRAM)
|
|
parser = default.QueryParser('grams', schema)
|
|
parser.remove_plugin_class(plugins.WhitespacePlugin)
|
|
|
|
q = parser.parse(u("Hello There"))
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 8
|
|
assert [sq.text for sq in q] == ["hell", "ello", "llo ", "lo t", "o th",
|
|
" the", "ther", "here"]
|
|
|
|
|
|
def test_ngramwords():
|
|
schema = fields.Schema(grams=fields.NGRAMWORDS(queryor=True))
|
|
parser = default.QueryParser('grams', schema)
|
|
|
|
q = parser.parse(u("Hello Tom"))
|
|
assert q.__class__ == query.And
|
|
assert q[0].__class__ == query.Or
|
|
assert q[1].__class__ == query.Term
|
|
assert q[0][0].text == "hell"
|
|
assert q[0][1].text == "ello"
|
|
assert q[1].text == "tom"
|
|
|
|
|
|
def test_multitoken_default():
|
|
textfield = fields.TEXT()
|
|
assert textfield.multitoken_query == "default"
|
|
schema = fields.Schema(text=textfield)
|
|
parser = default.QueryParser('text', schema)
|
|
qstring = u("chaw-bacon")
|
|
|
|
texts = list(schema["text"].process_text(qstring))
|
|
assert texts == ["chaw", "bacon"]
|
|
|
|
q = parser.parse(qstring)
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 2
|
|
assert q[0].__class__ == query.Term
|
|
assert q[0].text == "chaw"
|
|
assert q[1].__class__ == query.Term
|
|
assert q[1].text == "bacon"
|
|
|
|
|
|
def test_multitoken_or():
|
|
textfield = fields.TEXT()
|
|
textfield.multitoken_query = "or"
|
|
schema = fields.Schema(text=textfield)
|
|
parser = default.QueryParser('text', schema)
|
|
qstring = u("chaw-bacon")
|
|
|
|
texts = list(schema["text"].process_text(qstring))
|
|
assert texts == ["chaw", "bacon"]
|
|
|
|
q = parser.parse(qstring)
|
|
assert q.__class__ == query.Or
|
|
assert len(q) == 2
|
|
assert q[0].__class__ == query.Term
|
|
assert q[0].text == "chaw"
|
|
assert q[1].__class__ == query.Term
|
|
assert q[1].text == "bacon"
|
|
|
|
|
|
def test_multitoken_phrase():
|
|
textfield = fields.TEXT()
|
|
textfield.multitoken_query = "phrase"
|
|
schema = fields.Schema(text=textfield)
|
|
parser = default.QueryParser("text", schema)
|
|
qstring = u("chaw-bacon")
|
|
|
|
texts = list(schema["text"].process_text(qstring))
|
|
assert texts == ["chaw", "bacon"]
|
|
|
|
q = parser.parse(qstring)
|
|
assert q.__class__ == query.Phrase
|
|
|
|
|
|
def test_singlequote_multitoken():
|
|
schema = fields.Schema(text=fields.TEXT(multitoken_query="or"))
|
|
parser = default.QueryParser("text", schema)
|
|
q = parser.parse(u("foo bar"))
|
|
assert q.__unicode__() == "(text:foo AND text:bar)"
|
|
|
|
q = parser.parse(u("'foo bar'")) # single quotes
|
|
assert q.__unicode__() == "(text:foo OR text:bar)"
|
|
|
|
|
|
def test_operator_queries():
|
|
qp = default.QueryParser("f", None)
|
|
|
|
q = qp.parse("a AND b OR c AND d")
|
|
assert text_type(q) == "((f:a AND f:b) OR (f:c AND f:d))"
|
|
|
|
q = qp.parse("a OR b OR c OR d")
|
|
assert text_type(q) == "(f:a OR f:b OR f:c OR f:d)"
|
|
|
|
q = qp.parse("a ANDMAYBE b ANDNOT c REQUIRE d")
|
|
assert text_type(q) == "((f:a ANDMAYBE (f:b ANDNOT f:c)) REQUIRE f:d)"
|
|
|
|
|
|
#def test_associativity():
|
|
# left_andmaybe = (syntax.InfixOperator("ANDMAYBE", syntax.AndMaybeGroup, True), 0)
|
|
# right_andmaybe = (syntax.InfixOperator("ANDMAYBE", syntax.AndMaybeGroup, False), 0)
|
|
# not_ = (syntax.PrefixOperator("NOT", syntax.NotGroup), 0)
|
|
#
|
|
# def make_parser(*ops):
|
|
# parser = default.QueryParser("f", None)
|
|
# parser.replace_plugin(plugins.CompoundsPlugin(ops, clean=True))
|
|
# return parser
|
|
#
|
|
# p = make_parser(left_andmaybe)
|
|
# q = p.parse("a ANDMAYBE b ANDMAYBE c ANDMAYBE d")
|
|
# assert text_type(q), "(((f:a ANDMAYBE f:b) ANDMAYBE f:c) ANDMAYBE f:d)")
|
|
#
|
|
# p = make_parser(right_andmaybe)
|
|
# q = p.parse("a ANDMAYBE b ANDMAYBE c ANDMAYBE d")
|
|
# assert text_type(q), "(f:a ANDMAYBE (f:b ANDMAYBE (f:c ANDMAYBE f:d)))")
|
|
#
|
|
# p = make_parser(not_)
|
|
# q = p.parse("a NOT b NOT c NOT d", normalize=False)
|
|
# assert text_type(q), "(f:a AND NOT f:b AND NOT f:c AND NOT f:d)")
|
|
#
|
|
# p = make_parser(left_andmaybe)
|
|
# q = p.parse("(a ANDMAYBE b) ANDMAYBE (c ANDMAYBE d)")
|
|
# assert text_type(q), "((f:a ANDMAYBE f:b) ANDMAYBE (f:c ANDMAYBE f:d))")
|
|
#
|
|
# p = make_parser(right_andmaybe)
|
|
# q = p.parse("(a ANDMAYBE b) ANDMAYBE (c ANDMAYBE d)")
|
|
# assert text_type(q), "((f:a ANDMAYBE f:b) ANDMAYBE (f:c ANDMAYBE f:d))")
|
|
|
|
|
|
def test_not_assoc():
|
|
qp = default.QueryParser("text", None)
|
|
q = qp.parse(u("a AND NOT b OR c"))
|
|
assert text_type(q) == "((text:a AND NOT text:b) OR text:c)"
|
|
|
|
qp = default.QueryParser("text", None)
|
|
q = qp.parse(u("a NOT (b OR c)"))
|
|
assert text_type(q) == "(text:a AND NOT (text:b OR text:c))"
|
|
|
|
|
|
def test_fieldname_space():
|
|
qp = default.QueryParser("a", None)
|
|
q = qp.parse("Man Ray: a retrospective")
|
|
assert text_type(q) == "(a:Man AND a:Ray: AND a:a AND a:retrospective)"
|
|
|
|
|
|
def test_fieldname_fieldname():
|
|
qp = default.QueryParser("a", None)
|
|
q = qp.parse("a:b:")
|
|
assert q == query.Term("a", "b:")
|
|
|
|
|
|
def test_paren_fieldname():
|
|
schema = fields.Schema(kind=fields.ID, content=fields.TEXT)
|
|
|
|
qp = default.QueryParser("content", schema)
|
|
q = qp.parse(u("(kind:1d565 OR kind:7c584) AND (stuff)"))
|
|
assert text_type(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"
|
|
|
|
q = qp.parse(u("kind:(1d565 OR 7c584) AND (stuff)"))
|
|
assert text_type(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"
|
|
|
|
|
|
def test_star_paren():
|
|
qp = default.QueryParser("content", None)
|
|
q = qp.parse(u("(*john*) AND (title:blog)"))
|
|
|
|
assert q.__class__ == query.And
|
|
assert q[0].__class__ == query.Wildcard
|
|
assert q[1].__class__ == query.Term
|
|
assert q[0].fieldname == "content"
|
|
assert q[1].fieldname == "title"
|
|
assert q[0].text == "*john*"
|
|
assert q[1].text == "blog"
|
|
|
|
|
|
def test_dash():
|
|
ana = analysis.StandardAnalyzer("[^ \t\r\n()*?]+")
|
|
schema = fields.Schema(title=fields.TEXT(analyzer=ana),
|
|
text=fields.TEXT(analyzer=ana),
|
|
time=fields.ID)
|
|
qtext = u("*Ben-Hayden*")
|
|
|
|
qp = default.QueryParser("text", schema)
|
|
q = qp.parse(qtext)
|
|
assert q.__class__ == query.Wildcard
|
|
assert q.fieldname == "text"
|
|
assert q.text == "*ben-hayden*"
|
|
|
|
qp = default.MultifieldParser(["title", "text", "time"], schema)
|
|
q = qp.parse(qtext)
|
|
assert q.__unicode__() == "(title:*ben-hayden* OR text:*ben-hayden* OR time:*Ben-Hayden*)"
|
|
|
|
|
|
def test_bool_True():
|
|
schema = fields.Schema(text=fields.TEXT, bool=fields.BOOLEAN)
|
|
qp = default.QueryParser("text", schema)
|
|
q = qp.parse("bool:True")
|
|
assert q.__class__ == query.Term
|
|
assert q.fieldname == "bool"
|
|
assert q.text is True
|
|
|
|
|
|
def test_not_order():
|
|
schema = fields.Schema(id=fields.STORED,
|
|
count=fields.KEYWORD(lowercase=True),
|
|
cats=fields.KEYWORD(lowercase=True))
|
|
qp = default.QueryParser("count", schema)
|
|
|
|
q1 = qp.parse(u("(NOT (count:0) AND cats:1)"))
|
|
assert q1.__class__ == query.And
|
|
assert q1[0].__class__ == query.Not
|
|
assert q1[1].__class__ == query.Term
|
|
assert q1.__unicode__() == '(NOT count:0 AND cats:1)'
|
|
|
|
q2 = qp.parse(u("(cats:1 AND NOT (count:0))"))
|
|
assert q2.__class__ == query.And
|
|
assert q2[0].__class__ == query.Term
|
|
assert q2[1].__class__ == query.Not
|
|
assert q2.__unicode__() == '(cats:1 AND NOT count:0)'
|
|
|
|
|
|
def test_spacespace_and():
|
|
qp = default.QueryParser("f", None)
|
|
# one blank before/after AND
|
|
q = qp.parse("A AND B")
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 2
|
|
assert q[0] == query.Term("f", "A")
|
|
assert q[1] == query.Term("f", "B")
|
|
|
|
# two blanks before AND
|
|
q = qp.parse("A AND B")
|
|
assert q.__class__ == query.And
|
|
assert len(q) == 2
|
|
assert q[0] == query.Term("f", "A")
|
|
assert q[1] == query.Term("f", "B")
|
|
|
|
|
|
def test_unicode_num():
|
|
schema = fields.Schema(num=fields.NUMERIC)
|
|
parser = default.QueryParser(u("num"), schema=schema)
|
|
q = parser.parse(u("num:1"))
|
|
|
|
_ = text_type(q)
|
|
|
|
|
|
def test_phrase_andmaybe():
|
|
qp = default.QueryParser("f", None)
|
|
|
|
q = qp.parse(u('Dahmen ANDMAYBE "Besov Spaces"'))
|
|
assert isinstance(q, query.AndMaybe)
|
|
assert q[0] == query.Term("f", u("Dahmen"))
|
|
assert q[1] == query.Phrase("f", [u("Besov"), u("Spaces")])
|
|
|
|
|
|
def test_phrase_boost():
|
|
qp = default.QueryParser("f", None)
|
|
q = qp.parse(u('Dahmen ANDMAYBE "Besov Spaces"^9'))
|
|
assert isinstance(q, query.AndMaybe)
|
|
assert q[0] == query.Term("f", u("Dahmen"))
|
|
assert q[1] == query.Phrase("f", [u("Besov"), u("Spaces")], boost=9)
|
|
|
|
|
|
def test_andmaybe_none():
|
|
schema = fields.Schema(f=fields.TEXT, year=fields.NUMERIC)
|
|
qp = default.QueryParser("f", schema)
|
|
_ = qp.parse(u("Dahmen ANDMAYBE @year:[2000 TO]"))
|
|
|
|
|
|
def test_quoted_prefix():
|
|
qp = default.QueryParser("f", None)
|
|
|
|
expr = r"(^|(?<=[ (]))(?P<text>\w+|[*]):"
|
|
qp.replace_plugin(plugins.FieldsPlugin(expr))
|
|
|
|
q = qp.parse(u('foo url:http://apple.com:8080/bar* baz'))
|
|
assert isinstance(q, query.And)
|
|
assert q[0] == query.Term("f", "foo")
|
|
assert q[1] == query.Prefix("url", "http://apple.com:8080/bar")
|
|
assert q[2] == query.Term("f", "baz")
|
|
assert len(q) == 3
|