Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make bool values lowercase in solr query url - fixes #401 #435

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions pysolr.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def safe_urlencode(params, doseq=0):
which can't fail down to ascii.
"""
if IS_PY3:
for key, val in params.items():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, pysolr has avoided doing type coercion to avoid needing to know what type(s) Solr supports for a given field (e.g. a parameter like group is documented as accepting true but will also accept on or yes but not 1 or True) but I think this is relatively safe because Solr doesn't accept that value natively so the only case where this could cause a problem is if someone was passing a Solr string which they for some reason wanted to have processed literally — for example, if I had a Python app which used a Solr StringField for something and expected the literal value True or False because that field type isn't case-insensitive.

if isinstance(val, bool):
params[key] = str(val).lower()
return urlencode(params, doseq)

if hasattr(params, "items"):
Expand Down Expand Up @@ -300,6 +303,16 @@ def __iter__(self):
result = result._next_page_query and result._next_page_query()


def get_nested(obj, keys, default=None):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this is only used by a debug logging call, and it could be replaced by a standard dictionary get-with-default call there.

"""Nested key lookup for a dict-like object."""
try:
for k in keys:
obj = obj[k]
return obj
except KeyError:
return default


class Solr(object):
"""
The main object for working with Solr.
Expand Down Expand Up @@ -528,7 +541,7 @@ def _update(
path_handler = handler
if self.use_qt_param:
path_handler = "select"
query_vars.append("qt=%s" % safe_urlencode(handler, True))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to be unrelated to the proposed change.

query_vars.append("qt=%s" % handler)

path = "%s/" % path_handler

Expand Down Expand Up @@ -830,11 +843,17 @@ def search(self, q, search_handler=None, **kwargs):
response = self._select(params, handler=search_handler)
decoded = self.decoder.decode(response)

self.log.debug(
"Found '%s' search results.",
# cover both cases: there is no response key or value is None
(decoded.get("response", {}) or {}).get("numFound", 0),
)
if decoded.get("grouped"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would defer support for grouped responses to a larger task which would provide more help for them - just changing a log message doesn't seem to add much value.

group_key = next(iter(decoded["grouped"]))
self.log.debug(
"Found results grouped by '%s' with %d matches",
group_key,
decoded["grouped"][group_key]["matches"],
)
else:
self.log.debug(
"Found %d docs", get_nested(decoded, ["response", "numFound"], 0)
)

cursorMark = params.get("cursorMark", None)
if cursorMark != decoded.get("nextCursorMark", cursorMark):
Expand Down
12 changes: 11 additions & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import absolute_import, unicode_literals

import datetime
import json
import random
import time
import unittest
Expand All @@ -17,7 +18,7 @@
clean_xml_string,
force_bytes,
force_unicode,
json,
get_nested,
safe_urlencode,
sanitize,
unescape_html,
Expand Down Expand Up @@ -72,6 +73,9 @@ def test_safe_urlencode(self):
"test=Hello \u2603!&test=Helllo world!",
)

# Boolean options for Solr should be in lowercase.
self.assertTrue("True" not in safe_urlencode({"group": True}))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would pass a bug which encoded it as group=None or group="false".

Suggested change
self.assertTrue("True" not in safe_urlencode({"group": True}))
self.assertEqual("group=true", safe_urlencode({"group": True}))


def test_sanitize(self):
self.assertEqual(
sanitize(
Expand Down Expand Up @@ -101,6 +105,12 @@ def test_force_bytes(self):
def test_clean_xml_string(self):
self.assertEqual(clean_xml_string("\x00\x0b\x0d\uffff"), "\x0d")

def test_get_nested(self):
doc = {"a": {"b": {"c": 2023}}}
self.assertEqual(get_nested(doc, ["a", "e"]), None)
self.assertEqual(get_nested(doc, ["a"]), doc["a"])
self.assertEqual(get_nested(doc, ["a", "b", "c"]), doc["a"]["b"]["c"])


class ResultsTestCase(unittest.TestCase):
def test_init(self):
Expand Down