Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

review: small fixes #6541

Open
wants to merge 4 commits into
base: 7.8.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 52 additions & 5 deletions lib/cylc/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from cylc.review_dao import CylcReviewDAO
from cylc.task_state import (
TASK_STATUSES_ORDERED, TASK_STATUS_GROUPS)
from cylc.url import quote
from cylc.version import CYLC_VERSION
from cylc.ws import get_util_home
from cylc.suite_srv_files_mgr import SuiteSrvFilesManager
Expand Down Expand Up @@ -496,7 +497,7 @@ def suites(self, user, names=None, page=1, order=None, per_page=None,
continue
try:
data["entries"].append({
"name": item,
"name": unicode(item, 'UTF-8', errors='ignore'),
"info": {},
"last_activity_time": (
self.get_last_activity_time(user, item))})
Expand Down Expand Up @@ -548,6 +549,7 @@ def get_file(self, user, suite, path, path_in_tar=None, mode=None):
suite = suite.replace('%2F', '/')
f_name = self._get_user_suite_dir(user, suite, path)
self._check_file_path(path)
self._check_link_path(f_name, suite)
view_size_max = self.VIEW_SIZE_MAX
if path_in_tar:
tar_f = tarfile.open(f_name, "r:gz")
Expand All @@ -561,7 +563,7 @@ def get_file(self, user, suite, path, path_in_tar=None, mode=None):
mime = self.MIME_TEXT_PLAIN
else:
mime = mimetypes.guess_type(
urllib.pathname2url(path_in_tar))[0]
quote(path_in_tar))[0]
handle.seek(0)
if (mode == "download" or
f_size > view_size_max or
Expand All @@ -585,7 +587,7 @@ def get_file(self, user, suite, path, path_in_tar=None, mode=None):
if open(f_name).read(2) == "#!":
mime = self.MIME_TEXT_PLAIN
else:
mime = mimetypes.guess_type(urllib.pathname2url(f_name))[0]
mime = mimetypes.guess_type(quote(f_name))[0]
if not mime:
mime = self.MIME_TEXT_PLAIN
if (mode == "download" or
Expand All @@ -596,9 +598,15 @@ def get_file(self, user, suite, path, path_in_tar=None, mode=None):
return cherrypy.lib.static.serve_file(f_name, mime)
text = open(f_name).read()
try:
text = unicode(text, encoding='UTF-8', errors='replace')
if mode in [None, "text"]:
text = jinja2.escape(text)
lines = [unicode(line) for line in text.splitlines()]
# escape HTML tags
# NOTE: jinja2.escape returns a Markup object which will also
# escape future modifications to this string. In order to
# allow log file syntax highlighting (DEBUG, INFO, etc) we
# must cast this back to a unicode to remove this functionality.
text = unicode(jinja2.escape(text))
lines = text.splitlines()
except UnicodeDecodeError:
if path_in_tar:
handle.seek(0)
Expand Down Expand Up @@ -912,6 +920,45 @@ def _check_string_for_path(string):
if os.path.split(string)[0] != '':
raise cherrypy.HTTPError(403)

@classmethod
def _check_link_path(cls, path, suite):
"""Raise HTTP 403 error if the path is not under cylc-run/<suite>.

The files "cylc review" is intended to serve may be symlinked to
other parts of the filesystem, however, the linked files should
always reside in directories managed by Cylc which should always
sit under the path "cylc-run/<suite>".

Examples:
# OK
>>> CylcReviewService._check_link_path(
... os.path.expanduser('~/cylc-run/elephant'), 'elephant'
... )

# BAD
>>> CylcReviewService._check_link_path(
... os.path.expanduser('~/cylc-run/elephant'), 'shrew'
... )
Traceback (most recent call last):
...
HTTPError: (403, None)

# BAD
>>> CylcReviewService._check_link_path(
... os.path.expanduser('~/anything-else'), 'anything-else'
... )
Traceback (most recent call last):
...
HTTPError: (403, None)

Raises:
cherrypy.HTTPError(403)

"""
rel = os.path.join('cylc-run', suite)
if rel not in os.path.realpath(path):
raise cherrypy.HTTPError(403)

@classmethod
def _check_file_path(cls, path):
"""Raise HTTP 403 error if the path is not intended to be served.
Expand Down
52 changes: 52 additions & 0 deletions lib/cylc/url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# This code is derived from the cPython "urllib" module and retains its
# original license.
#
# Python software and documentation are licensed under the
# Python Software Foundation License Version 2.
Comment on lines +1 to +5
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dpmatthews, can you ok this.

Sadly I couldn't achieve the required changes via hot patching.

The original code can be found here (2.7) https://github.com/python/cpython/blob/8d21aa21f2cbc6d50aab3f420bb23be1d081dac4/Lib/urllib.py#L1269

And the license here: https://github.com/python/cpython/blob/v2.7.18/LICENSE#L62


always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_safe_map = {}
for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
_safe_quoters = {}

def quote(s, safe='/'):
"""quote('abc def') -> 'abc%20def'

Each part of a URL, e.g. the path info, the query, etc., has a
different set of reserved characters that must be quoted.

RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
the following reserved characters.

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
"$" | ","

Each of these characters is reserved in some component of a URL,
but not necessarily in all of them.

By default, the quote function is intended for quoting the path
section of a URL. Thus, it will not encode '/'. This character
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
"""
# fastpath
if not s:
if s is None:
raise TypeError('None object cannot be quoted')
return s
cachekey = (safe, always_safe)
try:
(quoter, safe) = _safe_quoters[cachekey]
except KeyError:
safe_map = _safe_map.copy()
safe_map.update([(c, c) for c in safe])
quoter = safe_map.get
oliver-sanders marked this conversation as resolved.
Show resolved Hide resolved
safe = always_safe + safe
_safe_quoters[cachekey] = (quoter, safe)
if not s.rstrip(safe):
return s
return ''.join([quoter(c, c) for c in s])
oliver-sanders marked this conversation as resolved.
Show resolved Hide resolved