From 0f34ae80c92344bf61e899960ccccd0936702bbc Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Mon, 28 Jan 2019 15:22:17 +0100 Subject: [PATCH 01/16] V1 compatibility layer (still needs cleaning up) --- bblfsh/compat.py | 235 +++++++++++++++++++++++++++++++ bblfsh/compat_test.py | 288 ++++++++++++++++++++++++++++++++++++++ bblfsh/pyuast.cc | 3 + bblfsh/result_context.py | 289 +++++++++++++++++++++++++++++++++------ bblfsh/test.py | 64 ++++----- 5 files changed, 805 insertions(+), 74 deletions(-) create mode 100644 bblfsh/compat.py create mode 100644 bblfsh/compat_test.py diff --git a/bblfsh/compat.py b/bblfsh/compat.py new file mode 100644 index 0000000..1984b78 --- /dev/null +++ b/bblfsh/compat.py @@ -0,0 +1,235 @@ +import os +import sys +from typing import Union, List, Any + +import grpc + +import bblfsh.client as bcli +from bblfsh import role_id, role_name +from bblfsh.result_context import ResultContext, NodeIterator, Node +from bblfsh.result_context import iterator as bcli_iterator +from bblfsh.aliases import ( + ParseRequest, ParseResponse, DriverStub, ProtocolServiceStub, + VersionRequest, SupportedLanguagesRequest, ModeType, + Mode, VersionResponse, DESCRIPTOR +) +from bblfsh.pyuast import uast, iterator as native_iterator +from bblfsh.tree_order import TreeOrder + +# TODO XXX: cleanup imports and stuff +# TODO(juanjux): mark officially as deprecated + +if "BBLFSH_COMPAT_SHUTUP" not in os.environ: + print("Warning: using deprecated bblfsh v1 compatibility layer.", + file=sys.stderr) + +class WrongTypeException(Exception): + pass + + +class CompatParseResponse: + def __init__(self, ctx: ResultContext, filename: str = "") -> None: + self._res_context = ctx + self._filename = filename + + @property + def uast(self) -> 'CompatNodeIterator': + return self._res_context.uast + + @property + def ast(self) -> 'CompatNodeIterator': + return self._res_context.ast + # return self.uast + + @property + def ctx(self) -> ResultContext: + return self._res_context + + @property + def elapsed(self) -> int: + # FIXME(juanjux): check if the can get this or measure ourselves + return -1 + + @property + def language(self) -> str: + return self._res_context.language + + @property + def filename(self) -> str: + return self._filename + + # FIXME(juanjux) get type + @property + def DESCRIPTOR(self) -> Any: + return self._res_context._ctx.DESCRIPTOR + + @property + def errors(selfs) -> List: + # ParseResponse would have raised an exception on errors + return [] + + +class CompatBblfshClient: + def __init__(self, endpoint: Union[str, grpc.Channel]) -> None: + self._bblfsh_cli = bcli.BblfshClient(endpoint) + + self._channel = self._bblfsh_cli._channel + self._stub_v1 = self._bblfsh_cli._stub_v1 + self._stub_v2 = self._bblfsh_cli._stub_v2 + + def _parse(self, filename: str, language: str = None, contents: str = None, + timeout: float = None, + mode: ModeType = Mode.Value('ANNOTATED')) -> CompatParseResponse: + + if timeout is not None: + timeout = int(timeout) + + res = self._bblfsh_cli.parse(filename, language, contents, + mode=mode, timeout=timeout) + return CompatParseResponse(res, filename) + + def parse(self, filename: str, language: str = None, contents: str = None, + timeout: float = None) -> CompatParseResponse: + + return self._parse(filename, language, contents, timeout, + Mode.Value('ANNOTATED')) + + def native_parse(self, filename: str, language: str = None, + contents: str = None, + timeout: float = None) -> CompatParseResponse: + + return self._parse(filename, language, contents, timeout, + Mode.Value('NATIVE')) + + def supported_languages(self) -> List[str]: + return self._bblfsh_cli.supported_languages() + + def version(self) -> VersionResponse: + return self._bblfsh_cli.version() + + def close(self) -> None: + return self._bblfsh_cli.close() + + +class CompatNodeIterator: + def __init__( + self, + nodeit: NodeIterator, + only_nodes: bool = False + ) -> None: + self._nodeit = nodeit + self._ctx = nodeit._ctx + self._only_nodes = only_nodes + # Used to forward calls of the old Node object + # Check if this, and properties(), are needed + self._last_node = None + + def __iter__(self) -> 'CompatNodeIterator': + return self + + def __next__(self) -> Node: + next_val = next(self._nodeit) + + is_node = isinstance(next_val, Node) + val = next_val._internal_node if is_node else next_val + + # Skip positions and non dicts/lists, the later if only_nodes = True + skip = False + if isinstance(val, dict): + if "@type" not in val or val["@type"] == "uast:Positions": + skip = True + # elif self._only_nodes and not isinstance(val, list): + elif self._only_nodes: + skip = True + + if skip: + val = self.__next__()._internal_node + + ret_val = next_val if is_node else Node(value=val) + self._last_node = ret_val + return ret_val + + def filter(self, query) -> 'CompatNodeIterator': + return CompatNodeIterator(NodeIterator(self._ctx.filter(query), self._ctx)) + + @property + def properties(self) -> dict: + if isinstance(self._last_node, dict): + return self._last_node.keys() + else: + return {} + + +# FIXME XXX: if the native node was created from a dictionary, the returned iterator +# is empty +def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PRE_ORDER)\ + -> CompatNodeIterator: + + if isinstance(n, CompatNodeIterator): + return CompatNodeIterator(n._nodeit.iterate(order), only_nodes=True) + elif isinstance(n, Node): + nat_it = native_iterator(n._internal_node, order) + return CompatNodeIterator(NodeIterator(nat_it, n._ctx), only_nodes=True) + elif isinstance(n, dict): + nat_it = native_iterator(n, order) + return CompatNodeIterator(NodeIterator(nat_it, uast()), only_nodes=True) + else: + raise WrongTypeException( + "iterator on non node or iterator type (%s)" % str(type(n)) + ) + +def filter(n: Node, query: str) -> CompatNodeIterator: + # XXX remove check + if not isinstance(n, Node): + raise WrongTypeException( + "Filter on non node or iterator type (%s)" % str(type(n)) + ) + ctx = uast() + return CompatNodeIterator(NodeIterator(ctx.filter(query, n._internal_node), ctx)) + + +def filter_nodes(n: Node, query: str) -> CompatNodeIterator: + # XXX Create from NodeIterator from n._ctx.filter + return CompatNodeIterator(filter(n, query), only_nodes=True) + + +def _scalariter2item(n: Node, query: str, wanted_type: type) -> Any: + rlist = list(filter(n, query)) + + if len(rlist) > 1: + # XXX some exception type + raise Exception("More than one result for %s typed query" % str(type)) + + value = rlist[0] + if isinstance(value, Node): + value = value._internal_node + + value_type = type(value) + if wanted_type == float and value_type == int: + value = float(value) + + if not isinstance(value, wanted_type): + # XXX some exception type + raise Exception("Typed query for type %s returned type %s instead" + % (str(wanted_type), str(type(value)))) + + return wanted_type(value) + + +def filter_string(n: Node, query: str) -> str: + return _scalariter2item(n, query, str) + + +def filter_bool(n: Node, query: str) -> bool: + return _scalariter2item(n, query, bool) + + +def filter_int(n: Node, query: str) -> int: + return _scalariter2item(n, query, int) + + +def filter_float(n: Node, query: str) -> float: + return _scalariter2item(n, query, float) + + +filter_number = filter_float diff --git a/bblfsh/compat_test.py b/bblfsh/compat_test.py new file mode 100644 index 0000000..a7fde69 --- /dev/null +++ b/bblfsh/compat_test.py @@ -0,0 +1,288 @@ +import os +import resource +import unittest + + +import docker + +from bblfsh.compat import ( + filter as xpath_filter, role_id, iterator, role_name, Node, TreeOrder, filter_bool, + filter_number, WrongTypeException, CompatNodeIterator +) +from bblfsh.compat import CompatBblfshClient as BblfshClient +from bblfsh.launcher import ensure_bblfsh_is_running +from bblfsh.client import NonUTF8ContentException + + +class BblfshTests(unittest.TestCase): + BBLFSH_SERVER_EXISTED = None + fixtures_file = "fixtures/test.py" + + @classmethod + def setUpClass(cls): + cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running() + + @classmethod + def tearDownClass(cls): + if not cls.BBLFSH_SERVER_EXISTED: + client = docker.from_env(version="auto") + client.containers.get("bblfshd").remove(force=True) + client.api.close() + + def _parse_fixture(self): + return self.client.parse(self.fixtures_file) + + def setUp(self): + self.client = BblfshClient("0.0.0.0:9432") + + def _validate_resp(self, resp): + self.assertIsNotNone(resp) + self.assertEqual(len(resp.errors), 0) + + def testVersion(self): + version = self.client.version() + self.assertTrue(hasattr(version, "version")) + self.assertTrue(version.version) + self.assertTrue(hasattr(version, "build")) + self.assertTrue(version.build) + + def testNativeParse(self): + reply = self.client.native_parse(__file__) + assert(reply.ast) + + def testNonUTF8ParseError(self): + with self.assertRaises(NonUTF8ContentException): + self.client.parse("", "Python", b"a = '\x80abc'") + + def testUASTDefaultLanguage(self): + self._validate_resp(self.client.parse(__file__)) + + def testUASTPython(self): + self._validate_resp(self.client.parse(__file__, language="Python")) + + def testUASTFileContents(self): + resp = self._parse_fixture() + self._validate_resp(resp) + + def testBrokenFilter(self): + with self.assertRaises(WrongTypeException): + xpath_filter(0, "foo") + + def testFilterInternalType(self): + node = Node() + node.internal_type = 'a' + self.assertTrue(any(xpath_filter(node, "//a"))) + self.assertFalse(any(xpath_filter(node, "//b"))) + + def testFilterToken(self): + uast = self._parse_fixture().uast + it = xpath_filter(uast, "//*[@token='else']/text()") + first = next(it).get_str() + self.assertEqual(first, "else") + + def testFilterRoles(self): + uast = self._parse_fixture().uast + it = xpath_filter(uast, "//*[@role='Identifier']") + self.assertIsInstance(it, CompatNodeIterator) + + l = list(it) + self.assertGreater(len(l), 0) + + it = xpath_filter(uast, "//*[@role='Friend']") + self.assertIsInstance(it, CompatNodeIterator) + l = list(it) + self.assertEqual(len(l), 0) + + def testFilterStartOffset(self): + uast = self._parse_fixture().uast + self.assertTrue(any(xpath_filter(uast, "//uast:Positions/start/uast:Position[@offset=11749]"))) + self.assertFalse(any(xpath_filter(uast, "//uast:Positions/start/uast:Position[@offset=99999]"))) + + def testFilterStartLine(self): + uast = self._parse_fixture().uast + self.assertTrue(any(xpath_filter(uast, "//uast:Positions/start/uast:Position[@col=42]"))) + self.assertFalse(any(xpath_filter(uast, "//uast:Positions/start/uast:Position[@col=99999]"))) + + def testFilterStartCol(self): + uast = self._parse_fixture().uast + self.assertTrue(any(xpath_filter(uast, "//uast:Positions/start/uast:Position[@col=42]"))) + self.assertFalse(any(xpath_filter(uast, "//uast:Positions/start/uast:Position[@col=99999]"))) + + def testFilterEndOffset(self): + uast = self._parse_fixture().uast + self.assertTrue(any(xpath_filter(uast, "//uast:Positions/end/uast:Position[@offset=11757]"))) + self.assertFalse(any(xpath_filter(uast, "//uast:Positions/end/uast:Position[@offset=99999]"))) + + def testFilterEndLine(self): + uast = self._parse_fixture().uast + self.assertTrue(any(xpath_filter(uast, "//uast:Positions/end/uast:Position[@line=321]"))) + self.assertFalse(any(xpath_filter(uast, "//uast:Positions/end/uast:Position[@line=99999]"))) + + def testFilterEndCol(self): + uast = self._parse_fixture().uast + self.assertTrue(any(xpath_filter(uast, "//uast:Positions/end/uast:Position[@col=49]"))) + self.assertFalse(any(xpath_filter(uast, "//uast:Positions/end/uast:Position[@col=99999]"))) + + def testFilterProperties(self): + node = Node() + node.properties['k1'] = 'v1' + node.properties['k2'] = 'v2' + self.assertTrue(any(xpath_filter(node, "/*[@k1='v1']"))) + self.assertTrue(any(xpath_filter(node, "/*[@k2='v2']"))) + self.assertFalse(any(xpath_filter(node, "/*[@k1='v2']"))) + + def testFilterBool(self): + uast = self._parse_fixture().uast + self.assertTrue(filter_bool(uast, "boolean(//uast:Positions/end/uast:Position[@col=49])")) + self.assertFalse(filter_bool(uast, "boolean(//uast:Positions/end/uast:Position[@col=9999])")) + + def testFilterNumber(self): + res = filter_number(self._parse_fixture().uast, + "count(//uast:Positions/end/uast:Position[@col=49])") + self.assertEqual(int(res), 2) + + # get_str() already tested by testFiltertoken + + def testRoleIdName(self): + self.assertEqual(role_id(role_name(1)), 1) + self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") + + @staticmethod + def _itTestTree(): + root = Node() + root.internal_type = 'root' + root.start_position.offset = 0 + root.start_position.line = 0 + + son1 = Node() + son1.internal_type = 'son1' + son1.start_position.offset = 1 + + son1_1 = Node() + son1_1.internal_type = 'son1_1' + son1_1.start_position.offset = 10 + + son1_2 = Node() + son1_2.internal_type = 'son1_2' + son1_2.start_position.offset = 10 + + son1.children.extend([son1_1, son1_2]) + + son2 = Node() + son2.internal_type = 'son2' + son2.start_position.offset = 100 + + son2_1 = Node() + son2_1.internal_type = 'son2_1' + son2_1.start_position.offset = 5 + + son2_2 = Node() + son2_2.internal_type = 'son2_2' + son2_2.start_position.offset = 15 + + son2.children.extend([son2_1, son2_2]) + root.children.extend([son1, son2]) + + return root + + def testIteratorPreOrder(self): + root = self._itTestTree() + it = iterator(root, TreeOrder.PRE_ORDER) + self.assertIsNotNone(it) + expanded = [node.internal_type for node in it] + self.assertListEqual(expanded, ['root', 'son1', 'son1_1', 'son1_2', + 'son2', 'son2_1', 'son2_2']) + + def testIteratorPostOrder(self): + root = self._itTestTree() + it = iterator(root, TreeOrder.POST_ORDER) + self.assertIsNotNone(it) + expanded = [node.internal_type for node in it] + self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1', + 'son2_2', 'son2', 'root']) + + def testIteratorLevelOrder(self): + root = self._itTestTree() + it = iterator(root, TreeOrder.LEVEL_ORDER) + self.assertIsNotNone(it) + expanded = [node.internal_type for node in it] + self.assertListEqual(expanded, ['root', 'son1', 'son2', 'son1_1', + 'son1_2', 'son2_1', 'son2_2']) + + def testAddToNode(self): + n = Node() + n._internal_node["foo"] = "bar" + self.assertEqual(n.properties["foo"], "bar") + + # FIXME(juanjux): fails + # def testIteratorPositionOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.POSITION_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1', + # 'son1_2', 'son2_2', 'son2']) + + def testFilterInsideIter(self): + root = self.client.parse(__file__).uast + it = iterator(root, TreeOrder.PRE_ORDER) + self.assertIsNotNone(it) + for n in it: + xpath_filter(n, "//*[@roleIdentifier]") + + def testItersMixingIterations(self): + root = self.client.parse(__file__).uast + it = iterator(root, TreeOrder.PRE_ORDER) + next(it); next(it); next(it) + n = next(it) + it2 = iterator(n, TreeOrder.PRE_ORDER) + next(it2) + val_it1 = next(it).get() + val_it2 = next(it2).get() + + self.assertDictEqual(val_it1, val_it2) + + # XXX uncomment + def testManyFilters(self): + root = self._parse_fixture().uast + root.properties['k1'] = 'v2' + root.properties['k2'] = 'v1' + + before = resource.getrusage(resource.RUSAGE_SELF) + for _ in range(1000): + xpath_filter(root, "//*[@roleIdentifier]") + + after = resource.getrusage(resource.RUSAGE_SELF) + + # Check that memory usage has not doubled after running the filter + self.assertLess(after[2] / before[2], 2.0) + + def testManyParses(self): + before = resource.getrusage(resource.RUSAGE_SELF) + for _ in range(100): + self.client.parse(self.fixtures_file).uast + + after = resource.getrusage(resource.RUSAGE_SELF) + + self.assertLess(after[2] / before[2], 2.0) + + def testManyParsesAndFilters(self): + before = resource.getrusage(resource.RUSAGE_SELF) + for _ in range(100): + root = self.client.parse(self.fixtures_file).uast + xpath_filter(root, "//*[@role='Identifier']") + + after = resource.getrusage(resource.RUSAGE_SELF) + + self.assertLess(after[2] / before[2], 4.0) + + def testSupportedLanguages(self): + res = self.client.supported_languages() + self.assertGreater(len(res), 0) + for l in res: + for key in ('language', 'version', 'status', 'features'): + self.assertTrue(hasattr(l, key)) + self.assertIsNotNone(getattr(l, key)) + +if __name__ == "__main__": + unittest.main() diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index ee0da1a..886f491 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -1078,6 +1078,9 @@ PyInit_pyuast(void) Py_INCREF(&PythonContextType); PyModule_AddObject(m, "Context", (PyObject *)&PythonContextType); + Py_INCREF(&PythonContextExtType); + PyModule_AddObject(m, "ContextExt", (PyObject *)&PythonContextExtType); + Py_INCREF(&PyNodeExtType); PyModule_AddObject(m, "NodeExt", (PyObject *)&PyNodeExtType); diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py index f985074..dcdb045 100644 --- a/bblfsh/result_context.py +++ b/bblfsh/result_context.py @@ -1,7 +1,9 @@ +import copy import typing as t +from collections import MutableSequence from bblfsh.aliases import ParseResponse -from bblfsh.pyuast import decode, IteratorExt, NodeExt, iterator +from bblfsh.pyuast import Context, IteratorExt, NodeExt, decode, iterator, uast from bblfsh.tree_order import TreeOrder @@ -17,18 +19,134 @@ class NotNodeIterationException(Exception): pass -# ResultMultiType = t.NewType("ResultMultiType", t.Union[dict, int, float, bool, str]) +class GetOnEmptyNodeException(Exception): + pass + + ResultMultiType = t.Union[dict, int, float, bool, str, None] +class CompatPosition: + """ + v1 positions were extracted as node.[start|end]_position.[line|col|offset]. To + emulate that, this dictionary will be returned when accesing the old position + properties and its setters will update the parent Node real position ones. + """ + + def __init__(self, parent_pos: dict): + self._parent_pos = parent_pos + + @property + def line(self) -> int: + return self._parent_pos["line"] + + @line.setter + def line(self, v: int) -> None: + self._parent_pos["line"] = v + + @property + def col(self) -> int: + return self._parent_pos["col"] + + @col.setter + def col(self, v: int) -> None: + self._parent_pos["col"] = v + + @property + def offset(self) -> int: + return self._parent_pos["offset"] + + @offset.setter + def offset(self, v: int) -> None: + self._parent_pos["offset"] = v + + +class CompatChildren(MutableSequence): + def __init__(self, parent: "Node"): + self._children = parent.get_dict()["@children"] + + @staticmethod + def _node2dict(n): + if isinstance(n, Node): + # Convert to dict before appending + return n.get_dict() + return n + + def __len__(self): + return len(self._children) + + def __getitem__(self, idx): + return self._children[idx] + + def __delitem__(self, idx): + del self._children[idx] + + def __setitem__(self, idx, val): + self._children[idx] = self._node2dict(val) + + def insert(self, idx, val): + self._children.insert(idx, self._node2dict(val)) + + def append(self, val): + self._children.append(self._node2dict(val)) + + def extend(self, items) -> None: + for i in items: + self.append(i) + + def __str__(self): + return str(self._children) + +EMPTY_NODE_DICT = { + "@type": "", + "@token": "", + "@role": [], + "@children": [], +} + +# XXX check if I can totally remove ctx from this class Node: - def __init__(self, node_ext: NodeExt) -> None: + def __init__(self, node_ext: NodeExt = None, value: ResultMultiType=None, + ctx: Context = None) -> None: + + if node_ext and (value is not None): + # XXX exception type + raise Exception( + "Node creation can have node_ext or value, not both" + ) + self._node_ext = node_ext - self._loaded_node: ResultMultiType = None + if node_ext is None: + self._internal_node = value if (value is not None)\ + else copy.deepcopy(EMPTY_NODE_DICT) + elif not isinstance(node_ext, NodeExt): + # XXX exception type + raise Exception("Node instanced with a non NodeExt first argument: %s" + % str(type(node_ext))) + else: + # generate self._internal_node from the NodeExt + self._ensure_load() + + if isinstance(self._internal_node, dict): + self._load_children() + + self._ctx = ctx if ctx is not None else uast() + + def _load_children(self) -> None: + "Get all properties of type node or dict and load them into the list" + d = self.get_dict() + children = d["@children"] + for k, v in d.items(): + if k in ["@children", "@pos"]: + continue + if type(v) in [Node, dict]: + children.append(v) def _ensure_load(self) -> None: - if self._loaded_node is None: - self._loaded_node = self._node_ext.load() + if self._node_ext is not None: + self._internal_node = self._node_ext.load() + if isinstance(self._internal_node, dict): + self._internal_node["@children"] = self._internal_node.get("@children", []) def __str__(self) -> str: return str(self.get()) @@ -38,26 +156,21 @@ def __repr__(self) -> str: def get(self) -> ResultMultiType: self._ensure_load() - return self._loaded_node + return self._internal_node - def _get_typed(self, type_: t.Union[type, t.List[type]]) -> ResultMultiType: + def _get_typed(self, *type_list: t.List[type]) -> ResultMultiType: self._ensure_load() - if not isinstance(type_, list) and not isinstance(type_, tuple): - type_list = [type_] - else: - type_list = type_ - - if type(self._loaded_node) not in type_list: + if type(self._internal_node) not in type_list: raise ResultTypeException("Expected {} result, but type is '{}'" - .format(str(type_list), type(self._loaded_node))) - return self._loaded_node + .format(str(type_list), type(self._internal_node))) + return self._internal_node def get_bool(self) -> bool: return t.cast(bool, self._get_typed(bool)) def get_float(self) -> float: - res: ResultMultiType = self._get_typed([float, int]) + res: ResultMultiType = self._get_typed(float, int) if isinstance(res, int): res = float(res) return t.cast(float, res) @@ -71,56 +184,154 @@ def get_str(self) -> str: def get_dict(self) -> dict: return t.cast(dict, self._get_typed(dict)) - def iterate(self, order: int) -> 'NodeIterator': - if not isinstance(self._node_ext, NodeExt): - raise NotNodeIterationException("Cannot iterate over leaf of type '{}'" - .format(type(self._node_ext))) - TreeOrder.check_order(order) - return NodeIterator(iterator(self._node_ext, order)) + # TODO(juanjux): backward compatibility methods, remove once v1 + # is definitely deprecated + + @property + def internal_type(self) -> str: + return self.get_dict()["@type"] + + @internal_type.setter + def internal_type(self, t: str) -> None: + d = self.get_dict() + d["@type"] = t + + @property + def properties(self): + return self.get_dict() + + def _is_dict_list(self, key: str) -> t.Optional[t.List]: + val = self.get_dict().get(key, None) + if not val or not isinstance(val, t.List): + return None + + for i in val: + if not isinstance(i, dict): + return None + + return val + + @property + def children(self): + return CompatChildren(self) + + @property + def token(self) -> str: + return self.get_dict()["@token"] + @token.setter + def token(self, t: str) -> None: + d = self.get_dict() + d["@token"] = t + @property + def roles(self) -> t.List: + return self.get_dict().get("@role", []) + + def _add_position(self) -> None: + d = self.get_dict() + if "@pos" not in d: + d["@pos"] = { + "@type": "uast:Positions", + "start": { + "@type": "uast:Position", + "offset": -1, + "line": -1, + "col": -1, + }, + "end": { + "@type": "uast:Position", + "offset": -1, + "line": -1, + "col": -1, + } + } + + @property + def start_position(self): + self._add_position() + start = self.get_dict()["@pos"]["start"] + return CompatPosition(start) + + @property + def end_position(self): + self._add_position() + end = self.get_dict()["@pos"]["end"] + return CompatPosition(end) + + +# XXX remove ctx if removed from Node class NodeIterator: - def __init__(self, iter_ext: IteratorExt) -> None: + def __init__(self, iter_ext: IteratorExt, ctx: Context) -> None: self._iter_ext = iter_ext + self._ctx = ctx + # default, can be changed on self.iterate() + self._order: TreeOrder = TreeOrder.PRE_ORDER + # saves the last node for re-iteration with iterate() + self._last_node: Node = None def __iter__(self) -> 'NodeIterator': return self - def __next__(self) -> Node: - return Node(next(self._iter_ext)) + def __next__(self) -> t.Union[ResultMultiType, Node]: + next_node = next(self._iter_ext) + + if isinstance(next_node, NodeExt): + # save last node for potential re-iteration + self._last_node = Node(node_ext=next_node, ctx=self._ctx) + return self._last_node + # non node (bool, str, etc) + return next_node def iterate(self, order: int) -> 'NodeIterator': + if self._last_node is None: + self._last_node = Node(node_ext=next(self._iter_ext), + ctx=self._ctx) + TreeOrder.check_order(order) - return NodeIterator(iterator(next(self._iter_ext), order)) + self._order = order + return NodeIterator( + iterator((self._last_node._node_ext), order), self._ctx) class ResultContext: - def __init__(self, grpc_response: ParseResponse) -> None: - if grpc_response.errors: - raise ResponseError("\n".join( - [error.text for error in grpc_response.errors]) - ) - - self._response = grpc_response - self._ctx = decode(grpc_response.uast, format=0) + def __init__(self, grpc_response: ParseResponse = None) -> None: + if grpc_response: + if grpc_response.errors: + raise ResponseError("\n".join( + [error.text for error in grpc_response.errors]) + ) + self._response = grpc_response + self._ctx = decode(grpc_response.uast, format=0) + else: + self._response = None + self._ctx = uast() def filter(self, query: str) -> NodeIterator: - return NodeIterator(self._ctx.filter(query)) + return NodeIterator(self._ctx.filter(query), self._ctx) def get_all(self) -> dict: return self._ctx.load() def iterate(self, order: int) -> NodeIterator: TreeOrder.check_order(order) - return NodeIterator(iterator(self._ctx.root(), order)) + return NodeIterator(iterator(self._ctx.root(), order), self._ctx) @property def language(self) -> str: return self._response.language @property - def uast(self) -> t.Any: - return self._response.uast + def filename(self) -> str: + return self._response.filename + + @property + def uast(self) -> Node: + return Node(node_ext=self._ctx.root(), ctx=self._ctx) + + @property + def ast(self) -> Node: + return Node(node_ext=self._ctx.root(), ctx=self._ctx) def __str__(self) -> str: return str(self.get_all()) diff --git a/bblfsh/test.py b/bblfsh/test.py index 53afbde..e9c841a 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -34,7 +34,6 @@ def setUp(self) -> None: def _parse_fixture(self) -> ResultContext: ctx = self.client.parse(self.fixtures_file) self._validate_ctx(ctx) - return ctx def testVersion(self) -> None: @@ -99,14 +98,11 @@ def testBrokenFilter(self) -> None: self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$") - # FIXME: Uncomment once https://github.com/bblfsh/sdk/issues/340 is fixed def testFilterToken(self): ctx = self._parse_fixture() - it = ctx.filter("//*[@token='else']/@token") - print(next(it)) - # Problem: returns the node containing the @token, not the @token string ("else") - # first = next(it).get_str() - # self.assertEqual(first, "else") + it = ctx.filter("//*[@token='else']/text()") + first = next(it).get_str() + self.assertEqual(first, "else") def testFilterRoles(self) -> None: ctx = self._parse_fixture() @@ -277,10 +273,9 @@ def testIteratorPositionOrder(self) -> None: 'son1_2', 'son2_2', 'son2']) def _validate_ctx(self, ctx: ResultContext) -> None: - import bblfsh self.assertIsNotNone(ctx) - self.assertIsInstance(ctx, bblfsh.result_context.ResultContext) - self.assertIsInstance(ctx.uast, bytes) + self.assertIsInstance(ctx, ResultContext) + self.assertIsInstance(ctx.uast, Node) def testFilterInsideIter(self) -> None: ctx = self._parse_fixture() @@ -294,45 +289,44 @@ def testItersMixingIterations(self) -> None: next(it); next(it); next(it) n = next(it) - it2 = n.iterate(TreeOrder.PRE_ORDER) + it2 = it.iterate(TreeOrder.PRE_ORDER) next(it2) a = next(it).get() b = next(it2).get() self.assertListEqual(a, b) - # XXX uncomment - # def testManyFilters(self) -> None: - # ctx = self._parse_fixture() + def testManyFilters(self) -> None: + ctx = self._parse_fixture() - # before = resource.getrusage(resource.RUSAGE_SELF) - # for _ in range(500): - # ctx.filter("//*[@role='Identifier']") + before = resource.getrusage(resource.RUSAGE_SELF) + for _ in range(10000): + ctx.filter("//*[@role='Identifier']") - # after = resource.getrusage(resource.RUSAGE_SELF) + after = resource.getrusage(resource.RUSAGE_SELF) - # # Check that memory usage has not doubled - # self.assertLess(after[2] / before[2], 2.0) + # Check that memory usage has not doubled + self.assertLess(after[2] / before[2], 2.0) - # def testManyParses(self) -> None: - # before = resource.getrusage(resource.RUSAGE_SELF) - # for _ in range(100): - # self._parse_fixture() + def testManyParses(self) -> None: + before = resource.getrusage(resource.RUSAGE_SELF) + for _ in range(100): + self.client.parse(self.fixtures_file) - # after = resource.getrusage(resource.RUSAGE_SELF) + after = resource.getrusage(resource.RUSAGE_SELF) - # # Check that memory usage has not doubled - # self.assertLess(after[2] / before[2], 2.0) + # Check that memory usage has not doubled + self.assertLess(after[2] / before[2], 2.0) - # def testManyParsersAndFilters(self) -> None: - # before = resource.getrusage(resource.RUSAGE_SELF) - # for _ in range(100): - # ctx = self.client.parse(self.fixtures_file) - # ctx.filter("//*[@role='Identifier']") + def testManyParsesAndFilters(self) -> None: + before = resource.getrusage(resource.RUSAGE_SELF) + for _ in range(100): + ctx = self.client.parse(self.fixtures_file) + ctx.filter("//*[@role='Identifier']") - # after = resource.getrusage(resource.RUSAGE_SELF) + after = resource.getrusage(resource.RUSAGE_SELF) - # # Check that memory usage has not doubled - # self.assertLess(after[2] / before[2], 2.0) + # Check that memory usage has not doubled + self.assertLess(after[2] / before[2], 2.0) def testSupportedLanguages(self) -> None: res = self.client.supported_languages() From 9819edcd69b93bb5cea51170dcc36d7ae5ffa567 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 12 Mar 2019 17:16:33 +0100 Subject: [PATCH 02/16] Moved Node and related types into own file Signed-off-by: Juanjo Alvarez --- bblfsh/compat.py | 26 ++-- bblfsh/compat_test.py | 6 +- bblfsh/node.py | 243 ++++++++++++++++++++++++++++++++++++++ bblfsh/result_context.py | 249 +-------------------------------------- 4 files changed, 262 insertions(+), 262 deletions(-) create mode 100644 bblfsh/node.py diff --git a/bblfsh/compat.py b/bblfsh/compat.py index 1984b78..91725f6 100644 --- a/bblfsh/compat.py +++ b/bblfsh/compat.py @@ -7,7 +7,6 @@ import bblfsh.client as bcli from bblfsh import role_id, role_name from bblfsh.result_context import ResultContext, NodeIterator, Node -from bblfsh.result_context import iterator as bcli_iterator from bblfsh.aliases import ( ParseRequest, ParseResponse, DriverStub, ProtocolServiceStub, VersionRequest, SupportedLanguagesRequest, ModeType, @@ -16,13 +15,11 @@ from bblfsh.pyuast import uast, iterator as native_iterator from bblfsh.tree_order import TreeOrder -# TODO XXX: cleanup imports and stuff -# TODO(juanjux): mark officially as deprecated - if "BBLFSH_COMPAT_SHUTUP" not in os.environ: print("Warning: using deprecated bblfsh v1 compatibility layer.", file=sys.stderr) + class WrongTypeException(Exception): pass @@ -149,7 +146,7 @@ def __next__(self) -> Node: self._last_node = ret_val return ret_val - def filter(self, query) -> 'CompatNodeIterator': + def filter(self, query: str) -> 'CompatNodeIterator': return CompatNodeIterator(NodeIterator(self._ctx.filter(query), self._ctx)) @property @@ -160,8 +157,6 @@ def properties(self) -> dict: return {} -# FIXME XXX: if the native node was created from a dictionary, the returned iterator -# is empty def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PRE_ORDER)\ -> CompatNodeIterator: @@ -178,27 +173,27 @@ def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PR "iterator on non node or iterator type (%s)" % str(type(n)) ) +class FilterTypeException(Exception): + pass + def filter(n: Node, query: str) -> CompatNodeIterator: - # XXX remove check if not isinstance(n, Node): - raise WrongTypeException( - "Filter on non node or iterator type (%s)" % str(type(n)) - ) + raise FilterTypeException("Filter on non node or iterator type (%s)" % str(type(n)) ) ctx = uast() return CompatNodeIterator(NodeIterator(ctx.filter(query, n._internal_node), ctx)) def filter_nodes(n: Node, query: str) -> CompatNodeIterator: - # XXX Create from NodeIterator from n._ctx.filter return CompatNodeIterator(filter(n, query), only_nodes=True) +class TypedQueryException(Exception): + pass def _scalariter2item(n: Node, query: str, wanted_type: type) -> Any: rlist = list(filter(n, query)) if len(rlist) > 1: - # XXX some exception type - raise Exception("More than one result for %s typed query" % str(type)) + raise TypedQueryException("More than one result for %s typed query" % str(type)) value = rlist[0] if isinstance(value, Node): @@ -209,8 +204,7 @@ def _scalariter2item(n: Node, query: str, wanted_type: type) -> Any: value = float(value) if not isinstance(value, wanted_type): - # XXX some exception type - raise Exception("Typed query for type %s returned type %s instead" + raise TypedQueryException("Typed query for type %s returned type %s instead" % (str(wanted_type), str(type(value)))) return wanted_type(value) diff --git a/bblfsh/compat_test.py b/bblfsh/compat_test.py index a7fde69..0a93c60 100644 --- a/bblfsh/compat_test.py +++ b/bblfsh/compat_test.py @@ -23,7 +23,7 @@ def setUpClass(cls): cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running() @classmethod - def tearDownClass(cls): + def tearDownClass(cls) -> None: if not cls.BBLFSH_SERVER_EXISTED: client = docker.from_env(version="auto") client.containers.get("bblfshd").remove(force=True) @@ -32,7 +32,7 @@ def tearDownClass(cls): def _parse_fixture(self): return self.client.parse(self.fixtures_file) - def setUp(self): + def setUp(self) -> None: self.client = BblfshClient("0.0.0.0:9432") def _validate_resp(self, resp): @@ -243,6 +243,7 @@ def testItersMixingIterations(self): self.assertDictEqual(val_it1, val_it2) # XXX uncomment + """ def testManyFilters(self): root = self._parse_fixture().uast root.properties['k1'] = 'v2' @@ -275,6 +276,7 @@ def testManyParsesAndFilters(self): after = resource.getrusage(resource.RUSAGE_SELF) self.assertLess(after[2] / before[2], 4.0) + """ def testSupportedLanguages(self): res = self.client.supported_languages() diff --git a/bblfsh/node.py b/bblfsh/node.py new file mode 100644 index 0000000..561ed11 --- /dev/null +++ b/bblfsh/node.py @@ -0,0 +1,243 @@ +import copy +from collections import MutableSequence +from typing import Union, List, cast, Optional, Any + +from bblfsh.pyuast import Context, NodeExt, uast +from bblfsh.result_context import ResultMultiType + +class ResultTypeException(Exception): + pass + +class CompatPosition: + """ + v1 positions were extracted as node.[start|end]_position.[line|col|offset]. To + emulate that, this dictionary will be returned when accesing the old position + properties and its setters will update the parent Node real position ones. + """ + + def __init__(self, parent_pos: dict) -> None: + self._parent_pos = parent_pos + + @property + def line(self) -> int: + return self._parent_pos["line"] + + @line.setter + def line(self, v: int) -> None: + self._parent_pos["line"] = v + + @property + def col(self) -> int: + return self._parent_pos["col"] + + @col.setter + def col(self, v: int) -> None: + self._parent_pos["col"] = v + + @property + def offset(self) -> int: + return self._parent_pos["offset"] + + @offset.setter + def offset(self, v: int) -> None: + self._parent_pos["offset"] = v + + +class CompatChildren(MutableSequence): + def __init__(self, parent: "Node") -> None: + self._children = parent.get_dict()["@children"] + + @staticmethod + def _node2dict(n: Union['Node', dict]) -> dict: + if isinstance(n, Node): + # Convert to dict before appending + return n.get_dict() + return n + + def __len__(self) -> int: + return len(self._children) + + def __getitem__(self, idx: Union[int, slice]) -> Any: + return self._children[idx] + + def __delitem__(self, idx: Union[int, slice]) -> None: + del self._children[idx] + + def __setitem__(self, idx: Union[int, slice], val: Union['Node', dict]) -> None: + self._children[idx] = self._node2dict(val) + + def insert(self, idx: int, val: Union['Node', dict]) -> None: + self._children.insert(idx, self._node2dict(val)) + + def append(self, val: Union['Node', dict]) -> None: + self._children.append(self._node2dict(val)) + + def extend(self, items: List[Union['Node', dict]]) -> None: + for i in items: + self.append(i) + + def __str__(self) -> str: + return str(self._children) + +EMPTY_NODE_DICT = { + "@type": "", + "@token": "", + "@role": [], + "@children": [], +} + +class NodeInstancingException(Exception): + pass + +# XXX check if I can totally remove ctx from this +class Node: + def __init__(self, node_ext: NodeExt = None, value: ResultMultiType=None, + ctx: Context = None) -> None: + + if node_ext and (value is not None): + raise NodeInstancingException("Node creation can have node_ext or value, not both") + + self._node_ext = node_ext + if node_ext is None: + self._internal_node = value if (value is not None) \ + else copy.deepcopy(EMPTY_NODE_DICT) + elif not isinstance(node_ext, NodeExt): + raise NodeInstancingException("Node instanced with a non NodeExt first argument: %s" + % str(type(node_ext))) + else: + # generate self._internal_node from the NodeExt + self._ensure_load() + + if isinstance(self._internal_node, dict): + self._load_children() + + self._ctx = ctx if ctx is not None else uast() + + def _load_children(self) -> None: + "Get all properties of type node or dict and load them into the list" + d = self.get_dict() + children = d["@children"] + for k, v in d.items(): + if k in ["@children", "@pos"]: + continue + if type(v) in [Node, dict]: + children.append(v) + + def _ensure_load(self) -> None: + if self._node_ext is not None: + self._internal_node = self._node_ext.load() + if isinstance(self._internal_node, dict): + self._internal_node["@children"] = self._internal_node.get("@children", []) + + def __str__(self) -> str: + return str(self.get()) + + def __repr__(self) -> str: + return repr(self.get()) + + def get(self) -> ResultMultiType: + self._ensure_load() + return self._internal_node + + def _get_typed(self, *type_list: type) -> ResultMultiType: + self._ensure_load() + + if type(self._internal_node) not in type_list: + raise ResultTypeException("Expected {} result, but type is '{}'" + .format(str(type_list), type(self._internal_node))) + return self._internal_node + + def get_bool(self) -> bool: + return cast(bool, self._get_typed(bool)) + + def get_float(self) -> float: + res: ResultMultiType = self._get_typed(float, int) + if isinstance(res, int): + res = float(res) + return cast(float, res) + + def get_int(self) -> int: + return cast(int, self._get_typed(int)) + + def get_str(self) -> str: + return cast(str, self._get_typed(str)) + + def get_dict(self) -> dict: + return cast(dict, self._get_typed(dict)) + + # TODO(juanjux): backward compatibility methods, remove once v1 + # is definitely deprecated + + @property + def internal_type(self) -> str: + return self.get_dict()["@type"] + + @internal_type.setter + def internal_type(self, t: str) -> None: + d = self.get_dict() + d["@type"] = t + + @property + def properties(self) -> dict: + return self.get_dict() + + def _is_dict_list(self, key: str) -> Optional[List]: + val = self.get_dict().get(key, None) + if not val or not isinstance(val, List): + return None + + for i in val: + if not isinstance(i, dict): + return None + + return val + + @property + def children(self) -> CompatChildren: + return CompatChildren(self) + + @property + def token(self) -> str: + return self.get_dict()["@token"] + + @token.setter + def token(self, t: str) -> None: + d = self.get_dict() + d["@token"] = t + + @property + def roles(self) -> List: + return self.get_dict().get("@role", []) + + def _add_position(self) -> None: + d = self.get_dict() + if "@pos" not in d: + d["@pos"] = { + "@type": "uast:Positions", + "start": { + "@type": "uast:Position", + "offset": -1, + "line": -1, + "col": -1, + }, + "end": { + "@type": "uast:Position", + "offset": -1, + "line": -1, + "col": -1, + } + } + + @property + def start_position(self) -> CompatPosition: + self._add_position() + start = self.get_dict()["@pos"]["start"] + return CompatPosition(start) + + @property + def end_position(self) -> CompatPosition: + self._add_position() + end = self.get_dict()["@pos"]["end"] + return CompatPosition(end) + + diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py index dcdb045..3ad8ed3 100644 --- a/bblfsh/result_context.py +++ b/bblfsh/result_context.py @@ -1,8 +1,7 @@ -import copy -import typing as t -from collections import MutableSequence +from typing import Union, Optional from bblfsh.aliases import ParseResponse +from bblfsh.node import Node from bblfsh.pyuast import Context, IteratorExt, NodeExt, decode, iterator, uast from bblfsh.tree_order import TreeOrder @@ -11,10 +10,6 @@ class ResponseError(Exception): pass -class ResultTypeException(Exception): - pass - - class NotNodeIterationException(Exception): pass @@ -23,241 +18,7 @@ class GetOnEmptyNodeException(Exception): pass -ResultMultiType = t.Union[dict, int, float, bool, str, None] - - -class CompatPosition: - """ - v1 positions were extracted as node.[start|end]_position.[line|col|offset]. To - emulate that, this dictionary will be returned when accesing the old position - properties and its setters will update the parent Node real position ones. - """ - - def __init__(self, parent_pos: dict): - self._parent_pos = parent_pos - - @property - def line(self) -> int: - return self._parent_pos["line"] - - @line.setter - def line(self, v: int) -> None: - self._parent_pos["line"] = v - - @property - def col(self) -> int: - return self._parent_pos["col"] - - @col.setter - def col(self, v: int) -> None: - self._parent_pos["col"] = v - - @property - def offset(self) -> int: - return self._parent_pos["offset"] - - @offset.setter - def offset(self, v: int) -> None: - self._parent_pos["offset"] = v - - -class CompatChildren(MutableSequence): - def __init__(self, parent: "Node"): - self._children = parent.get_dict()["@children"] - - @staticmethod - def _node2dict(n): - if isinstance(n, Node): - # Convert to dict before appending - return n.get_dict() - return n - - def __len__(self): - return len(self._children) - - def __getitem__(self, idx): - return self._children[idx] - - def __delitem__(self, idx): - del self._children[idx] - - def __setitem__(self, idx, val): - self._children[idx] = self._node2dict(val) - - def insert(self, idx, val): - self._children.insert(idx, self._node2dict(val)) - - def append(self, val): - self._children.append(self._node2dict(val)) - - def extend(self, items) -> None: - for i in items: - self.append(i) - - def __str__(self): - return str(self._children) - -EMPTY_NODE_DICT = { - "@type": "", - "@token": "", - "@role": [], - "@children": [], -} - -# XXX check if I can totally remove ctx from this -class Node: - def __init__(self, node_ext: NodeExt = None, value: ResultMultiType=None, - ctx: Context = None) -> None: - - if node_ext and (value is not None): - # XXX exception type - raise Exception( - "Node creation can have node_ext or value, not both" - ) - - self._node_ext = node_ext - if node_ext is None: - self._internal_node = value if (value is not None)\ - else copy.deepcopy(EMPTY_NODE_DICT) - elif not isinstance(node_ext, NodeExt): - # XXX exception type - raise Exception("Node instanced with a non NodeExt first argument: %s" - % str(type(node_ext))) - else: - # generate self._internal_node from the NodeExt - self._ensure_load() - - if isinstance(self._internal_node, dict): - self._load_children() - - self._ctx = ctx if ctx is not None else uast() - - def _load_children(self) -> None: - "Get all properties of type node or dict and load them into the list" - d = self.get_dict() - children = d["@children"] - for k, v in d.items(): - if k in ["@children", "@pos"]: - continue - if type(v) in [Node, dict]: - children.append(v) - - def _ensure_load(self) -> None: - if self._node_ext is not None: - self._internal_node = self._node_ext.load() - if isinstance(self._internal_node, dict): - self._internal_node["@children"] = self._internal_node.get("@children", []) - - def __str__(self) -> str: - return str(self.get()) - - def __repr__(self) -> str: - return repr(self.get()) - - def get(self) -> ResultMultiType: - self._ensure_load() - return self._internal_node - - def _get_typed(self, *type_list: t.List[type]) -> ResultMultiType: - self._ensure_load() - - if type(self._internal_node) not in type_list: - raise ResultTypeException("Expected {} result, but type is '{}'" - .format(str(type_list), type(self._internal_node))) - return self._internal_node - - def get_bool(self) -> bool: - return t.cast(bool, self._get_typed(bool)) - - def get_float(self) -> float: - res: ResultMultiType = self._get_typed(float, int) - if isinstance(res, int): - res = float(res) - return t.cast(float, res) - - def get_int(self) -> int: - return t.cast(int, self._get_typed(int)) - - def get_str(self) -> str: - return t.cast(str, self._get_typed(str)) - - def get_dict(self) -> dict: - return t.cast(dict, self._get_typed(dict)) - - # TODO(juanjux): backward compatibility methods, remove once v1 - # is definitely deprecated - - @property - def internal_type(self) -> str: - return self.get_dict()["@type"] - - @internal_type.setter - def internal_type(self, t: str) -> None: - d = self.get_dict() - d["@type"] = t - - @property - def properties(self): - return self.get_dict() - - def _is_dict_list(self, key: str) -> t.Optional[t.List]: - val = self.get_dict().get(key, None) - if not val or not isinstance(val, t.List): - return None - - for i in val: - if not isinstance(i, dict): - return None - - return val - - @property - def children(self): - return CompatChildren(self) - - @property - def token(self) -> str: - return self.get_dict()["@token"] - - @token.setter - def token(self, t: str) -> None: - d = self.get_dict() - d["@token"] = t - - @property - def roles(self) -> t.List: - return self.get_dict().get("@role", []) - - def _add_position(self) -> None: - d = self.get_dict() - if "@pos" not in d: - d["@pos"] = { - "@type": "uast:Positions", - "start": { - "@type": "uast:Position", - "offset": -1, - "line": -1, - "col": -1, - }, - "end": { - "@type": "uast:Position", - "offset": -1, - "line": -1, - "col": -1, - } - } - - @property - def start_position(self): - self._add_position() - start = self.get_dict()["@pos"]["start"] - return CompatPosition(start) - - @property - def end_position(self): - self._add_position() - end = self.get_dict()["@pos"]["end"] - return CompatPosition(end) +ResultMultiType = Union[dict, int, float, bool, str, None] # XXX remove ctx if removed from Node @@ -268,12 +29,12 @@ def __init__(self, iter_ext: IteratorExt, ctx: Context) -> None: # default, can be changed on self.iterate() self._order: TreeOrder = TreeOrder.PRE_ORDER # saves the last node for re-iteration with iterate() - self._last_node: Node = None + self._last_node: Optional[Node] = None def __iter__(self) -> 'NodeIterator': return self - def __next__(self) -> t.Union[ResultMultiType, Node]: + def __next__(self) -> Union[ResultMultiType, Node]: next_node = next(self._iter_ext) if isinstance(next_node, NodeExt): From 49972503443007da1e6351c9ca01a012771e3d7b Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 12 Mar 2019 17:19:43 +0100 Subject: [PATCH 03/16] moved NodeIterator to its own file Signed-off-by: Juanjo Alvarez --- bblfsh/node_iterator.py | 42 ++++++++++++++++++++++++++++++++++++++++ bblfsh/result_context.py | 37 ++--------------------------------- 2 files changed, 44 insertions(+), 35 deletions(-) create mode 100644 bblfsh/node_iterator.py diff --git a/bblfsh/node_iterator.py b/bblfsh/node_iterator.py new file mode 100644 index 0000000..c621b24 --- /dev/null +++ b/bblfsh/node_iterator.py @@ -0,0 +1,42 @@ +from typing import Union, Optional + +from bblfsh.node import Node +from bblfsh.result_context import ResultMultiType +from bblfsh.pyuast import Context, IteratorExt, NodeExt, iterator +from bblfsh.tree_order import TreeOrder + + +# XXX remove ctx if removed from Node +class NodeIterator: + def __init__(self, iter_ext: IteratorExt, ctx: Context) -> None: + self._iter_ext = iter_ext + self._ctx = ctx + # default, can be changed on self.iterate() + self._order: TreeOrder = TreeOrder.PRE_ORDER + # saves the last node for re-iteration with iterate() + self._last_node: Optional[Node] = None + + def __iter__(self) -> 'NodeIterator': + return self + + def __next__(self) -> Union[ResultMultiType, Node]: + next_node = next(self._iter_ext) + + if isinstance(next_node, NodeExt): + # save last node for potential re-iteration + self._last_node = Node(node_ext=next_node, ctx=self._ctx) + return self._last_node + # non node (bool, str, etc) + return next_node + + def iterate(self, order: int) -> 'NodeIterator': + if self._last_node is None: + self._last_node = Node(node_ext=next(self._iter_ext), + ctx=self._ctx) + + TreeOrder.check_order(order) + self._order = order + return NodeIterator( + iterator((self._last_node._node_ext), order), self._ctx) + + diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py index 3ad8ed3..c6c7b36 100644 --- a/bblfsh/result_context.py +++ b/bblfsh/result_context.py @@ -2,7 +2,8 @@ from bblfsh.aliases import ParseResponse from bblfsh.node import Node -from bblfsh.pyuast import Context, IteratorExt, NodeExt, decode, iterator, uast +from bblfsh.node_iterator import NodeIterator +from bblfsh.pyuast import decode, iterator, uast from bblfsh.tree_order import TreeOrder @@ -21,40 +22,6 @@ class GetOnEmptyNodeException(Exception): ResultMultiType = Union[dict, int, float, bool, str, None] -# XXX remove ctx if removed from Node -class NodeIterator: - def __init__(self, iter_ext: IteratorExt, ctx: Context) -> None: - self._iter_ext = iter_ext - self._ctx = ctx - # default, can be changed on self.iterate() - self._order: TreeOrder = TreeOrder.PRE_ORDER - # saves the last node for re-iteration with iterate() - self._last_node: Optional[Node] = None - - def __iter__(self) -> 'NodeIterator': - return self - - def __next__(self) -> Union[ResultMultiType, Node]: - next_node = next(self._iter_ext) - - if isinstance(next_node, NodeExt): - # save last node for potential re-iteration - self._last_node = Node(node_ext=next_node, ctx=self._ctx) - return self._last_node - # non node (bool, str, etc) - return next_node - - def iterate(self, order: int) -> 'NodeIterator': - if self._last_node is None: - self._last_node = Node(node_ext=next(self._iter_ext), - ctx=self._ctx) - - TreeOrder.check_order(order) - self._order = order - return NodeIterator( - iterator((self._last_node._node_ext), order), self._ctx) - - class ResultContext: def __init__(self, grpc_response: ParseResponse = None) -> None: if grpc_response: From 8f5575693c37dccf09f760d1a9f2983a1d2c0f4e Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 13 Mar 2019 10:52:13 +0100 Subject: [PATCH 04/16] Fix position order iterator test Signed-off-by: Juanjo Alvarez --- bblfsh/compat_test.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/bblfsh/compat_test.py b/bblfsh/compat_test.py index 0a93c60..67cb0c0 100644 --- a/bblfsh/compat_test.py +++ b/bblfsh/compat_test.py @@ -141,7 +141,7 @@ def testFilterNumber(self): "count(//uast:Positions/end/uast:Position[@col=49])") self.assertEqual(int(res), 2) - # get_str() already tested by testFiltertoken + # get_str() already tested by testFilterToken def testRoleIdName(self): self.assertEqual(role_id(role_name(1)), 1) @@ -152,7 +152,6 @@ def _itTestTree(): root = Node() root.internal_type = 'root' root.start_position.offset = 0 - root.start_position.line = 0 son1 = Node() son1.internal_type = 'son1' @@ -214,14 +213,13 @@ def testAddToNode(self): n._internal_node["foo"] = "bar" self.assertEqual(n.properties["foo"], "bar") - # FIXME(juanjux): fails - # def testIteratorPositionOrder(self): - # root = self._itTestTree() - # it = iterator(root, TreeOrder.POSITION_ORDER) - # self.assertIsNotNone(it) - # expanded = [node.internal_type for node in it] - # self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1', - # 'son1_2', 'son2_2', 'son2']) + def testIteratorPositionOrder(self): + root = self._itTestTree() + it = iterator(root, TreeOrder.POSITION_ORDER) + self.assertIsNotNone(it) + expanded = [node.internal_type for node in it] + self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1', + 'son1_2', 'son2_2', 'son2']) def testFilterInsideIter(self): root = self.client.parse(__file__).uast From be602c2bb33ec1ffa58b5db293abf9f6ae571171 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 14 Mar 2019 12:33:55 +0100 Subject: [PATCH 05/16] Memory improvements - Remove ctx ref from Node and Iterator - Make Nodes not keep the NodeExt after load. This should help with memory usage and leaking. Signed-off-by: Juanjo Alvarez --- bblfsh/compat.py | 28 +++++++++++++++++++--------- bblfsh/compat_test.py | 16 ++++++++-------- bblfsh/node.py | 30 ++++++++++-------------------- bblfsh/node_iterator.py | 20 ++++++++++---------- bblfsh/result_context.py | 7 ++----- bblfsh/test.py | 6 +++--- 6 files changed, 52 insertions(+), 55 deletions(-) diff --git a/bblfsh/compat.py b/bblfsh/compat.py index 91725f6..927d745 100644 --- a/bblfsh/compat.py +++ b/bblfsh/compat.py @@ -1,6 +1,6 @@ import os import sys -from typing import Union, List, Any +from typing import Union, List, Any, Optional import grpc @@ -30,13 +30,12 @@ def __init__(self, ctx: ResultContext, filename: str = "") -> None: self._filename = filename @property - def uast(self) -> 'CompatNodeIterator': + def uast(self) -> Node: return self._res_context.uast @property - def ast(self) -> 'CompatNodeIterator': + def ast(self) -> Node: return self._res_context.ast - # return self.uast @property def ctx(self) -> ResultContext: @@ -114,12 +113,14 @@ def __init__( nodeit: NodeIterator, only_nodes: bool = False ) -> None: + # XXX Remove + if not isinstance(nodeit, NodeIterator): + raise Exception("First argument to CompatNodeIterator is of type: %s" % str(type(nodeit))) self._nodeit = nodeit - self._ctx = nodeit._ctx self._only_nodes = only_nodes # Used to forward calls of the old Node object # Check if this, and properties(), are needed - self._last_node = None + self._last_node: Optional[Node] = None def __iter__(self) -> 'CompatNodeIterator': return self @@ -147,7 +148,10 @@ def __next__(self) -> Node: return ret_val def filter(self, query: str) -> 'CompatNodeIterator': - return CompatNodeIterator(NodeIterator(self._ctx.filter(query), self._ctx)) + if not self._last_node: + return None + + return filter(self._last_node, query) @property def properties(self) -> dict: @@ -164,7 +168,7 @@ def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PR return CompatNodeIterator(n._nodeit.iterate(order), only_nodes=True) elif isinstance(n, Node): nat_it = native_iterator(n._internal_node, order) - return CompatNodeIterator(NodeIterator(nat_it, n._ctx), only_nodes=True) + return CompatNodeIterator(NodeIterator(nat_it), only_nodes=True) elif isinstance(n, dict): nat_it = native_iterator(n, order) return CompatNodeIterator(NodeIterator(nat_it, uast()), only_nodes=True) @@ -173,22 +177,28 @@ def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PR "iterator on non node or iterator type (%s)" % str(type(n)) ) + class FilterTypeException(Exception): pass + def filter(n: Node, query: str) -> CompatNodeIterator: + # XXX remove if not isinstance(n, Node): raise FilterTypeException("Filter on non node or iterator type (%s)" % str(type(n)) ) + ctx = uast() return CompatNodeIterator(NodeIterator(ctx.filter(query, n._internal_node), ctx)) def filter_nodes(n: Node, query: str) -> CompatNodeIterator: - return CompatNodeIterator(filter(n, query), only_nodes=True) + return CompatNodeIterator(filter(n, query)._nodeit, only_nodes=True) + class TypedQueryException(Exception): pass + def _scalariter2item(n: Node, query: str, wanted_type: type) -> Any: rlist = list(filter(n, query)) diff --git a/bblfsh/compat_test.py b/bblfsh/compat_test.py index 67cb0c0..b9ff001 100644 --- a/bblfsh/compat_test.py +++ b/bblfsh/compat_test.py @@ -7,7 +7,7 @@ from bblfsh.compat import ( filter as xpath_filter, role_id, iterator, role_name, Node, TreeOrder, filter_bool, - filter_number, WrongTypeException, CompatNodeIterator + filter_number, FilterTypeException, CompatNodeIterator ) from bblfsh.compat import CompatBblfshClient as BblfshClient from bblfsh.launcher import ensure_bblfsh_is_running @@ -65,12 +65,13 @@ def testUASTFileContents(self): self._validate_resp(resp) def testBrokenFilter(self): - with self.assertRaises(WrongTypeException): + with self.assertRaises(FilterTypeException): xpath_filter(0, "foo") def testFilterInternalType(self): node = Node() node.internal_type = 'a' + res = xpath_filter(node, "//a") self.assertTrue(any(xpath_filter(node, "//a"))) self.assertFalse(any(xpath_filter(node, "//b"))) @@ -240,15 +241,13 @@ def testItersMixingIterations(self): self.assertDictEqual(val_it1, val_it2) - # XXX uncomment - """ def testManyFilters(self): root = self._parse_fixture().uast root.properties['k1'] = 'v2' root.properties['k2'] = 'v1' before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(1000): + for i in range(1000): xpath_filter(root, "//*[@roleIdentifier]") after = resource.getrusage(resource.RUSAGE_SELF) @@ -258,13 +257,14 @@ def testManyFilters(self): def testManyParses(self): before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(100): + # XXX change to 100 again + for _ in range(2000): self.client.parse(self.fixtures_file).uast - after = resource.getrusage(resource.RUSAGE_SELF) - self.assertLess(after[2] / before[2], 2.0) + # XXX uncomment + """ def testManyParsesAndFilters(self): before = resource.getrusage(resource.RUSAGE_SELF) for _ in range(100): diff --git a/bblfsh/node.py b/bblfsh/node.py index 561ed11..766b597 100644 --- a/bblfsh/node.py +++ b/bblfsh/node.py @@ -3,9 +3,9 @@ from typing import Union, List, cast, Optional, Any from bblfsh.pyuast import Context, NodeExt, uast -from bblfsh.result_context import ResultMultiType +from bblfsh.type_aliases import ResultMultiType -class ResultTypeException(Exception): +class NodeTypedGetException(Exception): pass class CompatPosition: @@ -86,18 +86,18 @@ def __str__(self) -> str: "@children": [], } + class NodeInstancingException(Exception): pass + # XXX check if I can totally remove ctx from this class Node: - def __init__(self, node_ext: NodeExt = None, value: ResultMultiType=None, - ctx: Context = None) -> None: + def __init__(self, node_ext: NodeExt = None, value: ResultMultiType=None) -> None: if node_ext and (value is not None): raise NodeInstancingException("Node creation can have node_ext or value, not both") - self._node_ext = node_ext if node_ext is None: self._internal_node = value if (value is not None) \ else copy.deepcopy(EMPTY_NODE_DICT) @@ -106,29 +106,22 @@ def __init__(self, node_ext: NodeExt = None, value: ResultMultiType=None, % str(type(node_ext))) else: # generate self._internal_node from the NodeExt - self._ensure_load() + self._internal_node = node_ext.load() if isinstance(self._internal_node, dict): self._load_children() - self._ctx = ctx if ctx is not None else uast() - def _load_children(self) -> None: "Get all properties of type node or dict and load them into the list" d = self.get_dict() - children = d["@children"] + children = d.get("@children", []) for k, v in d.items(): - if k in ["@children", "@pos"]: + if k in ["@children", "@pos", "@role", "@type"]: continue + # XXX get also dict/Node children on a list! if type(v) in [Node, dict]: children.append(v) - def _ensure_load(self) -> None: - if self._node_ext is not None: - self._internal_node = self._node_ext.load() - if isinstance(self._internal_node, dict): - self._internal_node["@children"] = self._internal_node.get("@children", []) - def __str__(self) -> str: return str(self.get()) @@ -136,14 +129,11 @@ def __repr__(self) -> str: return repr(self.get()) def get(self) -> ResultMultiType: - self._ensure_load() return self._internal_node def _get_typed(self, *type_list: type) -> ResultMultiType: - self._ensure_load() - if type(self._internal_node) not in type_list: - raise ResultTypeException("Expected {} result, but type is '{}'" + raise NodeTypedGetException("Expected {} result, but type is '{}'" .format(str(type_list), type(self._internal_node))) return self._internal_node diff --git a/bblfsh/node_iterator.py b/bblfsh/node_iterator.py index c621b24..b904b0b 100644 --- a/bblfsh/node_iterator.py +++ b/bblfsh/node_iterator.py @@ -1,20 +1,24 @@ from typing import Union, Optional from bblfsh.node import Node -from bblfsh.result_context import ResultMultiType from bblfsh.pyuast import Context, IteratorExt, NodeExt, iterator from bblfsh.tree_order import TreeOrder +from bblfsh.type_aliases import ResultMultiType # XXX remove ctx if removed from Node class NodeIterator: - def __init__(self, iter_ext: IteratorExt, ctx: Context) -> None: + # savedCtx prevents the context from deallocating. This is because + # currently the IteratorExt will go away if the context from which it was + # called does. + # XXX type + def __init__(self, iter_ext: IteratorExt, savedCtx: Context = None) -> None: self._iter_ext = iter_ext - self._ctx = ctx # default, can be changed on self.iterate() self._order: TreeOrder = TreeOrder.PRE_ORDER # saves the last node for re-iteration with iterate() self._last_node: Optional[Node] = None + self._ctx = savedCtx def __iter__(self) -> 'NodeIterator': return self @@ -24,19 +28,15 @@ def __next__(self) -> Union[ResultMultiType, Node]: if isinstance(next_node, NodeExt): # save last node for potential re-iteration - self._last_node = Node(node_ext=next_node, ctx=self._ctx) + self._last_node = Node(node_ext=next_node) return self._last_node # non node (bool, str, etc) return next_node def iterate(self, order: int) -> 'NodeIterator': if self._last_node is None: - self._last_node = Node(node_ext=next(self._iter_ext), - ctx=self._ctx) + self._last_node = Node(node_ext=next(self._iter_ext)) TreeOrder.check_order(order) self._order = order - return NodeIterator( - iterator((self._last_node._node_ext), order), self._ctx) - - + return NodeIterator(iterator((self._last_node._node_ext), order), self._ctx) diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py index c6c7b36..fab5550 100644 --- a/bblfsh/result_context.py +++ b/bblfsh/result_context.py @@ -19,9 +19,6 @@ class GetOnEmptyNodeException(Exception): pass -ResultMultiType = Union[dict, int, float, bool, str, None] - - class ResultContext: def __init__(self, grpc_response: ParseResponse = None) -> None: if grpc_response: @@ -55,11 +52,11 @@ def filename(self) -> str: @property def uast(self) -> Node: - return Node(node_ext=self._ctx.root(), ctx=self._ctx) + return Node(node_ext=self._ctx.root()) @property def ast(self) -> Node: - return Node(node_ext=self._ctx.root(), ctx=self._ctx) + return Node(node_ext=self._ctx.root()) def __str__(self) -> str: return str(self.get_all()) diff --git a/bblfsh/test.py b/bblfsh/test.py index e9c841a..4cb2861 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -8,8 +8,8 @@ Modes, role_id, role_name) from bblfsh.launcher import ensure_bblfsh_is_running from bblfsh.client import NonUTF8ContentException -from bblfsh.result_context import (Node, NodeIterator, - ResultContext, ResultTypeException) +from bblfsh.node import NodeTypedGetException +from bblfsh.result_context import (Node, NodeIterator, ResultContext) from bblfsh.pyuast import uast @@ -182,7 +182,7 @@ def testFilterBadQuery(self) -> None: def testFilterBadType(self) -> None: ctx = self._parse_fixture() res = next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")) - self.assertRaises(ResultTypeException, res.get_str) + self.assertRaises(NodeTypedGetException, res.get_str) def testRoleIdName(self) -> None: self.assertEqual(role_id(role_name(1)), 1) From c55e65c82dbd54e79694b8f88c602bb6607859f3 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 15 Mar 2019 13:29:51 +0100 Subject: [PATCH 06/16] Fix for node.children and new test Signed-off-by: Juanjo Alvarez --- bblfsh/compat.py | 33 ++++++++--------------- bblfsh/compat_test.py | 56 ++++++++++++++++++++++++++++------------ bblfsh/node.py | 51 +++++++++++++++++++++++------------- bblfsh/node_iterator.py | 10 +++---- bblfsh/pyuast.cc | 21 ++++++++++++--- bblfsh/result_context.py | 16 +++++------- bblfsh/test.py | 2 +- 7 files changed, 112 insertions(+), 77 deletions(-) diff --git a/bblfsh/compat.py b/bblfsh/compat.py index 927d745..f4a7702 100644 --- a/bblfsh/compat.py +++ b/bblfsh/compat.py @@ -6,7 +6,9 @@ import bblfsh.client as bcli from bblfsh import role_id, role_name -from bblfsh.result_context import ResultContext, NodeIterator, Node +from bblfsh.node import Node +from bblfsh.node_iterator import NodeIterator +from bblfsh.result_context import ResultContext from bblfsh.aliases import ( ParseRequest, ParseResponse, DriverStub, ProtocolServiceStub, VersionRequest, SupportedLanguagesRequest, ModeType, @@ -54,10 +56,9 @@ def language(self) -> str: def filename(self) -> str: return self._filename - # FIXME(juanjux) get type @property def DESCRIPTOR(self) -> Any: - return self._res_context._ctx.DESCRIPTOR + return self._res_context.ctx.DESCRIPTOR @property def errors(selfs) -> List: @@ -88,7 +89,7 @@ def parse(self, filename: str, language: str = None, contents: str = None, timeout: float = None) -> CompatParseResponse: return self._parse(filename, language, contents, timeout, - Mode.Value('ANNOTATED')) + Mode.Value('ANNOTATED')) def native_parse(self, filename: str, language: str = None, contents: str = None, @@ -113,9 +114,6 @@ def __init__( nodeit: NodeIterator, only_nodes: bool = False ) -> None: - # XXX Remove - if not isinstance(nodeit, NodeIterator): - raise Exception("First argument to CompatNodeIterator is of type: %s" % str(type(nodeit))) self._nodeit = nodeit self._only_nodes = only_nodes # Used to forward calls of the old Node object @@ -129,25 +127,24 @@ def __next__(self) -> Node: next_val = next(self._nodeit) is_node = isinstance(next_val, Node) - val = next_val._internal_node if is_node else next_val + val = next_val.internal_node if is_node else next_val # Skip positions and non dicts/lists, the later if only_nodes = True skip = False if isinstance(val, dict): if "@type" not in val or val["@type"] == "uast:Positions": skip = True - # elif self._only_nodes and not isinstance(val, list): elif self._only_nodes: skip = True if skip: - val = self.__next__()._internal_node + val = self.__next__().internal_node ret_val = next_val if is_node else Node(value=val) self._last_node = ret_val return ret_val - def filter(self, query: str) -> 'CompatNodeIterator': + def filter(self, query: str) -> Optional['CompatNodeIterator']: if not self._last_node: return None @@ -167,7 +164,7 @@ def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PR if isinstance(n, CompatNodeIterator): return CompatNodeIterator(n._nodeit.iterate(order), only_nodes=True) elif isinstance(n, Node): - nat_it = native_iterator(n._internal_node, order) + nat_it = native_iterator(n.internal_node, order) return CompatNodeIterator(NodeIterator(nat_it), only_nodes=True) elif isinstance(n, dict): nat_it = native_iterator(n, order) @@ -178,17 +175,9 @@ def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PR ) -class FilterTypeException(Exception): - pass - - def filter(n: Node, query: str) -> CompatNodeIterator: - # XXX remove - if not isinstance(n, Node): - raise FilterTypeException("Filter on non node or iterator type (%s)" % str(type(n)) ) - ctx = uast() - return CompatNodeIterator(NodeIterator(ctx.filter(query, n._internal_node), ctx)) + return CompatNodeIterator(NodeIterator(ctx.filter(query, n.internal_node), ctx)) def filter_nodes(n: Node, query: str) -> CompatNodeIterator: @@ -207,7 +196,7 @@ def _scalariter2item(n: Node, query: str, wanted_type: type) -> Any: value = rlist[0] if isinstance(value, Node): - value = value._internal_node + value = value.internal_node value_type = type(value) if wanted_type == float and value_type == int: diff --git a/bblfsh/compat_test.py b/bblfsh/compat_test.py index b9ff001..2ab7ec4 100644 --- a/bblfsh/compat_test.py +++ b/bblfsh/compat_test.py @@ -7,7 +7,7 @@ from bblfsh.compat import ( filter as xpath_filter, role_id, iterator, role_name, Node, TreeOrder, filter_bool, - filter_number, FilterTypeException, CompatNodeIterator + filter_number, CompatNodeIterator ) from bblfsh.compat import CompatBblfshClient as BblfshClient from bblfsh.launcher import ensure_bblfsh_is_running @@ -48,7 +48,7 @@ def testVersion(self): def testNativeParse(self): reply = self.client.native_parse(__file__) - assert(reply.ast) + assert reply.ast def testNonUTF8ParseError(self): with self.assertRaises(NonUTF8ContentException): @@ -65,13 +65,12 @@ def testUASTFileContents(self): self._validate_resp(resp) def testBrokenFilter(self): - with self.assertRaises(FilterTypeException): + with self.assertRaises(AttributeError): xpath_filter(0, "foo") def testFilterInternalType(self): node = Node() node.internal_type = 'a' - res = xpath_filter(node, "//a") self.assertTrue(any(xpath_filter(node, "//a"))) self.assertFalse(any(xpath_filter(node, "//b"))) @@ -85,14 +84,18 @@ def testFilterRoles(self): uast = self._parse_fixture().uast it = xpath_filter(uast, "//*[@role='Identifier']") self.assertIsInstance(it, CompatNodeIterator) - - l = list(it) - self.assertGreater(len(l), 0) + li = list(it) + self.assertGreater(len(li), 0) it = xpath_filter(uast, "//*[@role='Friend']") self.assertIsInstance(it, CompatNodeIterator) - l = list(it) - self.assertEqual(len(l), 0) + li = list(it) + self.assertEqual(len(li), 0) + + it = xpath_filter(uast, "//*[@role='Identifier' and not(@role='Friend')]") + self.assertIsInstance(it, CompatNodeIterator) + li = list(it) + self.assertGreater(len(li), 0) def testFilterStartOffset(self): uast = self._parse_fixture().uast @@ -211,7 +214,7 @@ def testIteratorLevelOrder(self): def testAddToNode(self): n = Node() - n._internal_node["foo"] = "bar" + n.internal_node["foo"] = "bar" self.assertEqual(n.properties["foo"], "bar") def testIteratorPositionOrder(self): @@ -232,7 +235,9 @@ def testFilterInsideIter(self): def testItersMixingIterations(self): root = self.client.parse(__file__).uast it = iterator(root, TreeOrder.PRE_ORDER) - next(it); next(it); next(it) + next(it) + next(it) + next(it) n = next(it) it2 = iterator(n, TreeOrder.PRE_ORDER) next(it2) @@ -257,14 +262,12 @@ def testManyFilters(self): def testManyParses(self): before = resource.getrusage(resource.RUSAGE_SELF) - # XXX change to 100 again - for _ in range(2000): - self.client.parse(self.fixtures_file).uast + for _ in range(100): + _ = self.client.parse(self.fixtures_file).uast + after = resource.getrusage(resource.RUSAGE_SELF) self.assertLess(after[2] / before[2], 2.0) - # XXX uncomment - """ def testManyParsesAndFilters(self): before = resource.getrusage(resource.RUSAGE_SELF) for _ in range(100): @@ -274,7 +277,6 @@ def testManyParsesAndFilters(self): after = resource.getrusage(resource.RUSAGE_SELF) self.assertLess(after[2] / before[2], 4.0) - """ def testSupportedLanguages(self): res = self.client.supported_languages() @@ -284,5 +286,25 @@ def testSupportedLanguages(self): self.assertTrue(hasattr(l, key)) self.assertIsNotNone(getattr(l, key)) + def testChildren(self): + n = Node() + n.internal_type = 'root' + c1 = {"@type": "child1"} + n.properties["child1"] = c1 + self.assertDictEqual(n.children[0], c1) + + c2 = {"@type": "child2"} + n.children.append(c2) + self.assertDictEqual(n.children[1], c2) + n.children.append(c2) + self.assertDictEqual(n.children[2], c2) + + l = [{"@type": "list_child1"}, {"@type": "list_child2"}] + n.properties["some_list"] = l + self.assertDictEqual(n.children[3], l[0]) + self.assertDictEqual(n.children[4], l[1]) + + + if __name__ == "__main__": unittest.main() diff --git a/bblfsh/node.py b/bblfsh/node.py index 766b597..83ebc58 100644 --- a/bblfsh/node.py +++ b/bblfsh/node.py @@ -2,12 +2,14 @@ from collections import MutableSequence from typing import Union, List, cast, Optional, Any -from bblfsh.pyuast import Context, NodeExt, uast +from bblfsh.pyuast import NodeExt from bblfsh.type_aliases import ResultMultiType + class NodeTypedGetException(Exception): pass + class CompatPosition: """ v1 positions were extracted as node.[start|end]_position.[line|col|offset]. To @@ -79,6 +81,7 @@ def extend(self, items: List[Union['Node', dict]]) -> None: def __str__(self) -> str: return str(self._children) + EMPTY_NODE_DICT = { "@type": "", "@token": "", @@ -91,36 +94,49 @@ class NodeInstancingException(Exception): pass -# XXX check if I can totally remove ctx from this class Node: - def __init__(self, node_ext: NodeExt = None, value: ResultMultiType=None) -> None: + def __init__(self, node_ext: NodeExt = None, value: ResultMultiType = None) -> None: if node_ext and (value is not None): raise NodeInstancingException("Node creation can have node_ext or value, not both") if node_ext is None: - self._internal_node = value if (value is not None) \ + self.internal_node = value if (value is not None) \ else copy.deepcopy(EMPTY_NODE_DICT) elif not isinstance(node_ext, NodeExt): raise NodeInstancingException("Node instanced with a non NodeExt first argument: %s" % str(type(node_ext))) else: - # generate self._internal_node from the NodeExt - self._internal_node = node_ext.load() + # generate self.internal_node from the NodeExt + self.internal_node = node_ext.load() + + self.node_ext = node_ext - if isinstance(self._internal_node, dict): + if isinstance(self.internal_node, dict): self._load_children() + # This is for v1 "node.children" compatibility. It will update the children + # property with the dict or Node objects in properties or list/tuple properties + # when .children is accessed (because the user could change the node using get_dict() + # or .properties). + # Also, all these " in children" are O(1) so this will be slow for frequently accessing + # the children property on big nodes. def _load_children(self) -> None: - "Get all properties of type node or dict and load them into the list" + """Get all properties of type node or dict and load them into the list""" d = self.get_dict() children = d.get("@children", []) for k, v in d.items(): - if k in ["@children", "@pos", "@role", "@type"]: + if k in ("@children", "@pos", "@role", "@type"): continue - # XXX get also dict/Node children on a list! - if type(v) in [Node, dict]: - children.append(v) + + tv = type(v) + if tv in (Node, dict): + if v not in children: + children.append(v) + elif tv in (list, tuple): + # Get all node|dict types inside the list and add to children + children.extend([i for i in v if type(i) in (Node, dict) and i not in children]) + # else ignore it def __str__(self) -> str: return str(self.get()) @@ -129,13 +145,13 @@ def __repr__(self) -> str: return repr(self.get()) def get(self) -> ResultMultiType: - return self._internal_node + return self.internal_node def _get_typed(self, *type_list: type) -> ResultMultiType: - if type(self._internal_node) not in type_list: + if type(self.internal_node) not in type_list: raise NodeTypedGetException("Expected {} result, but type is '{}'" - .format(str(type_list), type(self._internal_node))) - return self._internal_node + .format(str(type_list), type(self.internal_node))) + return self.internal_node def get_bool(self) -> bool: return cast(bool, self._get_typed(bool)) @@ -184,6 +200,7 @@ def _is_dict_list(self, key: str) -> Optional[List]: @property def children(self) -> CompatChildren: + self._load_children() return CompatChildren(self) @property @@ -229,5 +246,3 @@ def end_position(self) -> CompatPosition: self._add_position() end = self.get_dict()["@pos"]["end"] return CompatPosition(end) - - diff --git a/bblfsh/node_iterator.py b/bblfsh/node_iterator.py index b904b0b..599b2db 100644 --- a/bblfsh/node_iterator.py +++ b/bblfsh/node_iterator.py @@ -6,19 +6,17 @@ from bblfsh.type_aliases import ResultMultiType -# XXX remove ctx if removed from Node class NodeIterator: - # savedCtx prevents the context from deallocating. This is because + # ctx is not used but prevents the context from deallocating (bug). This is because # currently the IteratorExt will go away if the context from which it was # called does. - # XXX type - def __init__(self, iter_ext: IteratorExt, savedCtx: Context = None) -> None: + def __init__(self, iter_ext: IteratorExt, ctx: Context = None) -> None: self._iter_ext = iter_ext # default, can be changed on self.iterate() self._order: TreeOrder = TreeOrder.PRE_ORDER # saves the last node for re-iteration with iterate() self._last_node: Optional[Node] = None - self._ctx = savedCtx + self.ctx = ctx def __iter__(self) -> 'NodeIterator': return self @@ -39,4 +37,4 @@ def iterate(self, order: int) -> 'NodeIterator': TreeOrder.check_order(order) self._order = order - return NodeIterator(iterator((self._last_node._node_ext), order), self._ctx) + return NodeIterator(iterator((self._last_node.node_ext), order), self.ctx) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 886f491..035ec17 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -103,6 +103,7 @@ extern "C" typedef struct { PyObject_HEAD ContextExt *ctx; + PyObject *pyCtx; uast::Iterator *iter; bool freeCtx; } PyUastIterExt; @@ -269,8 +270,7 @@ class ContextExt { NodeHandle unode = toHandle(node); if (unode == 0) unode = ctx->RootNode(); - uast::Iterator *it = ctx->Filter(unode, query); - + auto it = ctx->Filter(unode, query); return newIter(it, false); } @@ -295,7 +295,10 @@ static void PyUastIterExt_dealloc(PyObject *self) { auto it = (PyUastIterExt *)self; delete(it->iter); - if (it->freeCtx && it->ctx) delete(it->ctx); + Py_XDECREF(it->pyCtx); + if (it->freeCtx && it->ctx) { + delete(it->ctx); + } it->freeCtx = false; it->ctx = nullptr; @@ -305,6 +308,7 @@ static void PyUastIterExt_dealloc(PyObject *self) { typedef struct { PyObject_HEAD ContextExt *p; + PyObject *pyCtx; } PythonContextExt; static void PythonContextExt_dealloc(PyObject *self) { @@ -337,9 +341,11 @@ static PyObject *PythonContextExt_filter(PythonContextExt *self, PyObject *args, PyObject* it = nullptr; try { it = self->p->Filter(node, query); + ((PythonContextExt *)it)->pyCtx = (PyObject *)self; } catch (const std::exception& e) { PyErr_SetString(PyExc_RuntimeError, e.what()); } + Py_INCREF((PyObject *)self); return it; } @@ -699,6 +705,7 @@ Node* Node::lookupOrCreate(PyObject* obj) { typedef struct { PyObject_HEAD Context *ctx; + PyObject *pyCtx; uast::Iterator *iter; bool freeCtx; } PyUastIter; @@ -878,7 +885,10 @@ static void PyUastIter_dealloc(PyObject *self) { auto it = (PyUastIter *)self; delete(it->iter); - if (it->freeCtx && it->ctx) delete(it->ctx); + Py_XDECREF(it->pyCtx); + if (it->freeCtx && it->ctx) { + delete(it->ctx); + } it->freeCtx = false; it->ctx = nullptr; @@ -888,6 +898,7 @@ static void PyUastIter_dealloc(PyObject *self) { typedef struct { PyObject_HEAD Context *p; + PyObject *pyCtx; } PythonContext; static void PythonContext_dealloc(PyObject *self) { @@ -909,9 +920,11 @@ static PyObject *PythonContext_filter(PythonContext *self, PyObject *args, PyObj PyObject* it = nullptr; try { it = self->p->Filter(node, query); + ((PythonContext *)it)->pyCtx = (PyObject *)self; } catch (const std::exception& e) { PyErr_SetString(PyExc_RuntimeError, e.what()); } + Py_INCREF((PyObject *)self); return it; } diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py index fab5550..71a24cc 100644 --- a/bblfsh/result_context.py +++ b/bblfsh/result_context.py @@ -1,5 +1,3 @@ -from typing import Union, Optional - from bblfsh.aliases import ParseResponse from bblfsh.node import Node from bblfsh.node_iterator import NodeIterator @@ -27,20 +25,20 @@ def __init__(self, grpc_response: ParseResponse = None) -> None: [error.text for error in grpc_response.errors]) ) self._response = grpc_response - self._ctx = decode(grpc_response.uast, format=0) + self.ctx = decode(grpc_response.uast, format=0) else: self._response = None - self._ctx = uast() + self.ctx = uast() def filter(self, query: str) -> NodeIterator: - return NodeIterator(self._ctx.filter(query), self._ctx) + return NodeIterator(self.ctx.filter(query), self.ctx) def get_all(self) -> dict: - return self._ctx.load() + return self.ctx.load() def iterate(self, order: int) -> NodeIterator: TreeOrder.check_order(order) - return NodeIterator(iterator(self._ctx.root(), order), self._ctx) + return NodeIterator(iterator(self.ctx.root(), order), self.ctx) @property def language(self) -> str: @@ -52,11 +50,11 @@ def filename(self) -> str: @property def uast(self) -> Node: - return Node(node_ext=self._ctx.root()) + return Node(node_ext=self.ctx.root()) @property def ast(self) -> Node: - return Node(node_ext=self._ctx.root()) + return Node(node_ext=self.ctx.root()) def __str__(self) -> str: return str(self.get_all()) diff --git a/bblfsh/test.py b/bblfsh/test.py index 4cb2861..912ef49 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -119,7 +119,7 @@ def testFilterRoles(self) -> None: def testFilterProperties(self) -> None: ctx = uast() - obj = {"k1":"v1", "k2": "v2"} + obj = {"k1": "v1", "k2": "v2"} self.assertTrue(any(ctx.filter("/*[@k1='v1']", obj))) self.assertTrue(any(ctx.filter("/*[@k2='v2']", obj))) self.assertFalse(any(ctx.filter("/*[@k2='v1']", obj))) From 3df9efa79b29810a9c2edd50108e3e64bd967660 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 15 Mar 2019 13:48:49 +0100 Subject: [PATCH 07/16] Added missing file Signed-off-by: Juanjo Alvarez --- bblfsh/type_aliases.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 bblfsh/type_aliases.py diff --git a/bblfsh/type_aliases.py b/bblfsh/type_aliases.py new file mode 100644 index 0000000..0c1b756 --- /dev/null +++ b/bblfsh/type_aliases.py @@ -0,0 +1,3 @@ +from typing import Union + +ResultMultiType = Union[dict, int, float, bool, str, None] From a3e9c85e9ea9e0a05e80dbee28925e1d39a198a6 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 15 Mar 2019 17:01:26 +0100 Subject: [PATCH 08/16] Make the V1 tests discoverable Signed-off-by: Juanjo Alvarez --- bblfsh/pyuast.cc | 2 -- bblfsh/test.py | 4 ++++ bblfsh/{compat_test.py => test_compat.py} | 0 3 files changed, 4 insertions(+), 2 deletions(-) rename bblfsh/{compat_test.py => test_compat.py} (100%) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 035ec17..8b0748a 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -295,7 +295,6 @@ static void PyUastIterExt_dealloc(PyObject *self) { auto it = (PyUastIterExt *)self; delete(it->iter); - Py_XDECREF(it->pyCtx); if (it->freeCtx && it->ctx) { delete(it->ctx); } @@ -885,7 +884,6 @@ static void PyUastIter_dealloc(PyObject *self) { auto it = (PyUastIter *)self; delete(it->iter); - Py_XDECREF(it->pyCtx); if (it->freeCtx && it->ctx) { delete(it->ctx); } diff --git a/bblfsh/test.py b/bblfsh/test.py index 912ef49..ff6caa9 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -36,6 +36,7 @@ def _parse_fixture(self) -> ResultContext: self._validate_ctx(ctx) return ctx + """ def testVersion(self) -> None: version = self.client.version() self.assertTrue(hasattr(version, "version")) @@ -187,6 +188,7 @@ def testFilterBadType(self) -> None: def testRoleIdName(self) -> None: self.assertEqual(role_id(role_name(1)), 1) self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") + """ @staticmethod def _itTestTree() -> dict: @@ -256,6 +258,7 @@ def testIteratorPostOrder(self) -> None: self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1', 'son2_2', 'son2', 'root']) + """ def testIteratorLevelOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.LEVEL_ORDER) @@ -335,6 +338,7 @@ def testSupportedLanguages(self) -> None: for key in ('language', 'version', 'status', 'features'): self.assertTrue(hasattr(l, key)) self.assertIsNotNone(getattr(l, key)) + """ if __name__ == "__main__": diff --git a/bblfsh/compat_test.py b/bblfsh/test_compat.py similarity index 100% rename from bblfsh/compat_test.py rename to bblfsh/test_compat.py From f07a020b95f36e85474c399d99624acf0e1532d0 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Mon, 18 Mar 2019 17:54:56 +0100 Subject: [PATCH 09/16] Update bblfsh/compat.py Co-Authored-By: juanjux --- bblfsh/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bblfsh/compat.py b/bblfsh/compat.py index f4a7702..a623b5d 100644 --- a/bblfsh/compat.py +++ b/bblfsh/compat.py @@ -45,7 +45,7 @@ def ctx(self) -> ResultContext: @property def elapsed(self) -> int: - # FIXME(juanjux): check if the can get this or measure ourselves + # FIXME(juanjux): check if the caller can get this, or measure it ourselves. return -1 @property From 0d5ab0d4880b9a3225bfbca5ab8df08d2b935da0 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Mon, 18 Mar 2019 18:03:20 +0100 Subject: [PATCH 10/16] Some fixes from review Signed-off-by: Juanjo Alvarez --- bblfsh/compat.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/bblfsh/compat.py b/bblfsh/compat.py index f4a7702..7df5880 100644 --- a/bblfsh/compat.py +++ b/bblfsh/compat.py @@ -17,9 +17,7 @@ from bblfsh.pyuast import uast, iterator as native_iterator from bblfsh.tree_order import TreeOrder -if "BBLFSH_COMPAT_SHUTUP" not in os.environ: - print("Warning: using deprecated bblfsh v1 compatibility layer.", - file=sys.stderr) +print("Warning: using deprecated bblfsh v1 compatibility layer.", file=sys.stderr) class WrongTypeException(Exception): @@ -109,15 +107,10 @@ def close(self) -> None: class CompatNodeIterator: - def __init__( - self, - nodeit: NodeIterator, - only_nodes: bool = False - ) -> None: + def __init__(self, nodeit: NodeIterator, only_nodes: bool = False) -> None: self._nodeit = nodeit self._only_nodes = only_nodes # Used to forward calls of the old Node object - # Check if this, and properties(), are needed self._last_node: Optional[Node] = None def __iter__(self) -> 'CompatNodeIterator': @@ -158,8 +151,8 @@ def properties(self) -> dict: return {} -def iterator(n: Union[Node, CompatNodeIterator], order: TreeOrder = TreeOrder.PRE_ORDER)\ - -> CompatNodeIterator: +def iterator(n: Union[Node, CompatNodeIterator, dict], + order: TreeOrder = TreeOrder.PRE_ORDER) -> CompatNodeIterator: if isinstance(n, CompatNodeIterator): return CompatNodeIterator(n._nodeit.iterate(order), only_nodes=True) From fc928b660a2b6db1766da2cbfcfabd7382d183d5 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 19 Mar 2019 10:06:08 +0100 Subject: [PATCH 11/16] Add docstrings to the compat module Signed-off-by: Juanjo Alvarez --- bblfsh/compat.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/bblfsh/compat.py b/bblfsh/compat.py index 5e377b8..1051cdd 100644 --- a/bblfsh/compat.py +++ b/bblfsh/compat.py @@ -1,3 +1,16 @@ +""" +This file provides a compatibility layer with the old UAST V1 (or client-python +v2) API. You can see a summary of that API here: + +https://github.com/bblfsh/client-python/blob/d485273f457a174b40b820ad71195a739db04197/README.md + +Note that this won't translate the XPath queries from the old projection to the new use; +even when using this module you're expected to use expressions matching the new +projection. + +Note that since this is a pure Python translation layer, some performance +impact is to be expected. +""" import os import sys from typing import Union, List, Any, Optional @@ -21,51 +34,92 @@ class WrongTypeException(Exception): + """ + This exception is raised when the API receives an unexpected type + """ pass class CompatParseResponse: + """ + This class emulates the API of the old ParseResponse object. + """ def __init__(self, ctx: ResultContext, filename: str = "") -> None: self._res_context = ctx self._filename = filename @property def uast(self) -> Node: + """ + Returns the root Node. + """ return self._res_context.uast @property def ast(self) -> Node: + """ + Returns the root Node. This is provided for compatibility, but + since the type of result is now expecified using CompatBblfshClient.parse + or parse_native, it'll return the same as uast(). + """ return self._res_context.ast @property def ctx(self) -> ResultContext: + """ + Returns the ResultContext of the response. + """ return self._res_context @property def elapsed(self) -> int: + """ + Provided for compatibility, but since the new API's ParseResponse doesn't + provide an elapsed time it'll always return -1. + """ # FIXME(juanjux): check if the caller can get this, or measure it ourselves. return -1 @property def language(self) -> str: + """ + Returns the language used for the request. + """ return self._res_context.language @property def filename(self) -> str: + """ + Returns the filename used for the request. + """ return self._filename @property def DESCRIPTOR(self) -> Any: + """ + Returns the gRPC context descriptor. + """ return self._res_context.ctx.DESCRIPTOR @property def errors(selfs) -> List: + """ + Provided for compatibility. Since the new API will raise exceptions on errors, + this just returns and empty array. + """ # ParseResponse would have raised an exception on errors return [] class CompatBblfshClient: + """ + This emulates the methods and properties of the old BblfshClient. + """ def __init__(self, endpoint: Union[str, grpc.Channel]) -> None: + """ + Connects to the specified grpc endpoint which can be specified either as + a grpc Channel object or a connection string (like "0.0.0.0:6432"). + """ self._bblfsh_cli = bcli.BblfshClient(endpoint) self._channel = self._bblfsh_cli._channel @@ -86,28 +140,54 @@ def _parse(self, filename: str, language: str = None, contents: str = None, def parse(self, filename: str, language: str = None, contents: str = None, timeout: float = None) -> CompatParseResponse: + """ + Parse the specified filename or contents and return a CompatParseResponse. + """ + return self._parse(filename, language, contents, timeout, Mode.Value('ANNOTATED')) def native_parse(self, filename: str, language: str = None, contents: str = None, timeout: float = None) -> CompatParseResponse: + """ + Same as parse() but the returned response will include only the native + (non annotated) AST. + """ return self._parse(filename, language, contents, timeout, Mode.Value('NATIVE')) def supported_languages(self) -> List[str]: + """ + Return a list of the languages that can be parsed by the connected + endpoint (driver or bblfsh daemon). + """ return self._bblfsh_cli.supported_languages() def version(self) -> VersionResponse: + """ + Returns the connected endpoint version. + """ return self._bblfsh_cli.version() def close(self) -> None: + """ + Closes the connection to the endpoint. + """ return self._bblfsh_cli.close() class CompatNodeIterator: + """ + This emulates the API of the pre-v3 iterators. + """ def __init__(self, nodeit: NodeIterator, only_nodes: bool = False) -> None: + """ + Creates a CompatNodeIterator compatibility object using a NodeIterator + from the post-v3 API. If the only_nodes parameter is set to true, + scalars and strings won't be included in the results. + """ self._nodeit = nodeit self._only_nodes = only_nodes # Used to forward calls of the old Node object @@ -138,6 +218,9 @@ def __next__(self) -> Node: return ret_val def filter(self, query: str) -> Optional['CompatNodeIterator']: + """ + Further filter the results using this iterator as base. + """ if not self._last_node: return None @@ -145,6 +228,9 @@ def filter(self, query: str) -> Optional['CompatNodeIterator']: @property def properties(self) -> dict: + """ + Returns the properties of the current node in the iteration. + """ if isinstance(self._last_node, dict): return self._last_node.keys() else: @@ -153,6 +239,10 @@ def properties(self) -> dict: def iterator(n: Union[Node, CompatNodeIterator, dict], order: TreeOrder = TreeOrder.PRE_ORDER) -> CompatNodeIterator: + """ + This function has the same signature as the pre-v3 iterator() + call returning a compatibility CompatNodeIterator. + """ if isinstance(n, CompatNodeIterator): return CompatNodeIterator(n._nodeit.iterate(order), only_nodes=True) @@ -169,15 +259,27 @@ def iterator(n: Union[Node, CompatNodeIterator, dict], def filter(n: Node, query: str) -> CompatNodeIterator: + """ + This function has the same signature as the pre-v3 filter() returning a + compatibility CompatNodeIterator. + """ ctx = uast() return CompatNodeIterator(NodeIterator(ctx.filter(query, n.internal_node), ctx)) def filter_nodes(n: Node, query: str) -> CompatNodeIterator: + """ + Utility function. Same as filter() but will only filter for nodes (i. e. + it will exclude scalars and positions). + """ return CompatNodeIterator(filter(n, query)._nodeit, only_nodes=True) class TypedQueryException(Exception): + """ + This exception will be raised when a query for a specific type (str, int, float...) + returns a different type of more than one result. + """ pass @@ -203,18 +305,30 @@ def _scalariter2item(n: Node, query: str, wanted_type: type) -> Any: def filter_string(n: Node, query: str) -> str: + """ + Filter and ensure that the returned value is of string type. + """ return _scalariter2item(n, query, str) def filter_bool(n: Node, query: str) -> bool: + """ + Filter and ensure that the returned value is of type bool. + """ return _scalariter2item(n, query, bool) def filter_int(n: Node, query: str) -> int: + """ + Filter and ensure that the returned value is of type int. + """ return _scalariter2item(n, query, int) def filter_float(n: Node, query: str) -> float: + """ + Filter and ensure that the returned value is of type int. + """ return _scalariter2item(n, query, float) From 1031d4ac2510e60b7bbb5fa07f20dee9f9c1411a Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 20 Mar 2019 13:06:39 +0100 Subject: [PATCH 12/16] Upgrade SDK dependency Signed-off-by: Juanjo Alvarez --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 98b9123..4abb231 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ LIBUAST_ARCH = "linux-amd64" SDK_V1_VERSION = "v1.16.1" SDK_V1_MAJOR = SDK_V1_VERSION.split('.')[0] -SDK_V2_VERSION = "v2.12.0" +SDK_V2_VERSION = "v2.15.0" SDK_V2_MAJOR = SDK_V2_VERSION.split('.')[0] FORMAT_ARGS = globals() From 99bd4d807828d07b6894f1945285a6b1858b1c8c Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 20 Mar 2019 18:43:02 +0100 Subject: [PATCH 13/16] Fix children duplicating nodes and unchecked nullptr in pyuast.cc Signed-off-by: Juanjo Alvarez --- bblfsh/node.py | 57 ++++++++++++++++++++----------------------- bblfsh/pyuast.cc | 1 + bblfsh/test.py | 4 --- bblfsh/test_compat.py | 13 ++++++++-- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/bblfsh/node.py b/bblfsh/node.py index 83ebc58..46c60d8 100644 --- a/bblfsh/node.py +++ b/bblfsh/node.py @@ -47,7 +47,26 @@ def offset(self, v: int) -> None: class CompatChildren(MutableSequence): def __init__(self, parent: "Node") -> None: - self._children = parent.get_dict()["@children"] + self._par_dict = parent.get_dict() + self._children = self._sync_children() + + def _sync_children(self) -> None: + if "_children" not in self._par_dict: + self._par_dict["_children"] = [] + children = self._par_dict["_children"] + for k, v in self._par_dict.items(): + if k in ("_children", "@pos", "@role", "@type"): + continue + + tv = type(v) + if tv in (Node, dict): + if v not in children: + children.append(v) + elif tv in (list, tuple): + # Get all node|dict types inside the list and add to children + children.extend([i for i in v if type(i) in (Node, dict) and i not in children]) + # else ignore it + return children @staticmethod def _node2dict(n: Union['Node', dict]) -> dict: @@ -66,13 +85,16 @@ def __delitem__(self, idx: Union[int, slice]) -> None: del self._children[idx] def __setitem__(self, idx: Union[int, slice], val: Union['Node', dict]) -> None: - self._children[idx] = self._node2dict(val) + self._par_dict["_children"].__setitem__(idx, self._node2dict(val)) + self._children = self._sync_children() def insert(self, idx: int, val: Union['Node', dict]) -> None: - self._children.insert(idx, self._node2dict(val)) + self._par_dict["_children"].insert(idx, self._node2dict(val)) + self._children = self._sync_children() def append(self, val: Union['Node', dict]) -> None: - self._children.append(self._node2dict(val)) + self._par_dict["_children"].append(self._node2dict(val)) + self._children = self._sync_children() def extend(self, items: List[Union['Node', dict]]) -> None: for i in items: @@ -112,32 +134,6 @@ def __init__(self, node_ext: NodeExt = None, value: ResultMultiType = None) -> N self.node_ext = node_ext - if isinstance(self.internal_node, dict): - self._load_children() - - # This is for v1 "node.children" compatibility. It will update the children - # property with the dict or Node objects in properties or list/tuple properties - # when .children is accessed (because the user could change the node using get_dict() - # or .properties). - # Also, all these " in children" are O(1) so this will be slow for frequently accessing - # the children property on big nodes. - def _load_children(self) -> None: - """Get all properties of type node or dict and load them into the list""" - d = self.get_dict() - children = d.get("@children", []) - for k, v in d.items(): - if k in ("@children", "@pos", "@role", "@type"): - continue - - tv = type(v) - if tv in (Node, dict): - if v not in children: - children.append(v) - elif tv in (list, tuple): - # Get all node|dict types inside the list and add to children - children.extend([i for i in v if type(i) in (Node, dict) and i not in children]) - # else ignore it - def __str__(self) -> str: return str(self.get()) @@ -200,7 +196,6 @@ def _is_dict_list(self, key: str) -> Optional[List]: @property def children(self) -> CompatChildren: - self._load_children() return CompatChildren(self) @property diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 8b0748a..af69699 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -551,6 +551,7 @@ class Node : public uast::Node { if (!keys) return nullptr; PyObject* key = PyList_GetItem(keys, i); // borrows + if (!key) return nullptr; const char * k = PyUnicode_AsUTF8(key); std::string* s = new std::string(k); diff --git a/bblfsh/test.py b/bblfsh/test.py index ff6caa9..912ef49 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -36,7 +36,6 @@ def _parse_fixture(self) -> ResultContext: self._validate_ctx(ctx) return ctx - """ def testVersion(self) -> None: version = self.client.version() self.assertTrue(hasattr(version, "version")) @@ -188,7 +187,6 @@ def testFilterBadType(self) -> None: def testRoleIdName(self) -> None: self.assertEqual(role_id(role_name(1)), 1) self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") - """ @staticmethod def _itTestTree() -> dict: @@ -258,7 +256,6 @@ def testIteratorPostOrder(self) -> None: self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1', 'son2_2', 'son2', 'root']) - """ def testIteratorLevelOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.LEVEL_ORDER) @@ -338,7 +335,6 @@ def testSupportedLanguages(self) -> None: for key in ('language', 'version', 'status', 'features'): self.assertTrue(hasattr(l, key)) self.assertIsNotNone(getattr(l, key)) - """ if __name__ == "__main__": diff --git a/bblfsh/test_compat.py b/bblfsh/test_compat.py index 2ab7ec4..181fe91 100644 --- a/bblfsh/test_compat.py +++ b/bblfsh/test_compat.py @@ -226,7 +226,7 @@ def testIteratorPositionOrder(self): 'son1_2', 'son2_2', 'son2']) def testFilterInsideIter(self): - root = self.client.parse(__file__).uast + root = self._parse_fixture().uast it = iterator(root, TreeOrder.PRE_ORDER) self.assertIsNotNone(it) for n in it: @@ -275,7 +275,6 @@ def testManyParsesAndFilters(self): xpath_filter(root, "//*[@role='Identifier']") after = resource.getrusage(resource.RUSAGE_SELF) - self.assertLess(after[2] / before[2], 4.0) def testSupportedLanguages(self): @@ -304,6 +303,16 @@ def testChildren(self): self.assertDictEqual(n.children[3], l[0]) self.assertDictEqual(n.children[4], l[1]) + def testChildrenFile(self): + root = self._parse_fixture().uast + self.assertEqual(len(root.children), 10) + n = Node() + n.internal_type = 'child_node' + root.children.append(n) + self.assertEqual(len(root.children), 11) + last = root.children[-1] + self.assertDictEqual(last, n.internal_node) + if __name__ == "__main__": From 7a0a80e8e0689470f09ec41cc9e1cf1b2e34f33e Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 20 Mar 2019 18:45:42 +0100 Subject: [PATCH 14/16] Restore deleted comment Signed-off-by: Juanjo Alvarez --- bblfsh/node.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bblfsh/node.py b/bblfsh/node.py index 46c60d8..09dbca5 100644 --- a/bblfsh/node.py +++ b/bblfsh/node.py @@ -44,7 +44,12 @@ def offset(self) -> int: def offset(self, v: int) -> None: self._parent_pos["offset"] = v - +# This is for v1 "node.children" compatibility. It will update the children +# property with the dict or Node objects in properties or list/tuple properties +# when .children is accessed (because the user could change the node using get_dict() +# or .properties). +# Also, all these " in children" are O(1) so this will be slow for frequently accessing +# the children property on big nodes. class CompatChildren(MutableSequence): def __init__(self, parent: "Node") -> None: self._par_dict = parent.get_dict() From faa8c76c9960208a36b984c3b8fd9db6757c7dea Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 21 Mar 2019 09:23:09 +0100 Subject: [PATCH 15/16] Fix comment Signed-off-by: Juanjo Alvarez --- bblfsh/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bblfsh/node.py b/bblfsh/node.py index 09dbca5..9d6a23d 100644 --- a/bblfsh/node.py +++ b/bblfsh/node.py @@ -48,7 +48,7 @@ def offset(self, v: int) -> None: # property with the dict or Node objects in properties or list/tuple properties # when .children is accessed (because the user could change the node using get_dict() # or .properties). -# Also, all these " in children" are O(1) so this will be slow for frequently accessing +# Also, all these " in children" are O(n) so this will be slow for frequently accessing # the children property on big nodes. class CompatChildren(MutableSequence): def __init__(self, parent: "Node") -> None: From 120047a7145c40934e226d252f07e25f687d4db6 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 22 Mar 2019 09:18:12 +0100 Subject: [PATCH 16/16] Renamed bcli alias to newbbl to avoid command line confussion Signed-off-by: Juanjo Alvarez --- bblfsh/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bblfsh/compat.py b/bblfsh/compat.py index 1051cdd..18dacb7 100644 --- a/bblfsh/compat.py +++ b/bblfsh/compat.py @@ -17,7 +17,7 @@ import grpc -import bblfsh.client as bcli +import bblfsh.client as newbbl from bblfsh import role_id, role_name from bblfsh.node import Node from bblfsh.node_iterator import NodeIterator @@ -120,7 +120,7 @@ def __init__(self, endpoint: Union[str, grpc.Channel]) -> None: Connects to the specified grpc endpoint which can be specified either as a grpc Channel object or a connection string (like "0.0.0.0:6432"). """ - self._bblfsh_cli = bcli.BblfshClient(endpoint) + self._bblfsh_cli = newbbl.BblfshClient(endpoint) self._channel = self._bblfsh_cli._channel self._stub_v1 = self._bblfsh_cli._stub_v1