Skip to content

Commit

Permalink
Merge pull request #51 from goodmami/v0.9.1
Browse files Browse the repository at this point in the history
V0.9.1
  • Loading branch information
goodmami authored Jan 16, 2020
2 parents 64effe0 + eba3884 commit eb4d7c4
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 12 deletions.
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,28 @@
(no unreleased changes yet)


## [v0.9.1][]

**Release date: 2020-01-16**

Partially revert some changes regarding the parsing of surface
alignments as they caused issues, particular when strings contain `~`
characters.

### Added

* `ALIGNMENT` production in `penman.lexer.PATTERNS` (this reverts a
change in v0.9.0) ([#50][])

### Changed

* Disallow tildes in the `ROLE` and `SYMBOL` patterns (this reverts a
change in v0.9.0) ([#50][])
* `ALIGNMENT` tokens are joined to their previous `ROLE`, `SYMBOL`, or
`STRING`, which means they can now be separated with a space (for
example, `(a / alpha ~1)`)


## [v0.9.0][]

**Release date: 2020-01-07**
Expand Down Expand Up @@ -550,3 +572,4 @@ First release with very basic functionality.
[#45]: https://github.com/goodmami/penman/issues/45
[#47]: https://github.com/goodmami/penman/issues/47
[#48]: https://github.com/goodmami/penman/issues/48
[#50]: https://github.com/goodmami/penman/issues/50
2 changes: 1 addition & 1 deletion docs/api/penman.lexer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ penman.lexer
A dictionary mapping token names to regular expressions. For
instance::

'ROLE': r':[^\s()\/:]*'
'ROLE': r':[^\s()\/:~]*'

The token names are used later by the :class:`TokenIterator` to
help with parsing.
Expand Down
2 changes: 1 addition & 1 deletion penman/__about__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

__version__ = '0.9.0'
__version__ = '0.9.1'
__version_info__ = tuple(
int(x) if x.isdigit() else x
for x in __version__.replace('.', ' ').replace('-', ' ').split()
Expand Down
15 changes: 11 additions & 4 deletions penman/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ def _parse_node(self, tokens: TokenIterator):
# for robustness, don't assume next token is the concept
if tokens.peek().type in ('SYMBOL', 'STRING'):
concept = tokens.next().text
if tokens.peek().type == 'ALIGNMENT':
concept += tokens.next().text
else:
concept = None
logger.warning('Missing concept: %s', slash.line)
Expand All @@ -156,13 +158,18 @@ def _parse_edge(self, tokens: TokenIterator):
Edge := Role (Constant | Node)
"""
role = tokens.expect('ROLE')
target = None
role_token = tokens.expect('ROLE')
role = role_token.text
if tokens.peek().type == 'ALIGNMENT':
role += tokens.next().text

target = None
_next = tokens.peek()
next_type = _next.type
if next_type in ('SYMBOL', 'STRING'):
target = tokens.next().text
if tokens.peek().type == 'ALIGNMENT':
target += tokens.next().text
elif next_type == 'LPAREN':
target = self._parse_node(tokens)
# for robustness in parsing, allow edges with no target:
Expand All @@ -171,9 +178,9 @@ def _parse_edge(self, tokens: TokenIterator):
elif next_type not in ('ROLE', 'RPAREN'):
raise tokens.error('Expected: SYMBOL, STRING, LPAREN', token=_next)
else:
logger.warning('Missing target: %s', role.line)
logger.warning('Missing target: %s', role_token.line)

return (role.text, target)
return (role, target)

def parse_triples(self, s: str) -> List[BasicTriple]:
""" Parse a triple conjunction from *s*."""
Expand Down
13 changes: 11 additions & 2 deletions penman/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,18 @@ def _interpret_node(t: Node, variables: Set[Variable], model: Model):

# atomic targets
if is_atomic(target):
# remove any alignments
if target and '~' in target:
target, _, alignment = target.partition('~')
epis.append(Alignment.from_string(alignment))
if target.startswith('"'):
# need to handle alignments on strings differently
# because strings may contain ~ inside the quotes
pivot = target.rindex('"') + 1
if pivot < len(target):
epis.append(Alignment.from_string(target[pivot:]))
target = target[:pivot]
else:
target, _, alignment = target.partition('~')
epis.append(Alignment.from_string(alignment))
triple = (var, role, target)
if model.is_role_inverted(role):
if target in variables:
Expand Down
7 changes: 4 additions & 3 deletions penman/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@
PATTERNS = {
'COMMENT': r'\#.*$',
'STRING': r'"[^"\\]*(?:\\.[^"\\]*)*"',
'ALIGNMENT': r'~(?:[a-z]\.?)?[0-9]+(?:,[0-9]+)*',
# ROLE cannot be made up of COLON + SYMBOL because it then becomes
# difficult to detect anonymous roles: (a : b) vs (a :b c)
'ROLE': r':[^\s()\/:]*',
'SYMBOL': r'[^\s()\/:]+',
'ROLE': r':[^\s()\/:~]*',
'SYMBOL': r'[^\s()\/:~]+',
'LPAREN': r'\(',
'RPAREN': r'\)',
'SLASH': r'\/', # concept (node label) role
Expand All @@ -41,7 +42,7 @@ def _compile(*names: str) -> Pattern[str]:
PENMAN_RE = _compile('COMMENT',
'STRING',
'LPAREN', 'RPAREN', 'SLASH',
'ROLE', 'SYMBOL',
'ROLE', 'SYMBOL', 'ALIGNMENT',
'UNEXPECTED')
TRIPLE_RE = _compile('COMMENT',
'STRING',
Expand Down
39 changes: 39 additions & 0 deletions tests/test_codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ def test_parse(self):
'a', [(':ARG-of', 'b')])
assert codec.parse('(a :ARG~1 b~2)') == (
'a', [(':ARG~1', 'b~2')])
# https://github.com/goodmami/penman/issues/50
assert codec.parse('(a :ARG "str~ing")') == (
'a', [(':ARG', '"str~ing"')])
assert codec.parse('(a :ARG "str~ing"~1)') == (
'a', [(':ARG', '"str~ing"~1')])

def test_format(self):
assert codec.format(
Expand Down Expand Up @@ -248,6 +253,40 @@ def test_decode_atoms(self):
('x20', ':instance', '876-9'),
]

def test_decode_alignments(self):
g = decode('(a / alpha~1)')
assert g.triples == [
('a', ':instance', 'alpha'),
]
assert surface.alignments(g) == {
('a', ':instance', 'alpha'): surface.Alignment((1,)),
}
assert surface.role_alignments(g) == {}

assert decode('(a / alpha~1)') == decode('(a / alpha ~1)')

g = decode('(a :ARG~e.1,2 b)')
assert g.triples == [
('a', ':instance', None),
('a', ':ARG', 'b'),
]
assert surface.alignments(g) == {}
assert surface.role_alignments(g) == {
('a', ':ARG', 'b'): surface.RoleAlignment((1, 2), prefix='e.'),
}

# https://github.com/goodmami/penman/issues/50
g = decode('(a :ARG1 "str~ing" :ARG2 "str~ing"~1)')
assert g.triples == [
('a', ':instance', None),
('a', ':ARG1', '"str~ing"'),
('a', ':ARG2', '"str~ing"'),
]
assert surface.alignments(g) == {
('a', ':ARG2', '"str~ing"'): surface.Alignment((1,)),
}
assert surface.role_alignments(g) == {}

def test_decode_invalid_graphs(self):
# some robustness
g = decode('(g / )')
Expand Down
8 changes: 7 additions & 1 deletion tests/test_lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,18 @@ def _lex(s):
'RPAREN']
assert _lex('(a :ROLE~e.1,2 b~3)') == [
'LPAREN', 'SYMBOL',
'ROLE', 'SYMBOL',
'ROLE', 'ALIGNMENT', 'SYMBOL', 'ALIGNMENT',
'RPAREN']
assert _lex('# comment\n# (n / nope)\n(a / alpha)') == [
'COMMENT', 'COMMENT', 'LPAREN', 'SYMBOL', 'SLASH', 'SYMBOL', 'RPAREN']


def test_lexing_issue_50():
# https://github.com/goodmami/penman/issues/50
assert [tok.type for tok in lexer.lex('(a :ROLE "a~b"~1)')] == [
'LPAREN', 'SYMBOL', 'ROLE', 'STRING', 'ALIGNMENT', 'RPAREN']


def test_lex_triples():
def _lex(s):
return [tok.type for tok in lexer.lex(s, pattern=lexer.TRIPLE_RE)]
Expand Down

0 comments on commit eb4d7c4

Please sign in to comment.