-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathom-to-xml.el
446 lines (393 loc) · 15.5 KB
/
om-to-xml.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
;;; om-to-xml.el --- Converts an org-mode file to XML. -*- lexical-binding: t -*-
;; Copyright © 2020 Norman Walsh
;; Author: Norman Walsh <ndw at nwalsh dot com>
;; Keywords: org-mode, org-ml, XML
;; This file is not part of GNU Emacs.
;; This program is Free Software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Prerequisites:
;; org-ml: https://github.com/ndwarshuis/org-ml
;;; Commentary:
;; This file converts an org-mode file to XML. It is a significant
;; improvement over `org-to-xml.el`, previously package, in that it
;; uses the excellent `org-ml` library by Nate Dwarshuis to parse the
;; Org data structures. I more-or-less abandoned attempting to improve
;; `org-to-xml.el` at the point where I realized I needed a better
;; parsing library for Org.
;; In addition to better parsing, `om-to-xml.el` provides a few
;; extensibility options:
;; `om-to-xml-plist-skip-keys':
;; The `om' library represents the properties of Org nodes with a
;; plist of properties. The properties named in this list will be
;; excluded from the XML output.
;; `om-to-xml-newline-before-start' and
;; `om-to-xml-newline-before-end':
;; The org node types named in these lists will have an extra
;; newline before their start and/or end tag. This makes the
;; XML output a little easier to read.
;; `om-to-xml-element-handlers':
;; This is an alist that maps node types to special functions for XML
;; conversion. By default, all of the Org nodes are converted to XML
;; in the same way: they get a start tag, the plist values become
;; attributes (or children), and any content is placed inside the
;; element. This list is an opportunity to insert special processing.
;;
;; If you define your own block types, for example, you may want to
;; define functions for converting them to XML. Four node types are
;; treated specially by default:
;;
;; + src-block: In the Org data structure, the content of the
;; src-block is a value in the plist. For XML, I turn it into
;; element content.
;; + macro: Org macros values are delimited by {{{ and }}}. There's no
;; reason to leave those delimiters in the XML.
;; + subscript and superscript: This library isn't really intended
;; as an export library exactly. But I treat sub and superscript
;; specially for convenience. If a _ or ^ wouldn't be repesented as
;; a sub or superscript on export, I ignore the Org data structure
;; and output a literal _ or ^ instead.
;;
;; More special functions may be added in the future.
;; `om-to-xml-post-process'
;; If this is set to a function, that function will be run on the
;; output XML buffer before it is saved.
;; Installation: put this to your load path and add
;;
;; (require 'om-to-xml)
;;
;; to your .emacs.
;;
;; I have 'om-to-xml bound to a key, but there's nothing remotely
;; standard about my binding.
;;
;; Change log:
;; v0.0.7: Updated to use org-ml (the om.el refactored)
;; ...
;; v0.0.1: Initial release
;;; Code:
(require 'org)
(require 'org-ml)
(defconst om-to-xml--om-to-xml-version "0.0.7")
(defconst om-to-xml--om-to-xml-uri "https://github.com/ndw/org-to-xml")
(defconst om-to-xml--namespace "https://nwalsh.com/ns/org-to-xml")
(defvar om-to-xml-plist-skip-keys
'(:begin
:end
:pre-blank
:post-blank
:contents-begin
:contents-end
:post-affiliated
:use-brackets-p)
"List of om node keys that should be ignored.
All key values that aren't ignored are turned into attributes." )
;; From an XML perspective, we can assume that whitespace
;; between "block" elements is irrelevant. These lists add
;; newlines to make the resulting XML a little more readable.
(defvar om-to-xml-newline-before-start
'(keyword
headline
paragraph
section
property-drawer
node-property)
"List of elements that should be preceded by a newline.")
(defvar om-to-xml-newline-before-end
'(property-drawer
headline
section)
"List of elements whose end tag should be preceded by a newline.")
(defvar om-to-xml-element-handlers
'((src-block . om-to-xml--om-src-block-to-xml)
(macro . om-to-xml--om-macro-to-xml)
(superscript . om-to-xml--om-sub-superscript-to-xml)
(subscript . om-to-xml--om-sub-superscript-to-xml))
"Special element handlers.")
(defvar om-to-xml-post-process nil
"Post-processing hook.
If set to a function, that function will run after the XML is
generated.")
;;;###autoload
(defun om-to-xml (&optional filename)
"Convert an 'org-mode' buffer to XML.
If FILENAME is provided, then that filename is used to store the
document. Otherwise, the filename is derived from the name of the
Org file."
(interactive)
(if (eq major-mode 'org-mode)
(om-to-xml--org-to-xml (current-buffer) filename)
(message "Error: om-to-xml can only be applied to org-mode buffers")))
(defun om-to-xml--org-to-xml (buffer &optional filename)
"Convert the 'org-mode' BUFFER to XML; save the result in FILENAME.
If no FILENAME is given, the buffer filename will be used, with
.org removed and .xml added."
(let* ((buffn (buffer-file-name buffer))
(xmlfn (if filename
filename
(if (string-suffix-p ".org" buffn)
(concat (substring buffn 0 (- (length buffn) 4)) ".xml")
(concat buffn ".xml"))))
(om-list (om-to-xml--parse-buffer buffer)))
(om-to-xml--kill-xml-buffer xmlfn)
(with-temp-buffer
(delete-region (point-min) (point-max))
(insert "<?xml version=\"1.0\"?>\n")
(insert "<!-- Converted from org-mode to XML by om-to-xml version ")
(insert om-to-xml--om-to-xml-version)
(insert " -->\n<!-- See ")
(insert om-to-xml--om-to-xml-uri)
(insert " -->\n")
(om-to-xml--om-to-xml om-list)
(if (functionp om-to-xml-post-process)
(funcall om-to-xml-post-process))
(write-region (point-min) (point-max) xmlfn))
(message (concat "Converted org-mode: " xmlfn))))
(defun om-to-xml--kill-xml-buffer (filename)
"Kill the buffer associated with FILENAME."
(mapc
(lambda (buf)
(if (string= filename (buffer-file-name buf))
(kill-buffer buf)))
(buffer-list)))
(defun om-to-xml--parse-buffer (&optional buffer)
"Use om library to parse entire document in BUFFER."
(save-current-buffer
(if buffer
(set-buffer buffer))
(save-excursion
(let ((om-point (point-min))
(last-point -1)
(om-list '())
(elem nil))
(while (< om-point (point-max))
(setq elem (org-ml-parse-element-at om-point))
(setq parent (plist-get (cadr elem) :parent))
(setq om-point (plist-get (cadr elem) :end))
;; There are places where parsing seems to get stuck.
;; For example, if a paragraph precedes the first heading
;; or if the file ends with several blank lines. The
;; :end doesn't advance the cursor position and this
;; while loop never ends. To avoid that, we use last-point
;; to force the cursor to advance.
;; https://github.com/ndwarshuis/org-ml/issues/8
(if (<= om-point last-point)
(setq om-point (1+ last-point))
(setq om-list (append om-list (list elem))))
(setq last-point om-point)
(goto-char om-point))
om-list))))
(defun om-to-xml--plist-children (plist &optional exclude)
"Return the names of keys in PLIST that must be child elements.
Any keys listed in EXCLUDE are ignored."
(let ((list plist)
(children nil))
(while list
(setq key (car list))
(cond
((eq key :parent)
nil)
((member key exclude)
nil)
((and (cadr list) (listp (cadr list)))
(if children
(setq children (append children (list key)))
(setq children (list key))))
(t
nil))
(setq list (cddr list)))
children))
(defun om-to-xml--plist-attributes (plist &optional exclude)
"Insert atomic PLIST values as XML attributes.
Any keys whose names are listed in EXCLUDE are ignored.
The comparison is done by name because symbols are unintered."
(let ((list plist) key value)
(while list
(setq key (car list))
(setq value
(cond
((member (symbol-name key) exclude)
nil)
((eq (cadr list) nil)
nil)
((integerp (cadr list))
(number-to-string (cadr list)))
((symbolp (cadr list))
(symbol-name (cadr list)))
((stringp (cadr list))
(ndw/o2xml--om-xml-attribute-escape (cadr list)))
(t nil)))
(setq list (cddr list))
(if value
(let ((name (substring (symbol-name key) 1)))
(if (not (member key om-to-xml-plist-skip-keys))
(progn
(insert (concat " " name "=\"" value "\"")))))))))
(defun om-to-xml--property-drawer-properties (element)
"If ELEMENT is a headline, return the names of its properties.
Properties in this case meaning any properties defined in a
property-drawer. The properties are returned as upper-case symbols
because that's how they appear in the headline plist."
(if (and (eq (car element) 'headline)
(eq (caaddr element) 'section))
(let ((section (caddr element)))
(if (eq (caaddr section) 'property-drawer)
(let ((props (cddr (caddr section)))
(names '())
key name)
(while props
(setq key (plist-get (cadr (car props)) :key))
(setq name (concat ":" (upcase key)))
(setq names (append names (list name)))
(setq props (cdr props)))
names)))))
(defun om-to-xml--om-to-xml (om-list)
"Convert OM-LIST to XML."
(insert (concat "<document xmlns=\""
om-to-xml--namespace
"\">"))
(om-to-xml--om-to-xml-list om-list)
(insert "</document>\n"))
(defun om-to-xml--om-to-xml-list (om-list)
"Convert OM-LIST to XML."
(let ((list om-list)
symbol)
(while list
(om-to-xml--om-element-to-xml (car list))
(setq list (cdr list)))))
(defun om-to-xml--om-insert-blank (elem)
"Insert blank line if ELEM has a non-zero :post-blank property."
(let* ((plist (cadr elem))
(pblank (plist-get plist :post-blank))
(blank (and (integerp pblank) (> pblank 0))))
(if blank
(insert " "))))
(defun om-to-xml--om-src-block-to-xml (elem)
"Convert src-block ELEM to XML.
In the Org data model, the contents of the src-block is in a
value in the plist. In the XML, it makes more sense to put the
value in the content of the element."
(let ((symbol (car elem))
(plist (cadr elem)))
(if (member symbol om-to-xml-newline-before-start)
(insert "\n"))
(insert (concat "<" (symbol-name symbol)))
(om-to-xml--plist-attributes plist '(":value"))
(let ((value (plist-get plist :value)))
(insert ">")
(insert (om-to-xml--om-xml-content-escape (substring value 0 (1- (length value)))))
(insert "</src-block>"))))
(defun om-to-xml--om-element-to-xml (elem)
"Convert ELEM to XML.
Lists are recursively processed, perhaps by the appropriate
'om-to-xml-element-handler. Other values are inserted
directly."
(cond
((and (listp elem) (symbolp (car elem)))
(let* ((symbol (car elem))
(handler (alist-get symbol om-to-xml-element-handlers)))
(if (and handler (functionp handler))
(funcall handler elem)
(om-to-xml--om-base-element-to-xml elem))
(om-to-xml--om-insert-blank elem)))
((listp elem)
(let ((children elem))
(while children
(om-to-xml--om-element-to-xml (car children))
(setq children (cdr children)))))
(t
(insert (om-to-xml--om-xml-content-escape elem)))))
(defun om-to-xml--om-xml-content-escape (value)
"Escape the string VALUE appropriately for XML content."
(let ((str
(cond
((integerp value)
(number-to-string value))
(t
value))))
(replace-regexp-in-string "<" "<"
(replace-regexp-in-string ">" ">"
(replace-regexp-in-string "&" "&" str)))))
(defun ndw/o2xml--om-xml-attribute-escape (str)
"Escape the STR value appropriately for an XML attribute."
(replace-regexp-in-string "<" "<"
(replace-regexp-in-string ">" ">"
(replace-regexp-in-string "\"" """
(replace-regexp-in-string "&" "&" str)))))
(defun om-to-xml--om-macro-to-xml (elem)
"Convert a macro ELEM to XML.
All this really does is trim off the leading and trailing
macro indicators '{{{' and '}}}'."
(let* ((symbol (car elem))
(plist (cadr elem))
(value (plist-get plist :value)))
(if (member symbol om-to-xml-newline-before-start)
(insert "\n"))
(insert (concat "<" (symbol-name symbol)))
(om-to-xml--plist-attributes plist '(":value"))
;;; This is probably redundant, but safety first.
(if (string= (substring value 0 3) "{{{")
(setq value (substring value 3)))
(if (string= (substring value (- (length value) 3)) "}}}")
(setq value (substring value 0 (- (length value) 3))))
(insert (concat
" value=\""
(ndw/o2xml--om-xml-attribute-escape value)
"\""))
(insert "/>")))
(defun om-to-xml--om-sub-superscript-to-xml (elem)
"Convert subscript or superscript ELEM to XML.
Mostly, converting form Org to XML is a direct translation of the
data model, but superscripts and subscripts are interpreted a
little bit more like export. In particular, they are turned back
into `_` and `^` according to the setting of
`org-export-with-sub-superscripts'."
(let* ((symbol (car elem))
(plist (cadr elem))
(rest (cddr elem))
(brackets (plist-get plist :use-brackets-p)))
(if (or (eq org-export-with-sub-superscripts t)
(and (eq org-export-with-sub-superscripts '{})
brackets))
(om-to-xml--om-base-element-to-xml elem)
(progn
(if (eq symbol 'superscript)
(insert "^")
(insert "_"))
(insert (car rest))))))
(defun om-to-xml--om-base-element-to-xml (elem)
"Convert ELEM to XML."
(let* ((symbol (car elem))
(plist (cadr elem))
(rest (cddr elem))
(exclude (om-to-xml--property-drawer-properties elem)))
(if (member symbol om-to-xml-newline-before-start)
(insert "\n"))
(insert (concat "<" (symbol-name symbol)))
(om-to-xml--plist-attributes plist exclude)
(if rest
(progn
(insert ">")
(let ((list (om-to-xml--plist-children plist))
name)
(while list
(setq name (substring (symbol-name (car list)) 1))
(insert (concat "<" name ">"))
(om-to-xml--om-element-to-xml (plist-get plist (car list)))
(insert (concat "</" name ">"))
(setq list (cdr list))))
(om-to-xml--om-to-xml-list rest)
(if (member symbol om-to-xml-newline-before-end)
(insert "\n"))
(insert (concat "</" (symbol-name symbol) ">")))
(progn
(insert "/>")))))
(provide 'om-to-xml)
;;; om-to-xml.el ends here