This repository was archived by the owner on Feb 3, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 19
Sample JSON parser
kanishka-azimi edited this page Jan 28, 2012
·
21 revisions
The following code is for FnParse 2. It’s also included in the source.
Things to note:
- In this case, the tokens are single characters. This means that Strings can be used directly as sequences of tokens.
- Also, the state data structure used is a struct-map called state-s, which contains the remainder, the current line, and the current column.
- Only the remainder is required to be contained in a state data structure in any parser. The line and column are useful for giving intelligent errors.
- Most of the terminal rules—such as rules matching single characters or line breaks—increment the line or column in the state they are given. That’s how it works.
- This parser throws a couple of error-kit errors, such as when it detects an invalid Unicode sequence. This is nowhere near complete—a real parser would probably raise errors in a lot more instances—but here it’s just to show one way to implement errors.
(ns name.choi.joshua.fnparse.json
(:use name.choi.joshua.fnparse clojure.contrib.error-kit
[clojure.contrib.seq-utils :only [flatten]]))
;; These are some functions that the rules will use. A lot of these are
;; optional.
; A JSON node, which what the parsing will return in the end.
(defstruct node-s :kind :content)
; The parsing state data structure. The remaining tokens are stored
; in :remainder, and the current column and line are stored in their
; respective fields.
(defstruct state-s :remainder :column :line)
(def remainder-a
(accessor state-s :remainder))
(def make-node
(partial struct node-s))
(def make-scalar-node
(partial make-node :scalar))
(def make-array-node
(partial make-node :array))
(def make-object-node
(partial make-node :object))
(def apply-str
(partial apply str))
;; These two functions are given a rule and make it so that it
;; increments the current column (or the current line).
(defn- nb-char [subrule]
(invisi-conc subrule (update-info :column inc))) ;very terse. run all parse matches,
;only use parsed value from subrule
(def nb-char-lit
(comp nb-char lit)) ; lit is a FnParse function that creates a literal
; rule.
(defn- b-char [subrule]
(invisi-conc subrule (update-info :line inc)))
;; A couple of parse errors have been put here and there. It's nowhere
;; near complete, but rather it's to show examples of how to implement
;; errors.
(deferror parse-error [] [state message message-args]
{:msg (str (format "JSON error at line %s, column %s: "
(:line state) (:column state))
(apply format message message-args))
:unhandled (throw-msg Exception)})
(defn- expectation-error-fn [expectation]
(fn [remainder state]
(raise parse-error state "%s expected where \"%s\" is"
[expectation (or (first remainder) "the end of the file")])))
;; And here are where this parser's rules are defined.
(def string-delimiter
(nb-char-lit \"))
(def escape-indicator
(nb-char-lit \\))
(def false-lit
(constant-semantics (lit-conc-seq "false" nb-char-lit) ;use nb-char-lit as
(make-scalar-node false))) ;parse-builder on ['f' 'a' ... ]
;then make the node as
;the corresponding parsed value
(def true-lit
(constant-semantics (lit-conc-seq "true" nb-char-lit)
(make-scalar-node true)))
(def null-lit
(constant-semantics (lit-conc-seq "null" nb-char-lit)
(make-scalar-node nil)))
(def keyword-lit (alt false-lit true-lit null-lit))
(def space (nb-char-lit \space))
(def tab (nb-char-lit \tab))
(def newline-lit (lit \newline))
(def return-lit (lit \return))
(def line-break (b-char (rep+ (alt newline-lit return-lit)))) ;;consecutive blank lines only adds 1 line?
(def json-char (alt line-break (nb-char anything)))
(def ws (constant-semantics (rep* (alt space tab line-break)) :ws))
(def begin-array
(constant-semantics (conc ws (nb-char-lit \ [) ws) :begin-array)) ;;markup going crazy, ignore space
(def end-array
(constant-semantics (conc ws (nb-char-lit \]) ws) :end-array))
(def begin-object
(constant-semantics (conc ws (nb-char-lit \{) ws) :begin-object))
(def end-object
(constant-semantics (conc ws (nb-char-lit \}) ws) :end-object))
(def name-separator
(constant-semantics (conc ws (nb-char-lit \:) ws) :name-separator))
(def value-separator
(constant-semantics (conc ws (nb-char-lit \,) ws) :value-separator))
(def minus-sign (nb-char-lit \-))
(def plus-sign (nb-char-lit \+))
(def decimal-point (nb-char-lit \.))
(def exponential-sign (lit-alt-seq "eE" nb-char-lit))
(def zero-digit (nb-char-lit \0))
(def nonzero-decimal-digit (lit-alt-seq "123456789" nb-char-lit))
(def decimal-digit (alt zero-digit nonzero-decimal-digit))
(def fractional-part (conc decimal-point (rep* decimal-digit)))
(def exponential-part
(conc exponential-sign (opt (alt plus-sign minus-sign))
(failpoint (rep+ decimal-digit)
(expectation-error-fn
(str "in number literal, after an exponent sign, decimal"
"digit")))))
(def number-lit
(complex [minus (opt minus-sign)
above-one (alt zero-digit (rep+ nonzero-decimal-digit))
below-one (opt fractional-part)
power (opt exponential-part)]
(-> [minus above-one below-one power] flatten apply-str
Double/parseDouble
((if (or below-one power) identity int))
make-scalar-node)))
(def hexadecimal-digit
(alt decimal-digit (lit-alt-seq "ABCDEF" nb-char-lit)))
(def unescaped-char
(except json-char (alt escape-indicator string-delimiter)))
(def unicode-char-sequence
(complex [_ (nb-char-lit \u)
digits (factor= 4
(failpoint hexadecimal-digit
(expectation-error-fn "hexadecimal digit")))]
(-> digits apply-str (Integer/parseInt 16) char)))
(def escaped-characters
{\\ \\, \/ \/, \b \backspace, \f \formfeed, \n \newline, \r \return,
\t \tab})
(def normal-escape-sequence
(semantics (lit-alt-seq (keys escaped-characters) nb-char-lit)
escaped-characters))
(def escape-sequence
(complex [_ escape-indicator
character (alt unicode-char-sequence
normal-escape-sequence)]
character))
(def string-char
(alt escape-sequence unescaped-char))
(def string-lit
(complex [_ string-delimiter
contents (rep* string-char)
_ string-delimiter]
(-> contents apply-str make-scalar-node)))
(declare array)
(declare object)
(def value (alt string-lit number-lit keyword-lit array object))
(def additional-value
(complex [_ value-separator, content value] content))
(def array-contents
(complex [first-value value, rest-values (rep* additional-value)]
(cons first-value rest-values)))
(def array
(complex [_ begin-array
contents (opt array-contents)
_ (failpoint end-array
(expectation-error-fn "an array is unclosed; \"]\""))]
(-> contents vec make-array-node)))
(def entry
(complex [entry-key string-lit, _ name-separator, entry-val value]
[entry-key entry-val]))
(def additional-entry
(complex [_ value-separator, content entry]
content))
(def object-contents
(complex [first-entry entry, rest-entries (rep* additional-entry)]
(cons first-entry rest-entries)))
(def object
(complex [_ begin-object
contents object-contents
_ (failpoint end-object
(expectation-error-fn
(str "either \"}\" or another object entry (which "
"always starts with a string)")))]
(struct node-s :object (into {} contents)))) ;;forgot to use make-object-node ?
(def text (alt object array)) ; The root rule
;; The functions below uses the rules to parse strings.
(defn parse [tokens]
(binding [*remainder-accessor* remainder-a] ; this is completely
; optional
(rule-match text
#(raise parse-error "invalid document \"%s\""
(apply-str (remainder-a %)))
#(raise parse-error "leftover data after a valid node \"%s\""
(apply-str (remainder-a %2)))
(struct state-s tokens 0 0))))
; The call to rule-match above is equivalent to the stuff below:
; (let [[product state :as result]
; (text (struct state-s tokens 0 0))]
; (if (nil? result)
; (raise parse-error "invalid document \"%s\""
; (apply-str tokens))
; (if-let [remainder (seq (remainder-a state))]
; product
; (raise parse-error "leftover data after a valid node \"%s\""
; (apply-str remainder)))))
;; The functions below just convert JSON nodes into Clojure strings,
;; vectors, and maps.
(defmulti represent :kind)
(defmethod represent :object [node]
(into {}
(map #(vector (represent (key %)) (represent (val %)))
(:content node))))
(defmethod represent :array [node]
(vec (map #(represent %) (:content node))))
(defmethod represent :scalar [node]
(:content node))
(def load-stream (comp represent parse))