Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ Unreleased
([#1644](https://github.com/melange-re/melange/pull/1644))
- ppx: disallow empty `%raw` expressions, which would cause syntax errors in
code generation ([#1651](https://github.com/melange-re/melange/pull/1651))
- ppx: support all valid `Longident`s in string interpolation
([#1653](https://github.com/melange-re/melange/pull/1653))

5.1.0-53 2025-03-23
---------------
Expand Down
89 changes: 43 additions & 46 deletions ppx/string_interp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,15 @@ type error =
| Unmatched_paren
| Invalid_syntax_of_var of string

type kind = String | Var of int * int
type content =
| String of string
| Var of {
lident : Longident.t;
content : string;
loffset : int;
roffset : int;
}

(* [Var (loffset, roffset)]
For parens it used to be (2,-1)
for non-parens it used to be (1,0) *)
Expand All @@ -44,7 +52,7 @@ type pos = {
(* Note it actually needs to be in sync with OCaml's lexing semantics *)
}

type segment = { start : pos; finish : pos; kind : kind; content : string }
type segment = { start : pos; finish : pos; content : content }

type cxt = {
mutable segment_start : pos;
Expand All @@ -58,43 +66,34 @@ type cxt = {

exception Error of pos * pos * error

let pp_error fmt err =
Format.pp_print_string fmt
@@
match err with
| Invalid_code_point -> "Invalid code point"
| Unterminated_backslash -> "\\ ended unexpectedly"
| Unterminated_variable -> "$ unterminated"
| Unmatched_paren -> "Unmatched paren"
| Invalid_syntax_of_var s ->
"`" ^ s ^ "' is not a valid syntax of interpolated identifer"
let pp_error =
let to_string = function
| Invalid_code_point -> "Invalid code point"
| Unterminated_backslash -> "\\ ended unexpectedly"
| Unterminated_variable -> "$ unterminated"
| Unmatched_paren -> "Unmatched paren"
| Invalid_syntax_of_var s ->
"`" ^ s ^ "' is not a valid syntax of interpolated identifer"
in
fun fmt err -> Format.pp_print_string fmt (to_string err)

let valid_lead_identifier_char = function
| 'a' .. 'z' | '_' -> true
| _ -> false

let valid_identifier_char = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '\'' -> true
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '\'' | '.' -> true
| _ -> false

(* Invariant: [valid_lead_identifier] has to be [valid_identifier] *)
let valid_identifier =
let for_all_from =
let rec unsafe_for_all_range s ~start ~finish p =
start > finish
|| p (String.unsafe_get s start)
&& unsafe_for_all_range s ~start:(start + 1) ~finish p
in
fun s start p ->
let len = String.length s in
if start < 0 then invalid_arg "for_all_from"
else unsafe_for_all_range s ~start ~finish:(len - 1) p
in
fun s ->
let s_len = String.length s in
if s_len = 0 then false
else
valid_lead_identifier_char s.[0] && for_all_from s 1 valid_identifier_char
let valid_identifier s =
match String.length (String.trim s) with
| 0 -> None
| _n -> (
match String.for_all s ~f:valid_identifier_char with
| true -> (
match Longident.parse s with li -> Some li | exception _ -> None)
| false -> None)

(* FIXME: multiple line offset
if there is no line offset. Note {|{j||} border will never trigger a new
Expand Down Expand Up @@ -130,24 +129,23 @@ let pos_error cxt ~loc error =
},
error ))

let add_var_segment cxt loc loffset roffset =
let add_var_segment cxt ~loc ~loffset ~roffset =
let content = Buffer.contents cxt.buf in
Buffer.clear cxt.buf;
let next_loc =
{ lnum = cxt.pos_lnum; offset = loc - cxt.pos_bol; byte_bol = cxt.byte_bol }
in
match valid_identifier content with
| true ->
| Some lident ->
cxt.segments <-
{
start = cxt.segment_start;
finish = next_loc;
kind = Var (loffset, roffset);
content;
content = Var { content; loffset; roffset; lident };
}
:: cxt.segments;
cxt.segment_start <- next_loc
| false ->
| None ->
let cxt =
match String.trim content with
| "" ->
Expand Down Expand Up @@ -179,7 +177,7 @@ let add_str_segment cxt loc =
{ lnum = cxt.pos_lnum; offset = loc - cxt.pos_bol; byte_bol = cxt.byte_bol }
in
cxt.segments <-
{ start = cxt.segment_start; finish = next_loc; kind = String; content }
{ start = cxt.segment_start; finish = next_loc; content = String content }
:: cxt.segments;
cxt.segment_start <- next_loc

Expand Down Expand Up @@ -227,7 +225,7 @@ and expect_simple_var loc s offset ({ buf; s_len; _ } as cxt) =
done;
let added_length = !v - offset in
let loc = added_length + loc in
add_var_segment cxt loc 1 0;
add_var_segment cxt ~loc ~loffset:1 ~roffset:0;
check_and_transform loc s (added_length + offset) cxt

and expect_var_paren loc s offset ({ buf; s_len; _ } as cxt) =
Expand All @@ -241,7 +239,7 @@ and expect_var_paren loc s offset ({ buf; s_len; _ } as cxt) =
let loc = added_length + 1 + loc in
match !v < s_len && s.[!v] = ')' with
| true ->
add_var_segment cxt loc 2 (-1);
add_var_segment cxt ~loc ~loffset:2 ~roffset:(-1);
check_and_transform loc s (added_length + 1 + offset) cxt
| false -> pos_error cxt ~loc Unmatched_paren

Expand All @@ -261,20 +259,20 @@ let rec handle_segments =
else if r.loc_ghost then l
else { loc_start = l.loc_start; loc_end = r.loc_end; loc_ghost = false }
in
let aux loc { start; finish; kind; content } =
match kind with
| String ->
let aux loc { start; finish; content } =
match content with
| String content ->
let loc = update border start finish loc in
Exp.constant (Pconst_string (content, loc, escaped_js_delimiter))
| Var (soffset, foffset) ->
| Var { loffset = soffset; roffset = foffset; content = _; lident } ->
let loc =
{
loc with
loc_start = update_position (soffset + border) start loc.loc_start;
loc_end = update_position (foffset + border) finish loc.loc_start;
}
in
Exp.ident ~loc { loc; txt = Lident content }
Exp.ident ~loc { loc; txt = lident }
in
let concat_exp a_loc x ~(lhs : expression) =
let loc = merge_loc a_loc lhs.pexp_loc in
Expand All @@ -286,7 +284,7 @@ let rec handle_segments =
match rev_segments with
| [] -> Exp.constant (Pconst_string ("", loc, escaped_js_delimiter))
| [ segment ] -> aux loc segment (* string literal *)
| { content = ""; _ } :: rest -> handle_segments loc rest
| { content = String ""; _ } :: rest -> handle_segments loc rest
| a :: rest -> concat_exp loc a ~lhs:(handle_segments loc rest)

let transform =
Expand Down Expand Up @@ -320,8 +318,7 @@ module Private = struct
type nonrec segment = segment = {
start : pos;
finish : pos;
kind : kind;
content : string;
content : content;
}

let transform_test s =
Expand Down
13 changes: 11 additions & 2 deletions ppx/string_interp.mli
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@ type error =
| Unmatched_paren
| Invalid_syntax_of_var of string

type kind = String | Var of int * int
type content =
| String of string
| Var of {
lident : Longident.t;
content : string;
loffset : int;
roffset : int;
}

(* Note the position is about code point *)
type pos = {
Expand All @@ -47,7 +54,9 @@ val transform :
loc:Location.t -> delim:string -> expression -> string -> expression

module Private : sig
type segment = { start : pos; finish : pos; kind : kind; content : string }
type segment = { start : pos; finish : pos; content : content }

val transform_test : string -> segment list
end


1 change: 0 additions & 1 deletion test/blackbox-tests/quoted-strings.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ Showcases how quoted strings work without melange.ppx
$ cat > x.ml <<EOF
> Js.log {js|🪄|js}
> EOF

$ melc x.ml
// Generated by Melange
'use strict';
Expand Down
48 changes: 32 additions & 16 deletions test/blackbox-tests/utf8-string-interp.t
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,23 @@ Test edge cases on unicode string interpolation
> let x = {j|Hello, $(M.world)|j}
> EOF
$ melc -ppx melppx x.ml
File "x.ml", line 4, characters 8-31:
4 | let x = {j|Hello, $(M.world)|j}
^^^^^^^^^^^^^^^^^^^^^^^
Warning 108 [melange-uninterpreted-delimiters]: Uninterpreted delimiters j
// Generated by Melange
'use strict';

File "x.ml", line 4, characters 21-31:
4 | let x = {j|Hello, $(M.world)|j}
^^^^^^^^^^
Error: `M.world' is not a valid syntax of interpolated identifer
[2]

const world = "world";

const M = {
world: world
};

const x = "Hello, " + world;

module.exports = {
M,
x,
}
/* No side effect */

This error is not super great, because `valid_lead_identifier_char` doesn't
take uppercase variables into account
Expand Down Expand Up @@ -94,13 +101,22 @@ record field access not yet supported
> let x = {j|Hello, $(t.x)|j}
> EOF
$ melc -ppx melppx x.ml
File "x.ml", line 5, characters 8-27:
5 | let x = {j|Hello, $(t.x)|j}
^^^^^^^^^^^^^^^^^^^
File "x.ml", line 1:
Error: Unbound module t
[2]

$ cat > x.ml <<'EOF'
> let x = {j|$(hello world)|j}
> EOF
$ melc -ppx melppx x.ml
File "x.ml", line 1, characters 8-28:
1 | let x = {j|$(hello world)|j}
^^^^^^^^^^^^^^^^^^^^
Warning 108 [melange-uninterpreted-delimiters]: Uninterpreted delimiters j

File "x.ml", line 5, characters 21-27:
5 | let x = {j|Hello, $(t.x)|j}
^^^^^^
Error: `t.x' is not a valid syntax of interpolated identifer
File "x.ml", line 1, characters 14-28:
1 | let x = {j|$(hello world)|j}
^^^^^^^^^^^^^^
Error: `hello world' is not a valid syntax of interpolated identifer
[2]

Loading
Loading