Files
yaak-mountain-loop/crates/yaak-templates/src/format_json.rs
Gregory Schier 267508e533 Support comments in JSON body (#419)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 15:05:09 -08:00

534 lines
13 KiB
Rust

enum FormatState {
TemplateTag,
String,
None,
}
/// Formats JSON that might contain template tags (skipped entirely)
pub fn format_json(text: &str, tab: &str) -> String {
let mut chars = text.chars().peekable();
let mut new_json = "".to_string();
let mut depth = 0;
let mut state = FormatState::None;
let mut saw_newline_in_whitespace = false;
loop {
let rest_of_chars = chars.clone();
let current_char = match chars.next() {
None => break,
Some(c) => c,
};
// Handle JSON string states
if let FormatState::String = state {
match current_char {
'"' => {
state = FormatState::None;
new_json.push(current_char);
continue;
}
'\\' => {
new_json.push(current_char);
if let Some(c) = chars.next() {
new_json.push(c);
}
continue;
}
_ => {
new_json.push(current_char);
continue;
}
}
}
// Close Template tag states
if let FormatState::TemplateTag = state {
if rest_of_chars.take(2).collect::<String>() == "]}" {
state = FormatState::None;
new_json.push_str("]}");
chars.next(); // Skip the second closing bracket
continue;
} else {
new_json.push(current_char);
continue;
}
}
if rest_of_chars.take(3).collect::<String>() == "${[" {
state = FormatState::TemplateTag;
new_json.push_str("${[");
chars.next(); // Skip {
chars.next(); // Skip [
continue;
}
// Handle line comments (//)
if current_char == '/' && chars.peek() == Some(&'/') {
chars.next(); // Skip second /
// Collect the rest of the comment until newline
let mut comment = String::from("//");
loop {
match chars.peek() {
Some(&'\n') | None => break,
Some(_) => comment.push(chars.next().unwrap()),
}
}
// Check if the comma handler already added \n + indent
let trimmed = new_json.trim_end_matches(|c: char| c == ' ' || c == '\t');
if trimmed.ends_with(",\n") && !saw_newline_in_whitespace {
// Trailing comment on the same line as comma (e.g. "foo",// comment)
new_json.truncate(trimmed.len() - 1);
new_json.push(' ');
} else if !trimmed.ends_with('\n') && !new_json.is_empty() {
// Trailing comment after a value (no newline before us)
new_json.push(' ');
}
new_json.push_str(&comment);
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
saw_newline_in_whitespace = false;
continue;
}
// Handle block comments (/* ... */)
if current_char == '/' && chars.peek() == Some(&'*') {
chars.next(); // Skip *
let mut comment = String::from("/*");
loop {
match chars.next() {
None => break,
Some('*') if chars.peek() == Some(&'/') => {
chars.next(); // Skip /
comment.push_str("*/");
break;
}
Some(c) => comment.push(c),
}
}
// If we're not already on a fresh line, add newline + indent before comment
let trimmed = new_json.trim_end_matches(|c: char| c == ' ' || c == '\t');
if !trimmed.is_empty() && !trimmed.ends_with('\n') {
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
}
new_json.push_str(&comment);
// After block comment, add newline + indent for the next content
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
continue;
}
match current_char {
',' => {
new_json.push(current_char);
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
}
'{' => match chars.peek() {
Some('}') => {
new_json.push(current_char);
new_json.push('}');
chars.next(); // Skip }
}
_ => {
depth += 1;
new_json.push(current_char);
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
}
},
'[' => match chars.peek() {
Some(']') => {
new_json.push(current_char);
new_json.push(']');
chars.next(); // Skip ]
}
_ => {
depth += 1;
new_json.push(current_char);
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
}
},
'}' => {
// Guard just in case invalid JSON has more closes than opens
if depth > 0 {
depth -= 1;
}
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
new_json.push(current_char);
}
']' => {
// Guard just in case invalid JSON has more closes than opens
if depth > 0 {
depth -= 1;
}
new_json.push('\n');
new_json.push_str(tab.to_string().repeat(depth).as_str());
new_json.push(current_char);
}
':' => {
new_json.push(current_char);
new_json.push(' '); // Pad with space
}
'"' => {
state = FormatState::String;
new_json.push(current_char);
}
_ => {
if current_char == ' '
|| current_char == '\n'
|| current_char == '\t'
|| current_char == '\r'
{
if current_char == '\n' {
saw_newline_in_whitespace = true;
}
// Don't add these
} else {
saw_newline_in_whitespace = false;
new_json.push(current_char);
}
}
}
}
// Filter out whitespace-only lines, but preserve empty lines inside block comments
let mut result_lines: Vec<&str> = Vec::new();
let mut in_block_comment = false;
for line in new_json.lines() {
if in_block_comment {
result_lines.push(line);
if line.contains("*/") {
in_block_comment = false;
}
} else {
if line.contains("/*") && !line.contains("*/") {
in_block_comment = true;
}
if !line.trim().is_empty() {
result_lines.push(line);
}
}
}
result_lines.iter().map(|line| line.trim_end()).collect::<Vec<&str>>().join("\n")
}
#[cfg(test)]
mod tests {
use crate::format_json::format_json;
#[test]
fn test_simple_object() {
assert_eq!(
format_json(r#"{"foo":"bar","baz":"qux"}"#, " "),
r#"
{
"foo": "bar",
"baz": "qux"
}
"#
.trim()
);
}
#[test]
fn test_escaped() {
assert_eq!(
format_json(r#"{"foo":"Hi \"world!\""}"#, " "),
r#"
{
"foo": "Hi \"world!\""
}
"#
.trim()
);
}
#[test]
fn test_simple_array() {
assert_eq!(
format_json(r#"["foo","bar","baz","qux"]"#, " "),
r#"
[
"foo",
"bar",
"baz",
"qux"
]
"#
.trim()
);
}
#[test]
fn test_extra_whitespace() {
assert_eq!(
format_json(
r#"["foo", "bar", "baz","qux"
]"#,
" "
),
r#"
[
"foo",
"bar",
"baz",
"qux"
]
"#
.trim()
);
}
#[test]
fn test_invalid_json() {
assert_eq!(
format_json(r#"["foo", {"bar", }"baz",["qux" ]]"#, " "),
r#"
[
"foo",
{
"bar",
}"baz",
[
"qux"
]
]
"#
.trim()
);
}
#[test]
fn test_skip_template_tags() {
assert_eq!(
format_json(r#"{"foo":${[ fn("hello", "world") ]} }"#, " "),
r#"
{
"foo": ${[ fn("hello", "world") ]}
}
"#
.trim()
);
}
#[test]
fn test_graphql_response() {
assert_eq!(
format_json(
r#"{"data":{"capsules":[{"landings":null,"original_launch":null,"reuse_count":0,"status":"retired","type":"Dragon 1.0","missions":null},{"id":"5e9e2c5bf3591882af3b2665","landings":null,"original_launch":null,"reuse_count":0,"status":"retired","type":"Dragon 1.0","missions":null}]}}"#,
" "
),
r#"
{
"data": {
"capsules": [
{
"landings": null,
"original_launch": null,
"reuse_count": 0,
"status": "retired",
"type": "Dragon 1.0",
"missions": null
},
{
"id": "5e9e2c5bf3591882af3b2665",
"landings": null,
"original_launch": null,
"reuse_count": 0,
"status": "retired",
"type": "Dragon 1.0",
"missions": null
}
]
}
}
"#
.trim()
);
}
#[test]
fn test_immediate_close() {
assert_eq!(
format_json(r#"{"bar":[]}"#, " "),
r#"
{
"bar": []
}
"#
.trim()
);
}
#[test]
fn test_more_closes() {
assert_eq!(
format_json(r#"{}}"#, " "),
r#"
{}
}
"#
.trim()
);
}
#[test]
fn test_line_comment_between_keys() {
assert_eq!(
format_json(
r#"{"foo":"bar",// a comment
"baz":"qux"}"#,
" "
),
r#"
{
"foo": "bar", // a comment
"baz": "qux"
}
"#
.trim()
);
}
#[test]
fn test_line_comment_at_end() {
assert_eq!(
format_json(
r#"{"foo":"bar" // trailing
}"#,
" "
),
r#"
{
"foo": "bar" // trailing
}
"#
.trim()
);
}
#[test]
fn test_block_comment() {
assert_eq!(
format_json(r#"{"foo":"bar",/* comment */"baz":"qux"}"#, " "),
r#"
{
"foo": "bar",
/* comment */
"baz": "qux"
}
"#
.trim()
);
}
#[test]
fn test_comment_in_array() {
assert_eq!(
format_json(
r#"[1,// item comment
2,3]"#,
" "
),
r#"
[
1, // item comment
2,
3
]
"#
.trim()
);
}
#[test]
fn test_comment_only_line() {
assert_eq!(
format_json(
r#"{
// this is a standalone comment
"foo": "bar"
}"#,
" "
),
r#"
{
// this is a standalone comment
"foo": "bar"
}
"#
.trim()
);
}
#[test]
fn test_multiline_block_comment() {
assert_eq!(
format_json(
r#"{
"foo": "bar"
/**
Hello World!
Hi there
*/
}"#,
" "
),
r#"
{
"foo": "bar"
/**
Hello World!
Hi there
*/
}
"#
.trim()
);
}
// NOTE: trailing whitespace on output lines is trimmed by the formatter.
// We can't easily add a test for this because raw string literals get
// trailing whitespace stripped by the editor/linter.
#[test]
fn test_comment_inside_string_ignored() {
assert_eq!(
format_json(r#"{"foo":"// not a comment","bar":"/* also not */"}"#, " "),
r#"
{
"foo": "// not a comment",
"bar": "/* also not */"
}
"#
.trim()
);
}
#[test]
fn test_comment_on_line_after_comma() {
assert_eq!(
format_json(
r#"{
"a": "aaa",
// "b": "bbb"
}"#,
" "
),
r#"
{
"a": "aaa",
// "b": "bbb"
}
"#
.trim()
);
}
}