Switch to vkbeautify for XML

https://feedback.yaak.app/p/xml-pretty-formatter-not-rendering-correctly
This commit is contained in:
Gregory Schier
2025-10-28 14:03:49 -07:00
parent a71fb8ed6c
commit 484b5b2fd8
9 changed files with 15 additions and 362 deletions

7
package-lock.json generated
View File

@@ -18131,6 +18131,12 @@
"url": "https://github.com/sponsors/jonschlinkert"
}
},
"node_modules/vkbeautify": {
"version": "0.99.3",
"resolved": "https://registry.npmjs.org/vkbeautify/-/vkbeautify-0.99.3.tgz",
"integrity": "sha512-2ozZEFfmVvQcHWoHLNuiKlUfDKlhh4KGsy54U0UrlLMR1SO+XKAIDqBxtBwHgNrekurlJwE8A9K6L49T78ZQ9Q==",
"license": "MIT"
},
"node_modules/vscode-languageserver-types": {
"version": "3.17.5",
"resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz",
@@ -19088,6 +19094,7 @@
"remark-gfm": "^4.0.1",
"slugify": "^1.6.6",
"uuid": "^11.1.0",
"vkbeautify": "^0.99.3",
"whatwg-mimetype": "^4.0.0",
"xml-beautify": "^1.2.3",
"yaml": "^2.6.1"

View File

@@ -50,8 +50,7 @@ use yaak_plugins::manager::PluginManager;
use yaak_plugins::plugin_meta::PluginMetadata;
use yaak_plugins::template_callback::PluginTemplateCallback;
use yaak_sse::sse::ServerSentEvent;
use yaak_templates::format::format_json;
use yaak_templates::format_xml::format_xml;
use yaak_templates::format_json::format_json;
use yaak_templates::{RenderErrorBehavior, RenderOptions, Tokens, transform_args};
mod commands;
@@ -747,11 +746,6 @@ async fn cmd_format_json(text: &str) -> YaakResult<String> {
Ok(format_json(text, " "))
}
#[tauri::command]
async fn cmd_format_xml(text: &str) -> YaakResult<String> {
Ok(format_xml(text, " "))
}
#[tauri::command]
async fn cmd_http_response_body<R: Runtime>(
window: WebviewWindow<R>,
@@ -1432,7 +1426,6 @@ pub fn run() {
cmd_export_data,
cmd_http_response_body,
cmd_format_json,
cmd_format_xml,
cmd_get_http_authentication_summaries,
cmd_get_http_authentication_config,
cmd_get_sse_events,

View File

@@ -143,7 +143,7 @@ pub fn format_json(text: &str, tab: &str) -> String {
#[cfg(test)]
mod tests {
use crate::format::format_json;
use crate::format_json::format_json;
#[test]
fn test_simple_object() {

View File

@@ -1,345 +0,0 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum XmlTok<'a> {
OpenTag { raw: &'a str, name: &'a str }, // "<tag ...>"
CloseTag { raw: &'a str, name: &'a str }, // "</tag>"
SelfCloseTag(&'a str), // "<tag .../>"
Comment(&'a str), // "<!-- ... -->"
CData(&'a str), // "<![CDATA[ ... ]]>"
ProcInst(&'a str), // "<?xml ...?>"
Doctype(&'a str), // "<!DOCTYPE ...>"
Text(&'a str), // "text between tags"
Template(&'a str), // "${[ ... ]}"
}
fn writeln_indented(out: &mut String, depth: usize, indent: &str, s: &str) {
for _ in 0..depth {
out.push_str(indent);
}
out.push_str(s);
out.push('\n');
}
pub fn format_xml(input: &str, indent: &str) -> String {
use XmlTok::*;
let tokens = tokenize_with_templates(input);
let mut out = String::new();
let mut depth = 0usize;
let mut i = 0usize;
while i < tokens.len() {
match tokens[i] {
OpenTag {
raw: open_raw,
name: open_name,
} => {
if i + 2 < tokens.len() {
if let Text(text_raw) = tokens[i + 1] {
let trimmed = text_raw.trim();
let no_newlines = !trimmed.contains('\n');
if no_newlines && !trimmed.is_empty() {
if let CloseTag {
raw: close_raw,
name: close_name,
} = tokens[i + 2]
{
if open_name == close_name {
for _ in 0..depth {
out.push_str(indent);
}
out.push_str(open_raw);
out.push_str(trimmed);
out.push_str(close_raw);
out.push('\n');
i += 3;
continue;
}
}
}
}
}
writeln_indented(&mut out, depth, indent, open_raw);
depth = depth.saturating_add(1);
i += 1;
}
CloseTag { raw, .. } => {
depth = depth.saturating_sub(1);
writeln_indented(&mut out, depth, indent, raw);
i += 1;
}
SelfCloseTag(raw) | Comment(raw) | ProcInst(raw) | Doctype(raw) | CData(raw)
| Template(raw) => {
writeln_indented(&mut out, depth, indent, raw);
i += 1;
}
Text(text_raw) => {
if text_raw.chars().any(|c| !c.is_whitespace()) {
let trimmed = text_raw.trim();
writeln_indented(&mut out, depth, indent, trimmed);
}
i += 1;
}
}
}
if out.ends_with('\n') {
out.pop();
}
out
}
fn tokenize_with_templates(input: &str) -> Vec<XmlTok<'_>> {
use XmlTok::*;
let bytes = input.as_bytes();
let mut i = 0usize;
let mut toks = Vec::<XmlTok>::new();
let starts_with =
|s: &[u8], i: usize, pat: &str| s.get(i..).map_or(false, |t| t.starts_with(pat.as_bytes()));
while i < bytes.len() {
// Template block: ${[ ... ]}
if starts_with(bytes, i, "${[") {
let start = i;
i += 3;
while i < bytes.len() && !starts_with(bytes, i, "]}") {
i += 1;
}
if starts_with(bytes, i, "]}") {
i += 2;
}
toks.push(Template(&input[start..i]));
continue;
}
if bytes[i] == b'<' {
// Comments
if starts_with(bytes, i, "<!--") {
let start = i;
i += 4;
while i < bytes.len() && !starts_with(bytes, i, "-->") {
i += 1;
}
if starts_with(bytes, i, "-->") {
i += 3;
}
toks.push(Comment(&input[start..i]));
continue;
}
// CDATA
if starts_with(bytes, i, "<![CDATA[") {
let start = i;
i += 9;
while i < bytes.len() && !starts_with(bytes, i, "]]>") {
i += 1;
}
if starts_with(bytes, i, "]]>") {
i += 3;
}
toks.push(CData(&input[start..i]));
continue;
}
// Processing Instruction
if starts_with(bytes, i, "<?") {
let start = i;
i += 2;
while i < bytes.len() && !starts_with(bytes, i, "?>") {
i += 1;
}
if starts_with(bytes, i, "?>") {
i += 2;
}
toks.push(ProcInst(&input[start..i]));
continue;
}
// DOCTYPE or other "<!"
if starts_with(bytes, i, "<!") {
let start = i;
i += 2;
while i < bytes.len() && bytes[i] != b'>' {
i += 1;
}
if i < bytes.len() {
i += 1;
}
toks.push(Doctype(&input[start..i]));
continue;
}
// Normal tag (open/close/self)
let start = i;
i += 1; // '<'
let is_close = if i < bytes.len() && bytes[i] == b'/' {
i += 1;
true
} else {
false
};
// read until '>' (respecting quotes)
let mut in_quote: Option<u8> = None;
while i < bytes.len() {
let c = bytes[i];
if let Some(q) = in_quote {
if c == q {
in_quote = None;
}
i += 1;
} else {
if c == b'\'' || c == b'"' {
in_quote = Some(c);
i += 1;
} else if c == b'>' {
i += 1;
break;
} else {
i += 1;
}
}
}
let raw = &input[start..i];
let is_self = raw.as_bytes().len() >= 2 && raw.as_bytes()[raw.len() - 2] == b'/';
if is_close {
let name = parse_close_name(raw);
toks.push(CloseTag { raw, name });
} else if is_self {
toks.push(SelfCloseTag(raw));
} else {
let name = parse_open_name(raw);
toks.push(OpenTag { raw, name });
}
continue;
}
// Text node until next '<' or template start
let start = i;
while i < bytes.len() && bytes[i] != b'<' && !starts_with(bytes, i, "${[") {
i += 1;
}
toks.push(XmlTok::Text(&input[start..i]));
}
toks
}
fn parse_open_name(raw: &str) -> &str {
// raw looks like "<name ...>" or "<name>"
// slice between '<' and first whitespace or '>' or '/>'
let s = &raw[1..]; // skip '<'
let end = s.find(|c: char| c.is_whitespace() || c == '>' || c == '/').unwrap_or(s.len());
&s[..end]
}
fn parse_close_name(raw: &str) -> &str {
// raw looks like "</name>"
let s = &raw[2..]; // skip "</"
let end = s.find('>').unwrap_or(s.len());
&s[..end]
}
#[cfg(test)]
mod tests {
use super::format_xml;
#[test]
fn inline_text_child() {
let src = r#"<root><foo>this might be a string</foo><bar attr="x">ok</bar></root>"#;
let want = r#"<root>
<foo>this might be a string</foo>
<bar attr="x">ok</bar>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn works_when_nested() {
let src = r#"<root><foo><b>bold</b></foo></root>"#;
let want = r#"<root>
<foo>
<b>bold</b>
</foo>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn trims_and_keeps_nonempty() {
let src = "<root><foo> hi </foo></root>";
let want = "<root>\n <foo>hi</foo>\n</root>";
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn attributes_inline_text_child() {
// Keeps attributes verbatim and inlines simple text children
let src = r#"<root><item id="42" class='a b'>value</item></root>"#;
let want = r#"<root>
<item id="42" class='a b'>value</item>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn attributes_with_irregular_spacing_preserved() {
// We don't normalize spaces inside the tag; raw is preserved
let src = r#"<root><a x = "1" y='2' >t</a></root>"#;
let want = r#"<root>
<a x = "1" y='2' >t</a>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn self_closing_with_attributes() {
let src =
r#"<root><img src="x" alt='hello &quot;world&quot;' width="10" height="20"/></root>"#;
let want = r#"<root>
<img src="x" alt='hello &quot;world&quot;' width="10" height="20"/>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn template_in_attribute_self_closing() {
let src = r#"<root><x attr=${[ compute(1, "two") ]}/></root>"#;
let want = r#"<root>
<x attr=${[ compute(1, "two") ]}/>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn attributes_and_nested_children_expand() {
// Not inlined because child is an element, not plain text
let src = r#"<root><box kind="card"><b>bold</b></box></root>"#;
let want = r#"<root>
<box kind="card">
<b>bold</b>
</box>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn namespace_and_xml_attrs() {
let src = r#"<root><ns:el xml:lang="en">ok</ns:el></root>"#;
let want = r#"<root>
<ns:el xml:lang="en">ok</ns:el>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
#[test]
fn mixed_quote_styles_in_attributes() {
// Single-quoted attr containing double quotes is fine; we don't re-quote
let src = r#"<root><a title='He said "hi"'>hello</a></root>"#;
let want = r#"<root>
<a title='He said "hi"'>hello</a>
</root>"#;
assert_eq!(format_xml(src, " "), want);
}
}

View File

@@ -1,10 +1,9 @@
pub mod error;
pub mod escape;
pub mod format;
pub mod format_json;
pub mod parser;
pub mod renderer;
pub mod wasm;
pub mod format_xml;
pub use parser::*;
pub use renderer::*;

View File

@@ -1,3 +1,4 @@
import vkBeautify from 'vkbeautify';
import { invokeCmd } from './tauri';
export async function tryFormatJson(text: string): Promise<string> {
@@ -23,8 +24,7 @@ export async function tryFormatXml(text: string): Promise<string> {
if (text === '') return text;
try {
const result = await invokeCmd<string>('cmd_format_xml', { text });
return result;
return vkBeautify.xml(text, ' ');
} catch (err) {
console.warn('Failed to format XML', err);
}

View File

@@ -15,7 +15,6 @@ type TauriCmd =
| 'cmd_dismiss_notification'
| 'cmd_export_data'
| 'cmd_format_json'
| 'cmd_format_xml'
| 'cmd_get_http_authentication_config'
| 'cmd_get_http_authentication_summaries'
| 'cmd_get_sse_events'

View File

@@ -1,2 +1,2 @@
declare module 'format-graphql';
declare module 'xml-beautify';
declare module 'vkbeautify';

View File

@@ -65,12 +65,13 @@
"remark-gfm": "^4.0.1",
"slugify": "^1.6.6",
"uuid": "^11.1.0",
"vkbeautify": "^0.99.3",
"whatwg-mimetype": "^4.0.0",
"xml-beautify": "^1.2.3",
"yaml": "^2.6.1"
},
"devDependencies": {
"@lezer/generator": "^1.8.0",
"@tailwindcss/container-queries": "^0.1.1",
"@tailwindcss/nesting": "^0.0.0-insiders.565cd3e",
"@tanstack/router-plugin": "^1.127.5",
"@types/node": "^24.0.13",
@@ -83,7 +84,6 @@
"@types/whatwg-mimetype": "^3.0.2",
"@vitejs/plugin-react": "^4.6.0",
"autoprefixer": "^10.4.21",
"@tailwindcss/container-queries": "^0.1.1",
"decompress": "^4.2.1",
"eslint-plugin-react-refresh": "^0.4.20",
"internal-ip": "^8.0.0",