From ef71d306432af8e97802005bb9affa135e609dec Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:06:44 +0000
Subject: [PATCH 1/6] Initial plan


From 3450b7650f99af203448313fc849b3ad42a21bbe Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:12:06 +0000
Subject: [PATCH 2/6] chore: initial plan for HTML selection preservation

Co-authored-by: kraftbj <88897+kraftbj@users.noreply.github.com>
---
 package-lock.json | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/package-lock.json b/package-lock.json
index fcada17..df79f71 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -34,6 +34,10 @@
         "grunt-checktextdomain": "^1.0.1",
         "grunt-wp-i18n": "^1.0.4",
         "terser": "^5.44.1"
+      },
+      "engines": {
+        "node": ">=20.10.0",
+        "npm": ">=10.2.3"
       }
     },
     "node_modules/@ampproject/remapping": {

From 818d998488001b809f1cde846e56615a8f57c112 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:21:39 +0000
Subject: [PATCH 3/6] fix: preserve HTML formatting when capturing bookmarklet
 text selection

- Bookmarklet now captures selected HTML using getRangeAt()/cloneContents()
  and sends it as `sel_html` alongside the plain-text `s` field
- Added htmlToBlocks() utility that converts HTML to Gutenberg block markup
  (handles headings h1-h6, paragraphs, ul/ol lists with nesting, blockquotes,
  and pre/code blocks; strips scripts/event handlers for safety)
- buildSuggestedContent() now renders HTML selections as formatted blocks
  instead of plain-text in a quote block
- App.js postMessage handler passes sel_html through to content builder
- Added 20 new unit tests for htmlToBlocks and updated buildSuggestedContent
- Added bookmarklet test for HTML selection capture
- Rebuilt minified bookmarklet

Co-authored-by: kraftbj <88897+kraftbj@users.noreply.github.com>
---
 assets/bookmarklet.js                     |  18 +-
 assets/bookmarklet.min.js                 |   2 +-
 src/App.js                                |  18 +-
 src/components/BlockTransformShortcuts.js |   6 +-
 src/utils/html-parser.js                  | 240 +++++++++++++++++++++-
 src/utils/index.js                        |   1 +
 tests/bookmarklet/bookmarklet.test.js     |  24 +++
 tests/utils/html-parser.test.js           | 224 ++++++++++++++++++++
 8 files changed, 520 insertions(+), 13 deletions(-)
 create mode 100644 tests/utils/html-parser.test.js

diff --git a/assets/bookmarklet.js b/assets/bookmarklet.js
index 234cda1..3380029 100644
--- a/assets/bookmarklet.js
+++ b/assets/bookmarklet.js
@@ -15,7 +15,7 @@
 		encURI = window.encodeURIComponent,
 		head = document.getElementsByTagName( 'head' )[0],
 		target = '_press_this_app',
-		windowWidth, windowHeight, selection,
+		windowWidth, windowHeight, selection, selectionHtml,
 		metas, links, content, images, iframes, img, scripts,
 		scrapedData = {},
 		popup;
@@ -32,7 +32,16 @@
 	}
 
 	if ( window.getSelection ) {
-		selection = window.getSelection() + '';
+		var sel = window.getSelection();
+		if ( sel && sel.rangeCount > 0 ) {
+			selection = sel.toString();
+			// Capture HTML to preserve formatting (bold, lists, headings, etc.).
+			var range = sel.getRangeAt( 0 );
+			var fragment = range.cloneContents();
+			var tempDiv = document.createElement( 'div' );
+			tempDiv.appendChild( fragment );
+			selectionHtml = tempDiv.innerHTML;
+		}
 	} else if ( document.getSelection ) {
 		selection = document.getSelection() + '';
 	} else if ( document.selection ) {
@@ -299,6 +308,11 @@
 		add( 's', selection );
 	}
 
+	// Add HTML selection to preserve formatting (bold, lists, headings, etc.).
+	if ( selectionHtml && selectionHtml !== selection ) {
+		add( 'sel_html', selectionHtml );
+	}
+
 	/**
 	 * Send scraped data to the Press This popup via postMessage.
 	 * Uses polling to wait for the popup to be ready.
diff --git a/assets/bookmarklet.min.js b/assets/bookmarklet.min.js
index 00bb55c..c43d75b 100644
--- a/assets/bookmarklet.min.js
+++ b/assets/bookmarklet.min.js
@@ -1 +1 @@
-!function(e,t,i,a){var n,o,r,l,c,s,g,m,d,f,h,u=e.encodeURIComponent,p=t.getElementsByTagName("head")[0],y={};if(a)if(i.match(/^https?:/)){a+="&u="+u(i),e.getSelection?r=e.getSelection()+"":t.getSelection?r=t.getSelection()+"":t.selection&&(r=t.selection.createRange().text||""),a+="&buster="+(new Date).getTime(),a+="&pm=1",n=(n=e.outerWidth||t.documentElement.clientWidth||600)<800||n>5e3?600:.7*n,o=(o=e.outerHeight||t.documentElement.clientHeight||700)<800||o>3e3?700:.9*o,T("pt_version",11),l=p.getElementsByTagName("meta")||[];for(var v=0;v<l.length&&!(v>200);v++){var b=l[v],O=b.getAttribute("name"),_=b.getAttribute("property"),x=b.getAttribute("content");x&&(O?T("_meta["+O+"]",x):_&&(T("_meta["+_+"]",x),"og:video"!==_&&"og:video:url"!==_&&"og:video:secure_url"!==_||T("_og_video[]",x)))}c=p.getElementsByTagName("link")||[];for(var E=0;E<c.length&&!(E>=50);E++){var A=c[E],w=A.getAttribute("rel");"canonical"!==w&&"icon"!==w&&"shortlink"!==w||T("_links["+w+"]",A.getAttribute("href")),"alternate"===w&&"x-default"===A.getAttribute("hreflang")&&T("_links[alternate_canonical]",A.getAttribute("href"))}!function(){f=t.querySelectorAll('script[type="application/ld+json"]');for(var e=0;e<f.length&&e<10;e++)try{var i=JSON.parse(f[e].textContent);i["@graph"]&&Array.isArray(i["@graph"])?i["@graph"].forEach(S):S(i)}catch(e){}}(),t.body.getElementsByClassName&&(s=t.body.getElementsByClassName("hfeed")[0]),g=(s=t.getElementById("content")||s||t.body).getElementsByTagName("img")||[];for(var N=0;N<g.length&&!(N>=100);N++)(d=g[N]).src.indexOf("avatar")>-1||d.className.indexOf("avatar")>-1||d.width&&d.width<256||d.height&&d.height<128||d.src&&0!==d.src.indexOf("data:")&&T("_images[]",d.src);m=t.body.getElementsByTagName("iframe")||[];for(var j=0;j<m.length&&!(j>=50);j++){var B=m[j].src;B&&"about:blank"!==B&&(B.indexOf("jetpack-comment")>-1||B.indexOf("disqus.com")>-1||B.indexOf("facebook.com/plugins")>-1||B.indexOf("platform.twitter.com/widgets")>-1||B.indexOf("google.com/recaptcha")>-1||B.indexOf("googletagmanager.com")>-1||B.indexOf("doubleclick.net")>-1||B.indexOf("googlesyndication.com")>-1||B.indexOf("amazon-adsystem.com")>-1||B.indexOf("quantserve.com")>-1||B.indexOf("scorecardresearch.com")>-1||B.indexOf("addthis.com")>-1||B.indexOf("sharethis.com")>-1||B.indexOf("addtoany.com")>-1||T("_embeds[]",B))}var k,P;t.title&&T("t",t.title),r&&T("s",r),h=e.open(a,"_press_this_app","location,resizable,scrollbars,width="+n+",height="+o),k=0,P=a.match(/^https?:\/\/[^\/]+/)[0],setTimeout(function e(){if(k++,h&&!h.closed){try{h.postMessage({type:"press-this-data",version:11,data:y},P)}catch(e){}k<50&&setTimeout(e,100)}},200)}else top.location.href=a;function T(e,t){if(null!=t&&""!==t){var i=e.match(/^(.+)\[\]$/);if(i){var a=i[1];return y[a]||(y[a]=[]),void y[a].push(t)}var n=e.match(/^(.+)\[(.+)\]$/);if(n){var o=n[1],r=n[2];return y[o]||(y[o]={}),void(y[o][r]=t)}y[e]=t}}function S(e){if(e&&"object"==typeof e){var t=e["@type"];if("VideoObject"===t&&(e.embedUrl&&T("_embeds[]",e.embedUrl),e.contentUrl&&!e.embedUrl&&T("_embeds[]",e.contentUrl)),"Article"!==t&&"WebPage"!==t&&"NewsArticle"!==t&&"BlogPosting"!==t||(e.mainEntityOfPage&&"string"==typeof e.mainEntityOfPage?T("_jsonld[canonical]",e.mainEntityOfPage):e.mainEntityOfPage&&e.mainEntityOfPage["@id"]&&T("_jsonld[canonical]",e.mainEntityOfPage["@id"]),e.headline&&T("_jsonld[headline]",e.headline),e.description&&T("_jsonld[description]",e.description)),e.image){var i="";"string"==typeof e.image?i=e.image:e.image.url?i=e.image.url:Array.isArray(e.image)&&e.image[0]&&(i="string"==typeof e.image[0]?e.image[0]:e.image[0].url),i&&T("_jsonld[image]",i)}}}}(window,document,top.location.href,window.pt_url);
\ No newline at end of file
+!function(e,t,i,a){var n,o,r,l,c,s,g,d,m,f,h,p,u=e.encodeURIComponent,y=t.getElementsByTagName("head")[0],v={};if(a)if(i.match(/^https?:/)){if(a+="&u="+u(i),e.getSelection){var b=e.getSelection();if(b&&b.rangeCount>0){r=b.toString();var O=b.getRangeAt(0).cloneContents(),_=t.createElement("div");_.appendChild(O),l=_.innerHTML}}else t.getSelection?r=t.getSelection()+"":t.selection&&(r=t.selection.createRange().text||"");a+="&buster="+(new Date).getTime(),a+="&pm=1",n=(n=e.outerWidth||t.documentElement.clientWidth||600)<800||n>5e3?600:.7*n,o=(o=e.outerHeight||t.documentElement.clientHeight||700)<800||o>3e3?700:.9*o,q("pt_version",11),c=y.getElementsByTagName("meta")||[];for(var x=0;x<c.length&&!(x>200);x++){var E=c[x],A=E.getAttribute("name"),w=E.getAttribute("property"),N=E.getAttribute("content");N&&(A?q("_meta["+A+"]",N):w&&(q("_meta["+w+"]",N),"og:video"!==w&&"og:video:url"!==w&&"og:video:secure_url"!==w||q("_og_video[]",N)))}s=y.getElementsByTagName("link")||[];for(var j=0;j<s.length&&!(j>=50);j++){var B=s[j],T=B.getAttribute("rel");"canonical"!==T&&"icon"!==T&&"shortlink"!==T||q("_links["+T+"]",B.getAttribute("href")),"alternate"===T&&"x-default"===B.getAttribute("hreflang")&&q("_links[alternate_canonical]",B.getAttribute("href"))}!function(){h=t.querySelectorAll('script[type="application/ld+json"]');for(var e=0;e<h.length&&e<10;e++)try{var i=JSON.parse(h[e].textContent);i["@graph"]&&Array.isArray(i["@graph"])?i["@graph"].forEach(H):H(i)}catch(e){}}(),t.body.getElementsByClassName&&(g=t.body.getElementsByClassName("hfeed")[0]),d=(g=t.getElementById("content")||g||t.body).getElementsByTagName("img")||[];for(var k=0;k<d.length&&!(k>=100);k++)(f=d[k]).src.indexOf("avatar")>-1||f.className.indexOf("avatar")>-1||f.width&&f.width<256||f.height&&f.height<128||f.src&&0!==f.src.indexOf("data:")&&q("_images[]",f.src);m=t.body.getElementsByTagName("iframe")||[];for(var P=0;P<m.length&&!(P>=50);P++){var C=m[P].src;C&&"about:blank"!==C&&(C.indexOf("jetpack-comment")>-1||C.indexOf("disqus.com")>-1||C.indexOf("facebook.com/plugins")>-1||C.indexOf("platform.twitter.com/widgets")>-1||C.indexOf("google.com/recaptcha")>-1||C.indexOf("googletagmanager.com")>-1||C.indexOf("doubleclick.net")>-1||C.indexOf("googlesyndication.com")>-1||C.indexOf("amazon-adsystem.com")>-1||C.indexOf("quantserve.com")>-1||C.indexOf("scorecardresearch.com")>-1||C.indexOf("addthis.com")>-1||C.indexOf("sharethis.com")>-1||C.indexOf("addtoany.com")>-1||q("_embeds[]",C))}var S,U;t.title&&q("t",t.title),r&&q("s",r),l&&l!==r&&q("sel_html",l),p=e.open(a,"_press_this_app","location,resizable,scrollbars,width="+n+",height="+o),S=0,U=a.match(/^https?:\/\/[^\/]+/)[0],setTimeout(function e(){if(S++,p&&!p.closed){try{p.postMessage({type:"press-this-data",version:11,data:v},U)}catch(e){}S<50&&setTimeout(e,100)}},200)}else top.location.href=a;function q(e,t){if(null!=t&&""!==t){var i=e.match(/^(.+)\[\]$/);if(i){var a=i[1];return v[a]||(v[a]=[]),void v[a].push(t)}var n=e.match(/^(.+)\[(.+)\]$/);if(n){var o=n[1],r=n[2];return v[o]||(v[o]={}),void(v[o][r]=t)}v[e]=t}}function H(e){if(e&&"object"==typeof e){var t=e["@type"];if("VideoObject"===t&&(e.embedUrl&&q("_embeds[]",e.embedUrl),e.contentUrl&&!e.embedUrl&&q("_embeds[]",e.contentUrl)),"Article"!==t&&"WebPage"!==t&&"NewsArticle"!==t&&"BlogPosting"!==t||(e.mainEntityOfPage&&"string"==typeof e.mainEntityOfPage?q("_jsonld[canonical]",e.mainEntityOfPage):e.mainEntityOfPage&&e.mainEntityOfPage["@id"]&&q("_jsonld[canonical]",e.mainEntityOfPage["@id"]),e.headline&&q("_jsonld[headline]",e.headline),e.description&&q("_jsonld[description]",e.description)),e.image){var i="";"string"==typeof e.image?i=e.image:e.image.url?i=e.image.url:Array.isArray(e.image)&&e.image[0]&&(i="string"==typeof e.image[0]?e.image[0]:e.image[0].url),i&&q("_jsonld[image]",i)}}}}(window,document,top.location.href,window.pt_url);
\ No newline at end of file
diff --git a/src/App.js b/src/App.js
index 32ba56f..9b9bb6a 100644
--- a/src/App.js
+++ b/src/App.js
@@ -221,12 +221,17 @@ export default function App() {
 			// Build suggested content from bookmarklet metadata.
 			// Extract description from meta tags.
 			const meta = messageData._meta || {};
-			const description =
-				messageData.s || // User selection takes priority.
-				meta[ 'twitter:description' ] ||
-				meta[ 'og:description' ] ||
-				meta.description ||
-				'';
+
+			// HTML selection takes highest priority (preserves formatting).
+			// Plain-text selection is a fallback, then meta descriptions.
+			const selectionHtml = messageData.sel_html || '';
+			const description = selectionHtml
+				? '' // HTML selection is used directly; skip plain-text fallback.
+				: messageData.s || // Plain-text user selection.
+				  meta[ 'twitter:description' ] ||
+				  meta[ 'og:description' ] ||
+				  meta.description ||
+				  '';
 
 			const title =
 				messageData.t ||
@@ -243,6 +248,7 @@ export default function App() {
 			const suggestedContent = buildSuggestedContentFromMetadata( {
 				title,
 				description,
+				selectionHtml,
 				siteName: meta[ 'og:site_name' ] || '',
 				canonical,
 				url: receivedSourceUrl,
diff --git a/src/components/BlockTransformShortcuts.js b/src/components/BlockTransformShortcuts.js
index a61a39b..3f3cf20 100644
--- a/src/components/BlockTransformShortcuts.js
+++ b/src/components/BlockTransformShortcuts.js
@@ -137,7 +137,11 @@ export default function BlockTransformShortcuts() {
 			if ( innerBlocks.length > 0 ) {
 				// Replace the quote with its inner blocks directly.
 				const replacementBlocks = innerBlocks.map( ( inner ) =>
-					createBlock( inner.name, { ...inner.attributes }, inner.innerBlocks )
+					createBlock(
+						inner.name,
+						{ ...inner.attributes },
+						inner.innerBlocks
+					)
 				);
 				replaceBlocks( currentClientId, replacementBlocks );
 			} else {
diff --git a/src/utils/html-parser.js b/src/utils/html-parser.js
index 2b1f82d..2f3033a 100644
--- a/src/utils/html-parser.js
+++ b/src/utils/html-parser.js
@@ -447,6 +447,234 @@ function getCanonical( doc, meta ) {
 	return linkCanonical?.getAttribute( 'href' ) || meta[ 'og:url' ] || '';
 }
 
+/**
+ * Sanitize inline HTML, keeping only safe formatting elements.
+ *
+ * Strips script/style elements and event handler attributes.
+ * Preserves safe inline elements: strong, em, b, i, u, s, a (href only),
+ * code, mark, sub, sup, span, br.
+ *
+ * @param {Element} element DOM element to sanitize (in-place).
+ */
+function sanitizeInlineContent( element ) {
+	// Remove script and style elements.
+	const dangerous = element.querySelectorAll(
+		'script, style, object, embed, iframe'
+	);
+	dangerous.forEach( ( el ) => el.remove() );
+
+	// Strip event handlers and javascript: hrefs from all elements.
+	const allEls = element.querySelectorAll( '*' );
+	allEls.forEach( ( el ) => {
+		// Remove all event handler attributes.
+		Array.from( el.attributes ).forEach( ( attr ) => {
+			if ( attr.name.startsWith( 'on' ) ) {
+				el.removeAttribute( attr.name );
+			}
+		} );
+
+		// Strip javascript: URLs from href and src.
+		const href = el.getAttribute( 'href' );
+		if ( href && /^\s*javascript:/i.test( href ) ) {
+			el.removeAttribute( 'href' );
+		}
+		const src = el.getAttribute( 'src' );
+		if ( src && /^\s*javascript:/i.test( src ) ) {
+			el.removeAttribute( 'src' );
+		}
+	} );
+}
+
+/**
+ * Convert a list element (ul/ol) to Gutenberg list block markup.
+ *
+ * @param {Element} listEl  The list element (ul or ol).
+ * @param {boolean} ordered Whether this is an ordered list.
+ * @return {string} Gutenberg list block markup.
+ */
+function listElementToBlock( listEl, ordered ) {
+	const tag = ordered ? 'ol' : 'ul';
+	const attr = ordered ? ' {"ordered":true}' : '';
+	let items = '';
+
+	listEl.childNodes.forEach( ( child ) => {
+		if (
+			child.nodeType !== 1 /* ELEMENT_NODE */ ||
+			child.tagName.toLowerCase() !== 'li'
+		) {
+			return;
+		}
+
+		// Clone to work with it non-destructively.
+		const li = child.cloneNode( true );
+
+		// Handle only direct-child nested lists within the li to avoid double-processing.
+		let nestedBlocks = '';
+		Array.from( li.children ).forEach( ( childEl ) => {
+			const childTag = childEl.tagName.toLowerCase();
+			if ( childTag === 'ul' || childTag === 'ol' ) {
+				const isOrdered = childTag === 'ol';
+				nestedBlocks += '\n' + listElementToBlock( childEl, isOrdered );
+				childEl.remove();
+			}
+		} );
+
+		sanitizeInlineContent( li );
+		const liContent = li.innerHTML.trim();
+
+		items += `<!-- wp:list-item -->\n<li>${ liContent }${ nestedBlocks }</li>\n<!-- /wp:list-item -->\n`;
+	} );
+
+	return `<!-- wp:list${ attr } -->\n<${ tag } class="wp-block-list">\n${ items }</${ tag }>\n<!-- /wp:list -->\n\n`;
+}
+
+/**
+ * Convert an HTML string to Gutenberg block markup.
+ *
+ * Handles block-level elements: paragraphs, headings (h1-h6), unordered and
+ * ordered lists with nesting, blockquotes, and preformatted/code blocks.
+ * Inline elements (strong, em, a, code, etc.) are preserved within blocks.
+ * Script/style elements and event handlers are stripped for safety.
+ *
+ * Falls back to a paragraph block for unrecognised or purely inline content.
+ *
+ * @param {string} html HTML string to convert.
+ * @return {string} Gutenberg block markup string, or empty string if no content.
+ */
+export function htmlToBlocks( html ) {
+	if ( ! html || typeof html !== 'string' ) {
+		return '';
+	}
+
+	const parser = new DOMParser();
+	const doc = parser.parseFromString( `<body>${ html }</body>`, 'text/html' );
+	const body = doc.body;
+
+	let blocks = '';
+	let inlineBuffer = '';
+
+	/**
+	 * Flush any accumulated inline/text content as a paragraph block.
+	 */
+	function flushInlineBuffer() {
+		const trimmed = inlineBuffer.trim();
+		if ( trimmed ) {
+			blocks += `<!-- wp:paragraph -->\n<p>${ trimmed }</p>\n<!-- /wp:paragraph -->\n\n`;
+		}
+		inlineBuffer = '';
+	}
+
+	/**
+	 * Set of block-level tag names that start a new block.
+	 */
+	const BLOCK_TAGS = new Set( [
+		'p',
+		'ul',
+		'ol',
+		'h1',
+		'h2',
+		'h3',
+		'h4',
+		'h5',
+		'h6',
+		'blockquote',
+		'pre',
+		'figure',
+		'div',
+	] );
+
+	body.childNodes.forEach( ( node ) => {
+		if ( node.nodeType === 3 /* TEXT_NODE */ ) {
+			const text = node.textContent;
+			// Accumulate non-empty text into the inline buffer.
+			if ( text.trim() ) {
+				inlineBuffer += escapeHtml( text );
+			}
+			return;
+		}
+
+		if ( node.nodeType !== 1 /* ELEMENT_NODE */ ) {
+			return;
+		}
+
+		const tag = node.tagName.toLowerCase();
+
+		// Skip dangerous elements.
+		if ( tag === 'script' || tag === 'style' ) {
+			return;
+		}
+
+		if ( ! BLOCK_TAGS.has( tag ) ) {
+			// Inline element – add to buffer.
+			const clone = node.cloneNode( true );
+			const tempEl = doc.createElement( 'div' );
+			tempEl.appendChild( clone );
+			sanitizeInlineContent( tempEl );
+			inlineBuffer += tempEl.innerHTML;
+			return;
+		}
+
+		// We're about to emit a block – flush any pending inline content first.
+		flushInlineBuffer();
+
+		// Headings.
+		if ( /^h[1-6]$/.test( tag ) ) {
+			const level = tag[ 1 ];
+			const clone = node.cloneNode( true );
+			const tempEl = doc.createElement( 'div' );
+			tempEl.appendChild( clone );
+			sanitizeInlineContent( tempEl );
+			blocks += `<!-- wp:heading {"level":${ level }} -->\n<${ tag } class="wp-block-heading">${ tempEl.firstChild.innerHTML }</${ tag }>\n<!-- /wp:heading -->\n\n`;
+			return;
+		}
+
+		// Lists.
+		if ( tag === 'ul' || tag === 'ol' ) {
+			blocks += listElementToBlock( node, tag === 'ol' );
+			return;
+		}
+
+		// Blockquotes.
+		if ( tag === 'blockquote' ) {
+			const clone = node.cloneNode( true );
+			const tempEl = doc.createElement( 'div' );
+			tempEl.appendChild( clone );
+			sanitizeInlineContent( tempEl );
+			const inner = htmlToBlocks( tempEl.firstChild.innerHTML );
+			const innerBlocks =
+				inner ||
+				`<!-- wp:paragraph -->\n<p>${ escapeHtml(
+					node.textContent.trim()
+				) }</p>\n<!-- /wp:paragraph -->\n`;
+			blocks += `<!-- wp:quote -->\n<blockquote class="wp-block-quote">${ innerBlocks }</blockquote>\n<!-- /wp:quote -->\n\n`;
+			return;
+		}
+
+		// Preformatted / code blocks.
+		if ( tag === 'pre' ) {
+			const codeEl = node.querySelector( 'code' );
+			const codeContent = escapeHtml( ( codeEl || node ).textContent );
+			blocks += `<!-- wp:code -->\n<pre class="wp-block-code"><code>${ codeContent }</code></pre>\n<!-- /wp:code -->\n\n`;
+			return;
+		}
+
+		// Paragraphs and generic block elements (div, figure, etc.).
+		const clone = node.cloneNode( true );
+		const tempEl = doc.createElement( 'div' );
+		tempEl.appendChild( clone );
+		sanitizeInlineContent( tempEl );
+		const innerHtml = tempEl.firstChild.innerHTML.trim();
+		if ( innerHtml ) {
+			blocks += `<!-- wp:paragraph -->\n<p>${ innerHtml }</p>\n<!-- /wp:paragraph -->\n\n`;
+		}
+	} );
+
+	// Flush any remaining inline content.
+	flushInlineBuffer();
+
+	return blocks.trim();
+}
+
 /**
  * Build suggested content from server-returned metadata.
  *
@@ -478,9 +706,15 @@ ${ escapeHtml( sourceUrl ) }
 `;
 	}
 
-	// Add quote block with description if available.
-	// Escape description.
-	if ( data.description ) {
+	// Add HTML selection as formatted blocks when available.
+	// Falls back to a plain-text quote block for meta descriptions.
+	if ( data.selectionHtml ) {
+		const selectionBlocks = htmlToBlocks( data.selectionHtml );
+		if ( selectionBlocks ) {
+			content += selectionBlocks + '\n\n';
+		}
+	} else if ( data.description ) {
+		// Escape description.
 		content += `<!-- wp:quote -->
 <blockquote class="wp-block-quote"><!-- wp:paragraph -->
 <p>${ escapeHtml( data.description ) }</p>
diff --git a/src/utils/index.js b/src/utils/index.js
index 4843855..e2650ac 100644
--- a/src/utils/index.js
+++ b/src/utils/index.js
@@ -8,6 +8,7 @@ export {
 	parseHtmlMetadata,
 	buildSuggestedContent,
 	buildSuggestedContentFromMetadata,
+	htmlToBlocks,
 	escapeHtml,
 	escapeAttr,
 } from './html-parser';
diff --git a/tests/bookmarklet/bookmarklet.test.js b/tests/bookmarklet/bookmarklet.test.js
index 2e0263e..c1353eb 100644
--- a/tests/bookmarklet/bookmarklet.test.js
+++ b/tests/bookmarklet/bookmarklet.test.js
@@ -207,6 +207,30 @@ describe( 'Bookmarklet Functionality', () => {
 		).toBe( true );
 	} );
 
+	test( 'HTML selection capture preserves formatting', () => {
+		// Check for getRangeAt usage to capture the selection range.
+		expect( bookmarkletSource ).toContain( 'getRangeAt' );
+
+		// Check for cloneContents to extract selected DOM fragment.
+		expect( bookmarkletSource ).toContain( 'cloneContents' );
+
+		// Check for innerHTML to serialise the selection as HTML.
+		expect( bookmarkletSource ).toContain( 'innerHTML' );
+
+		// Check that sel_html is sent alongside plain-text selection.
+		expect(
+			bookmarkletSource.includes( "'sel_html'" ) ||
+			bookmarkletSource.includes( '"sel_html"' ) ||
+			bookmarkletSource.includes( 'sel_html' )
+		).toBe( true );
+
+		// sel_html is added via the add() helper.
+		expect(
+			bookmarkletSource.includes( "add( 'sel_html'" ) ||
+			bookmarkletSource.includes( "add('sel_html'" )
+		).toBe( true );
+	} );
+
 	test( 'Enhanced data extraction - Open Graph video', () => {
 		// Check for og:video detection.
 		expect( bookmarkletSource ).toContain( 'og:video' );
diff --git a/tests/utils/html-parser.test.js b/tests/utils/html-parser.test.js
new file mode 100644
index 0000000..25434ee
--- /dev/null
+++ b/tests/utils/html-parser.test.js
@@ -0,0 +1,224 @@
+/**
+ * Tests for HTML parser utilities, especially htmlToBlocks.
+ *
+ * @package press-this
+ */
+
+import { htmlToBlocks, buildSuggestedContent } from '../../src/utils/html-parser';
+
+describe( 'htmlToBlocks', () => {
+	test( 'returns empty string for empty input', () => {
+		expect( htmlToBlocks( '' ) ).toBe( '' );
+		expect( htmlToBlocks( null ) ).toBe( '' );
+		expect( htmlToBlocks( undefined ) ).toBe( '' );
+	} );
+
+	test( 'converts a paragraph to a paragraph block', () => {
+		const result = htmlToBlocks( '<p>Hello world</p>' );
+		expect( result ).toContain( '<!-- wp:paragraph -->' );
+		expect( result ).toContain( '<p>Hello world</p>' );
+		expect( result ).toContain( '<!-- /wp:paragraph -->' );
+	} );
+
+	test( 'converts plain text to a paragraph block', () => {
+		const result = htmlToBlocks( 'Plain text content' );
+		expect( result ).toContain( '<!-- wp:paragraph -->' );
+		expect( result ).toContain( 'Plain text content' );
+	} );
+
+	test( 'converts h1-h6 headings to heading blocks', () => {
+		[ 1, 2, 3, 4, 5, 6 ].forEach( ( level ) => {
+			const result = htmlToBlocks( `<h${ level }>Heading ${ level }</h${ level }>` );
+			expect( result ).toContain( `<!-- wp:heading {"level":${ level }} -->` );
+			expect( result ).toContain( `<h${ level } class="wp-block-heading">` );
+			expect( result ).toContain( `Heading ${ level }` );
+			expect( result ).toContain( '<!-- /wp:heading -->' );
+		} );
+	} );
+
+	test( 'converts unordered list to list block', () => {
+		const result = htmlToBlocks( '<ul><li>Item 1</li><li>Item 2</li></ul>' );
+		expect( result ).toContain( '<!-- wp:list -->' );
+		expect( result ).toContain( '<ul class="wp-block-list">' );
+		expect( result ).toContain( '<!-- wp:list-item -->' );
+		expect( result ).toContain( '<li>Item 1</li>' );
+		expect( result ).toContain( '<li>Item 2</li>' );
+		expect( result ).toContain( '<!-- /wp:list-item -->' );
+		expect( result ).toContain( '<!-- /wp:list -->' );
+		// Should NOT use ordered list attribute.
+		expect( result ).not.toContain( '"ordered":true' );
+	} );
+
+	test( 'converts ordered list to list block with ordered attribute', () => {
+		const result = htmlToBlocks( '<ol><li>First</li><li>Second</li></ol>' );
+		expect( result ).toContain( '<!-- wp:list {"ordered":true} -->' );
+		expect( result ).toContain( '<ol class="wp-block-list">' );
+		expect( result ).toContain( '<!-- wp:list-item -->' );
+		expect( result ).toContain( '<li>First</li>' );
+		expect( result ).toContain( '<!-- /wp:list -->' );
+	} );
+
+	test( 'converts nested lists correctly', () => {
+		const html = '<ul><li>Parent<ul><li>Child item</li></ul></li></ul>';
+		const result = htmlToBlocks( html );
+		// Outer list.
+		expect( result ).toContain( '<!-- wp:list -->' );
+		// Nested list should also be wrapped.
+		const listCount = ( result.match( /<!-- wp:list -->/g ) || [] ).length;
+		expect( listCount ).toBeGreaterThanOrEqual( 2 );
+		expect( result ).toContain( 'Child item' );
+		expect( result ).toContain( 'Parent' );
+	} );
+
+	test( 'preserves inline formatting in list items', () => {
+		const html = '<ul><li><strong>Bold item</strong></li><li><em>Italic item</em></li></ul>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( '<strong>Bold item</strong>' );
+		expect( result ).toContain( '<em>Italic item</em>' );
+	} );
+
+	test( 'converts blockquote to quote block', () => {
+		const result = htmlToBlocks( '<blockquote><p>Quote text</p></blockquote>' );
+		expect( result ).toContain( '<!-- wp:quote -->' );
+		expect( result ).toContain( '<blockquote class="wp-block-quote">' );
+		expect( result ).toContain( 'Quote text' );
+		expect( result ).toContain( '<!-- /wp:quote -->' );
+	} );
+
+	test( 'converts pre/code to code block', () => {
+		const result = htmlToBlocks( '<pre><code>const x = 1;</code></pre>' );
+		expect( result ).toContain( '<!-- wp:code -->' );
+		expect( result ).toContain( '<pre class="wp-block-code">' );
+		expect( result ).toContain( 'const x = 1;' );
+		expect( result ).toContain( '<!-- /wp:code -->' );
+	} );
+
+	test( 'handles mix of headings and lists (issue example)', () => {
+		const html = `<h3><strong>In Unreal Engine: How to Choose</strong></h3>
+<ul>
+  <li>Unreal Engine allows developers to switch:
+    <ul>
+      <li><strong>Deferred Rendering</strong>: Default for most projects.</li>
+      <li><strong>Forward Rendering</strong>: For VR projects.</li>
+    </ul>
+  </li>
+</ul>
+<p>Each technique aligns with different requirements.</p>`;
+
+		const result = htmlToBlocks( html );
+
+		// Heading should be converted.
+		expect( result ).toContain( '<!-- wp:heading {"level":3} -->' );
+		expect( result ).toContain( 'In Unreal Engine: How to Choose' );
+
+		// List should be converted.
+		expect( result ).toContain( '<!-- wp:list -->' );
+		expect( result ).toContain( 'Deferred Rendering' );
+		expect( result ).toContain( 'Forward Rendering' );
+
+		// Paragraph should be converted.
+		expect( result ).toContain( '<!-- wp:paragraph -->' );
+		expect( result ).toContain( 'Each technique aligns' );
+	} );
+
+	test( 'strips script elements for safety', () => {
+		const html = '<p>Safe text</p><script>alert("xss")</script><p>More text</p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( 'Safe text' );
+		expect( result ).not.toContain( 'alert' );
+		expect( result ).not.toContain( '<script>' );
+	} );
+
+	test( 'strips event handler attributes for safety', () => {
+		const html = '<p onclick="alert(1)">Click me</p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( 'Click me' );
+		expect( result ).not.toContain( 'onclick' );
+	} );
+
+	test( 'strips javascript: href for safety', () => {
+		const html = '<p><a href="javascript:alert(1)">Link</a></p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( 'Link' );
+		expect( result ).not.toContain( 'javascript:' );
+	} );
+
+	test( 'preserves safe inline elements', () => {
+		const html = '<p><strong>bold</strong> and <em>italic</em> and <a href="https://example.com">link</a></p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( '<strong>bold</strong>' );
+		expect( result ).toContain( '<em>italic</em>' );
+		expect( result ).toContain( '<a href="https://example.com">link</a>' );
+	} );
+
+	test( 'escapes special characters in plain text nodes', () => {
+		// DOMParser treats the input as HTML, so & is a real entity to test.
+		const result = htmlToBlocks( '<p>Tom &amp; Jerry</p>' );
+		expect( result ).toContain( '<!-- wp:paragraph -->' );
+		// & should remain as & (or &amp;) in output, not double-escaped.
+		expect( result ).toMatch( /Tom (&amp;|&) Jerry/ );
+	} );
+} );
+
+describe( 'buildSuggestedContent with selectionHtml', () => {
+	const sourceUrl = 'https://example.com/article';
+
+	test( 'uses selectionHtml as formatted blocks when provided', () => {
+		const result = buildSuggestedContent(
+			{
+				selectionHtml: '<h2>Title</h2><p>Content</p>',
+				description: 'Plain description',
+				title: 'Article',
+			},
+			sourceUrl
+		);
+
+		// Should use heading block from selectionHtml.
+		expect( result ).toContain( '<!-- wp:heading' );
+		expect( result ).toContain( 'Title' );
+		// Should NOT put content in a quote block.
+		expect( result ).not.toContain( '<!-- wp:quote -->' );
+		// Should still add the source attribution.
+		expect( result ).toContain( 'Source:' );
+		expect( result ).toContain( sourceUrl );
+	} );
+
+	test( 'falls back to description in quote block when no selectionHtml', () => {
+		const result = buildSuggestedContent(
+			{
+				description: 'Plain text description',
+				title: 'Article',
+			},
+			sourceUrl
+		);
+
+		expect( result ).toContain( '<!-- wp:quote -->' );
+		expect( result ).toContain( 'Plain text description' );
+		expect( result ).toContain( 'Source:' );
+	} );
+
+	test( 'renders only source attribution when both selectionHtml and description are absent', () => {
+		const result = buildSuggestedContent(
+			{ title: 'Article' },
+			sourceUrl
+		);
+
+		expect( result ).not.toContain( '<!-- wp:quote -->' );
+		expect( result ).not.toContain( '<!-- wp:heading' );
+		expect( result ).toContain( 'Source:' );
+	} );
+
+	test( 'selectionHtml takes priority over description', () => {
+		const result = buildSuggestedContent(
+			{
+				selectionHtml: '<ul><li>List item</li></ul>',
+				description: 'Should be ignored',
+			},
+			sourceUrl
+		);
+
+		expect( result ).toContain( '<!-- wp:list -->' );
+		expect( result ).toContain( 'List item' );
+		expect( result ).not.toContain( 'Should be ignored' );
+	} );
+} );

From 8d3ae6cf4b39979582fa80976a88c3f1e5dd73ce Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:48:52 +0000
Subject: [PATCH 4/6] fix: address security and robustness review comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Critical fixes:
- Switch sanitizeInlineContent to allowlist (strong,em,b,i,u,s,a,code,
  mark,sub,sup,span,br); all other elements unwrapped, all attributes
  stripped except href on <a>
- Block data: and vbscript: URI schemes alongside javascript: in href
- Fix silent content loss: buildSuggestedContent falls back to
  description quote when selectionHtml produces empty blocks; App.js
  always computes description so the fallback is available

Important fixes:
- Wrap bookmarklet cloneContents() DOM calls in try-catch
- Add depth limits: listElementToBlock cap at 10, htmlToBlocks blockquote
  recursion cap at 5
- Remove unreliable selectionHtml !== selection comparison (innerHTML
  entity-encodes & to &amp;); always send sel_html when capture succeeded

Also fix tempEl.firstChild.innerHTML → tempEl.innerHTML (heading/paragraph/
blockquote handlers were broken by the allowlist unwrapping block containers)
Update JSDoc for buildSuggestedContent to document selectionHtml
Add tests: data:/vbscript: blocking, non-allowlist unwrapping, attribute
stripping, depth-limit safety, selectionHtml→empty→description fallback

Co-authored-by: kraftbj <88897+kraftbj@users.noreply.github.com>
---
 assets/bookmarklet.js           |  18 +++--
 assets/bookmarklet.min.js       |   2 +-
 src/App.js                      |  16 ++---
 src/utils/html-parser.js        | 124 ++++++++++++++++++++++----------
 tests/utils/html-parser.test.js |  72 ++++++++++++++++++-
 5 files changed, 179 insertions(+), 53 deletions(-)

diff --git a/assets/bookmarklet.js b/assets/bookmarklet.js
index 3380029..145e91a 100644
--- a/assets/bookmarklet.js
+++ b/assets/bookmarklet.js
@@ -36,11 +36,17 @@
 		if ( sel && sel.rangeCount > 0 ) {
 			selection = sel.toString();
 			// Capture HTML to preserve formatting (bold, lists, headings, etc.).
-			var range = sel.getRangeAt( 0 );
-			var fragment = range.cloneContents();
-			var tempDiv = document.createElement( 'div' );
-			tempDiv.appendChild( fragment );
-			selectionHtml = tempDiv.innerHTML;
+			// Wrapped in try-catch: cloneContents() can throw DOMException in
+			// some browsers or unusual DOM states (e.g. cross-shadow-DOM ranges).
+			try {
+				var range = sel.getRangeAt( 0 );
+				var fragment = range.cloneContents();
+				var tempDiv = document.createElement( 'div' );
+				tempDiv.appendChild( fragment );
+				selectionHtml = tempDiv.innerHTML;
+			} catch ( e ) {
+				// HTML capture failed; plain-text selection is still available.
+			}
 		}
 	} else if ( document.getSelection ) {
 		selection = document.getSelection() + '';
@@ -309,7 +315,7 @@
 	}
 
 	// Add HTML selection to preserve formatting (bold, lists, headings, etc.).
-	if ( selectionHtml && selectionHtml !== selection ) {
+	if ( selectionHtml ) {
 		add( 'sel_html', selectionHtml );
 	}
 
diff --git a/assets/bookmarklet.min.js b/assets/bookmarklet.min.js
index c43d75b..515f14e 100644
--- a/assets/bookmarklet.min.js
+++ b/assets/bookmarklet.min.js
@@ -1 +1 @@
-!function(e,t,i,a){var n,o,r,l,c,s,g,d,m,f,h,p,u=e.encodeURIComponent,y=t.getElementsByTagName("head")[0],v={};if(a)if(i.match(/^https?:/)){if(a+="&u="+u(i),e.getSelection){var b=e.getSelection();if(b&&b.rangeCount>0){r=b.toString();var O=b.getRangeAt(0).cloneContents(),_=t.createElement("div");_.appendChild(O),l=_.innerHTML}}else t.getSelection?r=t.getSelection()+"":t.selection&&(r=t.selection.createRange().text||"");a+="&buster="+(new Date).getTime(),a+="&pm=1",n=(n=e.outerWidth||t.documentElement.clientWidth||600)<800||n>5e3?600:.7*n,o=(o=e.outerHeight||t.documentElement.clientHeight||700)<800||o>3e3?700:.9*o,q("pt_version",11),c=y.getElementsByTagName("meta")||[];for(var x=0;x<c.length&&!(x>200);x++){var E=c[x],A=E.getAttribute("name"),w=E.getAttribute("property"),N=E.getAttribute("content");N&&(A?q("_meta["+A+"]",N):w&&(q("_meta["+w+"]",N),"og:video"!==w&&"og:video:url"!==w&&"og:video:secure_url"!==w||q("_og_video[]",N)))}s=y.getElementsByTagName("link")||[];for(var j=0;j<s.length&&!(j>=50);j++){var B=s[j],T=B.getAttribute("rel");"canonical"!==T&&"icon"!==T&&"shortlink"!==T||q("_links["+T+"]",B.getAttribute("href")),"alternate"===T&&"x-default"===B.getAttribute("hreflang")&&q("_links[alternate_canonical]",B.getAttribute("href"))}!function(){h=t.querySelectorAll('script[type="application/ld+json"]');for(var e=0;e<h.length&&e<10;e++)try{var i=JSON.parse(h[e].textContent);i["@graph"]&&Array.isArray(i["@graph"])?i["@graph"].forEach(H):H(i)}catch(e){}}(),t.body.getElementsByClassName&&(g=t.body.getElementsByClassName("hfeed")[0]),d=(g=t.getElementById("content")||g||t.body).getElementsByTagName("img")||[];for(var k=0;k<d.length&&!(k>=100);k++)(f=d[k]).src.indexOf("avatar")>-1||f.className.indexOf("avatar")>-1||f.width&&f.width<256||f.height&&f.height<128||f.src&&0!==f.src.indexOf("data:")&&q("_images[]",f.src);m=t.body.getElementsByTagName("iframe")||[];for(var P=0;P<m.length&&!(P>=50);P++){var C=m[P].src;C&&"about:blank"!==C&&(C.indexOf("jetpack-comment")>-1||C.indexOf("disqus.com")>-1||C.indexOf("facebook.com/plugins")>-1||C.indexOf("platform.twitter.com/widgets")>-1||C.indexOf("google.com/recaptcha")>-1||C.indexOf("googletagmanager.com")>-1||C.indexOf("doubleclick.net")>-1||C.indexOf("googlesyndication.com")>-1||C.indexOf("amazon-adsystem.com")>-1||C.indexOf("quantserve.com")>-1||C.indexOf("scorecardresearch.com")>-1||C.indexOf("addthis.com")>-1||C.indexOf("sharethis.com")>-1||C.indexOf("addtoany.com")>-1||q("_embeds[]",C))}var S,U;t.title&&q("t",t.title),r&&q("s",r),l&&l!==r&&q("sel_html",l),p=e.open(a,"_press_this_app","location,resizable,scrollbars,width="+n+",height="+o),S=0,U=a.match(/^https?:\/\/[^\/]+/)[0],setTimeout(function e(){if(S++,p&&!p.closed){try{p.postMessage({type:"press-this-data",version:11,data:v},U)}catch(e){}S<50&&setTimeout(e,100)}},200)}else top.location.href=a;function q(e,t){if(null!=t&&""!==t){var i=e.match(/^(.+)\[\]$/);if(i){var a=i[1];return v[a]||(v[a]=[]),void v[a].push(t)}var n=e.match(/^(.+)\[(.+)\]$/);if(n){var o=n[1],r=n[2];return v[o]||(v[o]={}),void(v[o][r]=t)}v[e]=t}}function H(e){if(e&&"object"==typeof e){var t=e["@type"];if("VideoObject"===t&&(e.embedUrl&&q("_embeds[]",e.embedUrl),e.contentUrl&&!e.embedUrl&&q("_embeds[]",e.contentUrl)),"Article"!==t&&"WebPage"!==t&&"NewsArticle"!==t&&"BlogPosting"!==t||(e.mainEntityOfPage&&"string"==typeof e.mainEntityOfPage?q("_jsonld[canonical]",e.mainEntityOfPage):e.mainEntityOfPage&&e.mainEntityOfPage["@id"]&&q("_jsonld[canonical]",e.mainEntityOfPage["@id"]),e.headline&&q("_jsonld[headline]",e.headline),e.description&&q("_jsonld[description]",e.description)),e.image){var i="";"string"==typeof e.image?i=e.image:e.image.url?i=e.image.url:Array.isArray(e.image)&&e.image[0]&&(i="string"==typeof e.image[0]?e.image[0]:e.image[0].url),i&&q("_jsonld[image]",i)}}}}(window,document,top.location.href,window.pt_url);
\ No newline at end of file
+!function(e,t,i,a){var n,o,r,l,c,s,g,d,m,f,h,p,u=e.encodeURIComponent,y=t.getElementsByTagName("head")[0],v={};if(a)if(i.match(/^https?:/)){if(a+="&u="+u(i),e.getSelection){var b=e.getSelection();if(b&&b.rangeCount>0){r=b.toString();try{var O=b.getRangeAt(0).cloneContents(),_=t.createElement("div");_.appendChild(O),l=_.innerHTML}catch(e){}}}else t.getSelection?r=t.getSelection()+"":t.selection&&(r=t.selection.createRange().text||"");a+="&buster="+(new Date).getTime(),a+="&pm=1",n=(n=e.outerWidth||t.documentElement.clientWidth||600)<800||n>5e3?600:.7*n,o=(o=e.outerHeight||t.documentElement.clientHeight||700)<800||o>3e3?700:.9*o,q("pt_version",11),c=y.getElementsByTagName("meta")||[];for(var x=0;x<c.length&&!(x>200);x++){var E=c[x],A=E.getAttribute("name"),w=E.getAttribute("property"),N=E.getAttribute("content");N&&(A?q("_meta["+A+"]",N):w&&(q("_meta["+w+"]",N),"og:video"!==w&&"og:video:url"!==w&&"og:video:secure_url"!==w||q("_og_video[]",N)))}s=y.getElementsByTagName("link")||[];for(var j=0;j<s.length&&!(j>=50);j++){var B=s[j],T=B.getAttribute("rel");"canonical"!==T&&"icon"!==T&&"shortlink"!==T||q("_links["+T+"]",B.getAttribute("href")),"alternate"===T&&"x-default"===B.getAttribute("hreflang")&&q("_links[alternate_canonical]",B.getAttribute("href"))}!function(){h=t.querySelectorAll('script[type="application/ld+json"]');for(var e=0;e<h.length&&e<10;e++)try{var i=JSON.parse(h[e].textContent);i["@graph"]&&Array.isArray(i["@graph"])?i["@graph"].forEach(H):H(i)}catch(e){}}(),t.body.getElementsByClassName&&(g=t.body.getElementsByClassName("hfeed")[0]),d=(g=t.getElementById("content")||g||t.body).getElementsByTagName("img")||[];for(var k=0;k<d.length&&!(k>=100);k++)(f=d[k]).src.indexOf("avatar")>-1||f.className.indexOf("avatar")>-1||f.width&&f.width<256||f.height&&f.height<128||f.src&&0!==f.src.indexOf("data:")&&q("_images[]",f.src);m=t.body.getElementsByTagName("iframe")||[];for(var P=0;P<m.length&&!(P>=50);P++){var C=m[P].src;C&&"about:blank"!==C&&(C.indexOf("jetpack-comment")>-1||C.indexOf("disqus.com")>-1||C.indexOf("facebook.com/plugins")>-1||C.indexOf("platform.twitter.com/widgets")>-1||C.indexOf("google.com/recaptcha")>-1||C.indexOf("googletagmanager.com")>-1||C.indexOf("doubleclick.net")>-1||C.indexOf("googlesyndication.com")>-1||C.indexOf("amazon-adsystem.com")>-1||C.indexOf("quantserve.com")>-1||C.indexOf("scorecardresearch.com")>-1||C.indexOf("addthis.com")>-1||C.indexOf("sharethis.com")>-1||C.indexOf("addtoany.com")>-1||q("_embeds[]",C))}var S,U;t.title&&q("t",t.title),r&&q("s",r),l&&q("sel_html",l),p=e.open(a,"_press_this_app","location,resizable,scrollbars,width="+n+",height="+o),S=0,U=a.match(/^https?:\/\/[^\/]+/)[0],setTimeout(function e(){if(S++,p&&!p.closed){try{p.postMessage({type:"press-this-data",version:11,data:v},U)}catch(e){}S<50&&setTimeout(e,100)}},200)}else top.location.href=a;function q(e,t){if(null!=t&&""!==t){var i=e.match(/^(.+)\[\]$/);if(i){var a=i[1];return v[a]||(v[a]=[]),void v[a].push(t)}var n=e.match(/^(.+)\[(.+)\]$/);if(n){var o=n[1],r=n[2];return v[o]||(v[o]={}),void(v[o][r]=t)}v[e]=t}}function H(e){if(e&&"object"==typeof e){var t=e["@type"];if("VideoObject"===t&&(e.embedUrl&&q("_embeds[]",e.embedUrl),e.contentUrl&&!e.embedUrl&&q("_embeds[]",e.contentUrl)),"Article"!==t&&"WebPage"!==t&&"NewsArticle"!==t&&"BlogPosting"!==t||(e.mainEntityOfPage&&"string"==typeof e.mainEntityOfPage?q("_jsonld[canonical]",e.mainEntityOfPage):e.mainEntityOfPage&&e.mainEntityOfPage["@id"]&&q("_jsonld[canonical]",e.mainEntityOfPage["@id"]),e.headline&&q("_jsonld[headline]",e.headline),e.description&&q("_jsonld[description]",e.description)),e.image){var i="";"string"==typeof e.image?i=e.image:e.image.url?i=e.image.url:Array.isArray(e.image)&&e.image[0]&&(i="string"==typeof e.image[0]?e.image[0]:e.image[0].url),i&&q("_jsonld[image]",i)}}}}(window,document,top.location.href,window.pt_url);
\ No newline at end of file
diff --git a/src/App.js b/src/App.js
index 9b9bb6a..85f4a2d 100644
--- a/src/App.js
+++ b/src/App.js
@@ -223,15 +223,15 @@ export default function App() {
 			const meta = messageData._meta || {};
 
 			// HTML selection takes highest priority (preserves formatting).
-			// Plain-text selection is a fallback, then meta descriptions.
+			// Always compute plain-text description as a fallback; buildSuggestedContent
+			// will use it if htmlToBlocks() produces no blocks from selectionHtml.
 			const selectionHtml = messageData.sel_html || '';
-			const description = selectionHtml
-				? '' // HTML selection is used directly; skip plain-text fallback.
-				: messageData.s || // Plain-text user selection.
-				  meta[ 'twitter:description' ] ||
-				  meta[ 'og:description' ] ||
-				  meta.description ||
-				  '';
+			const description =
+				messageData.s || // Plain-text user selection.
+				meta[ 'twitter:description' ] ||
+				meta[ 'og:description' ] ||
+				meta.description ||
+				'';
 
 			const title =
 				messageData.t ||
diff --git a/src/utils/html-parser.js b/src/utils/html-parser.js
index 2f3033a..d4b83ed 100644
--- a/src/utils/html-parser.js
+++ b/src/utils/html-parser.js
@@ -448,40 +448,61 @@ function getCanonical( doc, meta ) {
 }
 
 /**
- * Sanitize inline HTML, keeping only safe formatting elements.
+ * Sanitize inline HTML, keeping only a safe allowlist of formatting elements.
  *
- * Strips script/style elements and event handler attributes.
- * Preserves safe inline elements: strong, em, b, i, u, s, a (href only),
+ * Allowlisted inline elements: strong, em, b, i, u, s, a (href only),
  * code, mark, sub, sup, span, br.
+ * All other elements are unwrapped (their children are preserved).
+ * Dangerous elements (script, style, object, embed, iframe) are fully removed.
+ * All attributes are stripped except `href` on `<a>` elements.
+ * URI schemes `javascript:`, `data:`, and `vbscript:` are blocked in `href`.
  *
  * @param {Element} element DOM element to sanitize (in-place).
  */
 function sanitizeInlineContent( element ) {
-	// Remove script and style elements.
+	// Completely remove dangerous elements (do not preserve children).
 	const dangerous = element.querySelectorAll(
 		'script, style, object, embed, iframe'
 	);
 	dangerous.forEach( ( el ) => el.remove() );
 
-	// Strip event handlers and javascript: hrefs from all elements.
-	const allEls = element.querySelectorAll( '*' );
-	allEls.forEach( ( el ) => {
-		// Remove all event handler attributes.
+	const ALLOWED_INLINE = new Set( [
+		'strong',
+		'em',
+		'b',
+		'i',
+		'u',
+		's',
+		'a',
+		'code',
+		'mark',
+		'sub',
+		'sup',
+		'span',
+		'br',
+	] );
+
+	// Iterate over a static snapshot — we may mutate the DOM while iterating.
+	Array.from( element.querySelectorAll( '*' ) ).forEach( ( el ) => {
+		const tagName = el.tagName.toLowerCase();
+
+		if ( ! ALLOWED_INLINE.has( tagName ) ) {
+			// Unwrap: replace the element with its child nodes.
+			el.replaceWith( ...el.childNodes );
+			return;
+		}
+
+		// Strip all attributes except allowed ones per element.
 		Array.from( el.attributes ).forEach( ( attr ) => {
-			if ( attr.name.startsWith( 'on' ) ) {
+			if ( tagName === 'a' && attr.name === 'href' ) {
+				// Block dangerous URI schemes.
+				if ( /^\s*(javascript|data|vbscript):/i.test( attr.value ) ) {
+					el.removeAttribute( attr.name );
+				}
+			} else {
 				el.removeAttribute( attr.name );
 			}
 		} );
-
-		// Strip javascript: URLs from href and src.
-		const href = el.getAttribute( 'href' );
-		if ( href && /^\s*javascript:/i.test( href ) ) {
-			el.removeAttribute( 'href' );
-		}
-		const src = el.getAttribute( 'src' );
-		if ( src && /^\s*javascript:/i.test( src ) ) {
-			el.removeAttribute( 'src' );
-		}
 	} );
 }
 
@@ -490,9 +511,14 @@ function sanitizeInlineContent( element ) {
  *
  * @param {Element} listEl  The list element (ul or ol).
  * @param {boolean} ordered Whether this is an ordered list.
+ * @param {number}  depth   Current recursion depth (default 0; max 10).
  * @return {string} Gutenberg list block markup.
  */
-function listElementToBlock( listEl, ordered ) {
+function listElementToBlock( listEl, ordered, depth = 0 ) {
+	if ( depth > 10 ) {
+		return '';
+	}
+
 	const tag = ordered ? 'ol' : 'ul';
 	const attr = ordered ? ' {"ordered":true}' : '';
 	let items = '';
@@ -514,7 +540,8 @@ function listElementToBlock( listEl, ordered ) {
 			const childTag = childEl.tagName.toLowerCase();
 			if ( childTag === 'ul' || childTag === 'ol' ) {
 				const isOrdered = childTag === 'ol';
-				nestedBlocks += '\n' + listElementToBlock( childEl, isOrdered );
+				nestedBlocks +=
+					'\n' + listElementToBlock( childEl, isOrdered, depth + 1 );
 				childEl.remove();
 			}
 		} );
@@ -533,19 +560,27 @@ function listElementToBlock( listEl, ordered ) {
  *
  * Handles block-level elements: paragraphs, headings (h1-h6), unordered and
  * ordered lists with nesting, blockquotes, and preformatted/code blocks.
- * Inline elements (strong, em, a, code, etc.) are preserved within blocks.
- * Script/style elements and event handlers are stripped for safety.
+ * Inline elements are filtered through an allowlist (strong, em, b, i, u, s,
+ * a[href], code, mark, sub, sup, span, br). All other attributes are stripped.
  *
  * Falls back to a paragraph block for unrecognised or purely inline content.
  *
- * @param {string} html HTML string to convert.
+ * @param {string} html  HTML string to convert.
+ * @param {number} depth Internal recursion depth; callers should omit this.
  * @return {string} Gutenberg block markup string, or empty string if no content.
  */
-export function htmlToBlocks( html ) {
+export function htmlToBlocks( html, depth = 0 ) {
 	if ( ! html || typeof html !== 'string' ) {
 		return '';
 	}
 
+	// Guard against deeply nested blockquote structures.
+	if ( depth > 5 ) {
+		return `<!-- wp:paragraph -->\n<p>${ escapeHtml(
+			html
+		) }</p>\n<!-- /wp:paragraph -->\n`;
+	}
+
 	const parser = new DOMParser();
 	const doc = parser.parseFromString( `<body>${ html }</body>`, 'text/html' );
 	const body = doc.body;
@@ -624,7 +659,9 @@ export function htmlToBlocks( html ) {
 			const tempEl = doc.createElement( 'div' );
 			tempEl.appendChild( clone );
 			sanitizeInlineContent( tempEl );
-			blocks += `<!-- wp:heading {"level":${ level }} -->\n<${ tag } class="wp-block-heading">${ tempEl.firstChild.innerHTML }</${ tag }>\n<!-- /wp:heading -->\n\n`;
+			// After sanitization, block-level wrappers (h-tags) are unwrapped by
+			// the allowlist; tempEl.innerHTML holds the sanitized inner content.
+			blocks += `<!-- wp:heading {"level":${ level }} -->\n<${ tag } class="wp-block-heading">${ tempEl.innerHTML }</${ tag }>\n<!-- /wp:heading -->\n\n`;
 			return;
 		}
 
@@ -640,7 +677,7 @@ export function htmlToBlocks( html ) {
 			const tempEl = doc.createElement( 'div' );
 			tempEl.appendChild( clone );
 			sanitizeInlineContent( tempEl );
-			const inner = htmlToBlocks( tempEl.firstChild.innerHTML );
+			const inner = htmlToBlocks( tempEl.innerHTML, depth + 1 );
 			const innerBlocks =
 				inner ||
 				`<!-- wp:paragraph -->\n<p>${ escapeHtml(
@@ -663,7 +700,9 @@ export function htmlToBlocks( html ) {
 		const tempEl = doc.createElement( 'div' );
 		tempEl.appendChild( clone );
 		sanitizeInlineContent( tempEl );
-		const innerHtml = tempEl.firstChild.innerHTML.trim();
+		// tempEl.innerHTML holds the sanitized inner content after any outer
+		// block-level wrapper has been unwrapped by the allowlist sanitizer.
+		const innerHtml = tempEl.innerHTML.trim();
 		if ( innerHtml ) {
 			blocks += `<!-- wp:paragraph -->\n<p>${ innerHtml }</p>\n<!-- /wp:paragraph -->\n\n`;
 		}
@@ -681,8 +720,17 @@ export function htmlToBlocks( html ) {
  * All dynamic content is escaped.
  * Uses pre-sanitized server data instead of client-side parsing.
  *
- * @param {Object} data      Server-returned metadata object.
- * @param {string} sourceUrl Original source URL.
+ * When `data.selectionHtml` is provided the selected HTML is converted to
+ * formatted Gutenberg blocks via `htmlToBlocks`. If the conversion produces
+ * no blocks (e.g. the selection contained only script tags), the function
+ * falls back to `data.description` in a plain-text quote block.
+ *
+ * @param {Object} data               Server-returned metadata object.
+ * @param {string} data.selectionHtml Optional HTML string of the user's selection.
+ * @param {string} data.description   Optional plain-text description / meta excerpt.
+ * @param {string} data.title         Optional page title.
+ * @param {string} data.siteName      Optional site name.
+ * @param {string} sourceUrl          Original source URL.
  * @return {string} Gutenberg block content.
  */
 export function buildSuggestedContent( data, sourceUrl ) {
@@ -706,15 +754,17 @@ ${ escapeHtml( sourceUrl ) }
 `;
 	}
 
-	// Add HTML selection as formatted blocks when available.
-	// Falls back to a plain-text quote block for meta descriptions.
+	// Prefer formatted HTML selection; fall back to plain-text description.
+	let selectionBlocks = '';
 	if ( data.selectionHtml ) {
-		const selectionBlocks = htmlToBlocks( data.selectionHtml );
-		if ( selectionBlocks ) {
-			content += selectionBlocks + '\n\n';
-		}
+		selectionBlocks = htmlToBlocks( data.selectionHtml );
+	}
+
+	if ( selectionBlocks ) {
+		// HTML selection converted successfully to blocks.
+		content += selectionBlocks + '\n\n';
 	} else if ( data.description ) {
-		// Escape description.
+		// No HTML selection (or it produced no blocks) – use plain-text quote.
 		content += `<!-- wp:quote -->
 <blockquote class="wp-block-quote"><!-- wp:paragraph -->
 <p>${ escapeHtml( data.description ) }</p>
diff --git a/tests/utils/html-parser.test.js b/tests/utils/html-parser.test.js
index 25434ee..1db60c9 100644
--- a/tests/utils/html-parser.test.js
+++ b/tests/utils/html-parser.test.js
@@ -143,6 +143,60 @@ describe( 'htmlToBlocks', () => {
 		expect( result ).not.toContain( 'javascript:' );
 	} );
 
+	test( 'strips data: href for safety', () => {
+		const html = '<p><a href="data:text/html,<script>alert(1)</script>">Link</a></p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( 'Link' );
+		expect( result ).not.toContain( 'data:' );
+	} );
+
+	test( 'strips vbscript: href for safety', () => {
+		const html = '<p><a href="vbscript:MsgBox(1)">Link</a></p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( 'Link' );
+		expect( result ).not.toContain( 'vbscript:' );
+	} );
+
+	test( 'unwraps non-allowlist elements but keeps their text', () => {
+		// <form>, <button>, <input>, <svg> are not in the allowlist.
+		const html = '<p>Before <form>form content</form> after</p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( 'Before' );
+		expect( result ).toContain( 'form content' );
+		expect( result ).toContain( 'after' );
+		expect( result ).not.toContain( '<form>' );
+	} );
+
+	test( 'strips all attributes except href on <a>', () => {
+		const html =
+			'<p><a href="https://example.com" class="btn" style="color:red">link</a></p>';
+		const result = htmlToBlocks( html );
+		expect( result ).toContain( '<a href="https://example.com">' );
+		expect( result ).not.toContain( 'class=' );
+		expect( result ).not.toContain( 'style=' );
+	} );
+
+	test( 'does not recurse infinitely on deeply nested blockquotes', () => {
+		// Build deeply nested blockquotes beyond the depth limit.
+		let html = '<p>Deep</p>';
+		for ( let i = 0; i < 8; i++ ) {
+			html = `<blockquote>${ html }</blockquote>`;
+		}
+		// Should not throw or hang; should return some content.
+		expect( () => htmlToBlocks( html ) ).not.toThrow();
+		const result = htmlToBlocks( html );
+		expect( result ).toBeTruthy();
+	} );
+
+	test( 'does not recurse infinitely on deeply nested lists', () => {
+		// Build a list nested beyond the depth limit (depth > 10).
+		let html = '<li>Item</li>';
+		for ( let i = 0; i < 15; i++ ) {
+			html = `<ul><li>Level ${ i }<ul>${ html }</ul></li></ul>`;
+		}
+		expect( () => htmlToBlocks( html ) ).not.toThrow();
+	} );
+
 	test( 'preserves safe inline elements', () => {
 		const html = '<p><strong>bold</strong> and <em>italic</em> and <a href="https://example.com">link</a></p>';
 		const result = htmlToBlocks( html );
@@ -197,6 +251,22 @@ describe( 'buildSuggestedContent with selectionHtml', () => {
 		expect( result ).toContain( 'Source:' );
 	} );
 
+	test( 'falls back to description when selectionHtml produces no blocks', () => {
+		// A selection with only script tags produces empty blocks.
+		const result = buildSuggestedContent(
+			{
+				selectionHtml: '<script>alert(1)</script>',
+				description: 'Plain fallback description',
+				title: 'Article',
+			},
+			sourceUrl
+		);
+
+		// htmlToBlocks returns '' for script-only input, so description is used.
+		expect( result ).toContain( '<!-- wp:quote -->' );
+		expect( result ).toContain( 'Plain fallback description' );
+	} );
+
 	test( 'renders only source attribution when both selectionHtml and description are absent', () => {
 		const result = buildSuggestedContent(
 			{ title: 'Article' },
@@ -212,7 +282,7 @@ describe( 'buildSuggestedContent with selectionHtml', () => {
 		const result = buildSuggestedContent(
 			{
 				selectionHtml: '<ul><li>List item</li></ul>',
-				description: 'Should be ignored',
+				description: 'Should be ignored when blocks are produced',
 			},
 			sourceUrl
 		);

From 01ef9cf14e742ed03fc9be151fab4efa1b69cfeb Mon Sep 17 00:00:00 2001
From: Brandon Kraft <public@brandonkraft.com>
Date: Mon, 16 Mar 2026 17:40:47 -0500
Subject: [PATCH 5/6] test: add e2e tests for HTML selection preservation

---
 tests/e2e/html-selection.spec.js | 383 +++++++++++++++++++++++++++++++
 1 file changed, 383 insertions(+)
 create mode 100644 tests/e2e/html-selection.spec.js

diff --git a/tests/e2e/html-selection.spec.js b/tests/e2e/html-selection.spec.js
new file mode 100644
index 0000000..e906e48
--- /dev/null
+++ b/tests/e2e/html-selection.spec.js
@@ -0,0 +1,383 @@
+/**
+ * HTML Selection Preservation E2E Tests
+ *
+ * Verifies that HTML-formatted text selections sent via postMessage
+ * are correctly converted to Gutenberg blocks in the editor.
+ *
+ * These tests bypass the bookmarklet and send postMessage directly,
+ * testing the real code path: App.js handler → htmlToBlocks() → block rendering.
+ */
+const { test, expect } = require( './utils/auth' );
+
+/**
+ * Send a postMessage to the Press This editor with test data.
+ *
+ * @param {import('@playwright/test').Page} page      Playwright page.
+ * @param {Object}                          overrides Fields to override in the message data.
+ */
+async function sendPostMessage( page, overrides = {} ) {
+	const defaults = {
+		t: 'Test Page Title',
+		s: '',
+		sel_html: '',
+		u: 'https://example.com/test',
+		_meta: {},
+		_links: {},
+		_images: [],
+		_embeds: [],
+	};
+	await page.evaluate( ( data ) => {
+		window.postMessage(
+			{
+				type: 'press-this-data',
+				version: '1.0.0',
+				data,
+			},
+			'*'
+		);
+	}, { ...defaults, ...overrides } );
+}
+
+/**
+ * Wait for Gutenberg blocks to appear in the editor content area.
+ *
+ * @param {import('@playwright/test').Page} page Playwright page.
+ */
+async function waitForBlocks( page ) {
+	await page
+		.locator( '.press-this-editor__content [data-type]' )
+		.first()
+		.waitFor( { timeout: 10000 } );
+}
+
+/**
+ * Navigate to Press This in postMessage mode and wait for the editor.
+ *
+ * @param {import('@playwright/test').Page} page Playwright page.
+ */
+async function loadEditor( page ) {
+	await page.goto( '/wp-admin/press-this.php?pm=1' );
+	await page
+		.locator( '.press-this-editor__content' )
+		.waitFor( { timeout: 10000 } );
+}
+
+const editorContent = '.press-this-editor__content';
+
+test.describe( 'HTML Selection Preservation', () => {
+	test.describe( 'Core Formatting', () => {
+		test( 'bold and italic inline formatting', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html:
+					'<p>This has <strong>bold</strong> and <em>italic</em> text.</p>',
+			} );
+			await waitForBlocks( page );
+
+			const paragraph = page.locator(
+				`${ editorContent } [data-type="core/paragraph"]`
+			);
+			await expect( paragraph.first() ).toContainText( 'bold' );
+			await expect( paragraph.first() ).toContainText( 'italic' );
+
+			// Verify the formatting tags are preserved in the rendered HTML.
+			const html = await paragraph.first().innerHTML();
+			expect( html ).toContain( '<strong>' );
+			expect( html ).toContain( '<em>' );
+		} );
+
+		test( 'headings at correct levels', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html: '<h2>Heading Two</h2><h3>Heading Three</h3>',
+			} );
+			await waitForBlocks( page );
+
+			await expect(
+				page.locator(
+					`${ editorContent } [data-type="core/heading"] h2`
+				)
+			).toHaveText( 'Heading Two' );
+			await expect(
+				page.locator(
+					`${ editorContent } [data-type="core/heading"] h3`
+				)
+			).toHaveText( 'Heading Three' );
+		} );
+
+		test( 'unordered list', async ( { loggedInPage: page } ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html:
+					'<ul><li>Item A</li><li>Item B</li></ul>',
+			} );
+			await waitForBlocks( page );
+
+			const list = page.locator(
+				`${ editorContent } [data-type="core/list"]`
+			);
+			await expect( list.first() ).toBeVisible();
+			await expect( list.first().locator( 'ul' ) ).toBeVisible();
+			await expect( list.first() ).toContainText( 'Item A' );
+			await expect( list.first() ).toContainText( 'Item B' );
+		} );
+
+		test( 'ordered list', async ( { loggedInPage: page } ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html:
+					'<ol><li>First</li><li>Second</li></ol>',
+			} );
+			await waitForBlocks( page );
+
+			const list = page.locator(
+				`${ editorContent } [data-type="core/list"]`
+			);
+			await expect( list.first() ).toBeVisible();
+			await expect( list.first().locator( 'ol' ) ).toBeVisible();
+			await expect( list.first() ).toContainText( 'First' );
+			await expect( list.first() ).toContainText( 'Second' );
+		} );
+
+		test( 'blockquote', async ( { loggedInPage: page } ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html:
+					'<blockquote><p>A quoted passage.</p></blockquote>',
+			} );
+			await waitForBlocks( page );
+
+			const quote = page.locator(
+				`${ editorContent } [data-type="core/quote"]`
+			);
+			await expect( quote ).toBeVisible();
+			await expect( quote ).toContainText( 'A quoted passage.' );
+		} );
+
+		test( 'code block', async ( { loggedInPage: page } ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html: '<pre><code>const x = 42;</code></pre>',
+			} );
+			await waitForBlocks( page );
+
+			const code = page.locator(
+				`${ editorContent } [data-type="core/code"]`
+			);
+			await expect( code ).toBeVisible();
+			await expect( code ).toContainText( 'const x = 42;' );
+		} );
+
+		test( 'mixed content preserves all block types', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html: [
+					'<h2>Getting Started</h2>',
+					'<p>Follow these <strong>steps</strong>:</p>',
+					'<ul><li>Install dependencies</li><li>Run the server</li></ul>',
+					'<pre><code>npm start</code></pre>',
+				].join( '' ),
+			} );
+			await waitForBlocks( page );
+
+			await expect(
+				page.locator(
+					`${ editorContent } [data-type="core/heading"]`
+				)
+			).toBeVisible();
+			await expect(
+				page.locator(
+					`${ editorContent } [data-type="core/paragraph"]`
+				).first()
+			).toContainText( 'steps' );
+			await expect(
+				page.locator(
+					`${ editorContent } [data-type="core/list"]`
+				)
+			).toBeVisible();
+			await expect(
+				page.locator(
+					`${ editorContent } [data-type="core/code"]`
+				)
+			).toBeVisible();
+		} );
+	} );
+
+	test.describe( 'Backward Compatibility', () => {
+		test( 'plain text selection without sel_html produces quote block', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				s: 'This is a plain text selection that should appear in a quote block.',
+				sel_html: '',
+			} );
+			await waitForBlocks( page );
+
+			const quote = page.locator(
+				`${ editorContent } [data-type="core/quote"]`
+			);
+			await expect( quote ).toBeVisible();
+			await expect( quote ).toContainText(
+				'plain text selection'
+			);
+		} );
+
+		test( 'no selection populates title and source only', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				t: 'Just a Title',
+				s: '',
+				sel_html: '',
+			} );
+			await waitForBlocks( page );
+
+			// Title should be populated.
+			const titleInput = page.getByLabel( 'Post title' );
+			await expect( titleInput ).toHaveValue( 'Just a Title', {
+				timeout: 10000,
+			} );
+
+			// Source attribution should exist.
+			await expect(
+				page
+					.locator( `${ editorContent }` )
+					.getByText( 'Source:' )
+			).toBeVisible();
+
+			// No quote block should appear.
+			await expect(
+				page.locator(
+					`${ editorContent } [data-type="core/quote"]`
+				)
+			).toHaveCount( 0 );
+		} );
+	} );
+
+	test.describe( 'Security', () => {
+		test( 'script tags are stripped', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html:
+					'<p>Safe text</p><script>alert("xss")</script>',
+			} );
+			await waitForBlocks( page );
+
+			await expect(
+				page.locator( `${ editorContent }` )
+			).toContainText( 'Safe text' );
+			await expect(
+				page.locator( `${ editorContent }` )
+			).not.toContainText( 'alert' );
+		} );
+
+		test( 'dangerous attributes are stripped', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html:
+					'<p onclick="alert(1)" style="color:red" class="danger">Styled text</p>',
+			} );
+			await waitForBlocks( page );
+
+			const paragraph = page.locator(
+				`${ editorContent } [data-type="core/paragraph"]`
+			);
+			await expect( paragraph.first() ).toContainText(
+				'Styled text'
+			);
+
+			// Verify no dangerous attributes leaked into the rich text content.
+			const hasUnsafeAttrs = await paragraph.first().evaluate( ( el ) => {
+				const p = el.querySelector( 'p, [role="document"]' );
+				if ( ! p ) {
+					return false;
+				}
+				return (
+					p.hasAttribute( 'onclick' ) ||
+					p.hasAttribute( 'style' ) ||
+					p.getAttribute( 'class' )?.includes( 'danger' )
+				);
+			} );
+			expect( hasUnsafeAttrs ).toBe( false );
+		} );
+
+		test( 'javascript: and data: URI schemes are blocked', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html:
+					'<p><a href="javascript:alert(1)">Bad link</a> and <a href="https://safe.example">Good link</a></p>',
+			} );
+			await waitForBlocks( page );
+
+			// The safe link should be present.
+			await expect(
+				page.locator(
+					`${ editorContent } a[href="https://safe.example"]`
+				)
+			).toBeVisible();
+
+			// No javascript: href should exist in the editor content.
+			const hasJsHref = await page
+				.locator( `${ editorContent }` )
+				.evaluate( ( el ) => {
+					return el.querySelector( 'a[href^="javascript:"]' ) !== null;
+				} );
+			expect( hasJsHref ).toBe( false );
+		} );
+	} );
+
+	test.describe( 'Edge Cases', () => {
+		test( 'empty sel_html falls back to plain-text description', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html: '<script>alert(1)</script>',
+				s: 'Fallback description text.',
+			} );
+			await waitForBlocks( page );
+
+			// htmlToBlocks returns empty for script-only input,
+			// so the plain-text fallback should be used in a quote block.
+			const quote = page.locator(
+				`${ editorContent } [data-type="core/quote"]`
+			);
+			await expect( quote ).toBeVisible();
+			await expect( quote ).toContainText(
+				'Fallback description text.'
+			);
+		} );
+
+		test( 'special characters are not double-escaped', async ( {
+			loggedInPage: page,
+		} ) => {
+			await loadEditor( page );
+			await sendPostMessage( page, {
+				sel_html: '<p>Tom &amp; Jerry</p>',
+			} );
+			await waitForBlocks( page );
+
+			const paragraph = page.locator(
+				`${ editorContent } [data-type="core/paragraph"]`
+			);
+			// Should render as "Tom & Jerry", not "Tom &amp; Jerry".
+			await expect( paragraph.first() ).toContainText(
+				'Tom & Jerry'
+			);
+		} );
+	} );
+} );

From 2565715f7998c4255bd7af2c2e2662b5acf7d7d3 Mon Sep 17 00:00:00 2001
From: Brandon Kraft <public@brandonkraft.com>
Date: Mon, 16 Mar 2026 17:54:31 -0500
Subject: [PATCH 6/6] test: fix e2e selectors for Gutenberg block rendering

---
 tests/e2e/html-selection.spec.js | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/tests/e2e/html-selection.spec.js b/tests/e2e/html-selection.spec.js
index e906e48..0978396 100644
--- a/tests/e2e/html-selection.spec.js
+++ b/tests/e2e/html-selection.spec.js
@@ -97,16 +97,16 @@ test.describe( 'HTML Selection Preservation', () => {
 			} );
 			await waitForBlocks( page );
 
-			await expect(
-				page.locator(
-					`${ editorContent } [data-type="core/heading"] h2`
-				)
-			).toHaveText( 'Heading Two' );
-			await expect(
-				page.locator(
-					`${ editorContent } [data-type="core/heading"] h3`
-				)
-			).toHaveText( 'Heading Three' );
+			const headings = page.locator(
+				`${ editorContent } [data-type="core/heading"]`
+			);
+			await expect( headings ).toHaveCount( 2 );
+			await expect( headings.nth( 0 ) ).toContainText(
+				'Heading Two'
+			);
+			await expect( headings.nth( 1 ) ).toContainText(
+				'Heading Three'
+			);
 		} );
 
 		test( 'unordered list', async ( { loggedInPage: page } ) => {
@@ -121,7 +121,6 @@ test.describe( 'HTML Selection Preservation', () => {
 				`${ editorContent } [data-type="core/list"]`
 			);
 			await expect( list.first() ).toBeVisible();
-			await expect( list.first().locator( 'ul' ) ).toBeVisible();
 			await expect( list.first() ).toContainText( 'Item A' );
 			await expect( list.first() ).toContainText( 'Item B' );
 		} );
@@ -138,7 +137,6 @@ test.describe( 'HTML Selection Preservation', () => {
 				`${ editorContent } [data-type="core/list"]`
 			);
 			await expect( list.first() ).toBeVisible();
-			await expect( list.first().locator( 'ol' ) ).toBeVisible();
 			await expect( list.first() ).toContainText( 'First' );
 			await expect( list.first() ).toContainText( 'Second' );
 		} );