HTML Cleanup and Simplifier Tool

Strip bloat, fix errors, and format your markup for cleaner web standards.

Cleanup Options

Input HTML (Paste Messy Code Here)

Cleaned & Formatted HTML

`, 'text/html'); const root = doc.body;// 2. Traverse and clean the DOM cleanNode(root, options);// 3. Get the cleaned HTML fragment (excluding the transient wrapper) let cleanedHtml = Array.from(root.childNodes) .map(node => node.outerHTML || node.textContent) .join(''); // 3b. Remove Non-breaking Spaces (  and Unicode \u00A0) if (options.removeNbsp) { cleanedHtml = cleanedHtml.replace(/ /g, ' '); cleanedHtml = cleanedHtml.replace(/\u00A0/g, ' '); } // 3c. Normalize Whitespace (Collapse sequences of spaces, tabs, and newlines) // This is a necessary pre-step before minification or regular formatting if (options.normalizeWhitespace || options.minifyHtml) { cleanedHtml = cleanedHtml.replace(/\s{2,}/g, ' '); }// 4. Final Formatting (Indentation or Minification) if (options.minifyHtml) { // Minification Logic (Override formatting) // 1. Remove all newlines/carriage returns/tabs cleanedHtml = cleanedHtml.replace(/[\n\r\t]/g, ''); // 2. Remove spaces immediately before/after tags (e.g., `> <` becomes `><`) cleanedHtml = cleanedHtml.replace(/\s*\s*/g, '>'); // 3. Final trim cleanedHtml = cleanedHtml.trim();outputHtmlArea.value = cleanedHtml; } else { // Use standard formatting with indentation cleanedHtml = formatHtml(cleanedHtml); outputHtmlArea.value = cleanedHtml; }} catch (error) { console.error("HTML Cleanup Error:", error); outputHtmlArea.value = `An error occurred during cleanup: ${error.message}. Please check your input HTML for extremely severe malformation.`; } }/** * Recursively cleans a DOM node and its children. */ function cleanNode(node, options) { // List of obsolete tags to unwrap (remove tag, keep content) const OBSOLETE_TAGS = ['font', 'center', 'dir', 'menu', 'strike', 's']; // Remove comments if the option is checked if (options.removeComments && node.nodeType === 8 /* COMMENT_NODE */) { node.parentNode.removeChild(node); return; }if (node.nodeType === 1 /* ELEMENT_NODE */) { const tagName = node.tagName.toLowerCase(); let shouldUnwrap = false;// Mark tags for unwrapping if the option is checked if (options.removeSpans && tagName === 'span') { shouldUnwrap = true; } // Mark obsolete tags for unwrapping if the option is checked if (options.removeObsoleteTags && OBSOLETE_TAGS.includes(tagName)) { shouldUnwrap = true; }// Apply attribute cleanup to the element if (node.hasAttributes()) { const attributesToRemove = []; for (let i = 0; i < node.attributes.length; i++) { const attr = node.attributes[i]; const name = attr.name.toLowerCase();if (options.removeStyles && name === 'style') { attributesToRemove.push(name); } if (options.removeClassesIds) { if (name === 'class' || name === 'id') { attributesToRemove.push(name); } } if (options.removeEvents && name.startsWith('on')) { attributesToRemove.push(name); }// Remove common editor bloat attributes like data-pm-* if (name.startsWith('data-pm-')) { attributesToRemove.push(name); } // Remove all data-* attributes if selected (excluding data-src/srcset) if (options.removeAllDataAttributes && name.startsWith('data-')) { if (name !== 'data-src' && name !== 'data-srcset') { attributesToRemove.push(name); } }} attributesToRemove.forEach(name => node.removeAttribute(name)); }// Recursively clean children // Iterate backwards because we might remove children for (let i = node.childNodes.length - 1; i >= 0; i--) { cleanNode(node.childNodes[i], options); }// POST-RECURSION CLEANUP// 1. Unwrap the node if it's marked for unwrapping ( or Obsolete Tags) if (shouldUnwrap && node.parentNode) { // Move all children to the parent before removing the node itself while (node.firstChild) { node.parentNode.insertBefore(node.firstChild, node); } node.parentNode.removeChild(node); return; // Stop processing this node as it's been removed }// 2. Remove empty tags if (options.removeEmptyTags) { // Check if the element is empty (no children or only empty text nodes) const isEmpty = Array.from(node.childNodes).every(child => child.nodeType === 3 && child.textContent.trim() === '' // Only whitespace text nodes ); // Do not remove essential structural elements or self-closing tags (e.g. img, br) const safeTags = ['body', 'html', 'br', 'hr', 'img', 'input', 'area', 'link', 'meta'];if (isEmpty && node.textContent.trim() === '' && !safeTags.includes(tagName)) { node.parentNode.removeChild(node); } } } }/** * Simple, regex-based formatter/beautifier for readability. * Indentation unit is 2 spaces. */ function formatHtml(html) { // Define the indentation unit as 2 spaces for compact, standard formatting. const INDENT_UNIT = ' '; // 2 spaces // 1. Pre-process: Collapse content within common text/inline tags onto a single line. const singleLineTags = ['p', 'strong', 'em', 'a', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'td', 'th'];for (const tag of singleLineTags) { const regex = new RegExp(`(<${tag}[^>]*>)([\\s\\S]*?)()`, 'gi'); html = html.replace(regex, function(match, openTag, content, closeTag) { content = content.replace(/\s+/g, ' ').trim(); return `${openTag}${content}${closeTag}`; }); }let indent = ''; const output = []; // Aggressively clean up unnecessary whitespace around tags and split into lines html = html.replace(//g, '>\n') .replace(/\n\s*\n/g, '\n') .trim();const lines = html.split('\n').filter(line => line.trim().length > 0);for (const line of lines) { const trimmedLine = line.trim();// Skip text nodes that are only whitespace or empty after trimming if (!trimmedLine.startsWith('<') && !trimmedLine.endsWith('>')) { output.push(indent + trimmedLine); continue; }// 1. Handle closing tags and self-closing tags if (trimmedLine.match(/^<\//) || trimmedLine.match(/\/>$/) || trimmedLine.match(/
/i) || trimmedLine.match(/ 0) { indent = indent.slice(0, -INDENT_UNIT.length); } } output.push(indent + trimmedLine); } // 2. Handle single-line tags that contain content (already merged in step 1) else if (trimmedLine.match(/^<[^/]+>.*<\/[^>]+>$/)) { output.push(indent + trimmedLine); } // 3. Handle standard opening tags (block elements that need content indented) else if (trimmedLine.match(/^ { copyButton.textContent = originalText; }, 1500); } catch (err) { console.error('Could not copy text: ', err); // Fallback message in the output area outputHtmlArea.value = 'Failed to copy. Please select the text and copy manually.'; } }/** * Clears both input and output areas. */ function clearAll() { document.getElementById('inputHtml').value = ''; document.getElementById('outputHtml').value = ''; document.getElementById('inputHtml').focus(); }// Add some default messy HTML to make testing easier window.onload = () => { document.getElementById('inputHtml').value = `
This text is old style and should be deleted!

Nội dung

Link Text
`; // Run the cleanup automatically on load cleanAndFormatHTML(); }

The Ultimate Guide to Clean & Simple HTML

Transform bloated, inefficient markup into lightweight, SEO-optimized source code. This interactive guide explains why clean code is critical and shows you how to achieve it.

The Problem: Why "Dirty" HTML is an SEO Killer

Bloated code isn't just ugly; it actively harms your site's performance and search engine rankings. This section explores the common causes and the severe consequences for your Core Web Vitals and SEO.

The WYSIWYG Catastrophe

Content editors from platforms like Word or Google Docs are notorious for injecting "bloat code"—unnecessary inline styles, meaningless `` tags, and proprietary markup that inflates file size and complicates your DOM.

Bloated Code Example

<p class="MsoNormal">
  <span style="color:red;">
    Some text.
  </span>
</p>

Clean Code Result

<p>Some text.</p>

Impact on Core Web Vitals

The Solution: Key Cleanup Features

A powerful HTML cleaner uses advanced techniques to surgically remove bloat without breaking your content's structure. Explore the core features below to understand how each one contributes to a cleaner, faster website. Click on a card to see details.

In-Depth Benefits of Clean HTML

Beyond surface-level tidiness, clean code provides critical advantages for performance, SEO, and long-term maintainability. This section explains how a simplified DOM and proper standards compliance directly impact your bottom line.

Boosting Rendering Performance

By removing inline styles and empty tags, you optimize the Critical Rendering Path, reduce memory/CPU load, and accelerate content display (FCP), leading to a smoother user experience.

Creating "Machine-Friendly" Markup

Clean code increases your Text-to-HTML ratio, helping Googlebot quickly identify core keywords. It also promotes Semantic HTML, which clarifies content structure for search engines.

Ensuring Web Standards Compliance

Stripping inline styles and events enforces "Separation of Concerns," making your code easier to maintain and scale. Clean HTML is also the perfect input for minification and other build tools.

How-To Guide: 3 Simple Steps to Clean HTML

Achieving perfectly clean code is a frictionless process. Follow this simple guide to transform your markup.

1

Paste Your HTML

Copy the chaotic code from your editor, document, or email and paste it into the cleaner's input area.

2

Select Cleanup Options

Customize the cleaning process. For maximum SEO benefit, enable options like removing styles, classes, and unwrapping `` tags.

3

Clean, Format & Copy

Click the button to process the code. The clean, indented output is ready to be copied and integrated into your project.

Advanced Optimization Techniques

After cleaning your HTML, apply these professional techniques for peak performance and accessibility.

File Size Optimization

Minification: Use a minifier to remove all whitespace and newlines from your final code, which can shave off an additional 10-20% of file size.

Server Compression: Ensure your web server is configured to use Gzip or Brotli compression, the most important step for minimizing transfer size.

Semantic CSS & Optimization

CSS Tree Shaking: Use tools like PurgeCSS to remove unused CSS rules from frameworks like Bootstrap or Tailwind, ensuring only the necessary styles are loaded.

Lazy Load CSS: Load critical, above-the-fold CSS first, and defer the rest to improve initial page render speed.

Accessibility (A11y) Compliance

Alt Attributes: Ensure all `` tags have descriptive `alt` attributes for screen readers and SEO.

Heading Structure: Use `

` only once per page and maintain a logical `

` to `

` hierarchy to structure content clearly.