6

I am building an Angular 7 app. In this app I let the users edit HTML that I then want to convert into JSON to store it in a way that make sense.

In short, I want to take any HTML code and process it into a JSON object. How can I do this?

3
  • 2
    You could maybe use something like mckamey/jsonml for the conversion between JSON and HTML/XML. Commented Feb 29, 2020 at 18:37
  • JSON and HTML are completely different structures. How do you even envision this working? Can you given an example, a basic html and the expected output? What do you key by? tag, style/script/element? How do you preserve the order? Honestly it is much easier to just store the entire html as a string. That's how all the website builders work afaik Commented Feb 29, 2020 at 18:39
  • 1
    @sinanspd - Well, they're both capable of being tree structures, and JSON has arrays for order, so... Commented Feb 29, 2020 at 18:50

4 Answers 4

9

I'd parse the HTML into a DOM (you can do that client-side or server-side) and then serialize the aspects of the DOM that I cared about to an object, which I'd then use JSON.stringify on (if you really want JSON).

For instance:

function converter(dom) {
    if (dom.nodeType === Node.TEXT_NODE) {
        return dom.nodeValue;
    }
    if (dom.nodeType === Node.DOCUMENT_NODE) {
        dom = dom.documentElement;
    }
    const obj = {};
    obj.nodeType = dom.nodeType;
    if (dom.nodeType === Node.ELEMENT_NODE) {
        obj.tagName = dom.tagName;
        obj.attributes = []; // Array.from(obj.attributes) gives us a lot of things we don't want
        for (let i = 0, len = dom.attributes.length; i < len; ++i) {
            const attr = dom.attributes[i];
            obj.attributes.push({name: attr.name, value: attr.value});
        }
        obj.children = [];
        for (let child = dom.firstChild; child; child = child.nextSibling) {
            obj.children.push(converter(child));
        }
    } else {
        obj.nodeValue = dom.nodeValue;
    }
    return obj;
}
const json = JSON.stringify(converter(document.getElementById("example")), null, 4);
console.log(json);
.as-console-wrapper {
    max-height: 100% !important;
}
<div id="example" class="ex">
  <span>Span 1</span>
  <span>Span 2</span>
  <!-- comment -->
  <span>
    Span 3
    <span>Inner span</span>
  </span>
</div>

Obviously that's just a rough sketch, not a completely baked solution.

Sign up to request clarification or add additional context in comments.

Comments

0

function converter(dom) {
    if (dom.nodeType === Node.TEXT_NODE) {
        return dom.nodeValue;
    }
    if (dom.nodeType === Node.DOCUMENT_NODE) {
        dom = dom.documentElement;
    }
    const obj = {};
    obj.nodeType = dom.nodeType;
    if (dom.nodeType === Node.ELEMENT_NODE) {
        obj.tagName = dom.tagName;
        obj.attributes = []; // Array.from(obj.attributes) gives us a lot of things we don't want
        for (let i = 0, len = dom.attributes.length; i < len; ++i) {
            const attr = dom.attributes[i];
            obj.attributes.push({name: attr.name, value: attr.value});
        }
        obj.children = [];
        for (let child = dom.firstChild; child; child = child.nextSibling) {
            obj.children.push(converter(child));
        }
    } else {
        obj.nodeValue = dom.nodeValue;
    }
    return obj;
}
const json = JSON.stringify(converter(document.getElementById("example")), null, 4);
console.log(json);
.as-console-wrapper {
    max-height: 100% !important;
}
<div id="example" class="ex">
  <span>Span 1</span>
  <span>Span 2</span>
  <!-- comment -->
  <span>
    Span 3
    <span>Inner span</span>
  </span>
</div>

1 Comment

Please always describe what you are doing in your answer. It should be updated or removed. Read How to answer before you provide more answers ^^
0

This is only a copy of the already marked answer from T.J.Crowder, with a little change. I have removed the empty strings and empty attributes and children node from the JSON result. Code changes are commented within code.

 

function converter(dom) {
    if (dom.nodeType === Node.TEXT_NODE) {
    //  add only if value is not empty
    if(dom.nodeValue && dom.nodeValue.trim() != '')
            return dom.nodeValue;
    }
    if (dom.nodeType === Node.DOCUMENT_NODE) {
        dom = dom.documentElement;
    }
    
    const obj = {};

    //  add only if value is not empty
    if(dom.nodeValue && dom.nodeValue.trim() != ''){
        obj.nodeType = dom.nodeType;
    }
    
    if (dom.nodeType === Node.ELEMENT_NODE) {
        obj.tagName = dom.tagName;
        
        obj.attributes = []; // Array.from(obj.attributes) gives us a lot of things we don't want
        for (let i = 0, len = dom.attributes.length; i < len; ++i) {
            const attr = dom.attributes[i];
            obj.attributes.push({name: attr.name, value: attr.value});
        }
    //  remove attributes if is empty
    if(obj.attributes.length == 0) delete obj.attributes;
        
        obj.children = [];
        for (let child = dom.firstChild;child;child = child.nextSibling) {
        //  add only if value is not NULL
        var childVal = converter(child);
        if(childVal)
           obj.children.push(childVal);
        }
        //  remove children if is empty
        if(obj.children.length == 0) delete obj.children;

    } else {
        //  add only if value is not empty
        if(dom.nodeValue && dom.nodeValue.trim() != '')
            obj.nodeValue = dom.nodeValue;
    }
    
    if(obj && Object.keys(obj).length === 0 && Object.getPrototypeOf(obj) === Object.prototype){
    //  do nothing  
    }else 
        return obj;
}

const json = JSON.stringify(converter(document.getElementById("example")), null, 4);
console.log(json);
.as-console-wrapper {
    max-height: 100% !important;
}
<div id="example" class="ex">
  <span>Span 1</span>
  <span>Span 2</span>
  <!-- comment -->
  <span>
    Span 3
    <span>Inner span</span>
  </span>
</div>

1 Comment

for some reason, my change also removes the "NodeType" information - I will fix this later
-1

If you want to just add it to a json request to send it to an external server you could do something like this:

{
    "html": "<html>...</html>"
}

And send it to the server for further processing.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.