Javascript to pull attributes from shortcode string

I have a Javascript application which retrieves shortcode stings from a WordPress database. So I may end up with a variable like this:

var shortcode = '[wp-form id="1946" title="My Test Form"]';

I am looking to use pure Javascript to access the attributes so I can extract the title, etc. I imagine this will be some form or regex and split(). But so far my efforts get frustrated by splitting by whitespace.

Read More

Any ideas greatly appreciated.

Related posts

Leave a Reply

4 comments

  1. Try to use this code:

    const shortcode = '[wp-form id="1946" title="My Test Form" empty=""]';  
    
    let attributes = {};
    shortcode.match(/[w-]+=".*?"/g).forEach(function(attribute) {
        attribute = attribute.match(/([w-]+)="(.*?)"/);
        attributes[attribute[1]] = attribute[2];
    });
    console.log(attributes);
    

    Output:

    Object {id: "1946", title: "My Test Form", empty: ''}
    
  2. Okay, even though I’m late to the party I’m going to throw an answer in. I’m surprised nobody complained “you can’t parse with just a regular expression!” I guess this used to be a much more fashionable comment to make . Anyways, I think it’s perfectly reasonable to use just a regex and see some reasonable attempts already given.

    However, if you want to really parse the tag, here’s a quick parser I whipped up.

    function parseShortCode(shortCode) {
      var re = /(s+|W)|(w+)/g;
      var match;
      var token;
      var curAttribute = '';
      var quoteChar;
      var mode = 'NOT STARTED'
      var parsedValue = {
        name: '',
        attributes: {}
      };
      
      while ((match = re.exec(shortCode)) != null) {
        token = match[0];
        switch (mode) {
          case 'NOT STARTED':
            if (token == '[') {
              mode = 'GETNAME';
            }
            break;
          case 'GETNAME':
            if (!(/s/.test(token))) {
              parsedValue.name += token;
            } else if (parsedValue.name) {
              mode = 'PARSING';
            }
            break;
          case 'PARSING':
            // if non text char throw it
            if (token == "]") { mode = 'COMPLETE'; }
            else if (token == "=") {
              if (!curAttribute) throw ('invalid token: "' + token + '" encountered at ' + match.index);
              else mode = 'GET ATTRIBUTE VALUE';
            }
            else if (!/s/.test(token)) {
              curAttribute += token;
            } else if (curAttribute) {
              mode = 'SET ATTRIBUTE'
            }
            break;
          case 'SET ATTRIBUTE':
            // these are always from match[1]
            if (/s/.test(token)) { parsedValue.attributes[curAttribute] = null; }
            else if (token == '=') { mode = 'GET ATTRIBUTE VALUE'; }
            else { throw ('invalid token: "' + token + '" encountered at ' + match.index); }
            break;
          case 'GET ATTRIBUTE VALUE':
            if (!(/s/.test(token))) {
              if (/["']/.test(token)) {
                quoteChar = token;
                parsedValue.attributes[curAttribute] = '';
                mode = 'GET QUOTED ATTRIBUTE VALUE';
              } else {
                parsedValue.attributes[curAttribute] = token;
                curAttribute = '';
                mode = 'PARSING';
              }
            }
            break;
          case 'GET QUOTED ATTRIBUTE VALUE':
            if (//.test(token)) { mode = 'ESCAPE VALUE'; }
            else if (token == quoteChar) {
              mode = 'PARSING';
              curAttribute = '';
            }
            else { parsedValue.attributes[curAttribute] += token; }
            break;
          case 'ESCAPE VALUE':
            if (/'"/.test(token)) { parsedValue.attributes[curAttribute] += token; }
            else { parsedValue.attributes[curAttribute] += '' + token; }
            mode = 'GET QUOTED ATTRIBUTE VALUE';
            break;
    
        }
      }
      if (curAttribute && !parsedValue.attributes[curAttribute]) {
      	parsedValue.attributes[curAttribute] = '';
      }
      return parsedValue;
    }
    
    function doUpdate() {
      var text = document.getElementById('shortcode').value;
      var output;
      try {
        output = parseShortCode(text);
      } catch (err) {
        output = err;
      }
    
      document.getElementById('result').innerHTML = JSON.stringify(output);
    }
    
    document.getElementById('updateBtn').addEventListener("click", doUpdate);
    doUpdate();
    Short Code:
    <textarea type=text id="shortcode" style="width:100%; height:60px">[wp-form id="1946" title="My Test Form"]</textarea>
    <div>
      <button id="updateBtn">Update</button>
    </div>
    <div>
      <pre id="result"></pre>
    </div>

    I’m sure this has bugs, but I got it to work with your case, and some cases the other answers couldn’t handle. Unless shortcodes get really intense, I’d just stick with a regex. But if you encounter stuff like unquoted attribute values and empty attributes this might work for you.

  3. Don’t try to use String.prototype.split in this case, describe an attribute with its value and build a pattern to match them using RegExp.prototype.exec:

    var re = /([w-]+)="([^"]*)"/g; 
    var str = '[wp-form id="1946" title="My Test Form"]';
    var m;
    
    while ((m = re.exec(str)) !== null) {
        console.log(m[1] + "n" + m[2] + "n");
    }
    
  4. can be done using regex simply

    var shortcode = '[wp-form id="1946" title="My Test Form"]';
    // use of regex to extract id , title 
    var arr  = /id="(.*?)".*title="(.*?)"/.exec(shortcode);
    var id = arr[1];
    var title = arr[2];