§ string-strip-html examples

§ Table of Contents

§ Quick Take

import { strict as assert } from "assert";
import stripHtml from "string-strip-html";

assert.equal(
  stripHtml(`Some text <b>and</b> text.`).result,
  `Some text and text.`
);

// prevents accidental string concatenation
assert.equal(
  stripHtml(`aaa<div>bbb</div>ccc`).result,
  `aaa bbb ccc`
);

// tag pairs with content, upon request
assert.equal(
  stripHtml(`a <pre><code>void a;</code></pre> b`, {
    stripTogetherWithTheirContents: [
      "script", // default
      "style", // default
      "xml", // default
      "pre", // <-- custom-added
    ],
  }).result,
  `a b`
);

// detects raw, legit brackets:
assert.equal(
  stripHtml(`a < b and c > d`).result,
  `a < b and c > d`
);

§ Extract HTML Head Contents

import { strict as assert } from "assert";
import stripHtml from "string-strip-html";

const someHtml = `<!DOCTYPE html>
<html lang="en" dir="ltr">
  <head>
    <meta charset="utf-8">
    <title>the title</title>
  </head>
  <body>
    the content
  </body>
</html>`;

// The task asks not to include <head...> and </head>.
// First, extract head tag-to-head tag, including contents
const headWithHeadTags = stripHtml(someHtml, {
  onlyStripTags: ["head"],
  stripTogetherWithTheirContents: ["head"],
})
  .filteredTagLocations.reduce(
    (acc, [from, to]) =>
      `${acc}${someHtml.slice(from, to)}`,
    ""
  )
  .trim();

assert.equal(
  headWithHeadTags,
  `<head>
    <meta charset="utf-8">
    <title>the title</title>
  </head>`
);

const headContents = headWithHeadTags
  .replace(/<\/?head>/g, "")
  .trim();
assert.equal(
  headContents,
  `<meta charset="utf-8">
    <title>the title</title>`
);

§ Leave Only HTML

import { strict as assert } from "assert";
import stripHtml from "string-strip-html";

const someHtml = `<!DOCTYPE html>
<html lang="en" dir="ltr">
  <head>
    <meta charset="utf-8">
    <title></title>
  </head>
  <body>
    <h1>Title</h1>
    Some text.
  </body>
</html>`;

assert.equal(
  stripHtml(someHtml).allTagLocations.reduce(
    (acc, [from, to]) =>
      `${acc}${someHtml.slice(from, to)}`,
    ""
  ),
  `<!DOCTYPE html><html lang="en" dir="ltr"><head><meta charset="utf-8"><title></title></head><body><h1></h1></body></html>`
);

§ Leave Only Opening Td Tags

import { strict as assert } from "assert";
import stripHtml from "string-strip-html";

const someHtml = `<table width="100" border="0" cellpadding="0" cellspacing="0">
  <tr>
    <td class="col1">
      cell1
    </td>
    <td class="col2">
      cell2
    </td>
  </tr>
  <tr>
    <td class="col3">
      cell3
    </td>
    <td class="col4">
      cell4
    </td>
  </tr>
</table>`;

// the first way
// -----------------------------------------------------------------------------

assert.equal(
  stripHtml(someHtml, {
    // notice there's no: onlyStripTags: ["td"]
    // we operate purely via callback
    cb: ({
      tag,
      deleteFrom,
      deleteTo,
      insert,
      rangesArr,
      proposedReturn,
    }) => {
      if (tag.name === "td" && !tag.slashPresent) {
        rangesArr.push(proposedReturn);
      }
    },
  }).ranges.reduce(
    (acc, [from, to]) =>
      `${acc}${someHtml.slice(from, to).trim()}`,
    ""
  ),
  `<td class="col1"><td class="col2"><td class="col3"><td class="col4">`
);

// the second way:
// -----------------------------------------------------------------------------

let resultStr = "";
// notice we don't even assign stripHtml() output to anything - we rely only
// on the callback, it mutates the "resultStr" in the upper scope
stripHtml(someHtml, {
  // notice there's no: onlyStripTags: ["td"]
  // we operate purely via callback
  cb: ({
    tag,
    deleteFrom,
    deleteTo,
    insert,
    rangesArr,
    proposedReturn,
  }) => {
    if (tag.name === "td" && !tag.slashPresent) {
      resultStr += someHtml
        .slice(deleteFrom, deleteTo)
        .trim();
    }
  },
});
assert.equal(
  resultStr,
  `<td class="col1"><td class="col2"><td class="col3"><td class="col4">`
);

§ Leave Only Td Tags

import { strict as assert } from "assert";
import stripHtml from "string-strip-html";

const someHtml = `<table width="100" border="0" cellpadding="0" cellspacing="0">
  <tr>
    <td class="col1">
      cell1
    </td>
    <td class="col2">
      cell2
    </td>
  </tr>
  <tr>
    <td class="col3">
      cell3
    </td>
    <td class="col4">
      cell4
    </td>
  </tr>
</table>`;

assert.equal(
  stripHtml(someHtml, {
    onlyStripTags: ["td"],
  }).filteredTagLocations.reduce(
    (acc, [from, to]) =>
      `${acc}${someHtml.slice(from, to)}`,
    ""
  ),
  `<td class="col1"></td><td class="col2"></td><td class="col3"></td><td class="col4"></td>`
);

§ Remove All HTML from a String

import { strict as assert } from "assert";
import stripHtml from "string-strip-html";

const someHtml = `<!DOCTYPE html>
<html lang="en" dir="ltr">
  <head>
    <meta charset="utf-8">
    <title></title>
  </head>
  <body>
    <h1>Title</h1>
    Some text.
  </body>
</html>`;

assert.equal(
  stripHtml(someHtml).result,
  `Title\nSome text.`
);

§ Strip HTML from a Raw JSON String

import { strict as assert } from "assert";
import stripHtml from "string-strip-html";
import traverse from "ast-monkey-traverse";

const stripFromJsonStr = (str) => {
  return traverse(JSON.parse(str), (key, val) => {
    // if currently an object is traversed, you get both "key" and "val"
    // if it's array, only "key" is present, "val" is undefined
    const current = val !== undefined ? val : key;
    if (
      // ensure it's a plain object, not array (monkey will report only "key" in
      // arrays and "val" will be undefined)
      // also ensure object's value a string, not boolean or number, because we
      // don't strip HTML from booleans or numbers or anything else than strings
      typeof val === "string"
    ) {
      // monkey's callback is like Array.map - whatever you return gets written:
      return stripHtml(val).result;
    }
    // default return, do nothing:
    return current;
  });
};

// nothing to strip, "<" is false alarm:
assert.equal(
  JSON.stringify(
    stripFromJsonStr(
      `{"Operator":"<","IsValid":true}`
    ),
    null,
    0
  ),
  `{"Operator":"<","IsValid":true}`
);

// some HTML within one of key values, monkey will skip the boolean:
assert.equal(
  JSON.stringify(
    stripFromJsonStr(
      `{"Operator":"a <div>b</div> c","IsValid":true}`
    ),
    null,
    0
  ),
  `{"Operator":"a b c","IsValid":true}`
);

§ Set the Title Case Using title Package

// https://www.npmjs.com/package/title

// This program will not touch any single tags (<br class="z"/> for example)
// or in case of paired tags, paired tags and content between

import { strict as assert } from "assert";
import title from "title";
import invertRanges from "ranges-invert";
import applyRanges from "ranges-apply";
import stripHtml from "string-strip-html";

const rangesRegex = require("ranges-regex");

function tagAwareTitle(str) {
  const whitelist = ["eslint", "readme", "npm"];
  const { filteredTagLocations } = stripHtml(str, {
    stripTogetherWithTheirContents: ["*"],
  });
  // console.log(
  //   `020 tagAwareTitle(): ${`\u001b[${33}m${`filteredTagLocations`}\u001b[${39}m`} = ${JSON.stringify(
  //     filteredTagLocations,
  //     null,
  //     4
  //   )}`
  // );
  const inverted = invertRanges(
    filteredTagLocations.concat(
      whitelist.reduce((acc, curr) => {
        const rangesFindings = rangesRegex(
          new RegExp(curr, "gi"),
          str
        );
        if (rangesFindings) {
          return acc.concat(rangesFindings);
        }
        return acc;
      }, [])
    ),
    str.length
  );
  // console.log(
  //   `028 tagAwareTitle(): ${`\u001b[${33}m${`inverted`}\u001b[${39}m`} = ${JSON.stringify(
  //     inverted,
  //     null,
  //     4
  //   )}`
  // );

  if (Array.isArray(inverted) && inverted.length) {
    // take inverted ranges, for example, [[3, 4], [10, 15]]
    // and add third element, replacement, which is same character
    // indexes only processed through "title":
    return applyRanges(
      str,
      inverted.map(([from, to]) => [
        from,
        to,
        title(str.slice(from, to)),
      ])
    );
  }
  // otherwise, just apply title() on the whole string:
  return title(str);
}

// middle:
assert.equal(
  tagAwareTitle(
    `This is a title with some <code>code</code> in it`
  ),
  `This Is a Title with Some <code>code</code> In It`
);

// leading:
assert.equal(
  tagAwareTitle(
    `<span class="xyz">abc<span> defgh ESLint`
  ),
  `<span class="xyz">abc<span> Defgh ESLint`
);