string-fix-broken-named-entities examples

Table of Contents

Quick Take

import { strict as assert } from "assert";
import { fixEnt } from "string-fix-broken-named-entities";
import { rApply } from "ranges-apply";

const source = "&nsp;x&nsp;y&nsp;";

// returns Ranges notation, see codsen.com/ranges/
assert.deepEqual(fixEnt(source), [
  [0, 5, " "],
  [6, 11, " "],
  [12, 17, " "],
]);

// render result from ranges using "ranges-apply":
assert.equal(
  rApply(source, fixEnt(source)),
  " x y "
);

Sift raw ampersands in a string from broken character references

// encode those raw ampersands and fix broken character references

import { strict as assert } from "assert";
import { fixEnt } from "string-fix-broken-named-entities";
import { rApply } from "ranges-apply";

const source = "&&nsp;&&nsp;&";

const finalRanges = [];
const indexesOfRawAmpersands = [];

// fixEnt() returns Ranges (see codsen.com/ranges/)
const resultRanges = fixEnt(source, {
  textAmpersandCatcherCb: (idx) =>
    indexesOfRawAmpersands.push(idx),
});

// check the ranges - all broken NBSP's were fixed:
assert.deepEqual(resultRanges, [
  [1, 6, " "],
  [7, 12, " "],
]);

// don't apply the ranges yet, dump them into the "finalRanges" array
// it's because applying them onto a string,
// rApply(source, resultRanges);
// will mess up the index positions, we'll need to calculate again.
// The whole point of Ranges is they're COMPOSABLE.

resultRanges.forEach((range) => {
  finalRanges.push(range);
});

// check the positions of reported raw ampersands:
assert.deepEqual(indexesOfRawAmpersands, [0, 6, 12]);

// replace each character at these positions: 0, 6 and 12
// with string "&" - in terms of Ranges, it's a matter
// of building a Ranges array:
const replacementRanges = indexesOfRawAmpersands.map(
  (idx) => [idx, idx + 1, "&"]
);
// this is Ranges notation, array of arrays: [from index, to index, what-to-replace]
assert.deepEqual(replacementRanges, [
  [0, 1, "&"], // we're saying, replace indexes from 0 to 1 with &
  [6, 7, "&"],
  [12, 13, "&"],
]);

// push them into resultRanges as well:
replacementRanges.forEach((range) => {
  resultRanges.push(range);
});

// check what's been gathered so far:
assert.deepEqual(resultRanges, [
  [1, 6, " "],
  [7, 12, " "],
  [0, 1, "&"],
  [6, 7, "&"],
  [12, 13, "&"],
]);

// apply Ranges onto a string - all amendments at once!
const finalResultStr = rApply(source, resultRanges);

// check result
assert.equal(
  finalResultStr,
  "& & &"
);

// Voilà! We fixed broken entities and encoded raw ampersands