Skip to content

Commit

Permalink
✨ fix extra whitespace in lists
Browse files Browse the repository at this point in the history
  • Loading branch information
ctcpip committed Oct 19, 2023
1 parent afc1eac commit 14a6f5c
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 14 deletions.
8 changes: 4 additions & 4 deletions scripts/bad-linebreaks-test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ const afterMD = './scripts/test-samples/bad-linebreaks-sample-after.md';
const beforeMD = './scripts/test-samples/bad-linebreaks-sample-before.md';

// verify hash values to detect file tampering
const knownAfterHash = 'c2b5b7cc30cf5d4ce28274848eeba743';
const knownBeforeHash = 'c9cf57714ec19de2aeea68d45536b119';
const knownAfterHash = '5f29c1fb4abd747c2dd801e12c600ee3';
const knownBeforeHash = '406e900af5cd9af66abbe5b3ab6bcf3e';
const afterHash = await getHashSlingingSlasher(afterMD);
const beforeHash = await getHashSlingingSlasher(beforeMD);
assert.strictEqual(afterHash, knownAfterHash);
Expand All @@ -18,7 +18,7 @@ let fixed, totalMatches;

({ fixed, totalMatches } = findBadStuff(beforeMD, true));
assert.strictEqual(totalMatches.badLinebreaks, 12);
assert.strictEqual(totalMatches.extraWhitespace, 28);
assert.strictEqual(totalMatches.extraWhitespace, 114);
assert.strictEqual(fixed, fs.readFileSync(afterMD, 'utf8').toString());

({ fixed, totalMatches } = findBadStuff(afterMD, true));
Expand All @@ -28,7 +28,7 @@ assert.strictEqual(fixed, fs.readFileSync(afterMD, 'utf8').toString());

({ fixed, totalMatches } = findBadStuff(beforeMD));
assert.strictEqual(totalMatches.badLinebreaks, 12);
assert.strictEqual(totalMatches.extraWhitespace, 28);
assert.strictEqual(totalMatches.extraWhitespace, 114);

function getHashSlingingSlasher(file) { // 💀
return new Promise((res, rej) => {
Expand Down
49 changes: 39 additions & 10 deletions scripts/bad-linebreaks.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,16 @@ import { glob } from 'glob';
// import attributes when?
const mdlintConfig = JSON.parse(fs.readFileSync('.markdownlint-cli2.jsonc', 'utf8').toString());

// not exhaustive, just the types we care about
const tokenTypeEnum = Object.freeze({
LIST: 'list',
PARAGRAPH: 'paragraph',
SPACE: 'space',
});

const reBadLinebreaks = /(?<=[\w\d ])\n(?=[\w\d ])/g;
const reExtraWhitespace = /^ +| (?= )| +$/gm;
const reExtraWhitespaceParagraph = /^ +| (?= )| +$/gm;
const reExtraWhitespaceList = /(?<=^ {0,}[-*+] |\d+\. ) +|(?<=\w+ ) +| +$/gm;

export function findBadStuff(file, fix = false) {

Expand All @@ -24,9 +32,16 @@ export function findBadStuff(file, fix = false) {
const t = tokens[i];
let tokenContent = t.raw;

if (t.type === 'paragraph') {
tokenContent = findBadLinebreaks(tokenContent, totalMatches, fix, file);
tokenContent = findExtraWhitespace(tokenContent, totalMatches, fix, file);
switch (t.type) {
case tokenTypeEnum.PARAGRAPH:
tokenContent = findBadLinebreaks(tokenContent, totalMatches, fix, file, t.type);
// falls through
case tokenTypeEnum.LIST:
case tokenTypeEnum.SPACE:
tokenContent = findExtraWhitespace(tokenContent, totalMatches, fix, file, t.type);
break;
default:
// do nothing
}

// we don't need to build this array if `fix` is `false`, but this keeps complexity down
Expand All @@ -41,7 +56,7 @@ export function findBadStuff(file, fix = false) {

}

function findBadLinebreaks(tokenContent, totalMatches, fix, file) {
function findBadLinebreaks(tokenContent, totalMatches, fix, file, tokenType) {

const matches = Array.from(tokenContent.matchAll(reBadLinebreaks));
totalMatches.badLinebreaks += matches.length;
Expand All @@ -61,27 +76,41 @@ function findBadLinebreaks(tokenContent, totalMatches, fix, file) {
}

} else if (matches.length > 0) {
console.error(`${file}\nfound paragraph with ${matches.length} erroneous linebreak(s):\n${tokenContent}\n`);
console.error(`${file}\nfound ${tokenType} with ${matches.length} erroneous linebreak(s):\n${tokenContent}\n`);
}

return tokenContent;

}

function findExtraWhitespace(tokenContent, totalMatches, fix, file) {
function findExtraWhitespace(tokenContent, totalMatches, fix, file, tokenType) {

let re;

switch (tokenType) {
case tokenTypeEnum.PARAGRAPH:
case tokenTypeEnum.SPACE:
re = reExtraWhitespaceParagraph;
break;
case tokenTypeEnum.LIST:
re = reExtraWhitespaceList;
break;
default:
throw new TypeError(`unsupported token type: ${tokenType}`);
}

const matches = Array.from(tokenContent.matchAll(reExtraWhitespace));
const matches = Array.from(tokenContent.matchAll(re));
const extraWhitespaceCharacters = matches.join('').length;
totalMatches.extraWhitespace += extraWhitespaceCharacters;

if (fix) {

if (matches.length > 0) {
return tokenContent.replace(reExtraWhitespace, '');
return tokenContent.replace(re, '');
}

} else if (matches.length > 0) {
console.error(`${file}\nfound paragraph with ${extraWhitespaceCharacters} extra whitespace character(s):\n${tokenContent}\n`);
console.error(`${file}\nfound ${tokenType} with ${extraWhitespaceCharacters} extra whitespace character(s):\n${tokenContent}\n`);
}

return tokenContent;
Expand Down
15 changes: 15 additions & 0 deletions scripts/test-samples/bad-linebreaks-sample-after.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,21 @@ let biscuits = "delicious";
let biscuits = "delicious";
```

100. First list item
- First nested list item
- Second nested list item

1. biscuits are
1. extremely delicious
1. indeed

- lists
* are
+ fun
- and
* one more
+ time

## story time!

True! nervous, very, very dreadfully nervous I had been and am; but why will you say that I am mad? The disease had sharpened my senses, not destroyed, not dulled them. Above all was the sense of hearing acute. I heard all things in the heaven and in the earth. I heard many things in hell. How then am I mad? Hearken! and observe how healthily, how calmly I can tell you the whole story.
Expand Down
15 changes: 15 additions & 0 deletions scripts/test-samples/bad-linebreaks-sample-before.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,21 @@ let biscuits = "delicious";
let biscuits = "delicious";
```

100. First list item
- First nested list item
- Second nested list item

1. biscuits are
1. extremely delicious
1. indeed

- lists
* are
+ fun
- and
* one more
+ time

## story time!

True! nervous, very, very dreadfully nervous I had been and am; but why will you say that I am mad? The disease had sharpened my senses, not destroyed,
Expand Down

0 comments on commit 14a6f5c

Please sign in to comment.