Iterating a vector in C++

Such a simple topic - iterating over a vector, is it even worth discussing?

Interestingly enough, there is a difference in how exactly you iterate - be it using iterators, for(:) sugar or plain old for(i=0; i<vec.size(); ++i).

Let us see what output does a compiler produce in each of these cases.

sample1:

std::vector<int> data{ 5, 3, 2, 1, 4 };

for (auto i = 0; i < data.size(); ++i) {
  moo(data[i]);
}
        mov     DWORD PTR [rbp-20], 0
        jmp     .L3
.L4:
        mov     eax, DWORD PTR [rbp-20]
        movsx   rdx, eax
        lea     rax, [rbp-64]
        mov     rsi, rdx
        mov     rdi, rax
        call    std::vector<int, std::allocator<int> >::operator[](unsigned long)
        mov     eax, DWORD PTR [rax]
        mov     edi, eax
        call    moo(int)
        add     DWORD PTR [rbp-20], 1
.L3:
        mov     eax, DWORD PTR [rbp-20]
        movsx   rbx, eax
        lea     rax, [rbp-64]
        mov     rdi, rax
        call    std::vector<int, std::allocator<int> >::size() const
        cmp     rbx, rax
        setb    al
        test    al, al
        jne     .L4

but

sample2:

for (auto i = data.size() - 1; i >= 0; --i) {
  moo(data[i]);
}
        lea     rax, [rbp-64]
        mov     rdi, rax
        call    std::vector<int, std::allocator<int> >::size() const
        sub     rax, 1
        mov     QWORD PTR [rbp-24], rax
.L3:
        mov     rdx, QWORD PTR [rbp-24]
        lea     rax, [rbp-64]
        mov     rsi, rdx
        mov     rdi, rax
        call    std::vector<int, std::allocator<int> >::operator[](unsigned long)
        mov     eax, DWORD PTR [rax]
        mov     edi, eax
        call    moo(int)
        sub     QWORD PTR [rbp-24], 1
        jmp     .L3

also

sample3:

for (auto i : data) {
  moo(i)
}
        lea     rax, [rbp-80]
        mov     QWORD PTR [rbp-24], rax
        mov     rax, QWORD PTR [rbp-24]
        mov     rdi, rax
        call    std::vector<int, std::allocator<int> >::begin()
        mov     QWORD PTR [rbp-88], rax
        mov     rax, QWORD PTR [rbp-24]
        mov     rdi, rax
        call    std::vector<int, std::allocator<int> >::end()
        mov     QWORD PTR [rbp-96], rax
        jmp     .L3
.L4:
        lea     rax, [rbp-88]
        mov     rdi, rax
        call    __gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > >::operator*() const
        mov     eax, DWORD PTR [rax]
        mov     DWORD PTR [rbp-28], eax
        mov     eax, DWORD PTR [rbp-28]
        mov     edi, eax
        call    moo(int)
        lea     rax, [rbp-88]
        mov     rdi, rax
        call    __gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > >::operator++()
.L3:
        lea     rdx, [rbp-96]
        lea     rax, [rbp-88]
        mov     rsi, rdx
        mov     rdi, rax
        call    bool __gnu_cxx::operator!=<int*, std::vector<int, std::allocator<int> > >(__gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > > const&, __gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > > const&)
        test    al, al
        jne     .L4

but with -O1

sample1:

        movabs  rax, 12884901893
        movabs  rdx, 4294967298
        mov     QWORD PTR [r12], rax
        mov     QWORD PTR [r12+8], rdx
        mov     DWORD PTR [r12+16], 4
        mov     QWORD PTR [rsp+8], rbp
        mov     rbx, r12
        jmp     .L10
.L20:
        add     rbx, 4
        cmp     rbp, rbx
        je      .L19
.L10:
        mov     edi, DWORD PTR [rbx]
        call    moo(int)
        jmp     .L20

sample2:

        movabs  rax, 12884901893
        movabs  rdx, 4294967298
        mov     QWORD PTR [rbp+0], rax
        mov     QWORD PTR [rbp+8], rdx
        mov     DWORD PTR [rbp+16], 4
        lea     rbx, [rbp+16]
        jmp     .L4
.L8:
        sub     rbx, 4
.L4:
        mov     edi, DWORD PTR [rbx]
        call    moo(int)
        jmp     .L8

sample3:

        mov     r12, rax
        lea     rbp, [rax+20]
        mov     QWORD PTR [rsp+16], rbp
        movabs  rax, 12884901893
        movabs  rdx, 4294967298
        mov     QWORD PTR [r12], rax
        mov     QWORD PTR [r12+8], rdx
        mov     DWORD PTR [r12+16], 4
        mov     QWORD PTR [rsp+8], rbp
        mov     rbx, r12
        jmp     .L10
.L20:
        add     rbx, 4
        cmp     rbx, rbp
        je      .L19
.L10:
        mov     edi, DWORD PTR [rbx]
        call    moo(int)
        jmp     .L20

Interesting how iterating forwards adds an extra boundary check and a jump (cmd rbx, rbp and je .L19 in samples #1 and #3), whereas iterating backwards does not.

But this find actually must come with one pretty big caveat: the cache lines. The number of assembly instructions is a pretty poor measure of performance - after all, CPU instructions are ridiculously fast, compared to any form of IO - specifically memory access.

Iterating over a vector backwards affects the memory caching in a pretty poor manner (benchmarking TBD).

Gantt chart. Part 4

Contents

Seems like every two years or so I hop on my Gantt chart implementation and rework it completely.

Last few attempts (rev. 1, rev. 2, rev. 3) were alright, but I was never quite satisfied with the implementation - be it SVG, which has a toll on a browser and has quite limited customization functionality or Canvas API, with same limited customization but being fast.

With the recent introduction of grid layouts in CSS, now supported in all browsers, now seems like a perfect time to revisit the old implementations once again:

Gantt chart, revision 4

CodeSandbox / live demo

This revision now has a proper horizontal scrolling on the panel with bars - meaning the labels on the left panel stay in place whilst the left panel is scrollable. Moreover, the chart is now relies on pure HTML and CSS (being rendered with React though), making it is possible to use rich markup inside the bars and labels.

Implementation steps

The data for the tests is going to look like this:

export const data = [
  {
    id: 1,
    name: "epic 1"
  },
  {
    id: 2,
    name: "epic 2"
  },
  {
    id: 3,
    name: "epic 3"
  },
  {
    id: 4,
    name: "story 1",
    parent: 1
  },
  {
    id: 5,
    name: "story 2",
    parent: 1
  },
  {
    id: 6,
    name: "story 3",
    parent: 1
  },
  {
    id: 7,
    name: "story 4",
    parent: 2
  },
  {
    id: 8,
    name: "story 5",
    parent: 2
  },
  {
    id: 9,
    name: "lorem ipsum dolor atata",
    parent: 5
  },
  {
    id: 10,
    name: "task 2",
    parent: 5
  }
];

The main component, <Gantt>, initially was implementated as follows:

import React, { useMemo } from "react";

import style from "./gantt.module.css";

const LeftPaneRow = ({ id, name }) => {
  return <div className={style.row}>{name}</div>;
};

const LeftPane = ({ items }) => {
  return (
    <div className={style.left_pane}>
      <div className={style.left_pane_header}>/</div>

      <div className={style.left_pane_rows}>
        {items.map((item) => (
          <LeftPaneRow key={item.id} {...item} />
        ))}
      </div>
    </div>
  );
};

const RightPaneRow = ({ id, name }) => {
  return (
    <div className={style.row}>
      <div className={style.entry} style={{ left: 0 }}>
        {id}
      </div>
    </div>
  );
};

const RightPane = ({ items }) => {
  return (
    <div className={style.right_pane}>
      <div className={style.right_pane_header}>...scale...</div>
      <div className={style.right_pane_rows}>
        {items.map((item) => (
          <RightPaneRow key={item.id} {...item} />
        ))}
      </div>
    </div>
  );
};

export const flattenTree = (items) => {
  const queue = [];

  items.filter(({ parent }) => !parent).forEach((item) => queue.push(item));

  const result = [];
  const visited = new Set();

  while (queue.length > 0) {
    const item = queue.shift();

    if (visited.has(item.id)) {
      continue;
    }

    result.push(item);
    visited.add(item.id);

    items
      .filter((child) => child.parent === item.id)
      .forEach((child) => queue.unshift(child));
  }

  return result;
};

export const Gantt = ({ items }) => {
  const itemList = useMemo(() => flattenTree(items), [items]);

  return (
    <div className={style.gantt}>
      <LeftPane items={itemList} />
      <RightPane items={itemList} />
    </div>
  );
};

The core of the proper representation of this diagram is the CSS:

.gantt {
  display: grid;
  grid-template: 1fr / auto 1fr;
  grid-template-areas: "left right";
  width: 100%;
}

.gantt .left_pane {
  display: grid;
  grid-area: left;
  border-right: 1px solid #bbb;
  grid-template: auto 1fr / 1fr;
  grid-template-areas: "corner" "rows";
}

.gantt .left_pane .left_pane_rows {
  display: grid;
  grid-area: rows;
}

.gantt .left_pane .left_pane_header {
  display: grid;
  grid-area: corner;
}

.gantt .right_pane {
  display: grid;
  grid-template: auto 1fr / 1fr;
  grid-template-areas: "scale" "rows";
  grid-area: right;
  overflow: auto;
}

.gantt .right_pane .right_pane_rows {
  width: 10000px; /*temp*/
  display: grid;
  grid-area: rows;
}

.gantt .right_pane .right_pane_header {
  display: flex;
  grid-area: scale;
}

.gantt .row {
  height: 40px;
  align-items: center;
  display: flex;
}

.gantt .right_pane .row {
  position: relative;
}

.gantt .right_pane .row .entry {
  position: absolute;
  background: #eeeeee;
  padding: 0.1rem 0.5rem;
  border-radius: 0.4rem;
}
Split into two panels, right is scrollable

Good, we now have two panels with items aligned in rows and the right panel being scrollable if it gets really long. Next thing, position: absolute is absolutely disgusting - we use grid layout already! Instead, split each row into the same number of columns using grid and position the elements in there:

const RightPaneRow = ({ id, name, columns, start, end }) => {
  const gridTemplate = `auto / repeat(${columns}, 1fr)`;
  const gridArea = `1 / ${start} / 1 / ${end}`;

  return (
    <div
      className={style.row}
      style={{
        gridTemplate,
      }}
    >
      <div
        className={style.entry}
        style={{
          gridArea,
        }}
      >
        {id}
      </div>
    </div>
  );
};

and clean up the CSS a bit (like removing the position: absolute and reducing the width from 10000px down to 1000px):

.gantt .right_pane .right_pane_rows {
  width: 1000px; /*temp*/
  display: grid;
  grid-area: rows;
}

.gantt .row {
  height: 40px;
  align-items: center;
  display: grid;
}

.gantt .right_pane .row {
  position: relative;
}

.gantt .right_pane .row .entry {
  background: #eeeeee;
  padding: 0.1rem 0.5rem;
  border-radius: 0.4rem;
}

Now, let’s position the elements in each row using the column index:

const RightPanelRowEntry = ({ id, start, end, children }) => {
  const gridArea = `1 / ${start} / 1 / ${end}`;

  return (
    <div
      className={style.entry}
      style={{
        gridArea,
      }}
    >
      {children}
    </div>
  );
};

const RightPaneRow = ({ id, name, columns, start, end }) => {
  const gridTemplate = `auto / repeat(${columns}, 1fr)`;
  const gridArea = `1 / ${start} / 1 / ${end}`;

  return (
    <div
      className={style.row}
      style={{
        gridTemplate,
      }}
    >
      <div
        className={style.entry}
        style={{
          gridArea,
        }}
      >
        {id}
      </div>
    </div>
  );
};

const RightPaneHeaderRow = ({ columns, children }) => {
  const gridTemplate = `auto / repeat(${columns}, 1fr)`;

  return (
    <div
      className={style.right_pane_header_row}
      style={{
        gridTemplate,
      }}
    >
      {children}
    </div>
  );
};

const RightPaneHeader = ({ children }) => {
  return <div className={style.right_pane_header}>{children}</div>;
};

const RightPane = ({ items, columns }) => {
  const columnHeaders = [...Array(columns)].map((_, idx) => (
    <RightPaneHeader>{idx + 1}</RightPaneHeader>
  ));

  const rows = items.map((item) => (
    <RightPaneRow key={item.id} columns={columns}>
      <RightPanelRowEntry {...item}>{item.id}</RightPanelRowEntry>
    </RightPaneRow>
  ));

  return (
    <div className={style.right_pane}>
      <RightPaneHeaderRow columns={columns}>{columnHeaders}</RightPaneHeaderRow>
      <div className={style.right_pane_rows}>{rows}</div>
    </div>
  );
};

And add corresponding new CSS styles:

.gantt .right_pane .right_pane_header_row {
  display: grid;
  grid-area: scale;
}

.gantt .right_pane .right_pane_header_row .right_pane_header {
  display: grid;
  align-items: center;
  text-align: center;
}

This requires start and end defined for each entry:

export const data = [
  {
    id: 1,
    name: "epic 1",
    start: 1,
    end: 12,
  },
  {
    id: 2,
    name: "epic 2",
    start: 2,
    end: 4,
  },
  {
    id: 3,
    name: "epic 3",
    start: 9,
    end: 11,
  },
  {
    id: 4,
    name: "story 1",
    parent: 1,
    start: 6,
    end: 7,
  },
  // ...
};
Aligning items in a grid

And, to make it not repeat a dozen of inline CSS styles, we can utilize CSS variables:

const RightPaneRow = ({ id, columns, children }) => {
  return (
    <div className={style.row}>
      {children}
    </div>
  );
};

const RightPanelRowEntry = ({ id, start, end, children }) => {
  return (
    <div
      className={style.entry}
      style={{
        "--col-start": start,
        "--col-end": end,
      }}
    >
      {children}
    </div>
  );
};

const RightPane = ({ items, columns }) => {
  const columnHeaders = [...Array(columns)].map((_, idx) => (
    <RightPaneHeader>{idx + 1}</RightPaneHeader>
  ));

  const rows = items.map((item) => (
    <RightPaneRow key={item.id} columns={columns}>
      <RightPanelRowEntry {...item}>{item.id}</RightPanelRowEntry>
    </RightPaneRow>
  ));

  return (
    <div className={style.right_pane} style={{ "--columns": columns }}>
      <RightPaneHeaderRow>{columnHeaders}</RightPaneHeaderRow>
      <div className={style.right_pane_rows}>{rows}</div>
    </div>
  );
};

We can also re-use the same row for header:

const RightPaneHeaderRow = ({ children }) => {
  return <div className={style.right_pane_header_row}>{children}</div>;
};

And corresponding CSS:

.gantt .right_pane .right_pane_header_row {
  display: grid;
  grid-area: scale;

  grid-template: auto / repeat(var(--columns, 1), 1fr);
}

.gantt .right_pane .row {
  position: relative;

  grid-template: auto / repeat(var(--columns, 1), 1fr);
}

.gantt .right_pane .row .entry {
  background: #eeeeee;
  padding: 0.1rem 0.5rem;
  border-radius: 0.5rem;
  align-items: center;
  text-align: center;

  grid-area: 1 / var(--col-start, 1) / 1 / var(--col-end, 1);
}

I like to also change the fonts, since the default sans-serif just looks terrible:

@import url("https://fonts.googleapis.com/css2?family=Assistant:wght@200..800&display=swap");

:root {
  font-family: "Assistant", sans-serif;
  font-optical-sizing: auto;
  font-weight: 300;
  font-style: normal;
  font-variation-settings: "wdth" 100;
}
Changing the font

And maybe add some grid lines for the rows:

.gantt .row:first-child {
  border-top: 1px solid var(--border-color, #eee);
}

.gantt .row {
  padding: 0 0.75rem;
  border-bottom: 1px solid var(--border-color, #eee);
}
Adding grid lines

Now let’s add some padding to separate parent and child items of a chart:

const LeftPaneRow = ({ level, id, name }) => {
  const nestingPadding = `${level}rem`;

  return (
    <div className={style.row} style={{ "--label-padding": nestingPadding }}>
      {name}
    </div>
  );
};
.gantt .left_pane .row {
  padding-left: var(--label-padding, 0);
}

and fill out the level property when flattening the item tree:

export const flattenTree = (items) => {
  const queue = [];

  items
    .filter(({ parent }) => !parent)
    .forEach((item) => queue.push({ level: 0, item }));

  const result = [];
  const visited = new Set();

  while (queue.length > 0) {
    const { level, item } = queue.shift();

    if (visited.has(item.id)) {
      continue;
    }

    result.push({ ...item, level });
    visited.add(item.id);

    items
      .filter((child) => child.parent === item.id)
      .forEach((child) => queue.unshift({ item: child, level: level + 1 }));
  }

  return result;
};
Shrinking right panel

And automate the number of columns calculation:

export const Gantt = ({ items }) => {
  const itemList = flattenTree(items);

  const startsAndEnds = items.flatMap(({ start, end }) => [start, end]);
  const columns = Math.max(...startsAndEnds) - Math.min(...startsAndEnds);

  return (
    <div className={style.gantt}>
      <LeftPane items={itemList} />
      <RightPane items={itemList} columns={columns} />
    </div>
  );
};

In order to make chart panel scrollable, one can set a width CSS property for the .right_pane_rows and .right_pane_header_row:

.gantt .right_pane .right_pane_rows {
  width: 2000px;
}

.gantt .right_pane .right_pane_header_row {
  width: 2000px;
}

The last bit for a this prototype would be to have a scale for the columns.

Assume a chart item has an abstract start and end fields - these could be dates or some domain-specific numbers (like a week in a quarter or a sprint, etc.). Those will then need to be mapped onto column index. Then the chart width (in columns) would be the difference between the smallest start value and the biggest end value:

export const Gantt = ({ items, scale }) => {
  const itemList = flattenTree(items).map((item) => ({
    ...item,
    ...scale(item), // assuming `scale` function returns an object { start: number; end: number }
  }));

  const minStartItem = minBy(itemList, (item) => item.start);
  const maxEndItem = maxBy(itemList, (item) => item.end);

  const columns = maxEndItem.end - minStartItem.start;

  return (
    <div className={style.gantt}>
      <LeftPane items={itemList} />
      <RightPane items={itemList} columns={columns} />
    </div>
  );
};

The minBy and maxBy helper functions could be either taken from lodash or manually defined like this:

const minBy = (items, selector) => {
  if (items.length === 0) {
    return undefined;
  }

  let minIndex = 0;

  items.forEach((item, index) => {
    if (selector(item) < selector(items[minIndex])) {
      minIndex = index;
    }
  });

  return items[minIndex];
}

For better navigation around this code we can add some types:

interface GanttChartItem {
  id: string;
  name: string;
}

interface GanttChartProps {
  items: GanttChartItem[];
  scale: (item: GanttChartItem) => { start: number; end: number };
}

function minBy<T>(items: T[], selector: (item: T) => number): T | undefined {
  // ...
}

export const Gantt = ({ items, scale }: GanttChartProps) => {
  // ...
};

export default function App() {
  const scale = ({ start, end }) => {
    return { start: start * 2, end: end * 2 };
  };

  return <Gantt items={data} scale={scale} />;
}

We can extend this even further by adding an API to provide labels for columns:

interface GanttChartProps {
  // ...
  scaleLabel: (column: number) => React.Element;
}

export const Gantt = ({ items, scale, scaleLabel }: GanttChartProps) => {
  // ...

  return (
    <div className={style.gantt}>
      <LeftPane items={itemList} />
      <RightPane items={itemList} columns={columns} scaleLabel={scaleLabel} />
    </div>
  );
};


const RightPane = ({ items, columns, scaleLabel }) => {
  const columnHeaders = [...Array(columns)].map((_, idx) => (
    <RightPaneHeader>{scaleLabel(idx)}</RightPaneHeader>
  ));

  // ...
};

export default function App() {
  const scale = ({ start, end }) => ({ start, end });
  };

  const scaleLabel = (col) => `${col}`;

  return <Gantt items={data} scale={scale} scaleLabel={scaleLabel} />;
}

This new API can then be utilized to show month names, for instance:

export default function App() {
  const scale = ({ start, end }) => {
    return { start, end };
  };

  const months = [
    "Jan",
    "Feb",
    "Mar",
    "Apr",
    "May",
    "Jun",
    "Jul",
    "Aug",
    "Sep",
    "Oct",
    "Nov",
    "Dec",
  ];

  const scaleLabel = (col) => months[col % 12];

  return <Gantt items={data} scale={scale} scaleLabel={scaleLabel} />;
}
Adding scale with labels

Moreover, it is now possible to inline HTML and CSS in the name of each chart item:

export const LeftPaneRow = ({ level, name }) => {
  const nestingPadding = `${level}rem`;

  return (
    <div className={style.row} style={{ "--label-padding": nestingPadding }}>
      <span dangerouslySetInnerHTML={{__html: name}}></span>
    </div>
  );
};

And then in data.json (note that FontAwesome requires its CSS on a page in order to work):

[
  {
    id: 7,
    name: '<i style="font-family: \'FontAwesome\';" class="fa fa-car"></i>&nbsp;story with FontAwesome',
    parent: 2,
    start: 4,
    end: 6,
  },
  {
    id: 9,
    name: 'inline <em><b style="color: #5ebebe">CSS</b> color</em> <u style="border: 1px dashed #bebefe; padding: 2px; border-radius: 2px">works</u>',
    parent: 5,
    start: 5,
    end: 6,
  },
]

The API can be further improved by providing the render function for the bars’ labels:

export const RightPane = ({ items, columns, scaleLabel, barLabel }) => {
  const rows = items.map((item) => (
    <RightPaneRow key={item.id} columns={columns}>
      <RightPaneRowEntry {...item}>{barLabel ? barLabel(item) : <>{item.id}</>}</RightPaneRowEntry>
    </RightPaneRow>
  ));

  // ...
};

export interface GanttChartProps {
  items: GanttChartItem[];
  scale: (item: GanttChartItem) => { start: number; end: number };
  scaleLabel: (column: number) => React.Element;
  barLabel: (item: GanttChartItem) => React.Element;
}

export const Chart = ({
  items,
  barLabel,
  scale,
  scaleLabel,
}: GanttChartProps) => {
  // ...
  return (
    <div className={style.gantt}>
      <LeftPane items={itemList} />
      <RightPane
        items={itemList}
        columns={columns}
        scaleLabel={scaleLabel}
        barLabel={barLabel}
      />
    </div>
  );
};

and then in App component:

import pluralize from "pluralize";

export default function App() {
  const barLabel = ({ start, end }) => (
    <>
      {end - start} {pluralize("month", end - start)}
    </>
  );

  // ...

  return (
    <Chart
      items={data}
      scale={scale}
      scaleLabel={scaleLabel}
      barLabel={barLabel}
    />
  );
};
More customized labels

So why Redux is bad?

Redux is generally considered a bad choice for state management on front-end. But check out an average application’ React application (or a few fragments of it):

import { Provider, useAtom } from 'jotai';
import { useQuery } from 'react-query';

const useInfo = () => {
  const { data, error, isLoading } = useQuery({
    queryKey: [ 'info' ],
    queryFn: () => fetch('/info').then(r => r.json()),
    staleTime: Infinity,
  });

  return {
    info: data,
    isLoading,
    error,
  };
};

const HomeWithoutProvider = () => {
  const { raiseToast } = useToast();

  const [ initialRender, setInitialRender ] = useState(false);

  const [ pageType, setPageType ] = useAtom(pageTypeAtom);

  const { info } = useInfo();

  useEffect(() => {
    if (info.isNewVersionAvailable) {
      raiseToast({
        // ...
      });
    }
  }, [info]);

  return (
    <div>...</div>
  );
};

const Home = () => (
  <Provider>
    <HomeWithoutProvider />
  </Provider>
);

const Routes = () => (
  <BrowserRouter>
    <Suspense fallback={<Loader />}>
      <Route element={<Home />} path="/" />
    </Suspense>
  </BrowserRouter>
);

const root = React.createRoot(document.getElementById('root'));

root.render(
  <StrictMode>
    <StyleThemeProvider>
      <ToastProvider>
        <GlobalErrorHandler>
          <ReactQueryProvider>
            <Routes />
          </ReactQueryProvider>
        </GlobalErrorHandler>
      </ToastProvider>
    </StyleThemeProvider>
  </StrictMode>
);

Usually most of the components (Home, Routes) and hooks (useInfo) are in separate files, but for the sake of simplicity I combined them all into one code block.

What I find suboptimal with this code is that it has at least three obvious different state management systems:

  • jotai for shared atoms (pieces of global state)
  • React.useState for internal component state
  • various React.Contexts (StyleThemeProvider, ToastProvider, GlobalErrorHandler, etc.)

On top of those, there are less obvious state management systems:

  • react-router uses internal router state, which could be treated as global application state
  • react-query uses its internal cache for each query
  • react-hook-form uses the form state of a component’ ancestor (which could be declared on any level above the current component)

In the pursuit of encapsulation and reducing the boilerplate, front-end developers came up with all of these solutions aiming to solve the problem of managing application state.

So what exactly is this problem? And what are the issues all of the aforementioned solutions try to address?

As I see it, there are two competing camps:

  • containing the logic in small reusable chunks (hooks, components)
  • sharing chunks of state between different parts of the application

There are some side-tracks like dealing with asynchronous actions (like fetching the data from server), changing the state of external components (like showing a toast message), reducing the unnecessary re-renders.

Back in the day, Redux seemingly addressed these areas to a degree. Redux implements Flux architecture, which was compared to MVC (Model-View-Controller) architecture back in the day:

Data flow in MVC architecture Data flow in Flux architecture

It became especially popular after Angular.JS’ MVVM (Model-View-ViewModel) architecture implementation was considered slow with its dirty checks and constant re-rendering.

It could be said that Redux is being shipped in recent versions of React itself - with the use of useReducer() hook. One would rarely use Redux on its own, often sticking to a somewhat opinionated stack of reselect (for derived state), react-redux (to connect() components to the store) and redux-thunk or redux-sagas (for asynchronous dispatch() calls).

The aforementioned component could be implemented with the “conventional” (old) Redux approach like so:

import { createStore, combineReducers, applyMiddleware } from 'redux';
import { connect } from 'react-redux';
import { createSelector } from 'reselect';
import { thunk } from 'redux-thunk';

const infoReducer = (state = { isNewVersionAvailable: false }, action) => {
  switch (action.type) {
    case 'INFO_LOADED':
      return { ...state, ...action.payload };

    default:
      return state;
  }
};

const pageDataReducer = (state = { pageType: undefined }, action) => {
  switch (action.type) {
    case 'SET_PAGE_TYPE':
      return { ...state, pageType: action.pageType };

    default:
      return state;
  }
};

const toastReducer = (state = { isOpen: false, content: undefined }, action) => {
  switch (action.type) {
    case 'SHOW_TOAST':
      return { ...state, isOpen: true, content: action.payload };

    default:
      return state;
  }
};

const rootReducer = combineReducers({
  info: infoReducer,
  pageData: pageDataReducer,
  toast: toastReducer,
});

const store = createStore(rootReducer, applyMiddleware(thunk));

// Meet thunks.
// A thunk in this context is a function that can be dispatched to perform async
// activity and can dispatch actions and read state.
// This is an action creator that returns a thunk:
const loadInfoAction = () =>
  (dispatch) =>
    fetch('/info')
      .then(r => r.json)
      .then(payload => dispatch({ type: 'INFO_LOADED', payload }));

const raiseToastAction = (content) =>
  (dispatch) =>
    dispatch({ type: 'SHOW_TOAST', payload: content });

const setPageTypeAction = (pageType) =>
  (dispatch) =>
    dispatch({ type: 'SET_PAGE_TYPE', pageType });

const Home = ({ info, pageType, loadInfo, raiseToast, setPageType }) => {
  useEffect(() => {
    loadInfo();
  }, []);

  useEffect(() => {
    if (info.isNewVersionAvailable) {
      raiseToast({
        // ...
      });
    }
  }, [info]);

  return (
    <div>...</div>
  );
};

const mapStateToProps = ({ info, pageData: { pageType } }) => ({ info, pageType });

const mapDispatchToProps = (dispatch) => ({
  raiseToast: (content) => dispatch(raiseToastAction(content)),

  setPageType: (pageType) => dispatch(setPageTypeAction(content)),

  loadInfo: () => dispatch(loadInfoAction()),
});

const HomeContainer = connect(mapStateToProps, mapDispatchToProps)(Home);

const Routes = () => (
  <BrowserRouter>
    <Suspense fallback={<Loader />}>
      <Route element={<HomeContainer />} path="/" />
    </Suspense>
  </BrowserRouter>
);

const root = React.createRoot(document.getElementById('root'));

root.render(
  <Routes />
);

In my opinion, Redux is not suitable for complex projects for a few reasons:

  • it combines all states (both local and global) in one big messy furball; managing it is quite a hurdle
    • as the project complexity grows, one can not just change a piece of state or selectors without affecting the entirety of the project (and teams)
    • combining reducers into one supermassive function makes any state update unreasonably long and complex process (remember: each reducer returns a new state instance; now imagine having even a hundred of reducers, each of which returns a new state)
  • it is easy for a component to be re-rendered on any change to the state; a lot of effort goes into making sure selectors are well memoized and not re-calculated
  • asynchronous actions are a big unsolved mystery (are you going to use thunks, flux, sagas or something else?)

On a flip side, the idea itself could actually bring a lot of positives if cooked properly:

  • there is only one possible flow of data: via dispatch() call, through the reducers and back to the components connected to the store via component props
    • this is supposed to make following the data (e.g. debugging the application) easy
  • components are pretty much stateless at this point, encapsulated and not having side effects leaking everywhere
  • logic is nicely separated from the representation and is encapsulated in the reducers (and maybe, to a small extent, in selectors)

Elm utilizes the language features and its own runtime combined with Redux-like architecture to improve some aspects of the more traditional pure JS way of things, where there are only opinionated libraries and no one way of doing things.

Consider Elm architecture and how it compares to Redux:

  • all the states are still combined into one big cauldron of chaos
  • by default, any component is just a function returning an array; the entire application will be rerendered on each state change, which is still suboptimal, since literally all components are connected to the store
  • asynchronous actions are handled separately by the runtime in a similar way to synchronous actions; each action returns a new state and a command (triggering the asynchronous processing)
    • since commands are handled by the runtime and there’s a handful of commands, all of them will (eventually) circle back to dispatching messages just like components do, following the same one-way data flow
  • reducers are a lot faster, since they are essentially a big switch..case statement (which is cheap)

The above component could be re-implemented in Elm as follows:

import Browser
import Html exposing (..)
import Html.Attributes exposing (style)
import Html.Events exposing (..)
import Http
import Json.Decode exposing (Decoder, map, field, bool)

main =
  Browser.element
    { init = init
    , update = update
    , subscriptions = subscriptions
    , view = view
    }

type alias Model =
  { info : Maybe (Result Http.Error Info)
  , toast : Maybe Toast
  , pageType : PageType
  }

type alias Info =
  { quote : String
  , source : String
  , author : String
  , year : Int
  }

type alias Toast =
  { content : String }

type PageType = Page1 | Page2

init : () -> (Model, Cmd Msg)
init _ =
  ({ info = Nothing, toast = Nothing, pageType = Page1 }, loadInfo)

type Msg
  = GotInfo (Result Http.Error Info)
  | ShowToast Toast
  | SetPageType PageType

update : Msg -> Model -> (Model, Cmd Msg)
update msg model =
  case msg of
    GotInfo result ->
      case result of
        Ok info ->
          ({ model | info = Just (Ok info) }, Cmd.none)

        Err e ->
          ({ model | info = Just (Err e) }, Cmd.none)

    ShowToast t ->
      ({ model | toast = Just t }, Cmd.none)

    SetPageType p ->
      ({ model | pageType = p }, Cmd.none)

subscriptions : Model -> Sub Msg
subscriptions model =
  Sub.none

view : Model -> Html Msg
view model =
  div []
    [ h2 [] [ text "Demo App" ]
    , viewInfo model.info
    , viewToast model.toast
    ]

viewInfo : Maybe (Result Http.Error Info) -> Html Msg
viewInfo mbInfoResult =
  case mbInfoResult of
    Nothing ->
      text "Loading..."

    Just infoResult ->
      case infoResult of
        Err _ ->
          div []
            [ text "Could not load info" ]

        Ok info ->
          div []
            [ text "App loaded" ]

viewToast : Maybe Toast -> Html Msg
viewToast mbToast =
  case mbToast of
    Nothing ->
      div [] []

    Just toast ->
      div [] [ text toast.content ]

loadInfo : Cmd Msg
loadInfo =
  Http.get
    { url = "/info"
    , expect = Http.expectJson GotInfo infoDecoder
    }

infoDecoder : Decoder Info
infoDecoder =
  map Info
    (field "isNewVersionAvailable" bool)

The good bits are:

  • forcing to handle all possible actions (messages) and results (HTTP success and error scenarios)
  • expressive language features (union types, strong typing, records, switch-case expressions) ensure robust code (as in this code does not leave room for mistakes like null/undefined/unhandled exceptions/unhandled code path/wrong value type)
  • no leeway for various ways to get things done (as in there is only one way to handle HTTP requests, only one way to handle asynchronous message dispatches, only one way to parse HTTP responses)

But if Redux spreads things apart compared to modern React, Elm feels like it spreads things further apart by handling effect results separately (like sending HTTP request, parsing HTTP response and processing the result by dispatching another message).

One other example would be PureScript (or rather Halogen). Purescript itself elevates the complexity to the skies and beyond, by making you run around with monads like a headless chicken. Consider “simple” example of sending a HTTP request:

module Main where

import Prelude

import Affjax.Web as AX
import Affjax.ResponseFormat as AXRF
import Data.Either (hush)
import Data.Maybe (Maybe(..))
import Effect (Effect)
import Effect.Aff.Class (class MonadAff)
import Halogen as H
import Halogen.Aff (awaitBody, runHalogenAff)
import Halogen.HTML as HH
import Halogen.HTML.Events as HE
import Halogen.HTML.Properties as HP
import Halogen.VDom.Driver (runUI)
import Web.Event.Event (Event)
import Web.Event.Event as Event

main :: Effect Unit
main = runHalogenAff do
  body <- awaitBody
  runUI component unit body

type State =
  { loading :: Boolean
  , username :: String
  , result :: Maybe String
  }

data Action
  = SetUsername String
  | MakeRequest Event

component :: forall query input output m. MonadAff m => H.Component query input output m
component =
  H.mkComponent
    { initialState
    , render
    , eval: H.mkEval $ H.defaultEval { handleAction = handleAction }
    }

initialState :: forall input. input -> State
initialState _ = { loading: false, username: "", result: Nothing }

render :: forall m. State -> H.ComponentHTML Action () m
render st =
  HH.form
    [ HE.onSubmit \ev -> MakeRequest ev ]
    [ HH.h1_ [ HH.text "Look up GitHub user" ]
    , HH.label_
        [ HH.div_ [ HH.text "Enter username:" ]
        , HH.input
            [ HP.value st.username
            , HE.onValueInput \str -> SetUsername str
            ]
        ]
    , HH.button
        [ HP.disabled st.loading
        , HP.type_ HP.ButtonSubmit
        ]
        [ HH.text "Fetch info" ]
    , HH.p_
        [ HH.text $ if st.loading then "Working..." else "" ]
    , HH.div_
        case st.result of
          Nothing -> []
          Just res ->
            [ HH.h2_
                [ HH.text "Response:" ]
            , HH.pre_
                [ HH.code_ [ HH.text res ] ]
            ]
    ]

handleAction :: forall output m. MonadAff m => Action -> H.HalogenM State Action () output m Unit
handleAction = case _ of
  SetUsername username -> do
    H.modify_ _ { username = username, result = Nothing }

  MakeRequest event -> do
    H.liftEffect $ Event.preventDefault event
    username <- H.gets _.username
    H.modify_ _ { loading = true }
    response <- H.liftAff $ AX.get AXRF.string ("https://api.github.com/users/" <> username)
    H.modify_ _ { loading = false, result = map _.body (hush response) }

Now add the halogen-store package to the mix to make use of Redux-like state management:

module Main where

import Prelude

import Affjax.Web as AX
import Affjax.ResponseFormat as AXRF
import Data.Either (hush)
import Data.Maybe (Maybe(..))
import Effect (Effect)
import Effect.Aff.Class (class MonadAff)
import Halogen as H
import Halogen.Aff as HA
import Halogen.HTML as HH
import Halogen.HTML.Events as HE
import Halogen.HTML.Properties as HP
import Halogen.VDom.Driver (runUI)
import Web.Event.Event (Event)
import Web.Event.Event as Event
import Halogen.Store.Monad (class MonadStore, updateStore, runStoreT)
import Halogen.Store.Connect (Connected, connect)
import Halogen.Store.Select (selectAll)
import Effect.Aff (launchAff_)

data StoreAction
  = StoreSetUsername String
  | StoreMakeRequest
  | StoreReceiveResponse (Maybe String)

reduce :: State -> StoreAction -> State
reduce store = case _ of
  StoreSetUsername username ->
    store { username = username, result = Nothing }
  StoreMakeRequest ->
    store { loading = true }
  StoreReceiveResponse response ->
    store { loading = false, result = response }

initialStore :: State
initialStore = { username: "", loading: false, result: Nothing }

main :: Effect Unit
main = launchAff_ do
  body <- HA.awaitBody
  root <- runStoreT initialStore reduce component
  void $ runUI root unit body

type State =
  { loading :: Boolean
  , username :: String
  , result :: Maybe String
  }

data Action
  = SetUsername String
  | MakeRequest Event
  | ReceiveState (Connected State Unit)

deriveState :: Connected State Unit -> State
deriveState { context: { username, loading, result }, input: _ } =
  { username: username
  , loading: loading
  , result: result
  }

component :: forall query output m. MonadAff m => MonadStore StoreAction State m => H.Component query Unit output m
component =
  connect selectAll $ H.mkComponent
    { initialState: deriveState
    , render
    , eval: H.mkEval $ H.defaultEval
      { handleAction = handleAction
      , receive = Just <<< ReceiveState
      }
    }

render :: forall m. State -> H.ComponentHTML Action () m
render st =
  HH.form
    [ HE.onSubmit \ev -> MakeRequest ev ]
    [ HH.h1_ [ HH.text "Look up GitHub user" ]
    , HH.label_
        [ HH.div_ [ HH.text "Enter username:" ]
        , HH.input
            [ HP.value st.username
            , HE.onValueInput \str -> SetUsername str
            ]
        ]
    , HH.button
        [ HP.disabled st.loading
        , HP.type_ HP.ButtonSubmit
        ]
        [ HH.text "Fetch info" ]
    , HH.p_
        [ HH.text $ if st.loading then "Working..." else "" ]
    , HH.div_
        case st.result of
          Nothing -> []
          Just res ->
            [ HH.h2_
                [ HH.text "Response:" ]
            , HH.pre_
                [ HH.code_ [ HH.text res ] ]
            ]
    ]

handleAction :: forall output m. MonadAff m => MonadStore StoreAction State m => Action -> H.HalogenM State Action () output m Unit
handleAction = case _ of
  SetUsername username -> do
    updateStore $ StoreSetUsername username

  MakeRequest event -> do
    H.liftEffect $ Event.preventDefault event
    username <- H.gets _.username
    updateStore $ StoreMakeRequest
    response <- H.liftAff $ AX.get AXRF.string ("https://api.github.com/users/" <> username)
    updateStore $ StoreReceiveResponse (map _.body (hush response))

  ReceiveState input ->
    H.put $ deriveState input

The really nice things about this approach are:

  • components could be self-sufficient, as opposed to Elm:
    • they can have both internal state and communicate with the external application via Aff
    • they can be extracted into separate modules, making them actually reusable components
  • state selectors and connecting to a store are seamlessly implemented based on existing Halogen tools (subscriptions)
  • it feels like you do not have to worry about state growing big, since each component explicitly declares which parts of that messy furball it needs (derives)

The bad news is that everything relies on monads and transformers - lifting, mapping, flat-mapping are just the very tip of the iceberg. Once you hit some mysterious error - it is quite tricky to understand what is going on. Unlike Elm, which has really nicely structured, formatted and presented both error message, its location and ways to fix it.

Just looking at the type definitions is nauseaing at best:

handleAction :: forall output m. MonadAff m => MonadStore StoreAction State m => Action -> H.HalogenM State Action () output m Unit

And then there is this bit, lifting everything to the same monad and then mapping and flat-mapping it to get the response body:

MakeRequest event -> do
  H.liftEffect $ Event.preventDefault event
  username <- H.gets _.username
  updateStore $ StoreMakeRequest
  response <- H.liftAff $ AX.get AXRF.string ("https://api.github.com/users/" <> username)
  updateStore $ StoreReceiveResponse (map _.body (hush response))

And do not forget that this monad is merely describing the computation, you will have to run it at some point:

main = runHalogenAff do
  body <- HA.awaitBody
  root <- runStoreT initialStore reduce component
  let ui = runUI root unit body
  ui

The example on halogen-store suggests using launchAff_, but then you will have to cast the return value type to match the monad of the main function (Effect Unit) or lift runStoreT to the Effect Unit monad - whichever you find suitable:

main = launchAff_ do
  body <- HA.awaitBody
  root <- runStoreT initialStore reduce component
  let ui = runUI root unit body
  void ui

But having to worry about all these intricacies actually strengthens the point that PureScript is not for the faint-harted - Elm prevails here.

The other drawback is that mixing logic in both component and the reducers is weird - it is not clear from the Flux architecture where the side-effects should live - like network calls, asynchronous actions, actions triggering other actions. Redux is known for suffering from all of these areas.

Elm solves this nicely with commands.

Halogen kind of takes a step backwards from Elm - it does have subscriptions, but it does not prevent you from issuing side effects from the handleActions. And halogen-store does not have a recipe for complex chained actions.

Ultimately, I don’t think Redux is bad - the idea to have a full visibility into all possible application interactions is scary in a complex project and it is hard to come up with a clean way to work around it, but single-way data flow is actually nice.

Interesting how developers went from “we don’t want Angular.js dirty checks - it is not clear where the data is flowing” to “we don’t want a single point of contention for all application interactions”.

In my eyes, the four technologies (modern-day React, Redux, Elm and Purescript) all come with their own pros and massive cons and there is no good or one-size-fits-all solution among them. And none of them ultimately solves the problem of managing application state and interactions in a non-bloated way. Maybe Angular or React 19 have an answer?

Bun is still undercooked

my Skunkworks project was trying out Bun. it was not a successful project, but there are some learnings:

TL;DR: I think Bun is still undercooked and despite being super cool and competitive on paper, it is a bit too early to use it in big projects. But hey, it works for my blog!

What is Bun

Bun is a combined alternative for NodeJS, package manager (npm / yarn / pnpm), bundler (vite / webpack / esbuild) and test runner (vite / jest).

Bun is ridiculously fast

Command Yarn Bun
yarn install 49 sec 7.5 sec
vite build 14 sec 0.9 sec
vite test forever 4.5 sec

This most likely has to do with what happens in those tools - Bun went with parsing files as ASTs, applying transformations and running them in memory (to the best of my knowledge, digging through the Bun code)

Some things work out of the box

Dependency management works like a charm. No questions asked. Bun is just 7x faster. Comparing the node_modules directories:

Only in node_modules_yarn:
    .yarn-state.yml
    @aashutoshrathi
    @isaacs
    @npmcli
    @pkgjs
    @tootallnate
    abbrev
    agentkeepalive
    aggregate-error
    aproba
    are-we-there-yet
    asynciterator.prototype
    cacache
    chownr
    clean-stack
    color-support
    console-control-strings
    deep-equal
    delegates
    depd
    eastasianwidth
    encoding
    env-paths
    err-code
    es-get-iterator
    exponential-backoff
    foreground-child
    fs-minipass
    gauge
    graceful-fs
    has
    has-unicode
    humanize-ms
    ip
    is-lambda
    jackspeak
    jsonc-parser
    make-fetch-happen
    minipass-collect
    minipass-fetch
    minipass-flush
    minipass-pipeline
    minipass-sized
    minizlib
    mkdirp
    negotiator
    node-gyp
    nopt
    npmlog
    object-is
    p-map
    promise-retry
    retry
    set-blocking
    smart-buffer
    socks
    socks-proxy-agent
    ssri
    stop-iteration-iterator
    string-width-cjs
    strip-ansi-cjs
    tar
    unique-filename
    unique-slug
    wide-align
    wrap-ansi-cjs

Only in node_modules_bun:
    confbox
    es-object-atoms
    word-wrap

Curious to see if those packages missing in bun’s node_modules are actually used anywhere.

Plugins

In Relational Migrator we use few plugins with vite, namely svgr, vanilla-extract and sentry. Bun only supports limited esbuild plugins and does not have the aforementioned plugins. Some of them work with minimal changes, some of them do not work entirely.

svgr plugin

svgr worked with minimal alterations:

import svgrEsbuildPlugin from 'esbuild-plugin-svgr';

Bun.build({
    plugins: [
        svgrEsbuildPlugin() as unknown as BunPlugin,
    ]
})

But required to change the imports from

import { ReactComponent as DatabaseAccessImage } from './assets/database-access-image.svg';

to

import DatabaseAccessImage from './assets/database-access-image.svg';

vanilla-extract plugin

This one loads vite server to compile the CSS and does not work no matter what I tried, throwing the following errors all over the place:

error: Styles were unable to be assigned to a file. This is generally caused by one of the following:

- You may have created styles outside of a '.css.ts' context
- You may have incorrect configuration. See https://vanilla-extract.style/documentation/getting-started
      at getFileScope (.../frontend/node_modules/@vanilla-extract/css/fileScope/dist/vanilla-extract-css-fileScope.cjs.dev.js:35:11)
      at generateIdentifier (.../frontend/node_modules/@vanilla-extract/css/dist/vanilla-extract-css.cjs.dev.js:175:7)
      at style (.../frontend/node_modules/@vanilla-extract/css/dist/vanilla-extract-css.cjs.dev.js:374:19)
      at .../frontend/src/shared/leafygreen-ui/badge/badge.css.ts:4:28

Followed by

error: Module._load is not a function. (In 'Module._load(file, parentModule)', 'Module._load' is undefined)
    at .../frontend/src/components/mapping-banner.css.ts:1:0

sentry plugin

This one was trivial and did not complain (I did not check if it actually works):

Bun.build({
    plugins: [
        sentryEsbuildPlugin({
                disable: !process.env.SENTRY_AUTH_TOKEN,
                org: 'mongodb-org',
                project: 'relational-migrator-frontend',
                telemetry: false,
                sourcemaps: {
                        filesToDeleteAfterUpload: '**/*.map',
                },
        }) as unknown as BunPlugin,
    ]
})

Bundling

Bun is great to run bundling, testing or manage packages from CLI when things are relatively simple. When you need plugins (for instance), the interactions become tricky. For specifying and configuring bundle-time plugins one needs to use Bun’s JS/TS API and make a custom build script:

await Bun.build({ ... });

By default, Bun does not log anything, which is actually quite inconvenient - not even build failures are logged. One has to get the result of Bun.build() and manually process them, which is a bit of a bummer:

const result = await Bun.build(...);

if (!result.success) {
  console.error('Build failed');

  for (const message of result.logs) {
    console.error(message);
  }
} else {
  console.info('Build succeeded');
}

Configuration

Another inconvenient interaction - some actions require entire scripts (like build configuration, serving files, etc.). Then there is a config file, bunfig.toml where users can specify some configurations for Bun.

Running tests

This one had the most issues on my end.

react-testing-library

Bun declares support for react-testing-library, which worked as expected.

Browser APIs

Had to use happy-dom and configure it in the bunfig.toml to enable some of the UI testing features (such as access to the window object). Yet, happy-dom still lacks support for Canvas API, for instance.

test.each

It is one example of Bun’s partial compatibility with Jest - with Jest one can use nice-ish string interpolation to generate test name:

test.each`
    currentStep | lastCompletedStep | progressType
    ${0}        | ${0}              | ${'active'}
    ${1}        | ${0}              | ${'inactive'}
    ${1}        | ${2}              | ${'checked'}
  `(
    'returns $progressType for $currentStep and $lastCompletedStep',
    ({ currentStep, lastCompletedStep, progressType }) => { })
);

With bun:test it is slightly different - you can’t use arguments out of order, nor do you have access to their names. Neither can you use this nice syntactic sugar for defining test cases in a table manner.

const cases = [
    // currentStep | lastCompletedStep | progressType
    [ 0, 0, 'active' ],
    [ 1, 0, 'inactive' ],
    [ 1, 2, 'checked' ],
  ];

  test.each(cases)(
    'For %p and %p returns %p',
    (currentStep, lastCompletedStep, progressType) => {
      expect(getProgressType(currentStep, lastCompletedStep)).toEqual(
        progressType
      );
    }
  );

Oh, and there is no describe.each() functionality at all, which makes defining suites of tests more tedious.

Mocks

Mocks work as expected, out of the box. There are mocks for system clock, which is nice. Had to replace vi.fn() with mock() and a corresponding import { mock } from 'bun:test';.

ObjectContaining matchers

When using nested matchers in the ObjectContaining, some of them are missing in Jest compatibility (like expect.toBeNumber):

expect(nodes).toEqual([
        expect.objectContaining({
          id: 'node-1',
          position: { x: expect.toBeNumber(), y: expect.toBeNumber() },
        }),
]);

Had to use expect.any(Number) instead:

expect(nodes).toEqual([
        expect.objectContaining({
          id: 'node-1',
          position: { x: expect.any(Number), y: expect.any(Number) },
        }),
]);

Using Fragment import alongside <>

If a component contains both import { Fragment } from 'react' and uses a shorthand <>, Bun will yell at test time (but not at build time, interestingly enough):

SyntaxError: Cannot declare an imported binding name twice: 'Fragment'.

If you specify a different jsxFragmentFactory in tsconfig.json and set "jsx": "react" (and not "react-jsx" or anything), you will get further.

After meddling with Bun source code itself, I figured something (like it parses files’ AST and modifies them to add missing imports, like Fragment but it ends up with duplicates), but even after applying some crude hacks to prevent it from adding those duplicate statements, I could not get to fix the issues. Left a comment on Bun’s Github issue, but from my experience developers do not pay enough attention to those.

Ended up manually changing sources for the libraries in question in node_modules folder directly (just for the test), which did actually help. Might be worth changing it in the libraries directly, but that won’t work with everything.

Ace editor

It still is kinda impossible to use UMD/AMD modules in conjunction with TS in Bun tests - the nature of UMD is that once the file is imported, it uses IIF to define stuff, but Bun does not tolerate this (I presume it only parses the AST of the imported file but does not actually execute it in the right order).

Hence Ace editor, which uses UMDs, can not really be used as intended.

Bun’s meat and potatoes

I did a bit of digging in Bun’s source code and it seems… immature - commented out code, ignored tests, thousand-line-functions and files (js_parser.zig has 23.3k LOC). And this is on top of using Zig, which is still at version 0.12 (as of writing of this post, 10 May 2024) and has quite limited standard library (no remove and find methods in lists, no hash sets, etc.).

Bottom line

My experience shows that Bun might fine to be used in new and low-risk projects, but it is not ready for a drop-in replacement in existing or more or less complex projects.

Strongly-typed front-end: experiment 2, simple application, in PureScript / Halogen

Contents

  1. Introduction
  2. Experiment 1, darken_color
  3. Experiment 2, simple application

A more “conventional” way to implement the front-end application in PureScript would be using a framework called Halogen.

Starting off with a “hello world” example:

module Main where

import Prelude

import Effect (Effect)
import Halogen as H
import Halogen.Aff as HA
import Halogen.HTML as HH
import Halogen.HTML.Events as HE
import Halogen.VDom.Driver (runUI)

main :: Effect Unit
main = HA.runHalogenAff do
  body <- HA.awaitBody
  runUI component unit body

data Action = Increment | Decrement

component =
  H.mkComponent
    { initialState
    , render
    , eval: H.mkEval $ H.defaultEval { handleAction = handleAction }
    }
  where
  initialState _ = 0

  render state =
    HH.div_
      [ HH.button [ HE.onClick \_ -> Decrement ] [ HH.text "-" ]
      , HH.div_ [ HH.text $ show state ]
      , HH.button [ HE.onClick \_ -> Increment ] [ HH.text "+" ]
      ]

  handleAction = case _ of
    Increment -> H.modify_ \state -> state + 1
    Decrement -> H.modify_ \state -> state - 1

Adding the utility code akin to the other technologies:

data Shape = Circle | Square

calculateArea :: Maybe Shape -> Float -> Float
calculateArea Nothing _ = 0
calculateArea (Just Circle) value = pi * value * value
calculateArea (Just Square) value = value * value

getShape :: String -> Maybe Shape
getShape "circle" = Just Circle
getShape "square" = Just Square
getShape _ = Nothing

This resurfaces few differences from Haskell, Elm and others:

  • there is no pi constant in the Prelude, so need to import one of the available definitions, I went with Data.Number
  • Float is not a type; there is Number, however
  • 0 is not a Number, it is Int, confusing the audience

These are all minor differences, however. But this code is not a conventional PureScript either - it is working against the good practices of functional programming and thus defeats the purpose of these experiments. Examples of this are the heavy reliance on String instead of using the available type system.

Let us change that a bit:

import Data.String.Read (class Read)

data Shape = Circle | Square

calculateArea :: Shape -> Number -> Number
calculateArea Circle value = pi * value * value
calculateArea Square value = value * value

instance Read Shape where
  read = case _ of
    "square" -> Just Square
    "circle" -> Just Circle
    _ -> Nothing

instance Show Shape where
  show = case _ of
    Square -> "square"
    Circle -> "circle"

Now, to the UI:

import Halogen.HTML.Properties as HP

render state =
  HH.div_
    [
      HH.select [] [
        HH.option [ HP.value "" ] [ HH.text "Select shape" ],
        HH.option [ HP.value (show Circle) ] [ HH.text (show Circle) ],
        HH.option [ HP.value (show Square) ] [ HH.text (show Square) ]
      ],
      HH.input [],
      HH.div_ [ HH.text "<area>" ]
    ]

In the application state we need to store the selected shape and the value, so we can utilize records for that:

initialState _ = { shape: Nothing, value: Nothing }

Then we need to modify the possible actions. Let’s stick to the same approach of utilizing the type system:

data Action = ChangeValue (Maybe Number) | ChangeShape (Maybe Shape)

The thing glueing the two together is the handleAction function:

handleAction = case _ of
  ChangeValue value ->
    H.modify_ \state -> state { value = value }
  ChangeShape shape ->
    H.modify_ \state -> state { shape = shape }

Here, unlike Haskell (to my best knowledge), the placeholder variable is being used for pattern matching against the only function argument. So instead of a little verbose

handleAction action = case action of
  -- ...

you can use this placeholder variable and just provide the branches for each of its possible values:

handleAction = case _ of
  -- ...

Modifying the state is done using the Halogen.Hooks.HookM.modify_ function, which allows us to only use the previous state value and provide a new state value, without the need to mess with monads. In turn, we modify the state record using the record syntax:

state { shape = newShapeValue }

Now the only bit left is tying the UI with the actions:

import Halogen.HTML.Events as HE
import Data.String.Read (read)
import Data.Number as N
import Data.Tuple (Tuple(..))

render state =
  HH.div_
    [
      HH.select [ HE.onValueChange onShapeChanged ] [
        HH.option [ HP.value "" ] [ HH.text "Select shape" ],
        HH.option [ HP.value (show Circle) ] [ HH.text (show Circle) ],
        HH.option [ HP.value (show Square) ] [ HH.text (show Square) ]
      ],
      HH.input [ HE.onValueChange onValueChanged ],
      HH.div_ [ HH.text "<area>" ]
    ]

onShapeChanged v = ChangeShape (read v)

onValueChanged v = ChangeValue (N.fromString v)

showArea state =
  case res of
    Nothing ->
      HH.text "Choose shape and provide its parameter"

    Just (Tuple shape area) ->
      HH.text $ "Area of " <> (show shape) <> " is " <> (show area)

  where
    res = do
      shape <- state.shape
      value <- state.value
      let area = calculateArea shape value
      pure (Tuple shape area)

Here is where most fun and benefit from using PureScript comes into play.

First of all, the HE.onValueChange event handler (the onShapeChanged and onValueChanged functions) - it will be called with the new value for the input instead of an entire event object. This allows us to skip unpacking the raw value from that object.

Then, the action dispatchers take the value from the input and try to parse it, returning a Maybe a:

onShapeChanged :: String -> Maybe Shape
onShapeChanged v = ChangeShape (read v)

onValueChanged :: String -> Maybe Number
onValueChanged v = ChangeValue (N.fromString v)

It is actually a quite important part, since the shape might not be selected (making the <select> value an empty string) and the value might be either a blank string or not a valid number string. PureScript does not allow us to not handle these cases, so whenever we parse the user input, we get a Maybe a value and we have to handle both scenarios when the value is valid and when it is not.

The function showArea is where this neatness comes together - we handle both values as one, using the Data.Tuple type to pair them together:

res = do
  shape <- state.shape -- unpacks `Shape` from `Maybe Shape`
  value <- state.value -- unpacks `Number` from `Maybe Number`
  let area = calculateArea shape value -- always returns a Number, since both `shape` and `value` are always provided
  pure (Tuple shape area) -- returns a tuple of shape and area, packed in a `Maybe`

The above code will shortcircuit whenever at any point it is trying to unpack a value from a Nothing and the whole do block will return Nothing.

Putting it all together:

module Main where

import Prelude

import Data.Maybe (Maybe(..))
import Data.Number as N
import Data.String.Read (class Read, read)
import Data.Tuple (Tuple(..))
import Effect (Effect)
import Halogen as H
import Halogen.Aff as HA
import Halogen.HTML as HH
import Halogen.HTML.Events as HE
import Halogen.HTML.Properties as HP
import Halogen.VDom.Driver (runUI)

data Shape = Circle | Square

calculateArea :: Shape -> Number -> Number
calculateArea Circle value = N.pi * value * value
calculateArea Square value = value * value

instance Read Shape where
  read = case _ of
    "square" -> Just Square
    "circle" -> Just Circle
    _ -> Nothing

instance Show Shape where
  show = case _ of
    Square -> "square"
    Circle -> "circle"

data Action = ChangeValue (Maybe Number) | ChangeShape (Maybe Shape)

component =
  H.mkComponent
    { initialState
    , render
    , eval: H.mkEval $ H.defaultEval { handleAction = handleAction }
    }
  where
  initialState _ = { shape: Nothing, value: Nothing }

  render state =
    HH.div_
      [
        HH.select [ HE.onValueChange onShapeChanged ] [
          HH.option [ HP.value "" ] [ HH.text "Select shape" ],
          HH.option [ HP.value (show Circle) ] [ HH.text (show Circle) ],
          HH.option [ HP.value (show Square) ] [ HH.text (show Square) ]
        ],
        HH.input [ HE.onValueChange onValueChanged ],
        HH.div_ [ showArea state ]
      ]

  onShapeChanged v = ChangeShape (read v)

  onValueChanged v = ChangeValue (N.fromString v)

  showArea state =
    case res of
      Nothing ->
        HH.text "Select shape and provide its value"

      Just (Tuple shape area) ->
        HH.text $ "Area of " <> (show shape) <> " is " <> (show area)
    where
      res = do
        shape <- state.shape
        value <- state.value
        let area = calculateArea shape value
        pure (Tuple shape area)

  handleAction = case _ of
    ChangeValue value ->
      H.modify_ \state -> state { value = value }
    ChangeShape shape ->
      H.modify_ \state -> state { shape = shape }

main :: Effect Unit
main = HA.runHalogenAff do
  body <- HA.awaitBody
  runUI component unit body

More than TypeScript

Back in 2011 frontend was a very different thing - JavaScript had no class, Object.entries / Object.keys, promises were a proof of concept idea (unless you used 3rd party library bluebird) and Node was v0.10.

Then came CoffeeScript, which added nice helper features to JavaScript - list comprehensions, classes, string interpolation and if statements (meaning you could use them for variable assignment):

# if statements
text = if happy and knowsIt
  chaChaCha()
else if sexy
  knowsIt()
else if tooSexy
  removeShirt()
else
  showIt()

# list comprehensions
courses = [ 'greens', 'caviar', 'truffles', 'roast', 'cake' ]
menu = (i, dish) -> "Menu Item #{i}: #{dish}"
menu i + 1, dish for dish, i in courses

# ranges and list comprehensions
countdown = (num for num in [10..1])

# iterating over object entries
yearsOld = max: 10, ida: 9, tim: 11

ages = for child, age of yearsOld
  "#{child} is #{age}"

Whilst it was still compiled to an inferior ES5 JavaScript, it helped to organise the code and make it substantially cleaner. The one drawback is that it did not provide any type safety (only added recently, via @flow and you would still have to duplicate your classes if you wanted to use it - once in coffeescript and once in @flow annotations). Anyhow, CoffeeScript was a good tool for the task, if you ask me.

Then came Dart, Flow and TypeScript, which were also compiled to ES5 JavaScript, but instead of adding new syntax features, they aimed to solve a different problem - by introducing types they sought to reduce the number of runtime errors with type checks at compile time.

This sounded so good that many developers and companies immediately got on board. Alas, the new tech still suffered from the same issue as JavaScript itself and most common cause of runtime errors - the null and undefined still was a thing, causing the exact same runtime errors.

The one true benefit offered by TypeScript over the others was that it allowed to seamlessly use existing JavaScript code. And, provided you have the type signatures for that JavaScript code, it could even perform type checking on it too, effectively reducing the requirements for using TypeScript in the existing codebase. No wonder it was an easy buy-in for many projects.

Fast-forward to 2024 (twelve years since its first release) and TypeScript dominates the frontend world. Over the years it seems to have been focused on improving the type system in terms of what can you do with types - union types, partial types, etc. It still suffers from the original issues though and there still are not too many new syntactic features to pair with its powerful type system (like pattern matching).

With the new EcmaScript standards, classes and promises becoming the first-class citizens in all browsers (even Internet Explorer / Edge, when it was still around), the APIs and syntax became more mature (Object.entries, async / await to reduce callback hell, for .. of, const / let, string interpolation and many others). There are still no list comprehensions or conditional expressions / pattern matching though.

TypeScript still does help in what I think is a small subset of highly-specific scenarios like navigating the code (jump to definition / declaration / find usages) in the IDE and refactoring the code. But IDEs matured as well, code navigation is not as much of a feature unique to TypeScript anymore as it used to be (in the era of Sublime Text). TypeScript can prevent some really naive errors at compile time like using a number instead of an object, but I don’t think developers run into them these days - because, again, IDEs are really powerful these days, even VSCode and they help eliminate such mistakes to a large degree.

Let’s consider a real-world scenario (from my work project): server returns a list of LocationType objects. Each one of the objects can be either a Collection, a Document (in a specific collection) or a Table, each with its own subset of fields, specific to the type. We need to handle each case differently (display them on the UI differently).

The OpenAPI spec:

tableLocation:
  type: object
  required:
    - table
  properties:
    table:
      type: string

collectionLocation:
  type: object
  required:
    - collection
  properties:
    collection:
      type: string

documentLocation:
  type: object
  required:
    - collection
    - document
  properties:
    collection:
      type: string
    document:
      type: string

location:
  oneOf:
    - $ref: "#/components/tableLocation"
    - $ref: "#/components/collectionLocation"
    - $ref: "#/components/documentLocation"

And the TypeScript code generated by OpenAPI for the above spec looks like this:

interface CollectionLocation {
    collection: string;
}

interface DocumentLocation {
    collection: string;
    document: string;
}

interface TableLocation {
    table: string;
}

function instanceOfCollectionLocation(value: object): boolean {
    let isInstance = true;
    isInstance = isInstance && "collection" in value;

    return isInstance;
}

function instanceOfDocumentLocation(value: object): boolean {
    let isInstance = true;
    isInstance = isInstance && "collection" in value;
    isInstance = isInstance && "document" in value;

    return isInstance;
}

function instanceOfTableLocation(value: object): boolean {
    let isInstance = true;
    isInstance = isInstance && "table" in value;

    return isInstance;
}

function CollectionLocationFromJSONTyped(json: any, ignoreDiscriminator: boolean): CollectionLocation {
    if ((json === undefined) || (json === null)) {
        return json;
    }
    return {
        'collection': json['collection'],
    };
}

function DocumentLocationFromJSONTyped(json: any, ignoreDiscriminator: boolean): DocumentLocation {
    if ((json === undefined) || (json === null)) {
        return json;
    }
    return {
        'collection': json['collection'],
        'document': json['document'],
    };
}

function TableLocationFromJSONTyped(json: any, ignoreDiscriminator: boolean): TableLocation {
    if ((json === undefined) || (json === null)) {
        return json;
    }
    return {
        'table': json['table'],
    };
}

type JobLocation = CollectionLocation | DocumentLocation | TableLocation;

function JobLocationFromJSONTyped(json: any, ignoreDiscriminator: boolean): JobLocation {
    if ((json === undefined) || (json === null)) {
        return json;
    }
    return { ...CollectionLocationFromJSONTyped(json, true), ...DocumentLocationFromJSONTyped(json, true), ...TableLocationFromJSONTyped(json, true) };
}

This might be a bit too harsh on TypeScript as a language, considering this is not the best implementation (in my opinion), but this highlights one of the issues with TypeScript: this very same code caused a number of runtime errors caught by users - not exceptions, not compilation errors, but wrong UI behaviour - on the UI all locations were treated as a table location.

The reason why this was happening is the code itself - it does not really handle the choice type JobUpdateLocation correctly and instead of a choice type it returns a union type, to put it roughly - instead of oneOf it returns essentially allOf object.

Now, even if we were to rewrite it by hand (which would defeat the purpose of using OpenAPI and could be harder to keep in sync between client and server code), we would end up with something like this:

type CollectionLocation = {
    collection: string;
}

type DocumentLocation = {
    collection: string;
    document: string;
}

type TableLocation = {
    table: string;
}

function instanceOfCollectionLocation(value: object): boolean {
    return ("collection" in value) && !("document" in value);
}

function instanceOfDocumentLocation(value: object): boolean {
    return ("collection" in value) && ("document" in value);
}

function instanceOfTableLocation(value: object): boolean {
    return ("table" in value);
}

function CollectionLocationFromJSONTyped(json: any): CollectionLocation | undefined {
    if ((json === undefined) || (json === null)) {
        return undefined;
    }
    return {
        'collection': json['collection'],
    };
}

function DocumentLocationFromJSONTyped(json: any): DocumentLocation | undefined {
    if ((json === undefined) || (json === null)) {
        return undefined;
    }
    return {
        'collection': json['collection'],
        'document': json['document'],
    };
}

function TableLocationFromJSONTyped(json: any): TableLocation | undefined {
    if ((json === undefined) || (json === null)) {
        return undefined;
    }
    return {
        'table': json['table'],
    };
}

type JobLocation = CollectionLocation | DocumentLocation | TableLocation;

function JobLocationFromJSONTyped(json: any): JobLocation | undefined {
    if ((json === undefined) || (json === null)) {
        return undefined;
    }
    if (instanceOfCollectionLocation(json) && !instanceOfDocumentLocation(json) && !instanceOfTableLocation(json)) {
      return CollectionLocationFromJSONTyped(json);
    }
    if (instanceOfDocumentLocation(json) && !instanceOfCollectionLocation(json) && !instanceOfTableLocation(json)) {
      return DocumentLocationFromJSONTyped(json);
    }
    if (instanceOfTableLocation(json) && !instanceOfCollectionLocation(json) && !instanceOfDocumentLocation(json)) {
      return TableLocationFromJSONTyped(json);
    }
    return undefined;
}

This is quite a verbose code, with few bad practices in place (the use of any and object, need to always be conscious of undefined), but this is literally what we ended up doing (the helpers, not redefining the types).

The types in TypeScript (or rather classes and interfaces) are also a point of a few confusing tricks you have to keep in mind - if we were to use class instead of type, as follows

class CollectionLocation {
    constructor(public collection: string) {}
}

class DocumentLocation {
    constructor(public collection: string, document: string) {}
}

class TableLocation {
    constructor(public table: string) {}
}

we would eventually run into the similar bug at runtime, which is a perfectly valid behaviour from the perspective of TypeScript, because of type compatibility, meaning DocumentLocation and CollectionLocation could be used interchangeably, since they have a subset of compatible fields:

const a: CollectionLocation = new DocumentLocation('col', 'doc'); // ok
const b: DocumentLocation = new CollectionLocation('col'); // also ok
const c: TableLocation = a; // not ok

This would not work if CollectionLocation, DocumentLocation and TableLocation were types instead:

const a: CollectionLocation = { collection: 'col', document: 'doc' }; // not ok
const b: DocumentLocation = { collection: 'col' }; // not ok
const c: TableLocation = b; // not ok

And the code to parse location from JSON is actually quite ugly. It would benefit so much from switch expressions or pattern matching!

And we still have to remember to handle those potentially undefined values whenever we use the helper functions. And here’s an example from literally few days ago:

interface Job {
    projectId: string;
}

const jobsInProgress: Job[];

const message = useProjectStatus(
    jobsInProgress[0]?.projectId ?? ''
);

const useProjectStatus = (projectId: string) {
    const { data } = useQuery({ queryFn: () => fetch(`/project/${projectId}`) });

    return data;
};

The above code started throwing an error, since server responded with 500 Server Error. Reason was quite simple - the projectId, which we recently started to validate on the server (expecting it to be a valid ID), was a blank string. Interesting thing: no one has questioned the very line causing the “default value” for projectId to become an empty string:

jobsInProgress[0]?.projectId ?? ''

And issues like these are unbelievably common in front-end world, while being quite tricky to detect and resolve. To fix this particular issue we added a client-side validation (to an extent) to run the query only when the projectId value is provided:

useQuery({
  enabled: !!projectId,
  queryFn: () => fetch(`/project/${projectId}`)
})

The problem remains: it is really easy to miss this rather small fallback to an empty string.

There is a solution in Scala world that somewhat addresses this issue - refined types, which allows to have something along the lines of:

import eu.timepit.refined.*
import eu.timepit.refined.api.Refined
import eu.timepit.refined.string.*

type NonBlankString = MatchesRegex[".+"]

def useProjectStatus(projectId: String Refined NonBlankString) = ???

This would require wrapping all the values passed to fetchData in refineV[NonBlankString]() call and handling the case when the validation fails:

def generateId(): String = List("some", "").last

def fetchData(id: String Refined NonBlankString) = println(s"fetching '$id'...")

def main(args: Array[String]) = {
  for {
    id <- refineV[NonBlankString](generateId())
  } yield fetchData(id)
}

But in the land of TypeScript, there is only so much you can do - TypeScript only works at compile time.

The above examples might sound like very far-fetched edge case scenarios, but keep in mind: this is the code generated automatically by one of the most popular tools from a trivial schema. This is not as far-fetched as it might seem.

Can we do better in TypeScript? Something like refined in Scala? What if we had a powerful type system and syntax to support it? And, if possible, get rid of the null and undefined along the way?

The problem of null and undefined can be mitigated to an extent by using some concepts of functional programming, similarly to how I showcased some time ago. It would be quite hard to achieve, though, given the problem remains imbued in the language itself. Moreover, targeting the issues described above, it would take an entire standard library to really reduce the possibility of the issue:

const jobInProgressMaybe: Maybe<Job> = new List<Job>(jobsInProgress).first();
const projectIdMaybe: Maybe<string> = jobInProgress.map(j => j.projectId);

useQuery({
  enabled: projectIdMaybe.isSome(),
  queryFn: () =>
    projectIdMaybe
      .flatMap(projectId => fetch(`/project/${projectId}`))
      .orElse(Promise.reject('no projectId'))
})

We could try to address the empty string issue with the extensive type system:

type NonEmptyString<T extends string> = '' extends T ? never : T;
type MyString<T extends string> = T;

function fetchData<T extends string>(id: NonEmptyString<T>) {
  console.log(`Fetching ${id}`);
}

But it would only work if all the values are known at compile time, which is easily broken with the simplest test:

function generateId(): NonEmptyString<string> {
  return ['moo', ''][1] as NonEmptyString<string>;
}

fetchData(''); // not ok
fetchData(generateId()); // ok, but guaranteed undefined behaviour at runtime

And it would take even more effort to make all types uniquely identifiable (to solve the choice type problem).

The way most developers would approach solving similar issues in a real-world project would be (at best) adding some linters, checkers and relying on automated tests and high-quality code reviews. In my experience, this is a rather flimsy excuse rather than a real solution and it does not work most of the time - especially in edge case scenarios.

This is where I’d suggest to use another language altogether, which, similarly to CoffeeScript and TypeScript back in the day, solved some problems at compile time. And suggest I will.

There is a big warning before I proceed though: another technology is a rather big decision, not to be taken lightly. Some might see it as an impossible switch, only few small or indie developers on rather unimportant projects would make. But remember it was the same story with ES6, TypeScript, bundlers and pretty much any significant upgrade in the past.

Balance bike is a good tool to get you going - it gets you from walking to moving fast. But if you want to get faster and further, you have to drop it at some stage in favour of a more advanced bike.

Similarly to how TypeScript and ESNext got you from plain callback-hell-infested JavaScript code to a better place - you can refactor code faster, it saves you from a few errors at compile time, the code is much cleaner and conscise now. But if you really want to get even further, you will have to make a leap of faith, make an investment into the future.

Here is my big controversial suggestion: a pure functional language, one with strong type system, which does not have a concept of null and undefined in the first place, with a nice sweet syntax.

Before landing on a specific choice, check out Elm (dead, but a good starting point) and PureScript, in that order. Let me explain.

Elm is like a very simplified Haskell - it is a pure functional language with a subset of Haskell syntax. It has a nice compiler with really good error messages. It enforces a structure for your application (redux-like). It gives you a gentle introduction to the functional programming concepts and it targets browsers (web applications). With its architecture, you can look at the message (action from redux) type and see exactly what are all possible operations in the application (which makes reading code and getting to know new codebases much easier).

On a bad note, it is not being developed since 2019, it comes with an entire runtime (saves you from runtime errors, but blows up the bundle size) and it is a all-or-nothing commitment for the project - it is an all-in-one platform and if you want to gradually update your application from React - sorry, you will have to rewrite entire parts of you application entirely in Elm. The good point turned bad, having all possible actions defined in one message type make complex applications really complex (with one massive type definition, an issue very familiar to developers who had to deal with Redux).

here could have been an Elm code sample

The next step on this journey would be PureScript. It is an actively developed language, it has a minimal footprint after compiled to JS (much smaller than Elm), it has a very rich ecosystem and, best of all, it has a very simple interop with JS and it can compile just one module. Top it up with Halogen framework and you effectively got yourself Elm on steroids. The downside is that it is slightly more complex platform (language and framework) compared to Elm, so the learning curve is a bit steeper.

The above example of CoffeeScript code could be written in plain PureScript like this:

foreign import happy :: Boolean
foreign import knowsIt :: Boolean
foreign import sexy :: Boolean
foreign import tooSexy :: Boolean
foreign import chaChaCha :: String
foreign import knowsItStr :: String
foreign import removeShirt :: String
foreign import showIt :: String

import Data.Array ((..), mapWithIndex)
import Data.Map as M
import Data.Tuple (Tuple(..))

-- if statements with multiple branches become pattern matching
text
  | happy && knowsIt = chaChaCha
  | sexy = knowsItStr
  | tooSexy = removeShirt
  | otherwise = showIt

-- list comprehensions become function application
courses = [ "greens", "caviar", "truffles", "roast", "cake" ]

-- string interpolation is possible via an external packages
-- https://pursuit.purescript.org/packages/purescript-interpolate
import Data.Interpolate as I
menu' i dish = I.i "Menu Item " i ": " dish

-- https://pursuit.purescript.org/packages/purescript-template-strings
import Data.TemplateString.Unsafe ((<~>))
menu'1 i dish = "Menu Item ${i}: ${dish}" <~> { i: i, dish: dish }

import Data.TemplateString ((<->))
import Data.Tuple.Nested ((/\))
menu'2 i dish = "Menu Item ${i}: ${dish}" <-> [ "i" /\ i, "dish" /\ dish ]

-- pure PureScript string interpolation
menu i dish = "Menu Item " <> (show i) <> ": " <> dish

x i dish = menu (i + 1) dish

-- can not just call a function and ignore its result
x' = mapWithIndex x courses

-- ranges become Array monad
-- countdown :: Array Int
countdown = do
  num <- 10 .. 1
  pure num

-- JavaScript objects exist in a separate package
import Foreign.Object as FO
yearsOld' = FO.fromHomogeneous { max: 10, ida: 9, tim: 11 }

-- object as Map
yearsOld = M.fromFoldable [Tuple "max" 10, Tuple "ida" 9, Tuple "tim" 11]

y child age = (show child) <> " is " <> (show age)
ages = map y yearsOld
ages = map y yearsOld'

The real deal with this approach is how to migrate from an existing (most likely) React/TypeScript/(webpack | vite) ecosystem to PureScript?

Expanding on Scott Wlaschin’s talk, you can (and probably should) separate the pure application logic from IO, potentially utilising the foreign imported functions to interact with the existing JS code (libraries). This way you keep your application logic error-free, and all the errors that can happen are shifted towards the presentation layer (MVC/MVP, remember this concept?).

This would be the best strategy for the most projects, migrating one bit at a time and making the application less and less error prone whilst not wreaking the havok by rewriting everything from scratch (very few businesses will buy into that).

The bigger issue is that most modern frontend apps I have seen are so mangled in mixing the business logic and the presentation layer, it would be challenging (to say the least) to unmangle it back to a reasonable code. Check how we handle UI action, triggering a HTTP request and updating both the UI (to display the request progress/status) and the application state (for other parts of the UI) at the same time.

here could have been a real-world application interaction handling code sample

Calling PureScript code from JavaScript (based on FFI example in PureScript book):

module Test where

import Prelude

gcd :: Int -> Int -> Int
gcd 0 m = m
gcd n 0 = n
gcd n m
  | n > m     = gcd (n - m) m
  | otherwise = gcd (m - n) n

data ZeroOrOne a = Zero | One a

inc :: ZeroOrOne Int -> ZeroOrOne Int
inc Zero = Zero
inc (One n) = One (n + 1)

_zero = Zero
_one = One 1
_two = One 2

and then in JS:

import Test from 'Test.js';

Test.gcd(15)(20);

const _zero = new Test.Zero();
const _one = new Test.One(1);
const _two = new Test.One(2);

console.log(Test.inc(_zero));
console.log(Test.inc(_one));
console.log(Test.inc(_two));

In the other direction (calling JS code from PureScript):

export const setItem = key => value => () =>
  window.localStorage.setItem(key, value);

export const getItem = key => () =>
  window.localStorage.getItem(key);

and then in PureScript:

foreign import setItem :: String -> String -> Effect Unit

foreign import getItem :: String -> Effect Json

import Data.Argonaut (class DecodeJson, class EncodeJson)
import Data.Argonaut.Decode.Generic (genericDecodeJson)
import Data.Argonaut.Encode.Generic (genericEncodeJson)
import Data.Generic.Rep (class Generic)

-- define PhoneType

derive instance Generic PhoneType _

instance EncodeJson PhoneType where encodeJson = genericEncodeJson
instance DecodeJson PhoneType where decodeJson = genericDecodeJson

processItem :: Json -> Either String Person
processItem item = do
  jsonString <- decodeJson item
  j          <- jsonParser jsonString
  decodeJson j

main = do
  item <- getItem "person"
  initialPerson <- case processItem item of
    Left  err -> do
      log $ "Error: " <> err <> ". Loading examplePerson"
      pure examplePerson
    Right p   -> pure p

Just to reiterate, I do understand that converting the application (and developers) to this new weird technology is an almost impossible task, especially in a large long-lived project. One way to reason about it and justify the transition is the resilience requirements of a project (the need for actually error-prone code) and the amount of time and effort spent to date on finding and fixing those nasty bugs and undefined behaviours in an application.

IO impact

At MongoDB I work on a Relational Migrator project - a tool which helps people migrate their relational database to MongoDB. And recently we grew interested in the performance of our tool. Due to the nature of the migrations, they are usually extremely long (potentially even never ending, for some scenarios). It is a rather valuable information to know where we can speed things up.

Hence we ran a profiler on a relatively big database of 1M rows. And this was what we saw:

The handleBatch method is where the meat and potatoes of our migration logic reside. It lasts for approx. 6.5 sec. We could have debated on which parts of this flame graph we could optimize (and we actually did), but we first decided to take a quick look at the same graph from the higher level - not the CPU time (when CPU is actually doing the active work) but the total time:

The entire application run took 4,937 sec (1hr 22min 17sec). Of which, the migration itself took only 130 sec:

The biggest chunk of it was writing to MongoDB database at 120 sec:

The actual migration logic is really just 3.5 sec:

So out of 130 sec of the actual migration run, the actual logic took 3.5 sec or mere 2.69%. The rest was just IO (input/output). Which we also saw on the thread timeline:

Most time all the threads spent sleeping.

This is not new information, just a reminder that the slowest part of pretty much any application is input-output.

Simulating network outages in Docker

Recently, for my work project I needed to simulate a system of ours going offline for a period of time (similar to having a network outage on a customer side).

I figured there are two ways to do it:

  • using Docker’ networks
  • using host OS’ firewall

With Docker network it is as easy as

docker network disconnect <network-name> <container-name>

This way you disconnect a container from a network (even if it is a bridge network, exposing the container to the host OS).

You can find a list of networks with docker network ls and list of containers with docker ps.

To roll it back (simulate recovery from an outage), simply

$ docker network connect <network-name> <container-name>

Disconnecting a container from a network is okay, but sometimes you might want to have a fine-grain control over the outage, like forbid a specific IP or port being accessed by your container.

With the firewall it is totally possible, but the steps differ for Linux and OSX.

In Linux you use iptables and control a rule group specific to Docker only:

$ iptables -I DOCKER -p tcp --dport 27017 -j DROP

The above command will block all TCP traffic on port 27017 for all Docker containers. To revert this, run

$ iptables -I DOCKER -p tcp —dport 22 ACCEPT

To control a specific IP address use the -s parameter:

$ iptables -I DOCKER -p tcp -s 192.168.0.10 --dport 27017 -j DROP
$ iptables -I DOCKER -p tcp -s 192.168.0.10 --dport 27017 -j ACCEPT

On the other hand, OSX uses a tool with BSD roots, pf. You can use the /etc/pf.conf file to mess with firewall rules.

Blocking traffic is achieved by adding a rule like below to the /etc/pf.conf file:

block drop out quick proto tcp from any to any port 27017

followed by reloading the rule list with

$ pfctl -f /etc/pf.conf

In case pf is disabled, one is enabled running

$ pfctl -e

Re-enabling is as easy as removing (or commenting out) the rule line in the /etc/pf.conf file and reloading it with pfctl -f /etc/pf.conf.

How unique are your bundles?

In the modern front-end development we are all used to package managers, transpilation and bundling. These concepts are the biproduct of the tools which we hoped would simplify developer’ burden and speed up the development.

However, how confident are you these tools are actually doing good job?

Developers seem comfortable off-loading the processing power to users’ machine (browser, predominantly). We are not surprised by seeing slow websites anymore. A simple blog is downloading 55MB of JavaScript? Seems fine nowadays.

I currently work on a fairly small tool (MongoDB Relational Migrator), which also utilizes TypeScript, React and, of course, bundling. We use Vite for that. Our bundles are split into chunks (but I have accounted for that, too).

I went ahead and wrote a rather simple script which parses the bundles (using TypeScript compiler API, because why not), extracting the function definitions (both arrow functions and plain old function) and counts how many times they occur in the file. For this last bit, to make sure I am not counting a => true and x => true as different occurrences, I am minimizing the function definition with uglifyjs and counting the SHA256 hashes of the minimized functions (just to have a reasonable key in my hashmap instead of entire function code).

These are my findings.

Out of 54 chunks, 47 are not css-in-js chunks. Out of 47 remaining, 7 have any significant duplication (over 5%). But when they do, they do it hard: duplication varies between 18% and a whopping 42% of sheer file size. Absolute numbers are also astonishing: 33% to 59% functions are duplicates.

Found 15192 functions, 8963 are unique (59%)
Duplicates length: 1518418 bytes out of 3537579 bytes are duplicate code (42.92%)

Found 1202 functions, 494 are unique (41.1%)
Duplicates length: 130649 bytes out of 340227 bytes are duplicate code (38.4%)

Found 513 functions, 231 are unique (45.03%)
Duplicates length: 50160 bytes out of 136057 bytes are duplicate code (36.87%)

Found 598 functions, 267 are unique (44.65%)
Duplicates length: 57607 bytes out of 164737 bytes are duplicate code (34.97%)

Found 17 functions, 10 are unique (58.82%)
Duplicates length: 1932 bytes out of 6532 bytes are duplicate code (29.58%)

Found 154 functions, 98 are unique (63.64%)
Duplicates length: 11140 bytes out of 45135 bytes are duplicate code (24.68%)

Found 968 functions, 651 are unique (67.25%)
Duplicates length: 52616 bytes out of 281406 bytes are duplicate code (18.7%)

I thought my code might be wrong, so I looked into the bundle code itself. Here’s a short excerpt:

Object.assign.bind():function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},yR.apply(this,arguments)}var Zce;function bR(){return bR=Object.assign?Object.assign.bind():function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},bR.apply(this,arguments)}var Wce;function wR(){return wR=Object.assign?Object.assign.bind():function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},wR.apply(this,arguments)}var Uce;function $R(){return $R=Object.assign?Object.assign.bind():function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},$R.apply(this,arguments)}var Gce;function OR(){return OR=Object.assign?Object.assign.bind():function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},OR.apply(this,arguments)}var Kce;function xR(){return xR=Object.assign?Object.assign.bind():function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},xR.apply(this,arguments)}

See how the following fragment of code repeats multiple times:

function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e}

In fact, this exact same fragment of code repeats 137 times in the same piece of bundle chunk (same file):

Repeated function definition in a single chunk of code

By the way, this is a production build of our front-end, built using Vite, with minification enabled.

The raw length of this function code is 146 characters. So in a single place, in a single file, you have 136 * 146 = 19_992 bytes of waste. Meaning, browser has to load these 20KB of code, parse it and create 136 duplicating functions.

Looking at the overall size of 3.5MB of code in this chunk and its insane 41% duplicated code (in sheer bytes, not occurrences, so 1.5MB wasted), imagine how much faster this single file could have been loaded in a browser.

I was keen on seeing what functions get duplicated most often and ran my script on an entire build output directory. Here are top 80-ish offenders:

Function code (minimized) Copies
function(r){for(var t=1;t<arguments.length;t++){var n,o=arguments[t];for(n in o)Object.prototype.hasOwnProperty.call(o,n)&&(r[n]=o[n])}return r} 2205
function n(){return Object.assign&&Object.assign.bind(),n.apply(this,arguments)} 1197
function n(){return Object.assign,n.apply(this,arguments)} 1008
function(){} 753
function(i){this.a=i} 250
function(n,e){if(null==n)return{};for(var r,t={},f=Object.keys(n),u=0;u<f.length;u++)r=f[u],0<=e.indexOf(r)||(t[r]=n[r]);return t} 191
function(e,r){if(null==e)return{};var t,n=function(e,r){if(null==e)return{};for(var t,n={},l=Object.keys(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||(n[t]=e[t]);return n}(e,r);if(Object.getOwnPropertySymbols)for(var l=Object.getOwnPropertySymbols(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t]);return n} 187
function(e,r){return r=r||e.slice(0),Object.freeze(Object.defineProperties(e,{raw:{value:Object.freeze(r)}}))} 159
function(n){return this===n} 119
function(r,t){if("object"!=typeof r||null===r)return r;var e=r[Symbol.toPrimitive];if(void 0===e)return("string"===t?String:Number)(r);e=e.call(r,t||"default");if("object"!=typeof e)return e;throw new TypeError("@@toPrimitive must return a primitive value.")} 113
function(r,e,t){return i=function(r,e){if("object"!=typeof r||null===r)return r;var t=r[Symbol.toPrimitive];if(void 0===t)return String(r);t=t.call(r,e);if("object"!=typeof t)return t;throw new TypeError("@@toPrimitive must return a primitive value.")}(e,"string"),(e="symbol"==typeof i?i:String(i))in r?Object.defineProperty(r,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):r[e]=t,r;var i} 111
function(t){t=function(t,r){if("object"!=typeof t||null===t)return t;var i=t[Symbol.toPrimitive];if(void 0===i)return String(t);i=i.call(t,r);if("object"!=typeof i)return i;throw new TypeError("@@toPrimitive must return a primitive value.")}(t,"string");return"symbol"==typeof t?t:String(t)} 111
function(e,n,r){return n in e?Object.defineProperty(e,n,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[n]=r,e} 104
function(c,i){He.call(this,c,i)} 94
function(n,r){(null==r||r&gt;n.length)&&(r=n.length);for(var e=0,l=new Array(r);e<r;e++)l[e]=n[e];return l} 93
function(){return!0} 92
function(){return!1} 78
function(){return new gt(this)} 77
function(r){if(Array.isArray(r))return r} 77
function(){throw new TypeError(`Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)} 77
function(t){return t&&"object"==typeof t&&"default"in t?t:{default:t}} 76
function(i,t){this.a=i,this.b=t} 58
function(){return this.a} 49
function(t,e){var r,n=Object.keys(t);return Object.getOwnPropertySymbols&&(r=Object.getOwnPropertySymbols(t),e&&(r=r.filter(function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable})),n.push.apply(n,r)),n} 49
function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(c,e),Object.defineProperty(y,e,t.get?t:{enumerable:!0,get:function(){return c[e]}}))} 49
function(){return c[b]} 49
function(a,e,i){var r,t=i["aria-label"],n=i["aria-labelledby"],c=i.title;switch(a){case"img":return t||n||c?(l(r={},"aria-labelledby",n),l(r,"aria-label",t),l(r,"title",c),r):{"aria-label":"".concat(e.replace(/([a-z])([A-Z])/g,"$1 $2")," Icon")};case"presentation":return{"aria-hidden":!0,alt:""}}} 49
function(i){Di(this,i)} 48
function(r){return Object.getOwnPropertyDescriptor(e,r).enumerable} 48
function(){throw M(new De)} 46
function(l,r){var t=null==l?null:typeof Symbol<"u"&&l[Symbol.iterator]||l["@@iterator"];if(null!=t){var n,u,e=[],a=!0,o=!1;try{for(t=t.call(l);!(a=(n=t.next()).done)&&(e.push(n.value),!r||e.length!==r);a=!0);}catch(l){o=!0,u=l}finally{try{a||null==t.return||t.return()}finally{if(o)throw u}}return e}} 44
function(n){throw M(new De)} 39
function(r){var n;return r&&"object"==typeof r&&"default"in r?r:(n=Object.create(null),r&&Object.keys(r).forEach(function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(r,e),Object.defineProperty(n,e,t.get?t:{enumerable:!0,get:function(){return r[e]}}))}),n.default=r,Object.freeze(n))} 39
function n(r){return n(r)} 38
function(n){return typeof n} 38
function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o} 38
function(r){var n;return r&&r.__esModule?r:(n=Object.create(null),r&&Object.keys(r).forEach(function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(r,e),Object.defineProperty(n,e,t.get?t:{enumerable:!0,get:function(){return r[e]}}))}),n.default=r,Object.freeze(n))} 37
function(){return this.b} 33
function(l,r){var t=null==l?null:typeof Symbol<"u"&&l[Symbol.iterator]||l["@@iterator"];if(null!=t){var e,n,u,a,f=[],i=!0,o=!1;try{if(u=(t=t.call(l)).next,0===r){if(Object(t)!==t)return;i=!1}else for(;!(i=(e=u.call(t)).done)&&(f.push(e.value),f.length!==r);i=!0);}catch(l){o=!0,n=l}finally{try{if(!i&&null!=t.return&&(a=t.return(),Object(a)!==a))return}finally{if(o)throw n}}return f}} 33
function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(n,t))} 32
function(n){return Ei(n)} 30
function(n){return L(fn,X,2,n,6,1)} 30
function(n){} 29
function(r){if(typeof Symbol<"u"&&null!=r[Symbol.iterator]||null!=r["@@iterator"])return Array.from(r)} 28
function(){throw new TypeError(`Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)} 28
()=>{} 27
function(){return null} 27
function(n,o){e.exports=o(m,on(),dn)} 27
function(i){Fi(this,i)} 23
function(n,o){e.exports=o(dn,on(),m)} 22
function(){throw M(new Fe(Re((xe(),Ds))))} 21
function(){return this} 20
function(i,t){this.b=i,this.a=t} 19
function(n,e){throw M(new De)} 18
function(){return 0} 17
function(r){Object.defineProperty(e,r,Object.getOwnPropertyDescriptor(t,r))} 16
()=>{var l;return null!=(l=e.options.debugAll)?l:e.options.debugHeaders} 16
function(n){return n} 15
function(c){Kr.call(this,c)} 15
function(){return this.c} 15
function(){return this.d} 15
function(n,r){return n} 14
d=>d.id 14
function(){var r=function(t,o){return(r=Object.setPrototypeOf||({__proto__:[]}instanceof Array?function(t,o){t.__proto__=o}:function(t,o){for(var n in o)Object.prototype.hasOwnProperty.call(o,n)&&(t[n]=o[n])}))(t,o)};return function(t,o){if("function"!=typeof o&&null!==o)throw new TypeError("Class extends value "+String(o)+" is not a constructor or null");function n(){this.constructor=t}r(t,o),t.prototype=null===o?Object.create(o):(n.prototype=o.prototype,new n)}} 14
function(_,o){_.__proto__=o} 14
function(o,r){for(var t in r)Object.prototype.hasOwnProperty.call(r,t)&&(o[t]=r[t])} 14
function(n){return!1} 13
function(t){var l=-1,n=null==t?0:t.length;for(this.clear();++l<n;){var r=t[l];this.set(r[0],r[1])}} 12
function(a,e,l){var i,r=l["aria-label"],t=l["aria-labelledby"],n=l.title;switch(a){case"img":return r||t||n?(f(i={},"aria-labelledby",t),f(i,"aria-label",r),f(i,"title",n),i):{"aria-label":"".concat(e.replace(/([a-z])([A-Z])/g,"$1 $2")," Icon")};case"presentation":return{"aria-hidden":!0,alt:""}}} 12
a=>a 12
a=>a() 12
function(n,a){n.a=a} 11
function(){ia.call(this)} 11
function(i,t,h){this.a=i,this.b=t,this.c=h} 11
function(n,r){return r} 11
function(){return this.a.gc()} 11
function(e){return e&&e.__esModule?e:{default:e}} 11
()=>n(!1) 11
function(i){this.b=i} 10
function(c){this.c=c} 10
function(){return this.f} 10
function(n){return n||"div"} 10
function(n,r,i){var l;return i=null!=(l=i)?l:"div",n||("string"==typeof(null==r?void 0:r.href)?"a":i)} 10

Let’s dive deeper, shall we?

Imagine for a second that we could define the duplicated functions once and then just reuse the short name instead (sounds reasonable, does it not?).

But not all of those functions could be de-duplicated in such way (at least not so easily). Some of these functions use the outer closure functions and variables (something defined outside of the function itself), so we can skip these. For instance, function(i){Di(this,i)} and function(){throw M(new De)} can be ignored.

Then, there are function using this. These might be tricky (this is not what you think is a famous JavaScript mantra).

Lastly, some (if not most) of the functions could be either replaced with arrow functions or standard library function. But that is uncertain - one must understand what a function does first.

With those points in mind, let’s look at the offenders once again:

Function code (minimized) Copies Notes
function(r){for(var t=1;t<arguments.length;t++){var n,o=arguments[t];for(n in o)Object.prototype.hasOwnProperty.call(o,n)&&(r[n]=o[n])}return r} 2205 spread operator?
function n(){return Object.assign&&Object.assign.bind(),n.apply(this,arguments)} 1197 Object.assign methods?
function n(){return Object.assign,n.apply(this,arguments)} 1008 Object.assign properties?
function(){} 753 no-op
function(n,e){if(null==n)return{};for(var r,t={},f=Object.keys(n),u=0;u<f.length;u++)r=f[u],0<=e.indexOf(r)||(t[r]=n[r]);return t} 191 ?
function(e,r){if(null==e)return{};var t,n=function(e,r){if(null==e)return{};for(var t,n={},l=Object.keys(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||(n[t]=e[t]);return n}(e,r);if(Object.getOwnPropertySymbols)for(var l=Object.getOwnPropertySymbols(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t]);return n} 187 ?
function(e,r){return r=r||e.slice(0),Object.freeze(Object.defineProperties(e,{raw:{value:Object.freeze(r)}}))} 159 ?
function(r,t){if("object"!=typeof r||null===r)return r;var e=r[Symbol.toPrimitive];if(void 0===e)return("string"===t?String:Number)(r);e=e.call(r,t||"default");if("object"!=typeof e)return e;throw new TypeError("@@toPrimitive must return a primitive value.")} 113 ?
function(r,e,t){return i=function(r,e){if("object"!=typeof r||null===r)return r;var t=r[Symbol.toPrimitive];if(void 0===t)return String(r);t=t.call(r,e);if("object"!=typeof t)return t;throw new TypeError("@@toPrimitive must return a primitive value.")}(e,"string"),(e="symbol"==typeof i?i:String(i))in r?Object.defineProperty(r,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):r[e]=t,r;var i} 111 ?
function(t){t=function(t,r){if("object"!=typeof t||null===t)return t;var i=t[Symbol.toPrimitive];if(void 0===i)return String(t);i=i.call(t,r);if("object"!=typeof i)return i;throw new TypeError("@@toPrimitive must return a primitive value.")}(t,"string");return"symbol"==typeof t?t:String(t)} 111 ?
function(e,n,r){return n in e?Object.defineProperty(e,n,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[n]=r,e} 104 ?
function(n,r){(null==r||r>n.length)&&(r=n.length);for(var e=0,l=new Array(r);e<r;e++)l[e]=n[e];return l} 93 array spread?
function(){return!0} 92 always-true
function(){return!1} 78 always-false
function(r){if(Array.isArray(r))return r} 77 self explanatory
function(){throw new TypeError('Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.')} 77 isSymbol?
function(t){return t&&"object"==typeof t&&"default"in t?t:{default:t}} 76 ?
function(t,e){var r,n=Object.keys(t);return Object.getOwnPropertySymbols&&(r=Object.getOwnPropertySymbols(t),e&&(r=r.filter(function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable})),n.push.apply(n,r)),n} 49 ?
function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(c,e),Object.defineProperty(y,e,t.get?t:{enumerable:!0,get:function(){return c[e]}}))} 49 ?
function(l,r){var t=null==l?null:typeof Symbol<"u"&&l[Symbol.iterator]||l["@@iterator"];if(null!=t){var n,u,e=[],a=!0,o=!1;try{for(t=t.call(l);!(a=(n=t.next()).done)&&(e.push(n.value),!r||e.length!==r);a=!0);}catch(l){o=!0,u=l}finally{try{a||null==t.return||t.return()}finally{if(o)throw u}}return e}} 44 ?
function(r){var n;return r&&"object"==typeof r&&"default"in r?r:(n=Object.create(null),r&&Object.keys(r).forEach(function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(r,e),Object.defineProperty(n,e,t.get?t:{enumerable:!0,get:function(){return r[e]}}))}),n.default=r,Object.freeze(n))} 39 ?
function n(r){return n(r)} 38 Function.apply?
function(n){return typeof n} 38 typeof
function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o} 38 ?
function(r){var n;return r&&r.__esModule?r:(n=Object.create(null),r&&Object.keys(r).forEach(function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(r,e),Object.defineProperty(n,e,t.get?t:{enumerable:!0,get:function(){return r[e]}}))}),n.default=r,Object.freeze(n))} 37 import?
function(l,r){var t=null==l?null:typeof Symbol<"u"&&l[Symbol.iterator]||l["@@iterator"];if(null!=t){var e,n,u,a,f=[],i=!0,o=!1;try{if(u=(t=t.call(l)).next,0===r){if(Object(t)!==t)return;i=!1}else for(;!(i=(e=u.call(t)).done)&&(f.push(e.value),f.length!==r);i=!0);}catch(l){o=!0,n=l}finally{try{if(!i&&null!=t.return&&(a=t.return(),Object(a)!==a))return}finally{if(o)throw n}}return f}} 33 ?
function(n){} 29 no-op
function(r){if(typeof Symbol<"u"&&null!=r[Symbol.iterator]||null!=r["@@iterator"])return Array.from(r)} 28 ?
function(){throw new TypeError('Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.')} 28 ?
()=>{} 27 no-op
function(){return null} 27 always-null
function(){return this} 20 always-this
function(){return 0} 17 always-zero
function(n){return n} 15 identity
function(n,r){return n} 14 always-first-argument
d=>d.id 14 .id
function(){var r=function(t,o){return(r=Object.setPrototypeOf||({__proto__:[]}instanceof Array?function(t,o){t.__proto__=o}:function(t,o){for(var n in o)Object.prototype.hasOwnProperty.call(o,n)&&(t[n]=o[n])}))(t,o)};return function(t,o){if("function"!=typeof o&&null!==o)throw new TypeError("Class extends value "+String(o)+" is not a constructor or null");function n(){this.constructor=t}r(t,o),t.prototype=null===o?Object.create(o):(n.prototype=o.prototype,new n)}} 14 ?
function(_,o){_.__proto__=o} 14 Object.is_a?
function(o,r){for(var t in r)Object.prototype.hasOwnProperty.call(r,t)&&(o[t]=r[t])} 14 ?
function(n){return!1} 13 always-false
a=>a 12 identity
a=>a() 12 call first argument
function(n,a){n.a=a} 11 enum/const definition?
function(n,r){return r} 11 always-second-argument
function(e){return e&&e.__esModule?e:{default:e}} 11 import default
function(c){this.c=c} 10 enum/const definition?

As a matter of fact, someone on the internet did a very similar research few years ago. So I hoped to see the improvement in the build tools over the years.

As I mentioned above, our front-end is bundled with Vite. Let’s see if using esbuild or bun (since both are fairly new and stand out in terms of architecture and performance) do a better job.

With few small adjustments to make things fair (e.g. build the same thing in the same way), like disabling the plugins for Vite, setting up svgr loader, here are some build time stats:

yarn install:

➤ YN0000: Done with warnings in 17s 798ms
yarn  12.11s user 19.69s system 175% cpu 18.122 total

bun install:

warn: esbuild's postinstall script took 748.9ms

 1028 packages installed [1.82s]
  Removed: 2
bun install  0.22s user 0.65s system 47% cpu 1.849 total
Bundler Build time
bun 0.43s
esbuild 2.57s
vite 85.04s
webpack 138.64s

And the analysis of the built bundles:

vite:

Found 968 functions, 651 are unique (67.25%)
Found 598 functions, 267 are unique (44.65%)
Found 154 functions, 98 are unique (63.64%)
Found 17 functions, 10 are unique (58.82%)
Found 15192 functions, 8963 are unique (59%)
Found 1202 functions, 494 are unique (41.1%)
Found 513 functions, 231 are unique (45.03%)
= Total 18644 functions, 10714 are unique (57.4%)

Duplicates length: 52616 bytes out of 281406 bytes are duplicate code (18.7%)
Duplicates length: 57607 bytes out of 164737 bytes are duplicate code (34.97%)
Duplicates length: 11140 bytes out of 45135 bytes are duplicate code (24.68%)
Duplicates length: 1932 bytes out of 6532 bytes are duplicate code (29.58%)
Duplicates length: 1518418 bytes out of 3537579 bytes are duplicate code (42.92%)
Duplicates length: 130649 bytes out of 340227 bytes are duplicate code (38.4%)
Duplicates length: 50160 bytes out of 136057 bytes are duplicate code (36.87%)
= Total 1822522 out of 4511673 bytes are duplicate code (40.3%)

esbuild:

Found 46654 functions, 28952 are unique (62.06%)
Duplicates length: 6905599 bytes out of 9645594 bytes are duplicate code (71.59%)

bun:

Found 31113 functions, 25755 are unique (82.78%)
Duplicates length: 446020 bytes out of 5696964 bytes are duplicate code (7.83%)

webpack:

Found 2898 functions, 1434 are unique (49.48%)
Duplicates length: 320940 bytes out of 4645589 bytes are duplicate code (6.91%)

And a deeper analysis of the duplicated functions:

esbuild:

Function Copies
function(r){for(var t=1;t<arguments.length;t++){var n,o=arguments[t];for(n in o)Object.prototype.hasOwnProperty.call(o,n)&&(r[n]=o[n])}return r} 2216
function n(){return Object.assign&&Object.assign.bind(),n.apply(this,arguments)} 1204
function n(){return Object.assign,n.apply(this,arguments)} 1010
function(){} 844
function(t){return t&&"object"==typeof t&&"default"in t?t:{default:t}} 260
function(i){this.a=i} 250
function(n,e){if(null==n)return{};for(var r,t={},f=Object.keys(n),u=0;u<f.length;u++)r=f[u],0<=e.indexOf(r)||(t[r]=n[r]);return t} 203
function(e,r){if(null==e)return{};var t,n=function(e,r){if(null==e)return{};for(var t,n={},l=Object.keys(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||(n[t]=e[t]);return n}(e,r);if(Object.getOwnPropertySymbols)for(var l=Object.getOwnPropertySymbols(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t]);return n} 194
function(e,r){return r=r||e.slice(0),Object.freeze(Object.defineProperties(e,{raw:{value:Object.freeze(r)}}))} 160
function(r,t){if("object"!=typeof r||null===r)return r;var e=r[Symbol.toPrimitive];if(void 0===e)return("string"===t?String:Number)(r);e=e.call(r,t||"default");if("object"!=typeof e)return e;throw new TypeError("@@toPrimitive must return a primitive value.")} 137
function(r,e,t){return i=function(r,e){if("object"!=typeof r||null===r)return r;var t=r[Symbol.toPrimitive];if(void 0===t)return String(r);t=t.call(r,e);if("object"!=typeof t)return t;throw new TypeError("@@toPrimitive must return a primitive value.")}(e,"string"),(e="symbol"==typeof i?i:String(i))in r?Object.defineProperty(r,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):r[e]=t,r;var i} 134
function(t){t=function(t,r){if("object"!=typeof t||null===t)return t;var i=t[Symbol.toPrimitive];if(void 0===i)return String(t);i=i.call(t,r);if("object"!=typeof i)return i;throw new TypeError("@@toPrimitive must return a primitive value.")}(t,"string");return"symbol"==typeof t?t:String(t)} 134
()=>{} 129
function(n){return this===n} 119
function(e,n,r){return n in e?Object.defineProperty(e,n,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[n]=r,e} 115
function(n,r){(null==r||r>n.length)&&(r=n.length);for(var e=0,l=new Array(r);e<r;e++)l[e]=n[e];return l} 106
function(c,i){Bs.call(this,c,i)} 94
function(){return!0} 93
function(r){if(Array.isArray(r))return r} 83
function(){throw new TypeError(Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a Symbol.iterator method.)} 83
function(){return!1} 79
function(){return new cu(this)} 77
function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(b,e),Object.defineProperty(E,e,t.get?t:{enumerable:!0,get:function(){return b[e]}}))} 76
function(){return b[P]} 76
function(a,e,l){var i,r=l["aria-label"],t=l["aria-labelledby"],n=l.title;switch(a){case"img":return r||t||n?(s(i={},"aria-labelledby",t),s(i,"aria-label",r),s(i,"title",n),i):{"aria-label":"".concat(e.replace(/([a-z])([A-Z])/g,"$1 $2")," Icon")};case"presentation":return{"aria-hidden":!0,alt:""}}} 76
function(r){var n;return r&&r.__esModule?r:(n=Object.create(null),r&&Object.keys(r).forEach(function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(r,e),Object.defineProperty(n,e,t.get?t:{enumerable:!0,get:function(){return r[e]}}))}),n.default=r,Object.freeze(n))} 67
function(n){return typeof n} 64
function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o} 64
function n(r){return n(r)} 63
function(t,e){var r,n=Object.keys(t);return Object.getOwnPropertySymbols&&(r=Object.getOwnPropertySymbols(t),e&&(r=r.filter(function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable})),n.push.apply(n,r)),n} 61
function(i,t){this.a=i,this.b=t} 58
function(r){var n;return r&&"object"==typeof r&&"default"in r?r:(n=Object.create(null),r&&Object.keys(r).forEach(function(e){var t;"default"!==e&&(t=Object.getOwnPropertyDescriptor(r,e),Object.defineProperty(n,e,t.get?t:{enumerable:!0,get:function(){return r[e]}}))}),n.default=r,Object.freeze(n))} 50
function(r){if(typeof Symbol<"u"&&null!=r[Symbol.iterator]||null!=r["@@iterator"])return Array.from(r)} 49
function(){throw new TypeError(`Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)} 49
function(){return this.a} 49
function(i){p0(this,i)} 48
function(l,r){var t=null==l?null:typeof Symbol<"u"&&l[Symbol.iterator]||l["@@iterator"];if(null!=t){var n,u,e=[],a=!0,o=!1;try{for(t=t.call(l);!(a=(n=t.next()).done)&&(e.push(n.value),!r||e.length!==r);a=!0);}catch(l){o=!0,u=l}finally{try{a||null==t.return||t.return()}finally{if(o)throw u}}return e}} 46
function(){throw St(new Ss)} 46
function(n){throw St(new Ss)} 39
()=>{"use strict";Vu(),Du()} 38
function(l,r){var t=null==l?null:typeof Symbol<"u"&&l[Symbol.iterator]||l["@@iterator"];if(null!=t){var e,n,u,a,f=[],i=!0,o=!1;try{if(u=(t=t.call(l)).next,0===r){if(Object(t)!==t)return;i=!1}else for(;!(i=(e=u.call(t)).done)&&(f.push(e.value),f.length!==r);i=!0);}catch(l){o=!0,n=l}finally{try{if(!i&&null!=t.return&&(a=t.return(),Object(a)!==a))return}finally{if(o)throw n}}return f}} 37
function(){return this.b} 33
function(){X(x)} 32
function(n){} 31
a=>a() 30
function(n){return V1(n)} 30
function(n){return dn(oi,mr,2,n,6,1)} 30
function(){return null} 29
function(){return this} 24
()=>{"use strict";Du()} 23
function(i){g0(this,i)} 23
function(t,r){var e;if(t)return"string"==typeof t?k(t,r):"Map"===(e="Object"===(e=Object.prototype.toString.call(t).slice(8,-1))&&t.constructor?t.constructor.name:e)||"Set"===e?Array.from(t):"Arguments"===e||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(e)?k(t,r):void 0} 22
function(t,r){var e;if(t)return"string"==typeof t?P(t,r):"Map"===(e="Object"===(e=Object.prototype.toString.call(t).slice(8,-1))&&t.constructor?t.constructor.name:e)||"Set"===e?Array.from(t):"Arguments"===e||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(e)?P(t,r):void 0} 22
function(n){return null!=n&&n instanceof Array} 21
function(r){if(Array.isArray(r))return P(r)} 21
function(e){return e&&e.__esModule?e:{default:e}} 21
function(t,r){var e;if(t)return"string"==typeof t?Q(t,r):"Map"===(e="Object"===(e=Object.prototype.toString.call(t).slice(8,-1))&&t.constructor?t.constructor.name:e)||"Set"===e?Array.from(t):"Arguments"===e||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(e)?Q(t,r):void 0} 21
function(){throw St(new Os(il((qs(),_g))))} 21
function(n){return n} 20
function(n){return null!=n&&n.nodeType===Node.ELEMENT_NODE} 20
function(e){throw Error("Received unhandled value: ".concat(e))} 20
function(r,n){return Array.isArray(r)?r.concat(n):"string"==typeof r?r:void 0} 19

bun:

Function Copies
function(){} 739
function(i){this.a=i} 250
function(r){for(var t=1;t<arguments.length;t++){var n,o=arguments[t];for(n in o)Object.prototype.hasOwnProperty.call(o,n)&&(r[n]=o[n])}return r} 197
()=>{} 141
function(n){return this===n} 119
function(c,f){f7.call(this,c,f)} 94
function(){return!0} 91
function(){return new p9(this)} 77
function(){return!1} 76
function(i,t){this.a=i,this.b=t} 58
function(n,e){if(null==n)return{};for(var r,t={},f=Object.keys(n),u=0;u<f.length;u++)r=f[u],0<=e.indexOf(r)||(t[r]=n[r]);return t} 53
function(r,t){if("object"!=typeof r||null===r)return r;var e=r[Symbol.toPrimitive];if(void 0===e)return("string"===t?String:Number)(r);e=e.call(r,t||"default");if("object"!=typeof e)return e;throw new TypeError("@@toPrimitive must return a primitive value.")} 51
function(){return this.a} 49
function(e,r){if(null==e)return{};var t,n=function(e,r){if(null==e)return{};for(var t,n={},l=Object.keys(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||(n[t]=e[t]);return n}(e,r);if(Object.getOwnPropertySymbols)for(var l=Object.getOwnPropertySymbols(e),o=0;o<l.length;o++)t=l[o],0<=r.indexOf(t)||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t]);return n} 48
function(r,e,t){return i=function(r,e){if("object"!=typeof r||null===r)return r;var t=r[Symbol.toPrimitive];if(void 0===t)return String(r);t=t.call(r,e);if("object"!=typeof t)return t;throw new TypeError("@@toPrimitive must return a primitive value.")}(e,"string"),(e="symbol"==typeof i?i:String(i))in r?Object.defineProperty(r,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):r[e]=t,r;var i} 48
function(t){t=function(t,r){if("object"!=typeof t||null===t)return t;var i=t[Symbol.toPrimitive];if(void 0===i)return String(t);i=i.call(t,r);if("object"!=typeof i)return i;throw new TypeError("@@toPrimitive must return a primitive value.")}(t,"string");return"symbol"==typeof t?t:String(t)} 48
function(i){T6(this,i)} 48
()=>{R3(),G3()} 46
function(){throw x0(new w7)} 46
function(e,r){return r=r||e.slice(0),Object.freeze(Object.defineProperties(e,{raw:{value:Object.freeze(r)}}))} 45
function(n,r){(null==r||r>n.length)&&(r=n.length);for(var e=0,l=new Array(r);e<r;e++)l[e]=n[e];return l} 41
function(n){throw x0(new w7)} 39
function(r){if(Array.isArray(r))return r} 36
function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")} 36
function(){return this.b} 33
function(n){return n2(n)} 30
function(n){return J1($5,p1,2,n,6,1)} 30
function(t,e){var r,n=Object.keys(t);return Object.getOwnPropertySymbols&&(r=Object.getOwnPropertySymbols(t),e&&(r=r.filter(function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable})),n.push.apply(n,r)),n} 29
function(l,r){var t=null==l?null:"undefined"!=typeof Symbol&&l[Symbol.iterator]||l["@@iterator"];if(null!=t){var e,n,u,f,i=[],a=!0,o=!1;try{if(u=(t=t.call(l)).next,0===r){if(Object(t)!==t)return;a=!1}else for(;!(a=(e=u.call(t)).done)&&(i.push(e.value),i.length!==r);a=!0);}catch(l){o=!0,n=l}finally{try{if(!a&&null!=t.return&&(f=t.return(),Object(f)!==f))return}finally{if(o)throw n}}return i}} 29
function(n){} 29
function(){return null} 28
function(e){return Object.getOwnPropertyDescriptor(Z,e).enumerable} 25
function(e){Object.defineProperty(Z,e,Object.getOwnPropertyDescriptor(W,e))} 25
function(n){return n} 23
function(i){C6(this,i)} 23
()=>{G3()} 22
function(){throw x0(new C7(n7((y7(),KG))))} 21
function(){return this} 19
function(i,t){this.b=i,this.a=t} 19
function(n){return!1} 18
function(n,w){throw x0(new w7)} 18
function(){return 0} 17
function(n){return typeof n} 17
function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o} 17
()=>{R3()} 16
()=>{var e;return null!=(e=Z.options.debugAll)?e:Z.options.debugHeaders} 16
function(n,r){return n} 15
function(c){hX.call(this,c)} 15
function(){return this.c} 15
function(){return this.d} 15
function(e,n,r){return n in e?Object.defineProperty(e,n,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[n]=r,e} 14
function(r){if("undefined"!=typeof Symbol&&null!=r[Symbol.iterator]||null!=r["@@iterator"])return Array.from(r)} 14
function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")} 14
d=>d.id 14
()=>W(!1) 13
a=>a 12
function(t){var l=-1,n=null==t?0:t.length;for(this.clear();++l<n;){var r=t[l];this.set(r[0],r[1])}} 12
function(n,c){} 12
function(n,r){return r} 11
function(n,a){n.a=a} 11
function(){_V.call(this)} 11
function(i,t,h){this.a=i,this.b=t,this.c=h} 11
function(){return this.a.gc()} 11
function(i){this.b=i} 10
function(c){this.c=c} 10
function(){return this.f} 10

Interestingly enough, all three tools handled the job bad in different aspects:

  • vite was the slowest and produced second biggest bundle
  • esbuild was the fastest and produced the largest bundle
  • bun was slower than esbuild by a split of hair, but produced smallest bundle with least duplicates

Bonus points to bun for installing node modules in a link of an eye.

In terms of duplicates, however, all three failed miserably (in my opinion), with the best result being the bundle produced by bun with 18% duplicates and the rest having almost half the bundle wasted.

For the most part, bundlers seem to be doing a pretty bad job at tree shaking and keep a lot of those utility functions’ duplicates. One can estimate how much of a wasted space these use, by multiplying the function code length by the number of duplicates minus one (for one definition).

Let’s imagine some of the above functions could be de-duplicated. What are the benefit of that? For the most part, the front-end can load faster for users - simply because there is less bytes to transfer. On top of that, there are less functions to be created in memory. So technically, the front-end can act faster. Although, on the modern machines the difference between having one function and few thousand of the same function is negligible.

Here is a shortened list of top abusers from different bundlers for our tool:

Function vite esbuild bun
# Bytes # Bytes # Bytes
function(){} 753 9036 844 10128 739 8868
function(){return!0} 92 1840 93 1860 91 1820
function(){return!1} 78 1560 79 1580 76 1520
function(){return null} 27 621 29 667 28 644
function(){return this} 20 460 24 552 19 437
function(){return 0} 17 340 n/a 0 n/a 0
function(n){return!1} 13 273 n/a 0 18 378
function(n){} 29 377 31 403 29 377
function(n){return n} 15 315 20 420 23 483
function(n){return typeof n} n/a 0 64 1792 17 476
function(n){return this===n} n/a 0 119 3332 119 3332
function(n,r){return n} 14 322 n/a 0 15 345
function(n,r){return r} 11 253 n/a 0 11 253
function(n,c){} n/a 0 n/a 0 12 180
()=>{} 27 162 129 774 141 846
a=>a 12 48 n/a 0 12 48
a=>a() 12 72 n/a 0 n/a 0
function(r){for(var t=1;t<arguments.length;t++){var n,o=arguments[t];for(n in o)Object.prototype.hasOwnProperty.call(o,n)&&(r[n]=o[n])}return r} 2205 317520 2216 319104 197 28368
function n(){return Object.assign&&Object.assign.bind(),n.apply(this,arguments)} 1197 95760 1204 96320 n/a 0
function n(){return Object.assign,n.apply(this,arguments)} 1008 58464 1010 58580 n/a 0
function(n,r){(null==r||r>n.length)&&(r=n.length);for(var e=0,l=new Array(r);e<r;e++)l[e]=n[e];return l} 93 9672 106 11024 n/a 0
function(r){if(Array.isArray(r))return r} 77 3157 83 3403 36 1476
function(){throw new TypeError(&#39;Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.&#39;)} 77 13244 83 14276 36 6192
function(t){return t&&"object"==typeof t&&"default"in t?t:{default:t}} 76 5624 260 19240 n/a 0
function(_,o){_.__proto__=o} 14 392 n/a 0 n/a 0
function(o,r){for(var t in r)Object.prototype.hasOwnProperty.call(r,t)&&(o[t]=r[t])} 14 1176 n/a 0 n/a 0
function(e){return e&&e.__esModule?e:{default:e}} 11 539 21 1029 n/a 0
function(e,n,r){return n in e?Object.defineProperty(e,n,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[n]=r,e} n/a 0 115 13570 n/a 0
function(r,n){return Array.isArray(r)?r.concat(n):"string"==typeof r?r:void 0} n/a 0 19 1520 n/a 0
function(n){return null!=n&&n instanceof Array} n/a 0 21 987 n/a 0
function(n,e){if(null==n)return{};for(var r,t={},f=Object.keys(n),u=0;u<f.length;u++)r=f[u],0<=e.indexOf(r)||(t[r]=n[r]);return t} n/a 0 n/a 0 53 6890
function(r,t){if("object"!=typeof r||null===r)return r;var e=r[Symbol.toPrimitive];if(void 0===e)return("string"===t?String:Number)(r);e=e.call(r,t||"default");if("object"!=typeof e)return e;throw new TypeError("@@toPrimitive must return a primitive value.")} n/a 0 137 36853 51 13719
function(e,r){return r=r||e.slice(0),Object.freeze(Object.defineProperties(e,{raw:{value:Object.freeze(r)}}))} n/a 0 160 17600 n/a 0
function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o} n/a 0 64 7424 n/a 0
function(r){if("undefined"!=typeof Symbol&&null!=r[Symbol.iterator]||null!=r["@@iterator"])return Array.from(r)} n/a 0 n/a 0 14 1624

Interestingly enough, aside from a lot of () => {} and (a, b) => a and () => true (as I call them, utility) functions, there are a lot of ES6 / TypeScript helpers such as class definition and spread operator variants, presumingly made to be compatible with ES5-only browsers. Maybe if we had targeted only platforms supporting the latest ES features we would get better results?

Well, not quite much:

bundle sizes:

Bundler Bundle size
bun 5.4M
esbuild 9.2M
esbuild (tuned) 8.0M
vite 7.1M
vite (tuned) 3.8M
webpack 4.4M

vite:

Found 15983 functions, 9581 are unique (59.94%)
Duplicates length: 1495985 bytes out of 4019326 bytes are duplicate code (37.22%)

esbuild:

Found 41736 functions, 29224 are unique (70.02%)
Duplicates length: 3406606 bytes out of 8347230 bytes are duplicate code (40.81%)

webpack is (should be) already using the target config option from tsconfig.json (which is set to ESNext in our case) and bun does not really have a whole lot of customization in this regard.

One unlikely possibility when these duplicates can be faster than having just one function is that running the nearly-defined code (in terms of a single block of code) might be slightly faster than making code jumps. This is super far-fetched idea from low-level programming, when CPU does not have to jump thousands or millions of (assembly) instructions back and forth but literally a few instead. This won’t justify using verbose ES5-compatible code on ESnext browser, however. How about we run a very synthetic benchmark to check just this one theory?

function f(){return!1}

console.time('1');
for(var i=0;i<100000;i++){var a=[];for(var t=0;t<20000;t++)a.push(Math.random()*1000000);var x=a.filter(f).length}
console.timeEnd('1');
console.time('2');
for(var i=0;i<100000;i++){var a=[];for(var t=0;t<20000;t++)a.push(Math.random()*1000000);var f=function(){return!1},x=a.filter(f).length}
console.timeEnd('2');

The results are actually quite stable:

1: 17029ms
1: 16998ms
1: 16903ms

2: 21877ms
2: 21811ms
2: 21821ms

Having just one function instance is approx. 23% faster. But what happens at consequitive runs?

1: 9194ms
1: 9159ms
1: 14044ms
1: 13882ms
1: 13975ms
1: 9205ms
1: 14026ms

2: 21821ms
2: 13843ms
2: 13866ms
2: 13854ms
2: 13961ms

2: 21718ms
2: 13952ms
2: 13925ms
2: 13923ms

Seems like CPU does indeed do a little bit of instruction caching and branch prediction (first run is visibly slower than the subsequent runs). But the observation still holds: having one function definition instead of many copies (even “near” copies) has a much bigger impact.

With that being said, there is one interesting thing to try here: what if we actually replace some of those bulky duplicates with one-time declarations, within the same bundle?

Without performing an in-depth code analysis and optimization, I came up with the following naive implementation:

  1. analyze the code and pick a few functions (biggest abusers) to fix them up
  2. extract all function names, function parameter names (including potential destructuring objects in function params) and all variable and constant names
  3. for each function to be de-duplicated, create a unique name (using the variable and function parameter and function names to avoid any clashes)
  4. from the end of the file to the beginning, remove all occurrences of the function declaration and replace all function references with the unique name
  5. add the function declarations to the beginning of the file

This approach is rather naive, since it does not account for a number of edge cases. For instance, if there are two functions to be replaced:

function(e){for(var r=1;r<arguments.length;r++){var t,l=arguments[r];for(t in l)Object.prototype.hasOwnProperty.call(l,t)&&(e[t]=l[t])}return e}

function p(){return(p=Object.assign||function(e){for(var r=1;r<arguments.length;r++){var t,l=arguments[r];for(t in l)Object.prototype.hasOwnProperty.call(l,t)&&(e[t]=l[t])}return e}).apply(this,arguments)}

e.g. one includes the other, the algorithm could have potentially evicted cases like this.

Before proceeding further, it is a good idea to test if the cleaned up bundle can safely replace the original one. Hence I just stuffed it in the static assets folder of our project and ran it with the modified bundle.

This way I figured few issues with the naive approach:

  • two function definitions are causing stack overflow:
    • $z=function n(){return Object.assign,n.apply(this,arguments)}
    • $q=function n(){return n=Object.assign&&Object.assign.bind(),n.apply(this,arguments)}
  • some of the empty functions are actually used as constructors (ES5-compatible OOP model) which is only discovered by finding the expressions like $FnName.prototype.something = somethingElse;
  • some functions are named and then referenced later in the code
  • some functions are not used at all: Unused aliases

For the shorthand functions I first tried manually fixing them up - had to replace them with $z=function(){return $z=Object.assign.bind(),$z.apply(this,arguments)} alikes. This worked, so I created an AST transformer to handle these one-line return-only functions:

const simplifyFunction = (code, fname) => {
    const tmpFilename = '_tmp';

    fs.writeFileSync(tmpFilename, code, 'utf-8');

    const root = ts.createSourceFile(
        tmpFilename,
        code,
        ts.ScriptTarget.ESNext,
        /* setParentNodes */ true
    );

    let rootFnName = undefined;

    const parse = (node) => {
        if (ts.isFunctionDeclaration(node) && ts.isIdentifier(node.name) && node.name.escapedText !== '') {
            rootFnName = node.name.escapedText;
            return;
        }

        ts.forEachChild(node, child => {
            if (child) {
                parse(child);
            }
        });
    };

    parse(root);

    if (!rootFnName) {
        fs.rmSync(tmpFilename);
        return code;
    }

    const transformer = (ctx) => (sourceFile) => {
        const visit = (node) => {
            if (ts.isIdentifier(node) && node.escapedText === rootFnName) {
                return ts.factory.createIdentifier(fname);
            }

            if (
                ts.isFunctionDeclaration(node) &&
                ts.isBlock(node.body) &&
                node.body.statements.length === 1 &&
                ts.isReturnStatement(node.body.statements[0]) &&
                ts.isBinaryExpression(node.body.statements[0].expression)
            ) {
                const next = ts.factory.createFunctionDeclaration(
                    [],
                    undefined,
                    undefined,
                    [],
                    [],
                    undefined,

                    ts.factory.createBlock([
                        ts.factory.createReturnStatement(
                            ts.factory.createComma(
                                ts.factory.createAssignment(
                                    ts.factory.createIdentifier(fname),
                                    node.body.statements[0].expression.left
                                ),

                                node.body.statements[0].expression.right
                            )
                        )
                    ])
                );

                return ts.visitEachChild(next, visit, ctx);
            }

            return ts.visitEachChild(node, visit, ctx);
        };

        return ts.visitNode(sourceFile, visit);
    };

    const s = ts.createSourceFile(tmpFilename, code, ts.ScriptTarget.ESNext);
    const { transformed } = ts.transform(s, [ transformer ]);

    const newCode = ts.createPrinter({ omitTrailingSemicolon: true })
        .printFile(transformed.find(({ fileName }) => fileName === tmpFilename));

    fs.rmSync(tmpFilename);

    return newCode;
};

The transformer is essentially a two-pass processor: it first parses the code and identifies the first function declaration. If none was found - it just returns the original code. If there was a so-called “root function” defined, it then replaces all identifier with that “root function” name with the alias provided as fname. It also replaces the return statements in form of a return something && something() with return alias = something, alias().

This approach is different from simply using uglifyjs to just try and minimize the code - it is way more complex (compared to just one function call). It results in few extra whitespaces being added. But using uglifyjs messes things up again and TypeScript compiler does not have an option for minimizing the output. But few extra whitespaces are totally acceptable given the much bigger savings from this transformation.

With the constructors I simply excluded them from being de-duplicated. This resulted in 155 fewer substitutions (in Vite mode), which is negligible on the overall scale of the problem.

As for the named functions which are in the global scope and are referenced later down the line, I had to create a list of “backwards-compatible aliases”, mapping those old function names onto the new unique names.

I ended up with a three-pass parser-transformer utility (not really “simple script” anymore). The passes being:

  1. figuring out the duplicate function declarations and replacing them from the end to the start of the code (to minimize the chance of writing over the just-changed code)
  2. removing the duplicate global-scope named functions and replacing them with shorthand aliases to the de-duplicated declarations
  3. replace the usages of all known aliases and shorthand-named de-duplicated functions with their corresponding new (generated) names

Other than those, replacing the original bundle with the optimized one worked like a charm!

The results? With the threshold of 20 duplicates or more:

Bundler Before optimization
Bundle size Total functions Unique functions Unique functions, % Duplicate code, %
bun 6.2M 8903 7443 83.6% 0.78%
esbuild 8.7M 13057 10250 78.5% 3.9%
vite 3.9M 3502 2365 67.53% 6.39%
webpack 4.4M 2898 1434 49.48% 6.91%
After optimization
bun 6.2M (same) 7865 (-1038) 7355 (-88) 93.52% (+9.92%) 0.51% (-0.27%)
esbuild 8.5M (-0.2M) 3265 (-9792) 2990 (-7260) 91.58% (+13.08%) 0.62% (-3.28%)
vite 3.6M (-0.3M) 2483 (-1019) 2277 (-88) 91.7% (+24.17%) 1.68% (-4.71%)
webpack 4.1M (-0.3M) 1484 (-1414) 1375 (-59) 92.65% (+43.17%) 0.43% (-6.48%)

In conclusion, the bundlers do a pretty average job at optimizing the bundles, even in production mode with some extra tuning. And if some brave soul is willing to invest even more time and effort than I did into developing a sophisticated solution (potentially improving the existing tools, like uglifyjs or bundlers themselves), the numbers can be improved even further. It would be really interesting to see what would the results be running this optimizer on a bigger bundle.

In my humble opinion, Bun does produce the cleanest bundle. It might be not the smallest one, but it has least unnecessary stuff. On top of that, it is the fastest tool in JS world I have ever used. By the way, this very blog is built using Bun and React SSR - on Github Actions it takes around a minute to build and publish:

Github Actions build and publish with Bun Github Actions build breakdown

TypeScript classes are not what you think

There was a talk at CPPCon 2017 by Matt Goldbolt with an inspirational title, What has my compiler done for me lately?. It actually resonates with me quite a bit, especially in the world where we try to push so hard for statically typed compiled languages on front-end.

Recently I recalled one of (many) aspects why I think TypeScript is useless (as a way to introduce strong type system to JavaScript world) in many cases.

To be realistic and not just throw bare accusations around, this was inspired by the work on contracts for a new microservice, specifically - the request & response types to be used in both client and the back-end of the microservice. The types were similar to the types used on a database layer and one of the developers just returned objects of that DB layer model type on a controller (endpoint) level, which confused me.

Consider the code below:

class A {
    constructor(public moo: string, public foo: number) {}
}

class B {
    constructor(public moo: string, public foo: number, public zoo?: string[]) {}
}

const b: B = new B('ololo', -3.14, ['1']);
const a: A = b;
const c: B = a;

This works because of a (rather questionable) design decision by TypeScript team called Type compatibility, where any two classes or interfaces that have overlapping public fields are deemed compatible and can be mutually interplaceable.

However, in most languages with reasonable type system, you would expect two different classes to be just that - two different classes - in the case of a code above, an object of class B can never be assigned to a variable of type A and vice versa.

There is a way to achieve this in TypeScript, however (a bit cumbersome, though): by hiding the properties and only exposing them through non-getter/non-setter methods, when needed:

class A {
    constructor(private moo: string, private foo: number) {}

    getMoo() {
        return this.moo;
    }

    getFoo() {
        return this.foo;
    }
}

class B {
    constructor(private moo: string, private foo: number, private zoo?: ReadonlyArray<string>) {}

    getMoo() {
        return this.moo;
    }

    getFoo() {
        return this.foo;
    }

    getZoo() {
        return this.zoo;
    }
}

const b: B = new B('ololo', -3.14, ['1']);
const a: A = b; // Type 'B' is not assignable to type 'A'. Types have separate declarations of a private property 'moo'.(2322)
const c: B = a; // Property 'getZoo' is missing in type 'A' but required in type 'B'.(2741)

Without knowing this “feature” beforehand, one might end up with an inconsistent code or errors down the line (when somebody decides to modify the DB layer model and gets errors on an API layer). And TypeScript does not really help here.