" + $('div[id="ref-' + ref + '"]').html() + ""; }).join("\n"); window.tippy(this, { allowHTML: true, content: refHtml, maxWidth: 500, interactive: true, interactiveBorder: 10, theme: 'light-border', placement: 'bottom-start' }); }); // fix footnotes in tables (#411) // replacing broken distill.pub feature $('table d-footnote').each(function() { // we replace internal showAtNode methode which is triggered when hovering a footnote this.hoverBox.showAtNode = function(node) { // ported from https://github.com/distillpub/template/pull/105/files calcOffset = function(elem) { let x = elem.offsetLeft; let y = elem.offsetTop; // Traverse upwards until an `absolute` element is found or `elem` // becomes null. while (elem = elem.offsetParent && elem.style.position != 'absolute') { x += elem.offsetLeft; y += elem.offsetTop; } return { left: x, top: y }; } // https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/offsetTop const bbox = node.getBoundingClientRect(); const offset = calcOffset(node); this.show([offset.left + bbox.width, offset.top + bbox.height]); } }) // clear polling timer clearInterval(tid); // show body now that everything is ready on_load_complete(); } var tid = setInterval(distill_post_process, 50); distill_post_process(); } function init_downlevel() { init_common(); // insert hr after d-title $('.d-title').after($('


')); // check if we have authors var front_matter = JSON.parse($("#distill-front-matter").html()); var have_authors = front_matter.authors && front_matter.authors.length > 0; // manage byline/border if (!have_authors) $('.d-byline').remove(); $('.d-byline').after($('
')); $('.d-byline a').remove(); // remove toc $('.d-contents').remove(); // move appendix elements $('h1.appendix, h2.appendix').each(function(i, val) { $(this).changeElementType('h3'); }); $('h3.appendix').each(function(i, val) { $(this).nextUntil($('h1, h2, h3')).addBack().appendTo($('.d-appendix')); }); // inject headers into references and footnotes var refs_header = $('

'); refs_header.text('References'); $('#refs').prepend(refs_header); var footnotes_header = $('

')); // trim code $('pre>code').each(function(i, val) { $(this).html($.trim($(this).html())); }); // move posts-container right before article $('.posts-container').insertBefore($('.d-article')); $('body').addClass('downlevel'); on_load_complete(); } function init_common() { // jquery plugin to change element types (function($) { $.fn.changeElementType = function(newType) { var attrs = {}; $.each(this[0].attributes, function(idx, attr) { attrs[attr.nodeName] = attr.nodeValue; }); this.replaceWith(function() { return $("<" + newType + "/>", attrs).append($(this).contents()); }); }; })(jQuery); // prevent underline for linked images $('a > img').parent().css({'border-bottom' : 'none'}); // mark non-body figures created by knitr chunks as 100% width $('.layout-chunk').each(function(i, val) { var figures = $(this).find('img, .html-widget'); // ignore leaflet img layers (#106) figures = figures.filter(':not(img[class*="leaflet"])') if ($(this).attr('data-layout') !== "l-body") { figures.css('width', '100%'); } else { figures.css('max-width', '100%'); figures.filter("[width]").each(function(i, val) { var fig = $(this); fig.css('width', fig.attr('width') + 'px'); }); } }); // auto-append index.html to post-preview links in file: protocol // and in rstudio ide preview $('.post-preview').each(function(i, val) { if (window.location.protocol === "file:") $(this).attr('href', $(this).attr('href') + "index.html"); }); // get rid of index.html references in header if (window.location.protocol !== "file:") { $('.distill-site-header a[href]').each(function(i,val) { $(this).attr('href', $(this).attr('href').replace(/^index[.]html/, "./")); }); } // add class to pandoc style tables $('tr.header').parent('thead').parent('table').addClass('pandoc-table'); $('.kable-table').children('table').addClass('pandoc-table'); // add figcaption style to table captions $('caption').parent('table').addClass("figcaption"); // initialize posts list if (window.init_posts_list) window.init_posts_list(); // implmement disqus comment link $('.disqus-comment-count').click(function() { window.headroom_prevent_pin = true; $('#disqus_thread').toggleClass('hidden'); if (!$('#disqus_thread').hasClass('hidden')) { var offset = $(this).offset(); $(window).resize(); $('html, body').animate({ scrollTop: offset.top - 35 }); } }); } document.addEventListener('DOMContentLoaded', function() { if (is_downlevel_browser()) init_downlevel(); else window.addEventListener('WebComponentsReady', init_distill); });

Why I Indent My Code 8 Spaces

Roger Peng
2018-07-27

Jenny Bryan recently gave a wonderful talk at the Use R! 2018 meeting in Brisbane about “Code Smells and Feels” (I recommend you watch a video of that talk). Her talk covers various ways to detect when your code “smells” and how to fix those smells through refactoring. While there is quite a bit of literature on this with respect to other programming languages, it’s not well-covered with respect to R.

In the video version of the talk (not in the slides) Jenny calls out my particular indentation rule, which is to use 8 spaces. In my experience, people tend to find this a rather extreme indentation policy, with maybe 4 spaces being at the outer limit of what they could imagine. But I’ve been using 8 spaces for a long time now and I’ve found that it has a number of benefits.

First off, I did not make up the 8 space indent. I got it from the Linux kernal coding style document. Chapter 1 says:

Tabs are 8 characters, and thus indentations are also 8 characters. There are heretic movements that try to make indentations 4 (or even 2!) characters deep, and that is akin to trying to define the value of PI to be 3.

I’ve found the Linux kernal coding style to be pretty useful for my R programming, but a lot of it is C-specific and so not relevant. Nevertheless, it’s worth a quick peruse.

Personally, I’ve found 8 spaces is good for my aging eyes. I think my rule is that the appropriate number of spaces of indentation is proportional to the square of my age (I’m still working on that model though). At this point, code with a 2 space indent is indistinguishable from flush left.

Before going on, I have to emphasize that the 8 space indent cannot exist in isolation. It has to be coupled with a right-hand side limit of 80 columns. Otherwise, you could just indent yourself off to infinity and there would be no consequences. An 80 column limit forces you to keep your code within reasonable limits. Also, if someone ever needs to read your code on a PDP/11 you’ll be A-okay.

Most importantly though, I’ve found that the 8 space indent serves as a kind of early warning system for “smelly” code. Jenny gave some smelly examples in her talk and I thought I’d reproduce them here. This first example, as Jenny describes, suffers from not using the available class predicate functions to test for “numeric” or “integer”. Here’s the example with a 2 space indent.

bizarro <- function(x) {
  if (class(x)[[1]] == "numeric" || class(x)[[1]] == "integer") {
    -x
  } else if (class(x)[[1]] == "logical") {
    !x
  } else { 
    stop(...) 
  }
}

That first if state sticks out a little bit because it’s rather long. Better code might use the is.numeric() and is.integer() functions.

Here’s the same example with an 8 space indent.

bizarro <- function(x) {
        if (class(x)[[1]] == "numeric" || class(x)[[1]] == "integer") {
                -x
        } else if (class(x)[[1]] == "logical") {
                !x
        } else { 
                stop(...) 
        }
}

Although, it’s not egregious, that first line is pushing up against the 80 column limit on the right-hand side. You might not do anything about it in this case, but the purpose of the indenting system is to at least trigger a reaction.

The next example from Jenny’s talk is a bit more obvious. Here she gives a lesson in excessive if-else statements. The original code with 2 space indent is here.

get_some_data <- function(config, outfile) {
  if (config_ok(config)) {
    if (can_write(outfile)) {
      if (can_open_network_connection(config)) {
        data <- parse_something_from_network()
        if(makes_sense(data)) {
          data <- beautify(data)
          write_it(data, outfile)
          return(TRUE)
        } else {
          return(FALSE)
        }
      } else {
        stop("Can't access network")
      }
    } else {
      ## uhm. What was this else for again?
    }
  } else {
    ## maybe, some bad news about ... the config?
  } 
}

Now, it’s fair to say that this code already looks a bit smelly (fishy?), but it’s maybe passable from a visual standpoint. Let’s take a look at it with 8 space indents.

get_some_data <- function(config, outfile) {
        if (config_ok(config)) {
                if (can_write(outfile)) {
                        if (can_open_network_connection(config)) {
                                data <- parse_something_from_network()
                                if(makes_sense(data)) {
                                        data <- beautify(data)
                                        write_it(data, outfile)
                                        return(TRUE)
                                } else {
                                        return(FALSE)
                                }
                        } else {
                                stop("Can't access network")
                        }
                } else {
                        ## uhm. What was this else for again?
                }
        } else {
                ## maybe, some bad news about ... the config?
        } 
}

Now the code looks downright absurd, practically crying out for refactoring, and rightly so! The five levels of nesting will be unreadable as soon as you blink your eyes.

That’s basically it. I’ve found zero downsides to using an 8 space indent and a number of upsides, including cleaner, more modular code. Because the visual indicator penalizes against lots of indenting, you are usually forced to write out separate functions to handle different tasks rather than go one more level in. This not only has the benefit of being modular, but it’s also useful for things like profiling (it can be very uninformative to profile a single monolithic function).