#!/usr/bin/python
#
# Generate monthly web pages with details of commits, builds, and tests.
#
# Copyright (c) 2010-2023 Andreas Gustafsson.  All rights reserved.
# Please refer to the file COPYRIGHT for detailed copyright information.
#

from __future__ import print_function

import os
import sys
from html import escape as html_escape
import itertools
import re
from datetime import date, timedelta
from string import Template
import subprocess
import stat
from collections import defaultdict
import optparse
import traceback

from matplotlib.dates import date2num
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.backends.backend_svg import FigureCanvasSVG
import matplotlib.dates
import matplotlib.style

from htmlgen import a, body, br, div, head, h1, h2, h3, h4, html, img, object, \
     p, script, span, style, table, td, th, tr, title, tt, escape

from utils import write_file, ts2py, py2ts, adjacent_pairs, rm_f

import bracket
from bracket import *

show_test_results = True

plot_size_pixels = (800, 400)
plot_dpi = 80.0
plot_size_inches = tuple([size / plot_dpi for size in plot_size_pixels])

dark_mode_auto = bool(int(config_get('dark_mode_auto', '1')))

stylesheet = '''
.commit, .build, .testname {
    font-family: monospace;
}
.file {
    margin-left: 4ch
}
.summarybuild {
}
table, th, td {
    padding: 5px;
    border: 1px solid black;
    border-collapse: collapse;
}
table.atf_summary, table.atf_summary td {
    border-style: none;
    padding: 2px;
}
.lines {
    text-align: right;
}
a {
    color: #000000;
}
.ok, .ok a {
    color: #00C000;
}
.fail, .fail a {
    color: #C00000;
}
.indeterminate {
    color: #D0D0D0;
}
.build {
    background-color: #e0e0e0;
}
.highlight {
    border-style: solid;
    border-color: purple;
    border-width: 2px;
    border-radius: 4px;
    padding: 2px;
}
'''

if dark_mode_auto:
    stylesheet += '''
@media screen and (prefers-color-scheme: dark) {
    body {
        background-color: #000000;
        color: #e0e0e0;
    }
    a {
        color: #e0e0e0;
    }
    .ok, .ok a {
        color: #00E000;
    }
    .fail, .fail a {
        color: #E00000;
    }
    .indeterminate {
        color: #404040;
    }
    .build {
        background-color: #303030;
    }
    .highlight {
        border-color: #FF40FF;
    }
}
'''

# Javascript for the index page
index_javascript = '''
// Jump to the build identified by the URL fragment, if any

window.onload = window.onhashchange = function() {
    var fragment = window.location.hash.substr(1);
    if (! fragment)
        return;
    var parts = fragment.split(".");
    var year = parts[0];
    var month = parts[1];
    var path = window.location.pathname;
    path = path.replace(/index\.html$/, "");
    var newurl = path + 'commits-' + year + '.' + month + '.html#' + fragment;
    window.location.href = newurl;
};
'''

if dark_mode_auto:
    index_javascript += '''
if (window.matchMedia('screen and (prefers-color-scheme: dark)').matches)
    // Replace images with dark mode ones
    window.addEventListener('DOMContentLoaded', function(e) {
        var i;
        var elts;
        // SVG images
        elts = document.getElementsByTagName('object');
        for (i = 0; i < elts.length; i++) {
            var o = elts[i];
            var d = o['data'];
            parts = d.split('.');
            j = parts.length - 2;
            if (!(parts[j].endsWith('-dark'))) {
                parts[j] += '-dark';
                d = parts.join('.');
                o['data'] = d;
            }
        }
        // PNG images (the non-SVG plots)
        elts = document.getElementsByTagName('img');
        for (i = 0; i < elts.length; i++) {
            var o = elts[i];
            var d = o['src'];
            parts = d.split('.');
            j = parts.length - 2;
            if (d.includes('/breaks-') && !(parts[j].endsWith('-dark'))) {
                parts[j] += '-dark';
                d = parts.join('.');
                o['src'] = d;
            }
        }
    });
'''

# Javascript for the commits page
commits_javascript = '''
// Highlight the build identified by the URL fragment, if any

window.onload = window.onhashchange = function() {
    var fragment = window.location.hash.substr(1);
    if (! fragment)
        return;
    var elt = document.getElementById('build-' + fragment);
    if (! elt)
        return;
    elt.classList.add('highlight');
};
'''

use_subdir_per_year = bool(int(config_get('use_subdir_per_year', '1')))

# The directory where we store files to be published on the web
# pertaining to the build at time "ts"
# XXX code duplication wrt date_url()

def build_html_dir(ts):
    if use_subdir_per_year:
        return date_hierarchy_dir(config['htmldir'], ts)
    else:
        rcsdate = ts2rcs(ts)
        return os.path.join(config['htmldir'], 'build', rcsdate)

# Publish the build/test log file "f" from the build at time "ts" in
# the appropriate directory, calling "method" to link or copy it
# into place, possibly with filtering.

def publish_log(ts, f, method):
    dir = build_html_dir(ts)
    mkdir_p(dir)
    src = os.path.join(results_dir(ts), f)
    dst = os.path.join(dir, f)
    if not os.path.exists(dst):
        method(src, dst)

def date_url(ts, fn):
    if use_subdir_per_year:
        # There is a subdirectory per year instead...
        build_subdir = []
    else:
        # ...of the single directory 'build'
        build_subdir = ['build']
    return '/'.join(build_subdir + date_hierarchy_list(ts) + [fn])

# Clean up control characters for publishing on the web (as plain
# text, not HTML).  This used to run "col -b", but that can fail
# with "col: Invalid or incomplete multibyte or wide character"
# on Linux, losing not only all data after the offending character,
# but also some buffered data before it.

def cleanup_controls_gzipped(src, dst):
    with gzip.open(src, 'rb') as src_f:
        data = src_f.read()
    # NULs (must be done before processing escapes)
    data = re.sub(br'[\x00]', b'', data)
    # 8-bit data
    data = re.sub(br'[\x80-\xFF]', b'X', data)
    # ANSI CSI sequences
    data = re.sub(br'\x1b\[[\x30-\x3F]*[\x20-\x2F]*[\x40-\x7E]', b'', data)
    # Other ANSI escapes
    data = re.sub(br'\x1b[\x20-\x2F]*[\x30-\x7E]', b'', data)
    # Control characters other than tab and LF
    # (must be done after processing escapes)
    data = re.sub(br'[\x00-\x08\x0B-\x1F]', b'', data)
    with gzip.open(dst, 'wb') as dst_f:
        dst_f.write(data)

def annotate_tail_gzipped(src, dst):
    # Count the lines
    with gzip.open(src, 'r') as src_f:
        n_lines = len(src_f.readlines())
    # Prepend header and copy
    with gzip.open(src, 'r') as src_f:
        with gzip.open(dst, 'w') as dst_f:
            dst_f.write(("Last %d lines of build log:\n" % n_lines).encode('ASCII'))
            for line in src_f:
                dst_f.write(line)

# Color the HTML "text" green if "truth", red otherwise

def green_if(text, truth):
    if truth:
        cls = 'ok'
    else:
        cls = 'fail'
    return span({'class': cls}, text)

# Generate HTML describing the outcome of one stage of the build/test
# process identified by the build object "b"; the stage is identified
# by "tag".  If the status is unknown, return None, otherwise return
# the HTML as a string.
#
# Also, when the HTML returned contains links to log files, publish
# the files in case.

def fmt_status(ts, tag):
    s = tag + "_status"
    status = get_cached_status_if_any(ts, s)
    if status is None:
        return None

    # Build the text to return in "r", and add HTML markup later
    r = tag + ": "
    if status == 0:
        r += "OK"
    else:
        r += "failed"

    if tag == 'build':
        try:
            log_lines = get_cached_status(ts, 'build_log_lines')
            r += " with %i lines of log" % log_lines
        except:
            pass

    if status != 0:
        if tag == 'build':
            fn = '%s.log.tail' % tag
            pubmethod = annotate_tail_gzipped
        else:
            fn = '%s.log' % tag
            pubmethod = cleanup_controls_gzipped
        zfn = fn + '.gz'
        try:
            publish_log(ts, zfn, pubmethod)
        except Exception as e:
            print("warning: could not publish %s log %s for %s: %s" % \
                  (tag, zfn, ts2rcs(ts), str(e)))
            traceback.print_exc()
        else:
            url = date_url(ts, fn)
            r = a({'href': url}, r)

    return green_if(r, status == 0)

def test_url(ts, ext, fragment = None):
    if fragment is None:
        fragment = ''
    fn = 'test.' + ext
    return date_url(ts, fn) + fragment

def make_test_output_link(ts, descr, ext, force, pubmethod, fragment = None):
    try:
        zfn = 'test.' + ext + '.gz'
        publish_log(ts, zfn, pubmethod)
    except Exception as e:
        #happens all the time
        #print "warning: could not publish test output", zfn, "for", ts2rcs(ts), ":", e
        if force:
            return descr
        else:
            return None
    else:
        return a({'href': test_url(ts, ext, fragment)}, descr)

# Generate HTML describing the outcome of a full build + test run

def fmt_build_as_list(ts):
    # Format build/install/boot results (but not test)
    statuses = [fmt_status(ts, tag) for tag in ('build', 'install', 'boot')]
    statuses = [s for s in statuses if s is not None]

    # Format ATF test results
    db = get_db(ts)
    if db.get('test_status') or db.get('test_completed_status'):
        # The actual value of test_status is not reliable in old tests,
        # so we just test for its presence to see if the tests were attempted,
        # and the presence of passed_tests to see if they completed
        test_items = []
        if db.get('passed_tests'):
            for tag in ('passed', 'skipped', 'expected_failure', 'failed'):
                count = db.get(tag + '_tests')
                if count is not None:
                    test_items.append("%s %s" % (count, tag))
        else:
            msg = 'did not complete'
            if int(config_get('report_noncompletion_reason', '0')):
                # Get the reason as a Unicode string (possibly empty)
                def f(ts):
                    reason = noncompletion_reason(ts)
                    if reason is None:
                        return ''
                    return reason
                reason = db_memoize(config, f, ts, 'noncompletion_reason')
                if reason:
                    msg += ': ' + reason
            test_items = [escape(msg)]

        tests_ok = (db.get('failed_tests') == '0')
        test_html = "tests: " + ", ".join(test_items)
        test_html = make_test_output_link(ts, test_html, 'log', True, cleanup_controls_gzipped)
        #test_html += ': ' + make_test_output_link(ts, 'log', 'log')

        #if not tests_ok:
        if True:
            # Add links to ATF output
            atf_log_items = []
            for descr, ext in [('raw', 'tps'),
                               ('xml', 'xml'),
                               ('html', 'html')]:
                item = make_test_output_link(ts, descr, ext, False, os.link)
                if item is not None:
                    atf_log_items.append(item)
                # If we made a HTML link, publish the CSS, too
                if ext == 'html' and item is not None:
                    try:
                        publish_log(ts, 'tests-results.css.gz', os.link)
                    except:
                        print("warning: missing css for " + ts2rcs(ts))

            if len(atf_log_items):
                test_html += ", ATF output: " + ', '.join(atf_log_items)

        statuses.append(green_if(test_html, tests_ok))
    return statuses

def fmt_build(ts):
    return(", ".join(fmt_build_as_list(ts)))

def publish_nop(src, dst):
    pass

def test_fragment(test_tuple):
    testname = "%s/%s" % test_tuple
    return '#' + re.sub(r'/', '_', testname)

def fmt_test_name(ts, test_tuple, make_link):
    testname = "%s/%s" % test_tuple
    if not make_link:
        return testname
    else:
        return make_test_output_link(ts, testname, 'html', False,
                                     publish_nop, test_fragment(test_tuple))

def fmt_test_change(ts, class_, label, tests):
    tests = sorted(tests)
    # The HTML report lack anchors for successful tests
    make_link = ((class_ == 'fail') and bool(int(config_get('have_atf_xml', '1'))))
    if len(tests) == 0:
        return []
    if len(tests) > 200:
        listtext = "%d test cases" % len(tests)
    else:
        listtext = ', '.join([fmt_test_name(ts, t, make_link) for t in tests])
    return [span({'class': class_}, label + ": " + listtext)]

# Return a sequence of HTML strings summarizing the changes in the
# test results for "ts" compared to the previous ones.  If there are
# no changes, return an empty sequence.

def fmt_test_deltas_as_list(ts):
    global build_dates
    prev_ts, r0 = prev_test_results(build_dates, ts)
    if prev_ts is None:
        return []
    r1 = atf_test_results_or_none_memoize(ts)
    if r0 is None or r1 is None:
         return []
    # Find the tests common to both test runs
    tests = set(r0.keys()).intersection(set(r1.keys()))
    # Find the tests whose outcome has changed
    changed = [t for t in tests if r0[t] != r1[t]]
    n = [t for t in changed if r0[t]]
    p = [t for t in changed if r1[t]]
    return \
        fmt_test_change(ts, 'ok', 'no longer failing', p) + \
        fmt_test_change(ts, 'fail', 'new failures', n)

# Generate HTML describing a commit

def fmt_commit(c):
    return bracket.vc.format_commit_html(c)

def line_items(known_builds, commits):
    r = []
    # Group by commit date so that if multiple commits occur within a
    # second, we only show the build after the last one
    for ts, group in itertools.groupby(commits, lambda c: c.timestamp):
        # Add an anchor to support links to a specific date
        r.append(a({'name': ts2rcs(ts)}, ''))
        # Also add an anchor for the commit hash when available
        sha = bracket.ts2sha.get(ts)
        if sha:
            r.append(a({'name': sha}, ''))
        for commit in group:
            r.append(div({'class': 'commit'}, fmt_commit(commit)))
        if ts in known_builds:
            # First a line of build status, then optionally a line of test changes
            lines = fmt_build(ts)
            if bool(int(config_get('report_test_changes', '1'))):
                # This is rather slow
                l = fmt_test_deltas_as_list(ts)
                if len(l):
                    lines += div(', '.join(l))
            # The id may be used for fragment highlighting
            r.append(div({'class': 'build', 'id': 'build-' + ts2rcs(ts)}, lines))
    return r

# Generate HTML for a list of commits and builds

def fmt_all(known_builds, commits):
    return ''.join([item + '\n' for item in line_items(known_builds, commits)])

# Generate a complete HTML page containing a list of commits and builds

def make_report_page(body_str, title_str, script_str = ''):
    return html(head(style({'type': 'text/css'}, stylesheet),
                     script(script_str),
                     title(title_str)),
                body(body_str + a({'name': 'end'}, '')))

# Generate plot data for build break plots.  Return a list of
# tuples, with each tuple containing:
#
#   0   Date as seconds from epoch
#   1   Number of lines in build log
#   2   Number of lines in build log if success, None otherwise
#   3   Number of lines in build log if failure, None otherwise
#
# Columns #3 and #4 are used for plotting the red/green markers;
# None denotes a missing value, and is used as a placeholder
# wherever no marker should be shown.

def generate_build_plot_data(build_dates):
    r = []
    for ts in build_dates:
        # The build log size applies to the situation
        # _after_ a commit, and we assume it still
        # applies before the next commit
        # XXX this doesn't actually do that; it assumes
        # it applies before the next _build_, which is not
        # the same thing
        try:
            logsize = get_cached_status(ts, 'build_log_lines')
        except:
            continue

        success_pt = None
        fail_pt = None
        status = get_cached_status_if_any(ts, 'build_status')
        if status is None:
            pass
        elif status == 0:
            success_pt = logsize
        elif status > 0:
            fail_pt = logsize

        r.append((ts, logsize, success_pt, fail_pt))

        # Try to find the next commit, and if there is one,
        # plot a horizontal line segment indicating that the
        # build log size remains constant until that commit
        try:
            next_commit = ts2cno(ts) + 1
            r.append((cno2ts(next_commit), logsize, None, None))
        except:
            continue
    return r

# As above, but for the test failure plots.  The tuples
# contain just the date and the number of failed tests.

def generate_test_plot_data(build_dates, tag = 'failed_tests'):
    r = []
    for ts in build_dates:
        n_failed = get_cached_status_if_any(ts, tag)
        if n_failed is not None:
            r.append((ts, n_failed))
    return r

def this_month():
    r = date.today()
    return r.replace(day = 1)

def inc_month(d):
    if d.month == 12:
        return d.replace(year = d.year + 1, month = 1)
    else:
        return d.replace(month = d.month + 1)

def month_list(first_month, last_month):
    r = []
    d = first_month
    while d <= last_month:
        r.append(d)
        d = inc_month(d)
    return r

def pydate2ts(d):
    return int(d.strftime("%s"))

def pydate2rcs(d):
    return ".".join(["%02d" % n for n in [d.year, d.month, d.day, 0, 0, 0]])

def ts_month(ts):
    parts = ts2rcs(ts).split('.')
    return date(int(parts[0]), int(parts[1]), 1)

def gzip_file(fn):
    gzip_in = open(fn, "r")
    gzip_out = open(fn + ".gz", "w")
    subprocess.call(["gzip"], stdin = gzip_in, stdout = gzip_out)
    gzip_in.close()
    gzip_out.close()
    os.unlink(fn)

def plot_file_name_rel(prefix, month, dark_mode, ext):
    dotted_month_year = "%04d.%02d" % (month.year, month.month)
    if dark_mode:
        dark_suffix = "-dark"
    else:
        dark_suffix = ""
    return prefix + "-" + dotted_month_year + dark_suffix + '.' + ext

# Returns HTML for displaying the plot.

def make_plot_html(month, prefix, ext, htmldir):
    outputfilerel = plot_file_name_rel(prefix, month, False, ext)
    if ext == 'svg':
        # With this, links don't work:
        # return img({'src': outputfilerel,
        return object({'type': 'image/svg+xml', 'data': outputfilerel,
            'width': str(plot_size_pixels[0]), 'height': str(plot_size_pixels[1])}, '') + '\n'
    elif ext == 'png':
        return p(img({'src': outputfilerel})) + '\n'
    else:
        assert(0)

# Generate descriptive text.  "ext" is "png" or "svg".

def description(ext):
    update_fq = config.get('update_frequency')
    if update_fq:
        update_fq_text = 'It is updated ' + update_fq + '.'
    else:
        update_fq_text = ''

    arch = config['arch']
    machine, machine_arch = split_arch(arch)

    r = p(Template('''
This web page displays the results of periodic automated
builds and tests of
<a href="http://www.netbsd.org/releases/current.html"
>NetBSD-${branch}</a>/<a
href="http://www.netbsd.org/ports/${machine}/">${arch}</a>.
${note}
''').substitute(branch = branch_description(config),
                arch = arch,
                machine = machine,
                note = config_get('variant_description_html', '')) +
          update_fq_text)

    # Summarize the current build status
    for ts in reversed(build_dates):
        if get_cached_status_if_any(ts, 'build_status') is not None:
            rcsdate = ts2rcs(ts)
            r += a({'name': 'latest'}, h2('Latest build results: ' + rcsdate))

            #for item in fmt_build_as_list(build):
            #    r += div(span({'class': 'summarybuild'}, item))
            r += p(span({'class': 'summarybuild'}, fmt_build(ts)))

            branch = branch_name(config)
            if branch == 'HEAD':
                r += p('To reproduce the source used for this build, use ' + \
                    tt('cvs checkout -D %s' % rcsdate))
            else:
                pass # XXX implement for branch
            break

    r += a({'name': 'history'}, h2('Build status history'))
    r += p('''
The graphs below visualize the state of the
build by plotting the number of lines in the build log from
<tt>build.sh</tt> as a function of the source date,
and coloring the points red or green depending on
whether the build succeeded or failed.
''')

    r += p('''
The plots are constructed by looking for changes in the success/failure
status of the periodic
builds as well as significant changes in the size of the build log, and
then pinpointing the time when the change occurred using binary
search.  Note that this is not guaranteed to find every build failure,
because it can miss cases where the build is broken and then fixed again
between two consecutive periodic builds. The converse case, when the
build is fixed and then broken again between two consecutive periodic
builds, is usually detected because the size of the build log has
changed.
''')

    if ext == 'svg':
        r += p('''
Viewing the graphs requires an
<a href="http://www.w3.org/Graphics/SVG/">SVG</a>-capable browser.
If you see no graphs below, please try the
<a href="index-png.html">non-SVG version</a>.
''')
    if ext == 'png':
        r += p('''
If your browser supports
<a href="http://www.w3.org/Graphics/SVG/">SVG</a>,
you can use the
<a href="index.html">SVG version</a>.
''')
    r += p('''
Clicking on the [details] links will bring up a
list of the month\'s commits and builds in chronological order, as well
as results from automated tests of installing the build,
booting the installed system, and running the ATF tests using <a
href="http://www.gson.org/netbsd/anita/">Anita</a>.
''')
    return r

# Generate the yearly HTML "build and test status" index pages for the
# "date_range", which is the whole time period from the month defined
# by the config variables report_from_year and report_from_month to the
# current time.  The page for the current year is different from the
# others.

def gen_index_pages(htmldir, date_range):
    # XXX this includes older data
    t0 = time.time()
    d0 = generate_build_plot_data(build_dates)
    d1 = generate_test_plot_data(build_dates, 'failed_tests')
    d2 = generate_test_plot_data(build_dates, 'expected_failure_tests')
    t1 = time.time()
    print("  getting plot data took %i seconds" % (t1 - t0))

    #report_from_month = date(int(config['report_from_year']), int(config['report_from_month']), 1)
    #months = month_list(ts_month(build_dates[0]), ts_month(build_dates[-1]))
    months = month_list(date_range[0], date_range[1])
    months.reverse()

    if show_test_results:
        what = "build and test"
    else:
        what = "build"

    title = "NetBSD-%s/%s %s status" % \
        (branch_description(config), config['arch'], what)
    default_ext = 'svg'

    this_year = date.today().year

    n_index_pages = 0
    for ext in ('svg', 'png'):
        # Generate a single index page.  Captures ext, d0, etc.
        def gen_one_index_page(ymonths, is_summary):
            # ymonths contains a list of months for the current year
            r = ''
            r += h1(title)
            if is_summary:
                r += description(ext)

            for month in ymonths:
                combined_plot = make_plot_html(month, "breaks", ext, htmldir)
                dotted_month_year = "%04d.%02d" % (month.year, month.month)
                fragment = ['', '#end'][month == this_month()]
                r += h3(month.strftime('%B %Y ') +
                        a({'href': "commits-%s.html%s" % (dotted_month_year, fragment)},
                          "[details]"))
                r += combined_plot

            if is_summary:
                years = [y for y, g in itertools.groupby
                         (months, lambda m: m.year) if y != this_year]
                if years:
                    r += h2("Earlier years")
                    for y in years:
                        r += h3(a({'href': "index-%d-%s.html" % (y, ext)}, str(y)))

                #r += h2('Recently failed test cases')
                #r += render_punchcard_html(...)

            r += config['footer']
            r += '\n'

            # Determine the file name and write the file
            if is_summary:
                fn = "index-%s.html" % ext
            else:
                fn = "index-%d-%s.html" % (year, ext)
            indexfile = os.path.join(htmldir, fn)
            write_file(indexfile, make_report_page(r, title, index_javascript), 't')

            # Create "index.html" link to the default index
            if is_summary and ext == default_ext:
                default_index = os.path.join(htmldir, 'index.html')
                rm_f(default_index)
                os.link(indexfile, default_index)

        # Generate the top-level index/summary page, with the last N months
        gen_one_index_page(months[0:12], True)

        # Generate the per-year summary pages
        year_groups = itertools.groupby(months, lambda m: m.year)
        for year, ymonths in year_groups:
            gen_one_index_page(ymonths, False)
            n_index_pages += 1

    t2 = time.time()
    print("  generating %d yearly index pages (excluding plots) took %i seconds" % (n_index_pages, t2 - t1))

    # Generate the plots
    for month in months:
        for dark_mode in (False, True):
            if dark_mode:
                matplotlib.style.use('dark_background')
            else:
                matplotlib.style.use('default')

            fig = Figure(figsize = plot_size_inches, dpi = plot_dpi)

            def common_ax_setup(ax):
                ax.set_xlim((date2num(month), date2num(inc_month(month))))
                ax.xaxis.set_major_locator(matplotlib.dates.WeekdayLocator())
                ax.xaxis.set_minor_locator(matplotlib.dates.DayLocator())
                ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d"))

            def data_for_month(data, month):
                slop = timedelta(days = 7)
                date0 = py2ts(month - slop)
                date1 = py2ts(inc_month(month) + slop)
                return [d for d in data if d[0] >= date0 and d[0] < date1]

            # Build status plot

            ax = fig.add_subplot(211)
            data = data_for_month(d0, month)
            xdata = [date2num(ts2py(d[0])) for d in data]
            # This is an ugly hack to make hyperlinks in SVG plots open in
            # the main page rather than the plot's frame.
            def retarget_to_parent(url):
                return "javascript:parent.document.location.href='%s'" % url
            urls = [retarget_to_parent(commit_url(d[0])) for d in data]
            s = ax.scatter(xdata, [d[2] for d in data], marker='+', color = '#00c000', label = 'Successful build')
            s.set_urls(urls)
            s = ax.scatter(xdata, [d[3] for d in data], marker='x', color = '#c00000', label = 'Failed build')
            s.set_urls(urls)
            ax.plot(xdata, [d[1] for d in data], color = '#808080', label = 'Lines in build log')
            ax.set_title('Build status')
            ax.set_ylim(bottom = 0, top = max([1.0] + [d[1] * 1.2 for d in data]))
            # ax.grid(True)
            # ax.set_xlabel('time')
            ax.set_ylabel('Lines in build log')
            common_ax_setup(ax)

            # Optionally annotate build breaks with committer names
            # (not yet fully functional)
            if False:
                for datapair in adjacent_pairs(data):
                    tsp = tuple([d[0] for d in datapair])
                    # XXX this does not work: both items of the tuple are the same
                    yp = tuple([d[1] for d in datapair])
                    status_pair = tuple([get_cached_status_if_any(tsp[i], 'build_status') for i in (0, 1)])
                    if status_pair[0] == 0 and \
                       status_pair[1] == 1:
                       commits = get_commits(*tsp)
                       committers = set(c.committer for c in commits)
                       if len(committers) == 1:
                           committer = list(committers)[0]
                           ax.annotate(committer, xy = (date2num(ts2py(tsp[1])), yp[1]),
                               xytext = (20, 20), textcoords = 'offset points',
                               ha = 'left', va = 'bottom',
                               bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
                               arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))

            # Test status plot

            ax = fig.add_subplot(212)
            ax.set_title('Number of failed test cases')

            ymax = 1.0
            for data, label, color in ((d1, 'Unexpected', '#8080C0'), (d2, 'Expected', '#808040')):
                month_data = data_for_month(data, month)
                xdata = [date2num(ts2py(d[0])) for d in month_data]
                ydata = [d[1] for d in month_data]
                ax.plot(xdata, ydata, marker='x', color = color, label = label)
                ymax = max(ymax, max([0] + ydata))

            ax.set_ylabel('Failed test cases')
            ax.set_ylim(bottom = 0, top = ymax * 1.4)
            #This only works in matplotlib 1.2.1 and newer
            #ax.legend(loc='upper right', ncol=2, fontsize='small') # , frameon=False
            ax.legend(loc='upper right', ncol=2, prop = { 'size': 'small' })
            # ax.set_yscale('log')
            common_ax_setup(ax)

            #fig.tight_layout()
            fig.subplots_adjust(hspace = 0.5)

            prefix = "breaks" # XXX

            for canvas,ext in ((FigureCanvasAgg(fig), 'png'), (FigureCanvasSVG(fig), 'svg')):
                outputfilerel = plot_file_name_rel(prefix, month, dark_mode, ext)
                outputfileabs = os.path.join(htmldir, outputfilerel)
                canvas.print_figure(outputfileabs, dpi = plot_dpi)
                if ext == 'svg':
                    gzip_file(outputfileabs)

    t3 = time.time()
    print("  generating plots took %i seconds" % (t3 - t2))

# Returns a lock on the updated reports

def main(htmldir, args):
    parser = optparse.OptionParser()
    parser.add_option("--full",
                      help="regenerate all reports, not just changed ones",
                      action="store_true")
    (options, args) = parser.parse_args(args)

    use_current_repository()
    global build_dates
    build_dates = existing_build_dates()
    known_builds = set(build_dates)

    # Lock the reports while modifying them
    lockfd = lock(os.path.join(htmldir, 'lock'), verbose = True)

    t0 = time.time()
    # Gather data for optimization
    # month_mtime is a mapping from month -> newest modification time
    # of any db file in that month
    month_mtime = defaultdict(int)
    all_db_files = glob(os.path.join(config['results_root'],
                                     date_hieararchy_dir_pattern, 'bracket.db'))
    for fn in all_db_files:
        rcsdate = os.path.basename(os.path.dirname(fn))
        month = '.'.join(rcsdate.split('.')[0:2])
        mtime = os.stat(fn)[stat.ST_MTIME]
        # print month, mtime
        month_mtime[month] = max(month_mtime[month], mtime)

    begin_pydate = date(int(config['report_from_year']),
                        int(config['report_from_month']),
                        1)
    end_pydate = ts2py(last_commit_ts() + 1).date()

    t1 = time.time()
    print("examining modification times took %i seconds" % (t1 - t0))

    gen_index_pages(htmldir, (begin_pydate, end_pydate))

    t2 = time.time()
    print("generating index pages took %i seconds" % (t2 - t1))

    # Get all commits for the time period of interest, group them by
    # month, and generate the monthly HTML pages
    #begin_rcsdate = '.'.join(ts2rcs(build_dates[0]).split('.')[0:2] +
    #    ['01', '00', '00', '00'])

    # First get an array of commit objects, each representing an
    # individual file revision.
    all_commits = get_commits(pydate2ts(begin_pydate),
                              pydate2ts(end_pydate) + 86400 * 2)

    t3 = time.time()
    print("getting commits took %i seconds" % (t3 - t2))
    sys.stdout.flush()

    # Generate monthly commit/build lists
    for month, group in itertools.groupby(all_commits, commit_month):
        fn = os.path.join(htmldir, "commits-" + month + ".html")

        # Optimize: if the report for a month is newer than all the database
        # files for that month, it does not need to be updated
        try:
            mtime = os.stat(fn + '.gz')[stat.ST_MTIME]
        except:
            mtime = None
        if mtime is not None and mtime > month_mtime[month] and not options.full:
            # not updating
            continue
        print("updating", fn)

        thismonth = this_month()
        this_month_dotted = "%04d.%02d" % (thismonth.year, thismonth.month)
        if month == this_month_dotted:
            footer = p('For the very latest commits, see the ' + \
                a({'href': 'http://mail-index.netbsd.org/source-changes/'},
                  'source-changes mailing list archives'))
        else:
            footer = ''
        footer += p(a({'href': './'}, 'Back to index page'))
        write_file(fn, make_report_page(fmt_all(known_builds, group) + footer,
                                        month, commits_javascript), 't')
        gzip_file(fn)

        t4 = time.time()
        print("generating %s took %i seconds" % (fn, t4 - t3))
        t3 = t4
    print("done")
    sys.stdout.flush()
    return lockfd

# Link content to url if pred is true, or return it as such if not

def link_if(pred, url, content):
    if pred:
        return a({'href': url}, content)
    else:
        return content

# Generate a HTML "punch card" report

def render_punchcard_html(ts_list, data):
    def outcome_td(ts, test_tuple, outcome):
        text = ''
        if outcome is True:
            cls = 'ok'
            text = '&check;'
        elif outcome is False:
            cls = 'fail'
            #text = '&#x274c;' # cross mark - too large in Safari
            #text = '&#x2716;' # heavy multiplication x
            text = '&#x2715;' # multiplication x
        else:
            cls = 'indeterminate'
        return td({'class': cls}, link_if(outcome is False,
            test_url(ts, 'html', test_fragment(test_tuple)), text))
    s = ''
    for line in data:
        outcomes = line['outcomes']
        # make_test_output_link...
        s += tr(''.join([outcome_td(ts_list[i], line['test_tuple'], outcomes[i])
            for i in range(len(ts_list))]) +
            td({'class': 'testname'}, line['test_name']) + '\n')
    return div(table({'class': 'atf_summary'}, s))
    #title = 'Recently failed test cases'
    # return make_report_page(h1(title) + , title)
