Source code for pyGenClean.LaTeX.auto_report

#!/usr/bin/env python2.7

# This file is part of pyGenClean.
#
# pyGenClean is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# pyGenClean is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# pyGenClean.  If not, see <http://www.gnu.org/licenses/>.


import os
import re
from datetime import datetime

from . import utils as latex
from ..pipeline_error import ProgramError
from .. import __version__ as pygenclean_version


[docs]def create_report(outdirname, report_filename, **kwargs):
    """Creates a LaTeX report.

    :param report_filename: the name of the file.
    :param outdirname: the name of the output directory.

    :type report_filename: str
    :type outdirname: str

    """
    # Checking the required variables
    if "steps" in kwargs:
        assert "descriptions" in kwargs
        assert "long_descriptions" in kwargs
        assert "steps_filename" not in kwargs
    else:
        assert "steps_filename" in kwargs
        assert "descriptions" not in kwargs
        assert "long_descriptions" not in kwargs
    assert "summaries" in kwargs
    assert "background" in kwargs
    assert "project_name" in kwargs
    assert "summary_fn" in kwargs
    assert "report_title" in kwargs
    assert "report_author" in kwargs
    assert "initial_files" in kwargs
    assert "final_nb_markers" in kwargs
    assert "final_nb_samples" in kwargs
    assert "final_files" in kwargs
    assert "plink_version" in kwargs
    assert "graphic_paths_fn" in kwargs

    # Formatting the background section
    background_section = _format_background(kwargs["background"])

    # Writing the method steps to a separate file (for access later)
    steps_filename = None
    if "steps_filename" in kwargs:
        steps_filename = kwargs["steps_filename"]
    else:
        steps_filename = os.path.join(outdirname, "steps_summary.tex")
        with open(steps_filename, "w") as o_file:
            zipped = zip(kwargs["steps"], kwargs["descriptions"],
                         kwargs["long_descriptions"])
            for step, desc, long_desc in zipped:
                if desc.endswith("."):
                    desc = desc[:-1]
                step = step.replace("_", r"\_")
                to_print = latex.item(desc)
                to_print += " [{}].".format(latex.texttt(step))
                if long_desc is not None:
                    to_print += " " + long_desc
                print >>o_file, latex.wrap_lines(to_print) + "\n"

    # Adding the content of the results section
    result_summaries = []
    for name in kwargs["summaries"]:
        full_path = os.path.abspath(name)
        if os.path.isfile(full_path):
            rel_path = os.path.relpath(full_path, outdirname)
            result_summaries.append(re.sub(r"\\", "/", rel_path))

    # Reading the initial_files file
    initial_files = None
    with open(kwargs["initial_files"], "r") as i_file:
        initial_files = i_file.read().splitlines()

    # Reading the final_files file
    final_files = None
    with open(kwargs["final_files"], "r") as i_file:
        final_files = [i.split("\t")[0] for i in i_file.read().splitlines()]

    # Adding the bibliography content
    biblio_entry = latex.bib_entry(
        name="pyGenClean",
        authors="Lemieux Perreault LP, Provost S, Legault MA, Barhdadi A, "
                r"Dub\'e MP",
        title="pyGenClean: efficient tool for genetic data clean up before "
              "association testing",
        journal="Bioinformatics",
        year="2013",
        volume="29",
        number="13",
        pages="1704--1705",
    ) + "\n" * 2 + latex.bib_entry(
        name="plink",
        authors="Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MAR, "
                "Bender D, Maller J, Sklar P, de Bakker PIW, Daly MJ, Sham PC",
        title="PLINK: a tool set for whole-genome association and "
              "population-based linkage analyses",
        journal="American Journal of Human Genetics",
        year="2007",
        volume="81",
        number="3",
        pages="559--575",
    ) + "\n" * 2 + latex.bib_entry(
        name="bafRegress",
        authors=r"Goo J, Matthew F, Kurt NH, Jane MR, Kimberly FD, "
                r"Gon{\c{c}}alo RA, Michael B, Hyun Min K",
        title="Detecting and estimating contamination of human DNA samples in "
              "sequencing and array-based genotype data",
        journal="The American Journal of Human Genetics",
        year="2012",
        volume="91",
        number="5",
        pages="839--848",
    )

    # Getting the template
    main_template = latex.jinja2_env.get_template("main_document.tex")

    # Getting the data
    today = datetime.today()

    # Reading the graphics path
    graphic_paths = []
    if kwargs["graphic_paths_fn"] is not None:
        with open(kwargs["graphic_paths_fn"], "r") as i_file:
            graphic_paths = [
                re.sub(r"\\", "/", path) + ("" if path.endswith("/") else "/")
                for path in i_file.read().splitlines()
            ]

    try:
        with open(report_filename, "w") as i_file:
            # Rendering the template
            print >>i_file, main_template.render(
                project_name=latex.sanitize_tex(kwargs["project_name"]),
                month=today.strftime("%B"),
                day=today.day,
                year=today.year,
                background_content=background_section,
                result_summaries=result_summaries,
                bibliography_content=biblio_entry,
                pygenclean_version=pygenclean_version,
                plink_version=kwargs["plink_version"],
                steps_filename=os.path.basename(steps_filename),
                final_results=_create_summary_table(
                    kwargs["summary_fn"],
                    latex.jinja2_env.get_template("summary_table.tex"),
                    nb_samples=kwargs["final_nb_samples"],
                    nb_markers=kwargs["final_nb_markers"],
                ),
                report_title=latex.sanitize_tex(kwargs["report_title"]),
                report_author=latex.sanitize_tex(kwargs["report_author"]),
                initial_files=initial_files,
                final_files=final_files,
                final_nb_samples=kwargs["final_nb_samples"],
                final_nb_markers=kwargs["final_nb_markers"],
                graphic_paths=graphic_paths,
            )

    except IOError:
        msg = "{}: could not create report".format(report_filename)
        raise ProgramError(msg)


def _format_background(background):
    """Formats the background section

    :param background: the background content or file.

    :type background: str or file

    :returns: the background content.
    :rtype: str

    """
    # Getting the background
    if os.path.isfile(background):
        with open(background, "r") as i_file:
            background = i_file.read().splitlines()
    else:
        background = background.splitlines()

    # Formatting
    final_background = ""
    for line in background:
        if line == "":
            final_background += r"\\" + "\n\n"
            continue

        final_background += latex.wrap_lines(latex.sanitize_tex(line))

    return final_background


def _create_summary_table(fn, template, nb_samples, nb_markers):
    """Creates the final table.

    :param fn: the name of the file containing the summary.
    :param template: the Jinja2 template.
    :param nb_samples: the final number of samples.
    :param nb_markers: the final number of markers.

    :type fn: str
    :type template: Jinja2.template
    :type nb_samples: str
    :type nb_markers: str

    """
    # The final data
    table_data = []

    # Reading the summary file
    with open(fn, "r") as i_file:
        data = None

        line = i_file.readline()
        while line != "":
            if line.startswith("#"):
                # If there is data, this isn't the first line, so we save
                if data:
                    table_data.append(data)

                # This is the 'header' of a section (hence a new section)
                data = dict(
                    header=line.rstrip("\r\n").split(" ")[1],
                    data=[],
                )

                # Changing to next line
                line = i_file.readline()
                continue

            # If the line starts with '---', then it's a horizontal line
            if line.startswith("---"):
                data["data"].append(dict(hline=True))

                # Changing to next line
                line = i_file.readline()
                continue

            # If the line starts with '  -', then it's a sub section
            if line.startswith("  -"):
                tmp = line[4:].rstrip("\r\n").split("\t")
                if data["header"].endswith("/subset"):
                    if tmp[0].startswith("_file_path:"):
                        tmp[0] = r"\path{" + tmp[0][11:] + "}"
                elif data["header"].endswith("/flag_hw"):
                    tmp[0] = latex.format_numbers(tmp[0], prefix="p < ")
                else:
                    tmp = map(latex.sanitize_tex, tmp)
                    if tmp[0].startswith("x"):
                        tmp[0] = latex.inline_math(r"\times " + tmp[0][1:])

                data["data"].append(dict(
                    hline=False,
                    multicol=False,
                    row_data=tmp,
                ))

                # Changing to next line
                line = i_file.readline()
                continue

            # This is a regular line
            data["data"].append(dict(
                hline=False,
                multicol=True,
                row_data=map(
                    latex.sanitize_tex,
                    line.rstrip("\r\n").split("\t"),
                ),
            ))

            # Skipping to next line
            line = i_file.readline()

    # We add the last entry
    table_data.append(data)

    # Rendering
    return template.render(table_data=table_data, final_nb_markers=nb_markers,
                           final_nb_samples=nb_samples)