Source code for pyGenClean.SexCheck.gender_plot

#!/usr/bin/env python2.7

# This file is part of pyGenClean.
#
# pyGenClean is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# pyGenClean is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# pyGenClean.  If not, see <http://www.gnu.org/licenses/>.


import os
import sys
import gzip
import logging
import argparse

import numpy as np

from .. import __version__


logger = logging.getLogger("gender_plot")


[docs]def main(argString=None):
    """The main function of the module.

    :param argString: the options.

    :type argString: list

    These are the steps:

    1. Prints the options.
    2. If there are ``summarized_intensities`` provided, reads the files
       (:py:func:`read_summarized_intensities`) and skips to step 7.
    3. Reads the ``bim`` file to get markers on the sexual chromosomes
       (:py:func:`read_bim`).
    4. Reads the ``fam`` file to get gender (:py:func:`read_fam`).
    5. Reads the file containing samples with sex problems
       (:py:func:`read_sex_problems`).
    6. Reads the intensities and summarizes them (:py:func:`read_intensities`).
    7. Plots the summarized intensities (:py:func:`plot_gender`).

    """
    # Getting and checking the options
    args = parseArgs(argString)
    checkArgs(args)

    data = None
    if args.summarized_intensities is None:
        # Reading the BIM file
        marker_names_chr = read_bim(args.bfile + ".bim")

        # Reading the FAM file
        sample_names_gender = read_fam(args.bfile + ".fam")

        # Reading the sex problem file
        sex_problems = read_sex_problems(args.sex_problems)

        # Reading the intensity file
        data = read_intensities(args.intensities, marker_names_chr,
                                sample_names_gender, sex_problems)

    else:
        data = read_summarized_intensities(args.summarized_intensities)

    # Plot the gender
    plot_gender(data, args)


[docs]def read_sex_problems(file_name):
    """Reads the sex problem file.

    :param file_name: the name of the file containing sex problems.

    :type file_name: str

    :returns: a :py:class:`frozenset` containing samples with sex problem.

    If there is no ``file_name`` (*i.e.* is ``None``), then an empty
    :py:class:`frozenset` is returned.

    """
    if file_name is None:
        return frozenset()

    problems = None
    with open(file_name, 'r') as input_file:
        header_index = dict([
            (col_name, i) for i, col_name in
            enumerate(input_file.readline().rstrip("\r\n").split("\t"))
        ])
        if "IID" not in header_index:
            msg = "{}: no column named IID".format(file_name)
            raise ProgramError(msg)

        problems = frozenset([
            i.rstrip("\r\n").split("\t")[header_index["IID"]]
            for i in input_file.readlines()
        ])
    return problems


[docs]def encode_chr(chromosome):
    """Encodes chromosomes.

    :param chromosome: the chromosome to encode.

    :type chromosome: str

    :returns: the encoded chromosome as :py:class:`int`.

    It changes ``X``, ``Y``, ``XY`` and ``MT`` to ``23``, ``24``, ``25`` and
    ``26``, respectively. It changes everything else as :py:class:`int`.

    If :py:class:`ValueError` is raised, then :py:class:`ProgramError` is
    also raised. If a chromosome as a value below 1 or above 26, a
    :py:class:`ProgramError` is raised.

    .. testsetup::

        from pyGenClean.SexCheck.gender_plot import encode_chr

    .. doctest::

        >>> [encode_chr(str(i)) for i in range(0, 11)]
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        >>> [encode_chr(str(i)) for i in range(11, 21)]
        [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
        >>> [encode_chr(str(i)) for i in range(21, 27)]
        [21, 22, 23, 24, 25, 26]
        >>> [encode_chr(i) for i in ["X", "Y", "XY", "MT"]]
        [23, 24, 25, 26]
        >>> encode_chr("27")
        Traceback (most recent call last):
            ...
        ProgramError: 27: invalid chromosome
        >>> encode_chr("XX")
        Traceback (most recent call last):
            ...
        ProgramError: XX: invalid chromosome

    """
    if chromosome == "X":
        return 23
    if chromosome == "Y":
        return 24
    if chromosome == "XY":
        return 25
    if chromosome == "MT":
        return 26
    try:
        new_chromosome = int(chromosome)
        if new_chromosome < 0 or new_chromosome > 26:
            msg = "{}: invalid chromosome".format(chromosome)
            raise ProgramError(msg)
        return new_chromosome
    except ValueError:
        msg = "{}: invalid chromosome".format(chromosome)
        raise ProgramError(msg)


[docs]def encode_gender(gender):
    """Encodes the gender.

    :param gender: the gender to encode.

    :type gender: str

    :returns: the encoded gender.

    It changes ``1`` and ``2`` to ``Male`` and ``Female`` respectively. It
    encodes everything else to ``Unknown``.

    .. testsetup::

        from pyGenClean.SexCheck.gender_plot import encode_gender

    .. doctest::

        >>> encode_gender("1")
        'Male'
        >>> encode_gender("2")
        'Female'
        >>> encode_gender("0")
        'Unknown'
        >>> encode_gender("This is not a gender code")
        'Unknown'

    """
    if gender == "1":
        return "Male"
    if gender == "2":
        return "Female"
    return "Unknown"


[docs]def read_bim(file_name):
    """Reads the BIM file to gather marker names.

    :param file_name: the name of the ``bim`` file.

    :type file_name: str

    :returns: a :py:class:`dict` containing the chromosomal location of each
              marker on the sexual chromosomes.

    It uses the :py:func:`encode_chr` to encode the chromosomes from ``X`` and
    ``Y`` to ``23`` and ``24``, respectively.

    """
    marker_names_chr = None
    with open(file_name, 'r') as input_file:
        marker_names_chr = dict([
            (i[1], encode_chr(i[0]))
            for i in [
                j.rstrip("\r\n").split("\t") for j in input_file.readlines()
            ] if encode_chr(i[0]) in {23, 24}
        ])
    return marker_names_chr


[docs]def read_fam(file_name):
    """Reads the FAM file to gather sample names.

    :param file_name: the ``fam`` file to read.

    :type file_name: str

    :returns: a :py:class:`dict` containing the gender of each samples.

    It uses the :py:func:`encode_gender` to encode the gender from ``1``and
    ``2`` to ``Male`` and ``Female``, respectively.

    """
    sample_names_gender = None
    with open(file_name, 'r') as input_file:
        sample_names_gender = dict([
            (i[1], encode_gender(i[4])) for i in [
                j.rstrip("\r\n").split(" ") for j in input_file.readlines()
            ]
        ])
    return sample_names_gender


[docs]def plot_gender(data, options):
    """Plots the gender.

    :param data: the data to plot.
    :param options: the options.

    :type data: numpy.recarray
    :type options: argparse.Namespace

    Plots the summarized intensities of the markers on the Y chromosomes in
    function of the markers on the X chromosomes, with problematic samples with
    different colors.

    Also uses :py:func:`print_data_to_file` to save the data, so that it is
    faster to rerun the analysis.

    """
    if data is None:
        # there is a problem...
        msg = ("no data: specify either '--bfile' and '--intensities', or "
               "'--summarized-intensities'")
        raise ProgramError(msg)

    import matplotlib as mpl
    if options.format != "X11" and mpl.get_backend() != "agg":
        mpl.use("Agg")
    import matplotlib.pyplot as plt
    if options.format != "X11":
        plt.ioff()

    # The figure and axes
    fig = plt.figure()
    fig.subplots_adjust(top=0.84)
    ax = fig.add_subplot(111)

    # Changing the spines
    ax.xaxis.set_ticks_position("bottom")
    ax.yaxis.set_ticks_position("left")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    # Setting the title
    ax.set_xlabel(options.xlabel)
    ax.set_ylabel(options.ylabel)

    # For the legend
    plot_object = []
    labels = []

    # Plotting the OK males
    males = np.logical_and(data["gender"] == "Male", data["status"] == "OK")
    tmp, = ax.plot(data["chr23"][males], data["chr24"][males], "o", ms=5,
                   mec="#0099CC", mfc="#0099CC")
    plot_object.append(tmp)
    labels.append("OK Males (n={})".format(sum(males)))
    if options.summarized_intensities is None:
        print_data_to_file(data[males], "{}.ok_males.txt".format(options.out))

    # Plotting the OK females
    females = np.logical_and(data["gender"] == "Female",
                             data["status"] == "OK")
    tmp, = ax.plot(data["chr23"][females], data["chr24"][females], "o", ms=5,
                   mec="#CC0000", mfc="#CC0000")
    plot_object.append(tmp)
    labels.append("OK Females (n={})".format(sum(females)))
    if options.summarized_intensities is None:
        print_data_to_file(data[females],
                           "{}.ok_females.txt".format(options.out))

    # Plotting the OK unknowns
    unknowns = np.logical_and(data["gender"] == "Unknown",
                              data["status"] == "OK")
    tmp, = ax.plot(data["chr23"][unknowns], data["chr24"][unknowns], "o", ms=5,
                   mec="#555555", mfc="#555555")
    plot_object.append(tmp)
    labels.append("OK Unknowns (n={})".format(sum(unknowns)))
    if options.summarized_intensities is None:
        print_data_to_file(data[unknowns],
                           "{}.ok_unknowns.txt".format(options.out))

    # Plotting the Problem males
    males = np.logical_and(data["gender"] == "Male",
                           data["status"] == "Problem")
    tmp, = ax.plot(data["chr23"][males], data["chr24"][males], "^", ms=6,
                   mec="#000000", mfc="#669900")
    plot_object.append(tmp)
    labels.append("Problematic Males (n={})".format(sum(males)))
    if options.summarized_intensities is None:
        print_data_to_file(data[males],
                           "{}.problematic_males.txt".format(options.out))

    # Plotting the Problem females
    females = np.logical_and(data["gender"] == "Female",
                             data["status"] == "Problem")
    tmp, = ax.plot(data["chr23"][females], data["chr24"][females], "v", ms=6,
                   mec="#000000", mfc="#9933CC")
    plot_object.append(tmp)
    labels.append("Problematic Females (n={})".format(sum(females)))
    if options.summarized_intensities is None:
        print_data_to_file(data[females],
                           "{}.problematic_females.txt".format(options.out))

    # Plotting the Problem unknowns
    unknowns = np.logical_and(data["gender"] == "Unknown",
                              data["status"] == "Problem")
    tmp, = ax.plot(data["chr23"][unknowns], data["chr24"][unknowns], ">", ms=6,
                   mec="#000000", mfc="#555555")
    plot_object.append(tmp)
    labels.append("Problematic Unknown (n={})".format(sum(unknowns)))
    if options.summarized_intensities is None:
        print_data_to_file(data[unknowns],
                           "{}.problematic_unknowns.txt".format(options.out))

    # the legend
    prop = mpl.font_manager.FontProperties(size=10)
    leg = ax.legend(plot_object, labels, loc=8, numpoints=1, fancybox=True,
                    prop=prop, ncol=2, bbox_to_anchor=(0., 1.02, 1., .102),
                    borderaxespad=0.)

    # Setting the limit
    xlim = ax.get_xlim()
    ax.set_xlim((xlim[0]-0.01, xlim[1]+0.01))
    ylim = ax.get_ylim()
    ax.set_ylim((ylim[0]-0.01, ylim[1]+0.01))

    if options.format == "X11":
        plt.show()
    else:
        file_name = "{}.{}".format(options.out, options.format)
        try:
            plt.savefig(file_name)
        except IOError:
            msg = "{}: can't write file".format(file_name)
            raise ProgramError(msg)


[docs]def print_data_to_file(data, file_name):
    """Prints data to file.

    :param data: the data to print.
    :param file_name: the name of the output file.

    :type data: numpy.recarray
    :type file_name: str

    """
    try:
        with open(file_name, 'w') as output_file:
            print >>output_file, "\t".join(data.dtype.names)
            for row in data:
                print >>output_file, "\t".join(map(str, row))
    except IOError:
        msg = "{}: can't write file".format(file_name)
        raise ProgramError(msg)


[docs]def read_summarized_intensities(prefix):
    """Reads the summarized intensities from 6 files.

    :param prefix: the prefix of the six files.

    :type prefix: str

    :returns: a :py:class`numpy.recarray` containing the following columns (for
              each of the samples): ``sampleID``, ``chr23``, ``chr24``,
              ``gender`` and ``status``.

    Instead of reading a final report (like :py:func:`read_intensities`), this
    function reads six files previously created by this module to gather sample
    information. Here are the content of the six files:

    * ``prefix.ok_females.txt``: information about females without sex problem.
    * ``prefix.ok_males.txt``: information about males without sex problem.
    * ``prefix.ok_unknowns.txt``: information about unknown gender without sex
                                  problem.
    * ``prefix.problematic_females.txt``: information about females with sex
                                          problem.
    * ``prefix.problematic_males.txt``: information about males with sex
                                        problem.
    * ``prefix.problematic_unknowns.txt``: information about unknown gender
                                           with sex problem.

    Each file contains the following columns: ``sampleID``, ``chr23``,
    ``chr24``, ``gender`` and ``status``.

    The final data set contains the following information for each sample:

    * ``sampleID``: the sample ID.
    * ``chr23``: the summarized intensities for chromosome 23.
    * ``chr24``: the summarized intensities for chromosome 24.
    * ``gender``: the gender of the sample (``Male`` or ``Female``).
    * ``status``: the status of the sample (``OK`` or ``Problem``).

    The summarized intensities for a chromosome (:math:`S_{chr}`) is computed
    using this formula (where :math:`I_{chr}` is the set of all marker
    intensities on chromosome :math:`chr`):

    .. math::
        S_{chr} = \\frac{\\sum{I_{chr}}}{||I_{chr}||}

    """
    data = []
    for suffix in {".ok_females.txt", ".ok_males.txt", ".ok_unknowns.txt",
                   ".problematic_females.txt", ".problematic_males.txt",
                   ".problematic_unknowns.txt"}:
        with open(prefix + suffix, 'r') as input_file:
            header_index = None
            for line_nb, line in enumerate(input_file):
                row = line.rstrip("\r\n").split("\t")

                if line_nb == 0:
                    # This is the header
                    header_index = dict([
                        (col_name, i) for i, col_name in enumerate(row)
                    ])
                    for col_name in {"sampleID", "chr23", "chr24", "gender",
                                     "status"}:
                        if col_name not in header_index:
                            msg = "{}: no column named {}".format(
                                prefix+suffix,
                                col_name,
                            )
                            raise ProgramError(msg)

                else:
                    sampleID = row[header_index["sampleID"]]
                    chr23 = row[header_index["chr23"]]
                    chr24 = row[header_index["chr24"]]
                    gender = row[header_index["gender"]]
                    status = row[header_index["status"]]

                    try:
                        chr23 = float(chr23)
                        chr24 = float(chr24)
                    except ValueError:
                        msg = ("{} and {}: bad summarized intensities for "
                               "sample {}".format(chr23, chr24, sampleID))
                        raise ProgramError(msg)

                    data.append((sampleID, chr23, chr24, gender, status))

    # Creating the data structure
    data = np.array(
        data,
        dtype=[("sampleID", "a{}".format(max([len(i[0]) for i in data]))),
               ("chr23", float), ("chr24", float),
               ("gender", "a{}".format(max([len(i[3]) for i in data]))),
               ("status", "a{}".format(max([len(i[4]) for i in data])))],
    )
    return data


[docs]def read_intensities(file_name, needed_markers_chr, needed_samples_gender,
                     sex_problems):
    """Reads the intensities from a file.

    :param file_name: the name of the input file.
    :param needed_markers_chr: the markers that are needed.
    :param needed_samples_gender: the gender of all the samples.
    :param sex_problems: the sample with sex problem.

    :type file_name: str
    :type needed_markers_chr: dict
    :type needed_samples_gender: dict
    :type sex_problems: frozenset

    :returns: a :py:class`numpy.recarray` containing the following columns (for
              each of the samples): ``sampleID``, ``chr23``, ``chr24``,
              ``gender`` and ``status``.

    Reads the normalized intensities from a final report. The file must contain
    the following columns: ``SNP Name``, ``Sample ID``, ``X``, ``Y`` and
    ``Chr``. It then keeps only the required markers (those that are on
    sexual chromosomes (``23`` or ``24``), encoding `NaN` intensities to zero.

    The final data set contains the following information for each sample:

    * ``sampleID``: the sample ID.
    * ``chr23``: the summarized intensities for chromosome 23.
    * ``chr24``: the summarized intensities for chromosome 24.
    * ``gender``: the gender of the sample (``Male`` or ``Female``).
    * ``status``: the status of the sample (``OK`` or ``Problem``).

    The summarized intensities for a chromosome (:math:`S_{chr}`) is computed
    using this formula (where :math:`I_{chr}` is the set of all marker
    intensities on chromosome :math:`chr`):

    .. math::
        S_{chr} = \\frac{\\sum{I_{chr}}}{||I_{chr}||}

    """
    input_file = None
    if file_name.endswith(".gz"):
        input_file = gzip.open(file_name, 'rb')
    else:
        input_file = open(file_name, 'r')

    # The intensities
    intensities = {}

    header_index = None
    for line_nb, line in enumerate(input_file):
        row = line.rstrip("\r\n").split("\t")

        if line_nb == 0:
            # This is the header
            header_index = dict([
                (col_name, i) for i, col_name in enumerate(row)
            ])
            # Check the column names
            for col_name in {"SNP Name", "Sample ID", "X", "Y", "Chr"}:
                if col_name not in header_index:
                    msg = "{}: no column named {}".format(file_name, col_name)
                    raise ProgramError(msg)
        else:
            # This is the data
            # Check if we want this sample and this marker
            sampleID = row[header_index["Sample ID"]]
            markerID = row[header_index["SNP Name"]]
            chromosome = encode_chr(row[header_index["Chr"]])
            if chromosome not in {23, 24}:
                # Not good chromsoome
                continue
            if sampleID not in needed_samples_gender:
                # Sample not needed
                continue
            if markerID not in needed_markers_chr:
                # Marker not needed
                continue

            if sampleID not in intensities:
                # First time we see this sample
                intensities[sampleID] = [0, 0, 0, 0]

            # We get the intensities
            allele_a = row[header_index["X"]]
            allele_b = row[header_index["Y"]]

            # Check for NaN
            if allele_a == "NaN" or allele_b == "NaN":
                continue
            try:
                allele_a = float(allele_a)
                allele_b = float(allele_b)
            except ValueError:
                msg = "{}: {} and {}: wrong intensities".format(file_name,
                                                                allele_a,
                                                                allele_b)
                raise ProgramError(msg)

            if chromosome == 23:
                # Chromosome 23
                intensities[sampleID][0] += allele_a + allele_b
                intensities[sampleID][1] += 1
            else:
                # Chromosome 24
                intensities[sampleID][2] += allele_a + allele_b
                intensities[sampleID][3] += 1

    # Closing the input file
    input_file.close()

    # Creating the data structure
    data = []
    for sampleID in intensities.iterkeys():
        sum_chr_23, nb_chr_23, sum_chr_24, nb_chr_24 = intensities[sampleID]
        status = "OK"
        if sampleID in sex_problems:
            status = "Problem"
        try:
            data.append((sampleID, sum_chr_23 / nb_chr_23, sum_chr_24 /
                         nb_chr_24, needed_samples_gender[sampleID], status))
        except ZeroDivisionError:
            msg = "0 marker on chr23 or chr24"
            raise ProgramError(msg)

    data = np.array(
        data,
        dtype=[("sampleID", "a{}".format(max([len(i[0]) for i in data]))),
               ("chr23", float),
               ("chr24", float),
               ("gender", "a{}".format(max([len(i[3]) for i in data]))),
               ("status", "a{}".format(max([len(i[4]) for i in data])))],
    )
    return data


[docs]def checkArgs(args):
    """Checks the arguments and options.

    :param args: an object containing the options of the program.

    :type args: argparse.Namespace

    :returns: ``True`` if everything was OK.

    If there is a problem with an option, an exception is raised using the
    :py:class:`ProgramError` class, a message is printed to the
    :class:`sys.stderr` and the program exists with code 1.

    """
    # Checking if there are input files
    if args.summarized_intensities is None:
        if args.bfile is None and args.intensities is None:
            msg = ("need to specify either '--bfile' and '--intensities', or "
                   "'--summarized-intensities'")
            raise ProgramError(msg)

        # Checking the fam file bim
        for suffix in {".bim", ".fam"}:
            if not os.path.isfile(args.bfile + suffix):
                msg = "{}: no such file".format(args.bfile + suffix)
                raise ProgramError(msg)

        # Checking the intensity file
        if not os.path.isfile(args.intensities):
            msg = "{}: no such file".format(args.intensities)
            raise ProgramError(msg)
    else:
        if args.bfile is not None or args.intensities is not None:
            msg = ("need to specify either '--bfile' and '--intensities', or "
                   "'--summarized-intensities'")
            raise ProgramError(msg)

        for suffix in {".ok_females.txt", ".ok_males.txt", ".ok_unknowns.txt",
                       ".problematic_females.txt", ".problematic_males.txt",
                       ".problematic_unknowns.txt"}:
            if not os.path.isfile(args.summarized_intensities + suffix):
                msg = "{}: no such file".format(args.summarized_intensities +
                                                suffix)
                raise ProgramError(msg)

    # Checking the sex problem file
    if args.sex_problems is not None:
        if not os.path.isfile(args.sex_problems):
            msg = "{}: no such file".format(args.sex_problems)
            raise ProgramError(msg)

    return True


[docs]def parseArgs(argString=None):  # pragma: no cover
    """Parses the command line options and arguments.

    :param argString: the options.

    :type argString: list

    :returns: A :py:class:`argparse.Namespace` object created by the
              :py:mod:`argparse` module. It contains the values of the
              different options.

    ============================ ====== =======================================
               Options            Type                    Description
    ============================ ====== =======================================
    ``--bfile``                  string The plink binary file containing
                                        information about markers and
                                        individuals.
    ``--intensities``            string A file containing alleles intensities
                                        for each of the markers located on the
                                        X and Y chromosome.
    ``--summarized-intensities`` string The prefix of six files containing
                                        summarized chr23 and chr24 intensities.
    ``--sex-problems``           string The file containing individuals with
                                        sex problems.
    ``--format``                 string The output file format (png, ps, pdf,
                                        or X11).
    ``--xlabel``                 string The label of the X axis.
    ``--ylabel``                 string The label of the Y axis.
    ``--out``                    string The prefix of the output files.
    ============================ ====== =======================================

    .. note::
        No option check is done here (except for the one automatically done by
        argparse). Those need to be done elsewhere (see :py:func:`checkArgs`).

    """
    args = None
    if argString is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(argString)

    return args


[docs]class ProgramError(Exception):
    """An :py:class:`Exception` raised in case of a problem.

    :param msg: the message to print to the user before exiting.

    :type msg: str

    """
    def __init__(self, msg):
        """Construction of the :py:class:`ProgramError` class.

        :param msg: the message to print to the user

        :type msg: str

        """
        self.message = str(msg)

    def __str__(self):
        return self.message

# The parser object
desc = "Plots the gender using X and Y chromosomes' intensities"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument("-v", "--version", action="version",
                    version="pyGenClean version {}".format(__version__))

# The INPUT files
group = parser.add_argument_group("Input File")
group.add_argument("--bfile", type=str, metavar="FILE", required=True,
                   help=("The plink binary file containing information "
                         "about markers and individuals. Must be specified "
                         "if '--summarized-intensities' is not."))
group.add_argument("--intensities", type=str, metavar="FILE",
                   help=("A file containing alleles intensities for each "
                         "of the markers located on the X and Y "
                         "chromosome. Must be specified if "
                         "'--summarized-intensities' is not."))
group.add_argument("--summarized-intensities", type=str, metavar="FILE",
                   help=("The prefix of six files (prefix.ok_females.txt, "
                         "prefix.ok_males.txt, prefix.ok_unknowns.txt, "
                         "problematic_females.txt, problematic_males.txt "
                         "and problematic_unknowns.txt) containing "
                         "summarized chr23 and chr24 intensities. Must be "
                         "specified if '--bfile' and '--intensities' are "
                         "not."))
group.add_argument("--sex-problems", type=str, metavar="FILE", required=True,
                   help=("The file containing individuals with sex "
                         "problems. This file is not read if the option "
                         "'summarized-intensities' is used."))
# The options
group = parser.add_argument_group("Options")
group.add_argument("--format", type=str, metavar="FORMAT", default="png",
                   choices=["png", "ps", "pdf", "X11"],
                   help=("The output file format (png, ps, pdf, or X11 "
                         "formats are available). [default: %(default)s]"))
group.add_argument("--xlabel", type=str, metavar="STRING",
                   default="X intensity",
                   help="The label of the X axis. [default: %(default)s]")
group.add_argument("--ylabel", type=str, metavar="STRING",
                   default="Y intensity",
                   help="The label of the Y axis. [default: %(default)s]")
# The OUTPUT files
group = parser.add_argument_group("Output File")
group.add_argument("--out", type=str, metavar="FILE",
                   default="sexcheck",
                   help=("The prefix of the output files (which will be "
                         "a Plink binary file. [default: %(default)s]"))


[docs]def safe_main():
    """A safe version of the main function (that catches ProgramError)."""
    try:
        main()
    except KeyboardInterrupt:
        logger.info("Cancelled by user")
        sys.exit(0)
    except ProgramError as e:
        logger.error(e.message)
        parser.error(e.message)


if __name__ == "__main__":
    safe_main()