#!/usr/bin/env python2.7
# This file is part of pyGenClean.
#
# pyGenClean is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# pyGenClean is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# pyGenClean. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import logging
import argparse
import numpy as np
from . import createRowFromPlinkSpacedOutput
logger = logging.getLogger("plot_MDS")
desc = "Creates a MDS plot"
long_desc = None
parser = argparse.ArgumentParser(description=desc)
[docs]def main():
"""The main function of the module.
These are the steps:
1. Reads the population file (:py:func:`readPopulations`).
2. Extract the MDS data (:py:func:`extractData`).
3. Plots the MDS data (:py:func:`plotMDS`).
"""
# Getting and checking the options
args = parseArgs()
checkArgs(args)
# Reads the population file
populations = readPopulations(args.population_file)
# Acquire the data
theData, theLabels = extractData(args.file, populations)
theOrders = range(len(theLabels))
theColors = [(0.215686275, 0.000494118, 0.721568627),
(0.301960784, 0.68627451, 0.290196078),
(0.596078431, 0.305882353, 0.639215686),
(0.894117647, 0.101960784, 0.109803922),
(0.596078431, 0.305882353, 0.639215686)]
theSizes = [12, 12, 12, 8, 12]
theMarkers = [".", ".", ".", "+", "D"]
# Plot the data
plotMDS(theData, theOrders, theLabels, theColors, theSizes, theMarkers,
args)
[docs]def readPopulations(inputFileName):
"""Reads a population file.
:param inputFileName: the name of the population file.
:type inputFileName: str
:returns: a :py:class:`dict` of population for each of the samples.
"""
populations = {}
with open(inputFileName, "r") as inputFile:
for line in inputFile:
row = line.rstrip("\r\n").split("\t")
# Getting the informations
famID = row[0]
indID = row[1]
pop = row[2]
# Check if we already saw this sample
if (famID, indID) in populations:
if pop != populations[(famID, indID)]:
msg = ("{} {}: sample has multiple population ({} and "
"{})".format(famID, indID, pop,
populations[(famID, indID)]))
raise ProgramError(msg)
# Save the population
populations[(famID, indID)] = pop
return populations
[docs]def plotMDS(data, theOrders, theLabels, theColors, theSizes, theMarkers,
options):
"""Plot the MDS data.
:param data: the data to plot (MDS values).
:param theOrders: the order of the populations to plot.
:param theLabels: the names of populations to plot.
:param theColors: the colors of the populations to plot.
:param theSizes: the sizes of the markers for each population to plot.
:param theMarkers: the type of markers for each population to plot.
:param options: the options.
:type data: list of numpy.array
:type theOrders: list
:type theLabels: list
:type theColors: list
:type theSizes: list
:type theMarkers: list
:type options: argparse.Namespace
"""
# Do the import
import matplotlib as mpl
if options.format != "X11" and mpl.get_backend() != "agg":
mpl.use("Agg")
import matplotlib.pyplot as plt
if options.format != "X11":
plt.ioff()
fig = plt.figure()
ax = fig.add_subplot(111)
# The plot
plotObject = []
labels = []
for i, index in enumerate(theOrders):
tmp, = ax.plot(data[0][index], data[1][index], theMarkers[i],
color=theColors[i], mec=theColors[i],
markersize=theSizes[i])
plotObject.append(tmp)
labels.append(theLabels[index])
# The legend
prop = mpl.font_manager.FontProperties(size=10)
leg = ax.legend(plotObject, labels, loc="upper left", numpoints=1,
fancybox=True, prop=prop)
# The title and XY labels
ax.set_title(options.title)
ax.set_xlabel(options.xlabel)
ax.set_ylabel(options.ylabel)
if options.format == "X11":
# Show the plot
plt.show()
else:
fileName = options.out + "." + options.format
try:
plt.savefig(fileName)
except IOError:
msg = "%(fileName)s: can't write file" % locals()
raise ProgramError(msg)
[docs]def checkArgs(args):
"""Checks the arguments and options.
:param args: an object containing the options of the program.
:type args: argparse.Namespace
:returns: ``True`` if everything was OK.
If there is a problem with an option, an exception is raised using the
:py:class:`ProgramError` class, a message is printed to the
:class:`sys.stderr` and the program exists with code 1.
"""
# Check in input file
if not os.path.isfile(args.file):
msg = "%s: no such file" % args.file
raise ProgramError(msg)
# Check the population file
if args.population_file is None:
msg = "population-file: no population file"
raise ProgramError(msg)
elif not os.path.isfile(args.population_file):
msg = "%s: no such file" % args.population_file
raise ProgramError(msg)
return True
[docs]def parseArgs(): # pragma: no cover
"""Parses the command line options and arguments.
:returns: A :py:class:`argparse.Namespace` object created by the
:py:mod:`argparse` module. It contains the values of the
different options.
===================== ====== =========================================
Options Type Description
===================== ====== =========================================
``--file`` string The MBS file.
``--population-file`` string A file containing population information.
``--format`` string The output file format.
``--title`` string The title of the MDS plot.
``--xlabel`` string The label of the X axis.
``--ylabel`` string The label of the Y axis.
``--out`` string The prefix of the output files.
===================== ====== =========================================
.. note::
No option check is done here (except for the one automatically done by
argparse). Those need to be done elsewhere (see :py:func:`checkArgs`).
"""
# The INPUT files
group = parser.add_argument_group("Input File")
group.add_argument("--file", type=str, metavar="FILE", required=True,
help="The MBS file.")
parser.add_argument("--population-file", type=str, metavar="FORMAT",
required=True,
help="A file containing population information. "
"There must be three columns: famID, indID "
"and population information.")
# The graphical options
group = parser.add_argument_group("Graphical Options")
addCustomOptions(group)
# The OUTPUT files
group = parser.add_argument_group("Output File")
group.add_argument("--out", type=str, metavar="FILE",
default="mds",
help="The prefix of the output files. [default: "
"%(default)s]")
args = parser.parse_args()
return args
[docs]def addCustomOptions(parser):
"""Adds custom options to a parser.
:param parser: the parser.
:type parser: argparse.parser
"""
parser.add_argument("--format", type=str, metavar="FORMAT", default="png",
choices=["png", "ps", "pdf", "X11"],
help="The output file format (png, ps, pdf, or X11 "
"formats are available). [default: %(default)s]")
parser.add_argument("--title", type=str, metavar="STRING",
default="C2 in function of C1 - MDS",
help="The title of the MDS plot. [default: "
"%(default)s]")
parser.add_argument("--xlabel", type=str, metavar="STRING", default="C1",
help="The label of the X axis. [default: %(default)s]")
parser.add_argument("--ylabel", type=str, metavar="STRING", default="C2",
help="The label of the Y axis. [default: %(default)s]")
[docs]class ProgramError(Exception):
"""An :py:class:`Exception` raised in case of a problem.
:param msg: the message to print to the user before exiting.
:type msg: str
"""
def __init__(self, msg):
"""Construction of the :py:class:`ProgramError` class.
:param msg: the message to print to the user
:type msg: str
"""
self.message = str(msg)
def __str__(self):
return self.message
[docs]def safe_main():
"""A safe version of the main function (that catches ProgramError)."""
try:
main()
except KeyboardInterrupt:
logger.info("Cancelled by user")
sys.exit(0)
except ProgramError as e:
logger.error(e.message)
parser.error(e.message)
if __name__ == "__main__":
safe_main()