Source code for src.features.snpchimp

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 16 16:42:36 2021

@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
"""

import csv
import logging
import collections

from src.features.utils import text_or_gzip_open

# Get an instance of a logger
logger = logging.getLogger(__name__)


[docs]def clean_chrom(chrom: str): """Return 0 if chrom is 99 (unmapped for snpchimp) Args: chrom (str): the (SNPchiMp) chromsome Returns: str: 0 if chrom == 99 else chrom """ # forcing type (should be string by database constraints) if str(chrom) == "99": return "0" return chrom
[docs]def read_snpChimp(path: str, size=2048): sniffer = csv.Sniffer() with text_or_gzip_open(path) as handle: dialect = sniffer.sniff(handle.read(size)) handle.seek(0) reader = csv.reader(handle, dialect=dialect) # get header header = next(reader) # sanitize column names header = [column.lower() for column in header] logger.info(header) # define a datatype for my data SnpChimp = collections.namedtuple("SnpChimp", header) # add records to data for record in reader: # forcing data types record[header.index('position')] = int( record[header.index('position')]) # transform NULL valies in None record = [None if col == 'NULL' else col for col in record] # clean chromosome record[header.index('chromosome')] = clean_chrom( record[header.index('chromosome')]) # convert into collection record = SnpChimp._make(record) yield record