@article {151, title = {Association of structural variation with cardiometabolic traits in Finns.}, journal = {Am J Hum Genet}, volume = {108}, year = {2021}, month = {2021 04 01}, pages = {583-596}, abstract = {

The contribution of genome structural variation (SV) to quantitative traits associated with cardiometabolic diseases remains largely unknown. Here, we present the results of a study examining genetic association between SVs and cardiometabolic traits in the Finnish population. We used sensitive methods to identify and genotype 129,166 high-confidence SVs from deep whole-genome sequencing (WGS) data of 4,848 individuals. We tested the 64,572 common and low-frequency SVs for association with 116 quantitative traits and tested candidate associations using exome sequencing and array genotype data from an additional 15,205 individuals. We discovered 31 genome-wide significant associations at 15 loci, including 2 loci at which SVs have strong phenotypic effects: (1) a deletion of the ALB promoter that is greatly enriched in the Finnish population and causes decreased serum albumin level in carriers (p = 1.47~{\texttimes} 10) and is also associated with increased levels of total cholesterol (p = 1.22~{\texttimes} 10) and 14 additional cholesterol-related traits, and (2) a multi-allelic copy number variant (CNV) at PDPR that is strongly associated with pyruvate (p = 4.81~{\texttimes} 10) and alanine (p = 6.14~{\texttimes} 10) levels and resides within a structurally complex genomic region that has accumulated many rearrangements over evolutionary time. We also confirmed six previously reported associations, including five led by stronger signals in single nucleotide variants (SNVs) and one linking recurrent HP gene deletion and cholesterol levels (p = 6.24~{\texttimes} 10), which was also found to be strongly associated with increased glycoprotein level (p = 3.53~{\texttimes} 10). Our study confirms that integrating SVs in trait-mapping studies will expand our knowledge of genetic factors underlying disease risk.

}, keywords = {Alleles, Cardiovascular Diseases, Cholesterol, DNA Copy Number Variations, Female, Finland, Genome, Human, Genomic Structural Variation, Genotype, High-Throughput Nucleotide Sequencing, Humans, Male, Mitochondrial Proteins, Promoter Regions, Genetic, Pyruvate Dehydrogenase (Lipoamide)-Phosphatase, Pyruvic Acid, Serum Albumin, Human}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2021.03.008}, author = {Chen, Lei and Abel, Haley J and Das, Indraniel and Larson, David E and Ganel, Liron and Kanchi, Krishna L and Regier, Allison A and Young, Erica P and Kang, Chul Joo and Scott, Alexandra J and Chiang, Colby and Wang, Xinxin and Lu, Shuangjia and Christ, Ryan and Service, Susan K and Chiang, Charleston W K and Havulinna, Aki S and Kuusisto, Johanna and Boehnke, Michael and Laakso, Markku and Palotie, Aarno and Ripatti, Samuli and Freimer, Nelson B and Locke, Adam E and Stitziel, Nathan O and Hall, Ira M} } @article {116, title = {Copy number variants~and fixed duplications among 198 rhesus macaques (Macaca mulatta).}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 05}, pages = {e1008742}, abstract = {

The rhesus macaque is an abundant species of Old World monkeys and a valuable model organism for biomedical research due to its close phylogenetic relationship to humans. Copy number variation is one of the main sources of genomic diversity within and between species and a widely recognized cause of inter-individual differences in disease risk. However, copy number differences among rhesus macaques and between the human and macaque genomes, as well as the relevance of this diversity to research involving this nonhuman primate, remain understudied. Here we present a high-resolution map of sequence copy number for the rhesus macaque genome constructed from a dataset of 198 individuals. Our results show that about one-eighth of the rhesus macaque reference genome is composed of recently duplicated regions, either copy number variable regions or fixed duplications. Comparison with human genomic copy number maps based on previously published data shows that, despite overall similarities in the genome-wide distribution of these regions, there are specific differences at the chromosome level. Some of these create differences in the copy number profile between human disease genes and their rhesus macaque orthologs. Our results highlight the importance of addressing the number of copies of target genes in the design of experiments and cautions against human-centered assumptions in research conducted with model organisms. Overall, we present a genome-wide copy number map from a large sample of rhesus macaque individuals representing an important novel contribution concerning the evolution of copy number in primate genomes.

}, keywords = {Animals, Chromosome Mapping, DNA Copy Number Variations, Female, Gene Duplication, Genetics, Population, Genome, High-Throughput Nucleotide Sequencing, Humans, Macaca mulatta, Male, Open Reading Frames, Phylogeny, Sequence Analysis, DNA, Species Specificity}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1008742}, author = {Bras{\'o}-Vives, Marina and Povolotskaya, Inna S and Hartas{\'a}nchez, Diego A and Farr{\'e}, Xavier and Fernandez-Callejo, Marcos and Raveendran, Muthuswamy and Harris, R Alan and Rosene, Douglas L and Lorente-Galdos, Belen and Navarro, Arcadi and Marques-Bonet, Tomas and Rogers, Jeffrey and Juan, David} } @article {126, title = {Epilepsy subtype-specific copy number burden observed in a genome-wide study of 17 458 subjects.}, journal = {Brain}, volume = {143}, year = {2020}, month = {2020 07 01}, pages = {2106-2118}, abstract = {

Cytogenic testing is routinely applied in most neurological centres for severe paediatric epilepsies. However, which characteristics of copy number variants (CNVs) confer most epilepsy risk and which epilepsy subtypes carry the most CNV burden, have not been explored on a genome-wide scale. Here, we present the largest CNV investigation in epilepsy to date with 10 712 European epilepsy cases and 6746 ancestry-matched controls. Patients with genetic generalized epilepsy, lesional focal epilepsy, non-acquired focal epilepsy, and developmental and epileptic encephalopathy were included. All samples were processed with the same technology and analysis pipeline. All investigated epilepsy types, including lesional focal epilepsy patients, showed an increase in CNV burden in at least one tested category compared to controls. However, we observed striking differences in CNV burden across epilepsy types and investigated CNV categories. Genetic generalized epilepsy patients have the highest CNV burden in all categories tested, followed by developmental and epileptic encephalopathy patients. Both epilepsy types also show association for deletions covering genes intolerant for truncating variants. Genome-wide CNV breakpoint association showed not only significant loci for genetic generalized and developmental and epileptic encephalopathy patients but also for lesional focal epilepsy patients. With a 34-fold risk for developing genetic generalized epilepsy, we show for the first time that the established epilepsy-associated 15q13.3 deletion represents the strongest risk CNV for genetic generalized epilepsy across the whole genome. Using the human interactome, we examined the largest connected component of the genes overlapped by CNVs in the four epilepsy types. We observed that genetic generalized epilepsy and non-acquired focal epilepsy formed disease modules. In summary, we show that in all common epilepsy types, 1.5-3\% of patients carry epilepsy-associated CNVs. The characteristics of risk CNVs vary tremendously across and within epilepsy types. Thus, we advocate genome-wide genomic testing to identify all disease-associated types of CNVs.

}, keywords = {DNA Copy Number Variations, Epilepsy, Female, Genetic Predisposition to Disease, Genome-Wide Association Study, Humans, Male}, issn = {1460-2156}, doi = {10.1093/brain/awaa171}, author = {Niestroj, Lisa-Marie and Perez-Palma, Eduardo and Howrigan, Daniel P and Zhou, Yadi and Cheng, Feixiong and Saarentaus, Elmo and N{\"u}rnberg, Peter and Stevelink, Remi and Daly, Mark J and Palotie, Aarno and Lal, Dennis} } @article {91, title = {Rapid, Paralog-Sensitive CNV Analysis of 2457 Human Genomes Using QuicK-mer2.}, journal = {Genes (Basel)}, volume = {11}, year = {2020}, month = {2020 01 29}, abstract = {

Gene duplication is a major mechanism for the evolution of gene novelty, and copy-number variation makes a major contribution to inter-individual genetic diversity. However, most approaches for studying copy-number variation rely upon uniquely mapping reads to a genome reference and are unable to distinguish among duplicated sequences. Specialized approaches to interrogate specific paralogs are comparatively slow and have a high degree of computational complexity, limiting their effective application to emerging population-scale data sets. We present QuicK-mer2, a self-contained, mapping-free approach that enables the rapid construction of paralog-specific copy-number maps from short-read sequence data. This approach is based on the tabulation of unique k-mer sequences from short-read data sets, and is able to analyze a 20X coverage human genome in approximately 20 min. We applied our approach to newly released sequence data from the 1000 Genomes Project, constructed paralog-specific copy-number maps from 2457 unrelated individuals, and uncovered copy-number variation of paralogous genes. We identify nine genes where none of the analyzed samples have a copy number of two, 92 genes where the majority of samples have a copy number other than two, and describe rare copy number variation effecting multiple genes at the APOBEC3 locus.

}, keywords = {Algorithms, Computational Biology, DNA Copy Number Variations, Evolution, Molecular, Gene Duplication, Genome, Human, Humans, Sequence Analysis, DNA}, issn = {2073-4425}, doi = {10.3390/genes11020141}, author = {Shen, Feichen and Kidd, Jeffrey M} } @article {23, title = {Genomic Patterns of De Novo Mutation in Simplex Autism.}, journal = {Cell}, volume = {171}, year = {2017}, month = {2017 Oct 19}, pages = {710-722.e12}, abstract = {

To further our understanding of the genetic etiology of autism, we generated and analyzed genome sequence data from 516 idiopathic autism families (2,064 individuals). This resource includes >59 million single-nucleotide variants (SNVs) and 9,212 private copy number variants (CNVs), of which 133,992 and 88 are de novo mutations (DNMs), respectively. We estimate a mutation rate of \~{}1.5~{\texttimes} 10 SNVs per site per generation with a significantly higher mutation rate in repetitive DNA. Comparing probands and unaffected siblings, we observe several DNM trends. Probands carry more gene-disruptive CNVs and SNVs, resulting in severe~missense mutations and mapping to predicted fetal brain promoters and embryonic stem cell enhancers. These differences become more pronounced for autism genes (p~= 1.8~{\texttimes} 10, OR~= 2.2). Patients are more likely to carry multiple coding and noncoding DNMs in different genes, which are enriched for expression in striatal neurons (p~= 3~{\texttimes} 10), suggesting a path forward for genetically characterizing more complex cases of autism.

}, keywords = {Animals, Autistic Disorder, DNA Copy Number Variations, DNA Mutational Analysis, Female, Genome-Wide Association Study, Humans, INDEL Mutation, Male, Mice, Polymorphism, Single Nucleotide}, issn = {1097-4172}, doi = {10.1016/j.cell.2017.08.047}, author = {Turner, Tychele N and Coe, Bradley P and Dickel, Diane E and Hoekzema, Kendra and Nelson, Bradley J and Zody, Michael C and Kronenberg, Zev N and Hormozdiari, Fereydoun and Raja, Archana and Pennacchio, Len A and Darnell, Robert B and Eichler, Evan E} }