@article {151, title = {Association of structural variation with cardiometabolic traits in Finns.}, journal = {Am J Hum Genet}, volume = {108}, year = {2021}, month = {2021 04 01}, pages = {583-596}, abstract = {

The contribution of genome structural variation (SV) to quantitative traits associated with cardiometabolic diseases remains largely unknown. Here, we present the results of a study examining genetic association between SVs and cardiometabolic traits in the Finnish population. We used sensitive methods to identify and genotype 129,166 high-confidence SVs from deep whole-genome sequencing (WGS) data of 4,848 individuals. We tested the 64,572 common and low-frequency SVs for association with 116 quantitative traits and tested candidate associations using exome sequencing and array genotype data from an additional 15,205 individuals. We discovered 31 genome-wide significant associations at 15 loci, including 2 loci at which SVs have strong phenotypic effects: (1) a deletion of the ALB promoter that is greatly enriched in the Finnish population and causes decreased serum albumin level in carriers (p = 1.47~{\texttimes} 10) and is also associated with increased levels of total cholesterol (p = 1.22~{\texttimes} 10) and 14 additional cholesterol-related traits, and (2) a multi-allelic copy number variant (CNV) at PDPR that is strongly associated with pyruvate (p = 4.81~{\texttimes} 10) and alanine (p = 6.14~{\texttimes} 10) levels and resides within a structurally complex genomic region that has accumulated many rearrangements over evolutionary time. We also confirmed six previously reported associations, including five led by stronger signals in single nucleotide variants (SNVs) and one linking recurrent HP gene deletion and cholesterol levels (p = 6.24~{\texttimes} 10), which was also found to be strongly associated with increased glycoprotein level (p = 3.53~{\texttimes} 10). Our study confirms that integrating SVs in trait-mapping studies will expand our knowledge of genetic factors underlying disease risk.

}, keywords = {Alleles, Cardiovascular Diseases, Cholesterol, DNA Copy Number Variations, Female, Finland, Genome, Human, Genomic Structural Variation, Genotype, High-Throughput Nucleotide Sequencing, Humans, Male, Mitochondrial Proteins, Promoter Regions, Genetic, Pyruvate Dehydrogenase (Lipoamide)-Phosphatase, Pyruvic Acid, Serum Albumin, Human}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2021.03.008}, author = {Chen, Lei and Abel, Haley J and Das, Indraniel and Larson, David E and Ganel, Liron and Kanchi, Krishna L and Regier, Allison A and Young, Erica P and Kang, Chul Joo and Scott, Alexandra J and Chiang, Colby and Wang, Xinxin and Lu, Shuangjia and Christ, Ryan and Service, Susan K and Chiang, Charleston W K and Havulinna, Aki S and Kuusisto, Johanna and Boehnke, Michael and Laakso, Markku and Palotie, Aarno and Ripatti, Samuli and Freimer, Nelson B and Locke, Adam E and Stitziel, Nathan O and Hall, Ira M} } @article {156, title = {Haplotype-resolved diverse human genomes and integrated analysis of structural variation.}, journal = {Science}, volume = {372}, year = {2021}, month = {2021 04 02}, abstract = {

Long-read and strand-specific sequencing technologies together facilitate the de novo assembly of high-quality haplotype-resolved human genomes without parent-child trio data. We present 64 assembled haplotypes from 32 diverse human genomes. These highly contiguous haplotype assemblies (average minimum contig length needed to cover 50\% of the genome: 26 million base pairs) integrate all forms of genetic variation, even across complex loci. We identified 107,590 structural variants (SVs), of which 68\% were not discovered with short-read sequencing, and 278 SV hotspots (spanning megabases of gene-rich sequence). We characterized 130 of the most active mobile element source elements and found that 63\% of all SVs arise through homology-mediated mechanisms. This resource enables reliable graph-based genotyping from short reads of up to 50,340 SVs, resulting in the identification of 1526 expression quantitative trait loci as well as SV candidates for adaptive selection within the human population.

}, keywords = {Female, Genetic Variation, Genome, Human, Genotype, Haplotypes, High-Throughput Nucleotide Sequencing, Humans, INDEL Mutation, Interspersed Repetitive Sequences, Male, Population Groups, Quantitative Trait Loci, Retroelements, Sequence Analysis, DNA, Sequence Inversion, Whole Genome Sequencing}, issn = {1095-9203}, doi = {10.1126/science.abf7117}, author = {Ebert, Peter and Audano, Peter A and Zhu, Qihui and Rodriguez-Martin, Bernardo and Porubsky, David and Bonder, Marc Jan and Sulovari, Arvis and Ebler, Jana and Zhou, Weichen and Serra Mari, Rebecca and Yilmaz, Feyza and Zhao, Xuefang and Hsieh, PingHsun and Lee, Joyce and Kumar, Sushant and Lin, Jiadong and Rausch, Tobias and Chen, Yu and Ren, Jingwen and Santamarina, Martin and H{\"o}ps, Wolfram and Ashraf, Hufsah and Chuang, Nelson T and Yang, Xiaofei and Munson, Katherine M and Lewis, Alexandra P and Fairley, Susan and Tallon, Luke J and Clarke, Wayne E and Basile, Anna O and Byrska-Bishop, Marta and Corvelo, Andr{\'e} and Evani, Uday S and Lu, Tsung-Yu and Chaisson, Mark J P and Chen, Junjie and Li, Chong and Brand, Harrison and Wenger, Aaron M and Ghareghani, Maryam and Harvey, William T and Raeder, Benjamin and Hasenfeld, Patrick and Regier, Allison A and Abel, Haley J and Hall, Ira M and Flicek, Paul and Stegle, Oliver and Gerstein, Mark B and Tubio, Jose M C and Mu, Zepeng and Li, Yang I and Shi, Xinghua and Hastie, Alex R and Ye, Kai and Chong, Zechen and Sanders, Ashley D and Zody, Michael C and Talkowski, Michael E and Mills, Ryan E and Devine, Scott E and Lee, Charles and Korbel, Jan O and Marschall, Tobias and Eichler, Evan E} } @article {155, title = {Whole-genome sequencing of African Americans implicates differential genetic architecture in inflammatory bowel disease.}, journal = {Am J Hum Genet}, volume = {108}, year = {2021}, month = {2021 03 04}, pages = {431-445}, abstract = {

Whether or not populations diverge with respect to the genetic contribution to risk of specific complex diseases is relevant to understanding the evolution of susceptibility and origins of health disparities. Here, we describe a large-scale whole-genome sequencing study of inflammatory bowel disease encompassing 1,774 affected individuals and 1,644 healthy control Americans with African ancestry (African Americans). Although no new loci for inflammatory bowel disease are discovered at genome-wide significance levels, we identify numerous instances of differential effect sizes in combination with divergent allele frequencies. For example, the major effect at PTGER4 fine maps to a single credible interval of 22 SNPs corresponding to one of four independent associations at the locus in European ancestry individuals but with an elevated odds ratio for Crohn disease in African Americans. A rare variant aggregate analysis implicates Ca-binding neuro-immunomodulator CALB2 in ulcerative colitis. Highly significant overall overlap of common variant risk for inflammatory bowel disease susceptibility between individuals with African and European ancestries was observed, with 41 of 241 previously known lead variants replicated and overall correlations in effect sizes of 0.68 for combined inflammatory bowel disease. Nevertheless, subtle differences influence the performance of polygenic risk scores, and we show that ancestry-appropriate weights significantly improve polygenic prediction in the highest percentiles of risk. The median amount of variance explained per locus remains the same in African and European cohorts, providing evidence for compensation of effect sizes as allele frequencies diverge, as expected under a highly polygenic model of disease.

}, keywords = {African Americans, Aged, Aged, 80 and over, Calbindin 2, Colitis, Ulcerative, Crohn Disease, European Continental Ancestry Group, Female, Gene Frequency, Genetic Predisposition to Disease, Genome-Wide Association Study, Humans, Inflammatory Bowel Diseases, Male, Multifactorial Inheritance, Polymorphism, Single Nucleotide, Receptors, Prostaglandin E, EP4 Subtype, Whole Genome Sequencing}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2021.02.001}, author = {Somineni, Hari K and Nagpal, Sini and Venkateswaran, Suresh and Cutler, David J and Okou, David T and Haritunians, Talin and Simpson, Claire L and Begum, Ferdouse and Datta, Lisa W and Quiros, Antonio J and Seminerio, Jenifer and Mengesha, Emebet and Alexander, Jonathan S and Baldassano, Robert N and Dudley-Brown, Sharon and Cross, Raymond K and Dassopoulos, Themistocles and Denson, Lee A and Dhere, Tanvi A and Iskandar, Heba and Dryden, Gerald W and Hou, Jason K and Hussain, Sunny Z and Hyams, Jeffrey S and Isaacs, Kim L and Kader, Howard and Kappelman, Michael D and Katz, Jeffry and Kellermayer, Richard and Kuemmerle, John F and Lazarev, Mark and Li, Ellen and Mannon, Peter and Moulton, Dedrick E and Newberry, Rodney D and Patel, Ashish S and Pekow, Joel and Saeed, Shehzad A and Valentine, John F and Wang, Ming-Hsi and McCauley, Jacob L and Abreu, Maria T and Jester, Traci and Molle-Rios, Zarela and Palle, Sirish and Scherl, Ellen J and Kwon, John and Rioux, John D and Duerr, Richard H and Silverberg, Mark S and Zwick, Michael E and Stevens, Christine and Daly, Mark J and Cho, Judy H and Gibson, Greg and McGovern, Dermot P B and Brant, Steven R and Kugathasan, Subra} } @article {144, title = {Association of Rare Pathogenic DNA Variants for Familial Hypercholesterolemia, Hereditary Breast and Ovarian Cancer Syndrome, and Lynch Syndrome With Disease Risk in Adults According to Family History.}, journal = {JAMA Netw Open}, volume = {3}, year = {2020}, month = {2020 04 01}, pages = {e203959}, abstract = {

Importance: Pathogenic DNA variants associated with familial hypercholesterolemia, hereditary breast and ovarian cancer syndrome, and Lynch syndrome are widely recognized as clinically important and actionable when identified, leading some clinicians to recommend population-wide genomic screening.

Objectives: To assess the prevalence and clinical importance of pathogenic or likely pathogenic variants associated with each of 3 genomic conditions (familial hypercholesterolemia, hereditary breast and ovarian cancer syndrome, and Lynch syndrome) within the context of contemporary clinical care.

Design, Setting, and Participants: This cohort study used gene-sequencing data from 49 738 participants in the UK Biobank who were recruited from 22 sites across the UK between March 21, 2006, and October 1, 2010. Inpatient hospital data date back to 1977; cancer registry data, to 1957; and death registry data, to 2006. Statistical analysis was performed from July 22, 2019, to November 15, 2019.

Exposures: Pathogenic or likely pathogenic DNA variants classified by a clinical laboratory geneticist.

Main Outcomes and Measures: Composite end point specific to each genomic condition based on atherosclerotic cardiovascular disease events for familial hypercholesterolemia, breast or ovarian cancer for hereditary breast and ovarian cancer syndrome, and colorectal or uterine cancer for Lynch syndrome.

Results: Among 49 738 participants (mean [SD] age, 57 [8] years; 27 144 female [55\%]), 441 (0.9\%) harbored a pathogenic or likely pathogenic variant associated with any of 3 genomic conditions, including 131 (0.3\%) for familial hypercholesterolemia, 235 (0.5\%) for hereditary breast and ovarian cancer syndrome, and 76 (0.2\%) for Lynch syndrome. Presence of these variants was associated with increased risk of disease: for familial hypercholesterolemia, 28 of 131 carriers (21.4\%) vs 4663 of 49 607 noncarriers (9.4\%) developed atherosclerotic cardiovascular disease; for hereditary breast and ovarian cancer syndrome, 32 of 116 female carriers (27.6\%) vs 2080 of 27 028 female noncarriers (7.7\%) developed associated cancers; and for Lynch syndrome, 17 of 76 carriers (22.4\%) vs 929 of 49 662 noncarriers (1.9\%) developed colorectal or uterine cancer. The predicted probability of disease at age 75 years despite contemporary clinical care was 45.3\% for carriers of familial hypercholesterolemia, 41.1\% for hereditary breast and ovarian cancer syndrome, and 38.3\% for Lynch syndrome. Across the 3 conditions, 39.7\% (175 of 441) of the carriers reported a family history of disease vs 23.2\% (34 517 of 148 772) of noncarriers.

Conclusions and Relevance: The findings suggest that approximately 1\% of the middle-aged adult population in the UK Biobank harbored a pathogenic variant associated with any of 3 genomic conditions. These variants were associated with an increased risk of disease despite contemporary clinical care and were not reliably detected by family history.

}, keywords = {Aged, Cohort Studies, Colorectal Neoplasms, Hereditary Nonpolyposis, Female, Genetic Predisposition to Disease, Hereditary Breast and Ovarian Cancer Syndrome, Heterozygote, Humans, Hyperlipoproteinemia Type II, Male, Middle Aged, Pedigree, Proportional Hazards Models, United Kingdom, Whole Exome Sequencing}, issn = {2574-3805}, doi = {10.1001/jamanetworkopen.2020.3959}, author = {Patel, Aniruddh P and Wang, Minxian and Fahed, Akl C and Mason-Suares, Heather and Brockman, Deanna and Pelletier, Renee and Amr, Sami and Machini, Kalotina and Hawley, Megan and Witkowski, Leora and Koch, Christopher and Philippakis, Anthony and Cassa, Christopher A and Ellinor, Patrick T and Kathiresan, Sekar and Ng, Kenney and Lebo, Matthew and Khera, Amit V} } @article {95, title = {Combining Imaging and Genetics to Predict Recurrence of Anticoagulation-Associated Intracerebral Hemorrhage.}, journal = {Stroke}, volume = {51}, year = {2020}, month = {2020 07}, pages = {2153-2160}, abstract = {

BACKGROUND AND PURPOSE: For survivors of oral anticoagulation therapy (OAT)-associated intracerebral hemorrhage (OAT-ICH) who are at high risk for thromboembolism, the benefits of OAT resumption must be weighed against increased risk of recurrent hemorrhagic stroke. The ε2/ε4 alleles of the () gene, MRI-defined cortical superficial siderosis, and cerebral microbleeds are the most potent risk factors for recurrent ICH. We sought to determine whether combining MRI markers and genotype could have clinical impact by identifying ICH survivors in whom the risks of OAT resumption are highest.

METHODS: Joint analysis of data from 2 longitudinal cohort studies of OAT-ICH survivors: (1) MGH-ICH study (Massachusetts General Hospital ICH) and (2) longitudinal component of the ERICH study (Ethnic/Racial Variations of Intracerebral Hemorrhage). We evaluated whether MRI markers and genotype predict ICH recurrence. We then developed and validated a combined -MRI classification scheme to predict ICH recurrence, using Classification and Regression Tree analysis.

RESULTS: Cortical superficial siderosis, cerebral microbleed, and ε2/ε4 variants were independently associated with ICH recurrence after OAT-ICH (all <0.05). Combining genotype and MRI data resulted in improved prediction of ICH recurrence (Harrell C: 0.79 versus 0.55 for clinical data alone, =0.033). In the MGH (training) data set, CSS, cerebral microbleed, and ε2/ε4 stratified likelihood of ICH recurrence into high-, medium-, and low-risk categories. In the ERICH (validation) data set, yearly ICH recurrence rates for high-, medium-, and low-risk individuals were 6.6\%, 2.5\%, and 0.9\%, respectively, with overall area under the curve of 0.91 for prediction of recurrent ICH.

CONCLUSIONS: Combining MRI and genotype stratifies likelihood of ICH recurrence into high, medium, and low risk. If confirmed in prospective studies, this combined -MRI classification scheme may prove useful for selecting individuals for OAT resumption after ICH.

}, keywords = {Aged, Anticoagulants, Apolipoprotein E4, Cerebral Hemorrhage, Female, Humans, Magnetic Resonance Imaging, Male, Middle Aged, Neuroimaging, Recurrence}, issn = {1524-4628}, doi = {10.1161/STROKEAHA.120.028310}, author = {Biffi, Alessandro and Urday, Sebastian and Kubiszewski, Patryk and Gilkerson, Lee and Sekar, Padmini and Rodriguez-Torres, Axana and Bettin, Margaret and Charidimou, Andreas and Pasi, Marco and Kourkoulis, Christina and Schwab, Kristin and DiPucchio, Zora and Behymer, Tyler and Osborne, Jennifer and Morgan, Misty and Moomaw, Charles J and James, Michael L and Greenberg, Steven M and Viswanathan, Anand and Gurol, M Edip and Worrall, Bradford B and Testai, Fernando D and McCauley, Jacob L and Falcone, Guido J and Langefeld, Carl D and Anderson, Christopher D and Kamel, Hooman and Woo, Daniel and Sheth, Kevin N and Rosand, Jonathan} } @article {106, title = {Community-based recruitment and exome sequencing indicates high diagnostic yield in adults with intellectual disability.}, journal = {Mol Genet Genomic Med}, volume = {8}, year = {2020}, month = {2020 10}, pages = {e1439}, abstract = {

BACKGROUND: Establishing a genetic diagnosis for individuals with intellectual disability (ID) benefits patients and their families as it may inform the prognosis, lead to appropriate therapy, and facilitate access to medical and supportive services. Exome sequencing has been successfully applied in a diagnostic setting, but most clinical exome referrals are pediatric patients, with many adults with ID lacking a comprehensive genetic evaluation.

METHODS: Our unique recruitment strategy involved partnering with service and education providers for individuals with ID. We performed exome sequencing and analysis, and clinical variant interpretation for each recruited family.

RESULTS: All five families enrolled in the study opted-in for the return of genetic results. In three out of five families exome sequencing analysis identified pathogenic or likely pathogenic variants in KANSL1, TUSC3, and MED13L genes. Families discussed the results and any potential medical follow-up in an appointment with a board certified clinical geneticist.

CONCLUSION: Our study suggests high yield of exome sequencing as a diagnostic tool in adult patients with ID who have not undergone comprehensive sequencing-based genetic testing. Research studies including an option of return of results through a genetic clinic could help minimize the disparity in exome diagnostic testing between pediatric and adult patients with ID.

}, keywords = {Adult, Female, Genetic Testing, Humans, Independent Living, Intellectual Disability, Male, Mediator Complex, Membrane Proteins, Nuclear Proteins, Patient Selection, Sensitivity and Specificity, Tumor Suppressor Proteins, Whole Exome Sequencing}, issn = {2324-9269}, doi = {10.1002/mgg3.1439}, author = {Sabo, Aniko and Murdock, David and Dugan, Shannon and Meng, Qingchang and Gingras, Marie-Claude and Hu, Jianhong and Muzny, Donna and Gibbs, Richard} } @article {116, title = {Copy number variants~and fixed duplications among 198 rhesus macaques (Macaca mulatta).}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 05}, pages = {e1008742}, abstract = {

The rhesus macaque is an abundant species of Old World monkeys and a valuable model organism for biomedical research due to its close phylogenetic relationship to humans. Copy number variation is one of the main sources of genomic diversity within and between species and a widely recognized cause of inter-individual differences in disease risk. However, copy number differences among rhesus macaques and between the human and macaque genomes, as well as the relevance of this diversity to research involving this nonhuman primate, remain understudied. Here we present a high-resolution map of sequence copy number for the rhesus macaque genome constructed from a dataset of 198 individuals. Our results show that about one-eighth of the rhesus macaque reference genome is composed of recently duplicated regions, either copy number variable regions or fixed duplications. Comparison with human genomic copy number maps based on previously published data shows that, despite overall similarities in the genome-wide distribution of these regions, there are specific differences at the chromosome level. Some of these create differences in the copy number profile between human disease genes and their rhesus macaque orthologs. Our results highlight the importance of addressing the number of copies of target genes in the design of experiments and cautions against human-centered assumptions in research conducted with model organisms. Overall, we present a genome-wide copy number map from a large sample of rhesus macaque individuals representing an important novel contribution concerning the evolution of copy number in primate genomes.

}, keywords = {Animals, Chromosome Mapping, DNA Copy Number Variations, Female, Gene Duplication, Genetics, Population, Genome, High-Throughput Nucleotide Sequencing, Humans, Macaca mulatta, Male, Open Reading Frames, Phylogeny, Sequence Analysis, DNA, Species Specificity}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1008742}, author = {Bras{\'o}-Vives, Marina and Povolotskaya, Inna S and Hartas{\'a}nchez, Diego A and Farr{\'e}, Xavier and Fernandez-Callejo, Marcos and Raveendran, Muthuswamy and Harris, R Alan and Rosene, Douglas L and Lorente-Galdos, Belen and Navarro, Arcadi and Marques-Bonet, Tomas and Rogers, Jeffrey and Juan, David} } @article {126, title = {Epilepsy subtype-specific copy number burden observed in a genome-wide study of 17 458 subjects.}, journal = {Brain}, volume = {143}, year = {2020}, month = {2020 07 01}, pages = {2106-2118}, abstract = {

Cytogenic testing is routinely applied in most neurological centres for severe paediatric epilepsies. However, which characteristics of copy number variants (CNVs) confer most epilepsy risk and which epilepsy subtypes carry the most CNV burden, have not been explored on a genome-wide scale. Here, we present the largest CNV investigation in epilepsy to date with 10 712 European epilepsy cases and 6746 ancestry-matched controls. Patients with genetic generalized epilepsy, lesional focal epilepsy, non-acquired focal epilepsy, and developmental and epileptic encephalopathy were included. All samples were processed with the same technology and analysis pipeline. All investigated epilepsy types, including lesional focal epilepsy patients, showed an increase in CNV burden in at least one tested category compared to controls. However, we observed striking differences in CNV burden across epilepsy types and investigated CNV categories. Genetic generalized epilepsy patients have the highest CNV burden in all categories tested, followed by developmental and epileptic encephalopathy patients. Both epilepsy types also show association for deletions covering genes intolerant for truncating variants. Genome-wide CNV breakpoint association showed not only significant loci for genetic generalized and developmental and epileptic encephalopathy patients but also for lesional focal epilepsy patients. With a 34-fold risk for developing genetic generalized epilepsy, we show for the first time that the established epilepsy-associated 15q13.3 deletion represents the strongest risk CNV for genetic generalized epilepsy across the whole genome. Using the human interactome, we examined the largest connected component of the genes overlapped by CNVs in the four epilepsy types. We observed that genetic generalized epilepsy and non-acquired focal epilepsy formed disease modules. In summary, we show that in all common epilepsy types, 1.5-3\% of patients carry epilepsy-associated CNVs. The characteristics of risk CNVs vary tremendously across and within epilepsy types. Thus, we advocate genome-wide genomic testing to identify all disease-associated types of CNVs.

}, keywords = {DNA Copy Number Variations, Epilepsy, Female, Genetic Predisposition to Disease, Genome-Wide Association Study, Humans, Male}, issn = {1460-2156}, doi = {10.1093/brain/awaa171}, author = {Niestroj, Lisa-Marie and Perez-Palma, Eduardo and Howrigan, Daniel P and Zhou, Yadi and Cheng, Feixiong and Saarentaus, Elmo and N{\"u}rnberg, Peter and Stevelink, Remi and Daly, Mark J and Palotie, Aarno and Lal, Dennis} } @article {89, title = {Genetic diagnoses in epilepsy: The impact of dynamic exome analysis in a pediatric cohort.}, journal = {Epilepsia}, volume = {61}, year = {2020}, month = {2020 02}, pages = {249-258}, abstract = {

OBJECTIVE: We evaluated the yield of systematic analysis and/or reanalysis of whole exome sequencing (WES) data from a cohort of well-phenotyped pediatric patients with epilepsy and suspected but previously undetermined genetic etiology.

METHODS: We identified and phenotyped 125 participants with pediatric epilepsy. Etiology was unexplained at the time of enrollment despite clinical testing, which included chromosomal microarray (57 patients), epilepsy gene panel (n = 48), both (n = 28), or WES (n = 8). Clinical epilepsy diagnoses included developmental and epileptic encephalopathy (DEE), febrile infection-related epilepsy syndrome, Rasmussen encephalitis, and other focal and generalized epilepsies. We analyzed WES data and compared the yield in participants with and without prior clinical genetic testing.

RESULTS: Overall, we identified pathogenic or likely pathogenic variants in 40\% (50/125) of our study participants. Nine patients with DEE had genetic variants in recently published genes that had not been recognized as epilepsy-related at the time of clinical testing (FGF12, GABBR1, GABBR2, ITPA, KAT6A, PTPN23, RHOBTB2, SATB2), and eight patients had genetic variants in candidate epilepsy genes (CAMTA1, FAT3, GABRA6, HUWE1, PTCHD1). Ninety participants had concomitant or subsequent clinical genetic testing, which was ultimately explanatory for 26\% (23/90). Of the 67 participants whose molecular diagnoses were "unsolved" through clinical genetic testing, we identified pathogenic or likely pathogenic variants in 17 (25\%).

SIGNIFICANCE: Our data argue for early consideration of WES with iterative reanalysis for patients with epilepsy, particularly those with DEE or epilepsy with intellectual disability. Rigorous analysis of WES data of well-phenotyped patients with epilepsy leads to a broader understanding of gene-specific phenotypic spectra as well as candidate disease gene identification. We illustrate the dynamic nature of genetic diagnosis over time, with analysis and in some cases reanalysis of exome data leading to the identification of disease-associated variants among participants with previously nondiagnostic results from a variety of clinical testing strategies.

}, keywords = {Adolescent, Adult, Age of Onset, Brain Diseases, Child, Child, Preschool, Chromosomes, Human, Cohort Studies, Epilepsy, Epilepsy, Generalized, Exome, Female, Genetic Testing, Genetic Variation, Humans, Infant, Male, Microarray Analysis, Phenotype, Whole Exome Sequencing, Young Adult}, issn = {1528-1167}, doi = {10.1111/epi.16427}, author = {Rochtus, Anne and Olson, Heather E and Smith, Lacey and Keith, Louisa G and El Achkar, Christelle and Taylor, Alan and Mahida, Sonal and Park, Meredith and Kelly, McKenna and Shain, Catherine and Rockowitz, Shira and Rosen Sheidley, Beth and Poduri, Annapurna} } @article {92, title = {Genetics of schizophrenia in the South African Xhosa.}, journal = {Science}, volume = {367}, year = {2020}, month = {2020 01 31}, pages = {569-573}, abstract = {

Africa, the ancestral home of all modern humans, is the most informative continent for understanding the human genome and its contribution to complex disease. To better understand the genetics of schizophrenia, we studied the illness in the Xhosa population of South Africa, recruiting 909 cases and 917 age-, gender-, and residence-matched controls. Individuals with schizophrenia were significantly more likely than controls to harbor private, severely damaging mutations in genes that are critical to synaptic function, including neural circuitry mediated by the neurotransmitters glutamine, γ-aminobutyric acid, and dopamine. Schizophrenia is genetically highly heterogeneous, involving severe ultrarare mutations in genes that are critical to synaptic plasticity. The depth of genetic variation in Africa revealed this relationship with a moderate sample size and informed our understanding of the genetics of schizophrenia worldwide.

}, keywords = {Age Factors, Autistic Disorder, Bipolar Disorder, Dopamine, Female, gamma-Aminobutyric Acid, Genetic Variation, Glutamine, Humans, Male, Mutation, Neural Pathways, Schizophrenia, Sex Factors, South Africa, Synapses, Synaptic Transmission}, issn = {1095-9203}, doi = {10.1126/science.aay8833}, author = {Gulsuner, S and Stein, D J and Susser, E S and Sibeko, G and Pretorius, A and Walsh, T and Majara, L and Mndini, M M and Mqulwana, S G and Ntola, O A and Casadei, S and Ngqengelele, L L and Korchina, V and van der Merwe, C and Malan, M and Fader, K M and Feng, M and Willoughby, E and Muzny, D and Baldinger, A and Andrews, H F and Gur, R C and Gibbs, R A and Zingela, Z and Nagdee, M and Ramesar, R S and King, M-C and McClellan, J M} } @article {129, title = {Genome-Wide Polygenic Score, Clinical Risk Factors, and Long-Term Trajectories of Coronary Artery Disease.}, journal = {Arterioscler Thromb Vasc Biol}, volume = {40}, year = {2020}, month = {2020 11}, pages = {2738-2746}, abstract = {

OBJECTIVE: To determine the relationship of a genome-wide polygenic score for coronary artery disease (GPS) with lifetime trajectories of CAD risk, directly compare its predictive capacity to traditional risk factors, and assess its interplay with the Pooled Cohort Equations (PCE) clinical risk estimator. Approach and Results: We studied GPS in 28 556 middle-aged participants of the Malm{\"o} Diet and Cancer Study, of whom 4122 (14.4\%) developed CAD over a median follow-up of 21.3 years. A pronounced gradient in lifetime risk of CAD was observed-16\% for those in the lowest GPS decile to 48\% in the highest. We evaluated the discriminative capacity of the GPS-as assessed by change in the C-statistic from a baseline model including age and sex-among 5685 individuals with PCE risk estimates available. The increment for the GPS (+0.045, <0.001) was higher than for any of 11 traditional risk factors (range +0.007 to +0.032). Minimal correlation was observed between GPS and 10-year risk defined by the PCE (=0.03), and addition of GPS improved the C-statistic of the PCE model by 0.026. A significant gradient in lifetime risk was observed for the GPS, even among individuals within a given PCE clinical risk stratum. We replicated key findings-noting strikingly consistent results-in 325 003 participants of the UK Biobank.

CONCLUSIONS: GPS-a risk estimator available from birth-stratifies individuals into varying trajectories of clinical risk for CAD. Implementation of GPS may enable identification of high-risk individuals early in life, decades in advance of manifest risk factors or disease.

}, keywords = {Adult, Aged, Coronary Artery Disease, Female, Genetic Predisposition to Disease, Genome-Wide Association Study, Heart Disease Risk Factors, Heredity, Humans, Incidence, Male, Middle Aged, Multifactorial Inheritance, Phenotype, Prognosis, Risk Assessment, Sweden, Time Factors, United Kingdom}, issn = {1524-4636}, doi = {10.1161/ATVBAHA.120.314856}, author = {Hindy, George and Aragam, Krishna G and Ng, Kenney and Chaffin, Mark and Lotta, Luca A and Baras, Aris and Drake, Isabel and Orho-Melander, Marju and Melander, Olle and Kathiresan, Sekar and Khera, Amit V} } @article {97, title = {The GTEx Consortium atlas of genetic regulatory effects across human tissues.}, journal = {Science}, volume = {369}, year = {2020}, month = {2020 09 11}, pages = {1318-1330}, abstract = {

The Genotype-Tissue Expression (GTEx) project was established to characterize genetic effects on the transcriptome across human tissues and to link these regulatory mechanisms to trait and disease associations. Here, we present analyses of the version 8 data, examining 15,201 RNA-sequencing samples from 49 tissues of 838 postmortem donors. We comprehensively characterize genetic associations for gene expression and splicing in cis and trans, showing that regulatory associations are found for almost all genes, and describe the underlying molecular mechanisms and their contribution to allelic heterogeneity and pleiotropy of complex traits. Leveraging the large diversity of tissues, we provide insights into the tissue specificity of genetic effects and show that cell type composition is a key factor in understanding gene regulatory mechanisms in human tissues.

}, keywords = {Datasets as Topic, Disease, Female, Gene Expression Regulation, Genome-Wide Association Study, Humans, Male, Organ Specificity, Quantitative Trait Loci, Sequence Analysis, RNA}, issn = {1095-9203}, doi = {10.1126/science.aaz1776} } @article {102, title = {The impact of sex on gene expression across human tissues.}, journal = {Science}, volume = {369}, year = {2020}, month = {2020 09 11}, abstract = {

Many complex human phenotypes exhibit sex-differentiated characteristics. However, the molecular mechanisms underlying these differences remain largely unknown. We generated a catalog of sex differences in gene expression and in the genetic regulation of gene expression across 44 human tissue sources surveyed by the Genotype-Tissue Expression project (GTEx, v8 release). We demonstrate that sex influences gene expression levels and cellular composition of tissue samples across the human body. A total of 37\% of all genes exhibit sex-biased expression in at least one tissue. We identify cis expression quantitative trait loci (eQTLs) with sex-differentiated effects and characterize their cellular origin. By integrating sex-biased eQTLs with genome-wide association study data, we identify 58 gene-trait associations that are driven by genetic regulation of gene expression in a single sex. These findings provide an extensive characterization of sex differences in the human transcriptome and its genetic regulation.

}, keywords = {Chromosomes, Human, X, Disease, Epigenesis, Genetic, Female, Gene Expression, Gene Expression Regulation, Genetic Variation, Genome-Wide Association Study, Humans, Male, Organ Specificity, Promoter Regions, Genetic, Quantitative Trait Loci, Sex Characteristics, Sex Factors}, issn = {1095-9203}, doi = {10.1126/science.aba3066}, author = {Oliva, Meritxell and Mu{\~n}oz-Aguirre, Manuel and Kim-Hellmuth, Sarah and Wucher, Valentin and Gewirtz, Ariel D H and Cotter, Daniel J and Parsana, Princy and Kasela, Silva and Balliu, Brunilda and Vi{\~n}uela, Ana and Castel, Stephane E and Mohammadi, Pejman and Aguet, Fran{\c c}ois and Zou, Yuxin and Khramtsova, Ekaterina A and Skol, Andrew D and Garrido-Mart{\'\i}n, Diego and Reverter, Ferran and Brown, Andrew and Evans, Patrick and Gamazon, Eric R and Payne, Anthony and Bonazzola, Rodrigo and Barbeira, Alvaro N and Hamel, Andrew R and Martinez-Perez, Angel and Soria, Jos{\'e} Manuel and Pierce, Brandon L and Stephens, Matthew and Eskin, Eleazar and Dermitzakis, Emmanouil T and Segr{\`e}, Ayellet V and Im, Hae Kyung and Engelhardt, Barbara E and Ardlie, Kristin G and Montgomery, Stephen B and Battle, Alexis J and Lappalainen, Tuuli and Guigo, Roderic and Stranger, Barbara E} } @article {120, title = {Inherited causes of clonal haematopoiesis in 97,691 whole genomes.}, journal = {Nature}, volume = {586}, year = {2020}, month = {2020 10}, pages = {763-768}, abstract = {

Age is the dominant risk factor for most chronic human diseases, but the mechanisms through which ageing confers this risk are largely unknown. The age-related acquisition of somatic mutations that lead to clonal expansion in regenerating haematopoietic stem cell populations has recently been associated with both haematological cancer and coronary heart disease-this phenomenon is~termed clonal haematopoiesis of indeterminate potential (CHIP). Simultaneous analyses of germline and somatic whole-genome sequences provide the opportunity to identify root causes of CHIP. Here we analyse high-coverage whole-genome sequences from 97,691 participants of diverse ancestries in the National Heart, Lung, and Blood Institute Trans-omics for Precision Medicine (TOPMed) programme, and identify 4,229 individuals with CHIP. We identify associations with blood cell, lipid and inflammatory traits that are specific to different CHIP~driver genes. Association of a genome-wide set of germline genetic variants enabled the identification of three genetic loci associated with CHIP status, including one locus at TET2 that was specific to individuals of African ancestry. In silico-informed in vitro evaluation of the TET2 germline locus enabled the identification of a causal variant that disrupts a TET2 distal enhancer, resulting in increased self-renewal of haematopoietic stem cells. Overall, we observe that germline genetic variation shapes haematopoietic stem cell function, leading to CHIP through mechanisms that are specific to clonal haematopoiesis as well as shared mechanisms that lead to somatic mutations across tissues.

}, keywords = {Adult, Africa, African Continental Ancestry Group, Aged, Aged, 80 and over, alpha Karyopherins, Cell Self Renewal, Clonal Hematopoiesis, DNA-Binding Proteins, Female, Genetic Predisposition to Disease, Genome, Human, Germ-Line Mutation, Hematopoietic Stem Cells, Humans, Intracellular Signaling Peptides and Proteins, Male, Middle Aged, National Heart, Lung, and Blood Institute (U.S.), Phenotype, Precision Medicine, Proto-Oncogene Proteins, Tripartite Motif Proteins, United States, Whole Genome Sequencing}, issn = {1476-4687}, doi = {10.1038/s41586-020-2819-2}, author = {Bick, Alexander G and Weinstock, Joshua S and Nandakumar, Satish K and Fulco, Charles P and Bao, Erik L and Zekavat, Seyedeh M and Szeto, Mindy D and Liao, Xiaotian and Leventhal, Matthew J and Nasser, Joseph and Chang, Kyle and Laurie, Cecelia and Burugula, Bala Bharathi and Gibson, Christopher J and Lin, Amy E and Taub, Margaret A and Aguet, Fran{\c c}ois and Ardlie, Kristin and Mitchell, Braxton D and Barnes, Kathleen C and Moscati, Arden and Fornage, Myriam and Redline, Susan and Psaty, Bruce M and Silverman, Edwin K and Weiss, Scott T and Palmer, Nicholette D and Vasan, Ramachandran S and Burchard, Esteban G and Kardia, Sharon L R and He, Jiang and Kaplan, Robert C and Smith, Nicholas L and Arnett, Donna K and Schwartz, David A and Correa, Adolfo and de Andrade, Mariza and Guo, Xiuqing and Konkle, Barbara A and Custer, Brian and Peralta, Juan M and Gui, Hongsheng and Meyers, Deborah A and McGarvey, Stephen T and Chen, Ida Yii-Der and Shoemaker, M Benjamin and Peyser, Patricia A and Broome, Jai G and Gogarten, Stephanie M and Wang, Fei Fei and Wong, Quenna and Montasser, May E and Daya, Michelle and Kenny, Eimear E and North, Kari E and Launer, Lenore J and Cade, Brian E and Bis, Joshua C and Cho, Michael H and Lasky-Su, Jessica and Bowden, Donald W and Cupples, L Adrienne and Mak, Angel C Y and Becker, Lewis C and Smith, Jennifer A and Kelly, Tanika N and Aslibekyan, Stella and Heckbert, Susan R and Tiwari, Hemant K and Yang, Ivana V and Heit, John A and Lubitz, Steven A and Johnsen, Jill M and Curran, Joanne E and Wenzel, Sally E and Weeks, Daniel E and Rao, Dabeeru C and Darbar, Dawood and Moon, Jee-Young and Tracy, Russell P and Buth, Erin J and Rafaels, Nicholas and Loos, Ruth J F and Durda, Peter and Liu, Yongmei and Hou, Lifang and Lee, Jiwon and Kachroo, Priyadarshini and Freedman, Barry I and Levy, Daniel and Bielak, Lawrence F and Hixson, James E and Floyd, James S and Whitsel, Eric A and Ellinor, Patrick T and Irvin, Marguerite R and Fingerlin, Tasha E and Raffield, Laura M and Armasu, Sebastian M and Wheeler, Marsha M and Sabino, Ester C and Blangero, John and Williams, L Keoki and Levy, Bruce D and Sheu, Wayne Huey-Herng and Roden, Dan M and Boerwinkle, Eric and Manson, JoAnn E and Mathias, Rasika A and Desai, Pinkal and Taylor, Kent D and Johnson, Andrew D and Auer, Paul L and Kooperberg, Charles and Laurie, Cathy C and Blackwell, Thomas W and Smith, Albert V and Zhao, Hongyu and Lange, Ethan and Lange, Leslie and Rich, Stephen S and Rotter, Jerome I and Wilson, James G and Scheet, Paul and Kitzman, Jacob O and Lander, Eric S and Engreitz, Jesse M and Ebert, Benjamin L and Reiner, Alexander P and Jaiswal, Siddhartha and Abecasis, Gon{\c c}alo and Sankaran, Vijay G and Kathiresan, Sekar and Natarajan, Pradeep} } @article {93, title = {Large-Scale Exome Sequencing Study Implicates Both Developmental and Functional Changes in the Neurobiology of Autism.}, journal = {Cell}, volume = {180}, year = {2020}, month = {2020 02 06}, pages = {568-584.e23}, abstract = {

We present the largest exome sequencing study of autism spectrum disorder (ASD) to date (n~= 35,584 total samples, 11,986 with ASD). Using an enhanced analytical framework to integrate de novo and case-control rare variation, we identify 102 risk genes at a false discovery rate of 0.1 or less. Of these genes, 49 show higher frequencies of disruptive de novo variants in individuals ascertained to have severe neurodevelopmental delay, whereas 53 show higher frequencies in individuals ascertained to have ASD; comparing ASD cases with mutations in these groups reveals phenotypic differences. Expressed early in brain development, most risk genes have roles in regulation of gene expression or neuronal communication (i.e., mutations effect neurodevelopmental and neurophysiological changes), and 13 fall within loci recurrently hit by copy number variants. In cells from the human cortex, expression of risk genes is enriched in excitatory and inhibitory neuronal lineages, consistent with multiple paths to an excitatory-inhibitory imbalance underlying ASD.

}, keywords = {Autistic Disorder, Case-Control Studies, Cell Lineage, Cerebral Cortex, Cohort Studies, Exome, Female, Gene Expression Regulation, Developmental, Gene Frequency, Genetic Predisposition to Disease, Humans, Male, Mutation, Missense, Neurobiology, Neurons, Phenotype, Sex Factors, Single-Cell Analysis, Whole Exome Sequencing}, issn = {1097-4172}, doi = {10.1016/j.cell.2019.12.036}, author = {Satterstrom, F Kyle and Kosmicki, Jack A and Wang, Jiebiao and Breen, Michael S and De Rubeis, Silvia and An, Joon-Yong and Peng, Minshi and Collins, Ryan and Grove, Jakob and Klei, Lambertus and Stevens, Christine and Reichert, Jennifer and Mulhern, Maureen S and Artomov, Mykyta and Gerges, Sherif and Sheppard, Brooke and Xu, Xinyi and Bhaduri, Aparna and Norman, Utku and Brand, Harrison and Schwartz, Grace and Nguyen, Rachel and Guerrero, Elizabeth E and Dias, Caroline and Betancur, Catalina and Cook, Edwin H and Gallagher, Louise and Gill, Michael and Sutcliffe, James S and Thurm, Audrey and Zwick, Michael E and B{\o}rglum, Anders D and State, Matthew W and Cicek, A Ercument and Talkowski, Michael E and Cutler, David J and Devlin, Bernie and Sanders, Stephan J and Roeder, Kathryn and Daly, Mark J and Buxbaum, Joseph D} } @article {122, title = {Mapping and characterization of structural variation in 17,795 human genomes.}, journal = {Nature}, volume = {583}, year = {2020}, month = {2020 07}, pages = {83-89}, abstract = {

A key goal of whole-genome sequencing for studies of human genetics is to interrogate all forms of variation, including single-nucleotide variants, small insertion or deletion (indel) variants and structural variants. However, tools and resources for the study of structural variants have lagged behind those for smaller variants. Here we used a scalable pipeline to map and characterize structural variants in 17,795 deeply sequenced human genomes. We publicly release site-frequency data to create the largest, to our knowledge, whole-genome-sequencing-based structural variant resource so far. On average, individuals carry 2.9 rare structural variants that alter coding regions; these variants affect the dosage or structure of 4.2 genes and account for 4.0-11.2\% of rare high-impact coding alleles. Using a computational model, we estimate that structural variants account for 17.2\% of rare alleles genome-wide, with predicted deleterious effects that are equivalent to loss-of-function coding alleles; approximately 90\% of such structural variants are noncoding deletions (mean 19.1 per genome). We report 158,991 ultra-rare structural variants and show that 2\% of individuals carry ultra-rare megabase-scale structural variants, nearly half of which are balanced or complex rearrangements. Finally, we infer the dosage sensitivity of genes and noncoding elements, and reveal trends that relate to element class and conservation. This work will help to guide the analysis and interpretation of structural variants in the era of whole-genome sequencing.

}, keywords = {Alleles, Case-Control Studies, Continental Population Groups, Epigenesis, Genetic, Female, Gene Dosage, Genetic Variation, Genetics, Population, Genome, Human, High-Throughput Nucleotide Sequencing, Humans, Male, Molecular Sequence Annotation, Quantitative Trait Loci, Software, Whole Genome Sequencing}, issn = {1476-4687}, doi = {10.1038/s41586-020-2371-0}, author = {Abel, Haley J and Larson, David E and Regier, Allison A and Chiang, Colby and Das, Indraniel and Kanchi, Krishna L and Layer, Ryan M and Neale, Benjamin M and Salerno, William J and Reeves, Catherine and Buyske, Steven and Matise, Tara C and Muzny, Donna M and Zody, Michael C and Lander, Eric S and Dutcher, Susan K and Stitziel, Nathan O and Hall, Ira M} } @article {131, title = {A missense variant in Mitochondrial Amidoxime Reducing Component 1 gene and protection against liver disease.}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 04}, pages = {e1008629}, abstract = {

Analyzing 12,361 all-cause cirrhosis cases and 790,095 controls from eight cohorts, we identify a common missense variant in the Mitochondrial Amidoxime Reducing Component 1 gene (MARC1 p.A165T) that associates with protection from all-cause cirrhosis (OR 0.91, p = 2.3*10-11). This same variant also associates with lower levels of hepatic fat on computed tomographic imaging and lower odds of physician-diagnosed fatty liver as well as lower blood levels of alanine transaminase (-0.025 SD, 3.7*10-43), alkaline phosphatase (-0.025 SD, 1.2*10-37), total cholesterol (-0.030 SD, p = 1.9*10-36) and LDL cholesterol (-0.027 SD, p = 5.1*10-30) levels. We identified a series of additional MARC1 alleles (low-frequency missense p.M187K and rare protein-truncating p.R200Ter) that also associated with lower cholesterol levels, liver enzyme levels and reduced risk of cirrhosis (0 cirrhosis cases for 238 R200Ter carriers versus 17,046 cases of cirrhosis among 759,027 non-carriers, p = 0.04) suggesting that deficiency of the MARC1 enzyme may lower blood cholesterol levels and protect against cirrhosis.

}, keywords = {Alleles, Cholesterol, LDL, Coronary Artery Disease, Datasets as Topic, Fatty Liver, Female, Genetic Predisposition to Disease, Homozygote, Humans, Liver, Liver Cirrhosis, Liver Cirrhosis, Alcoholic, Loss of Function Mutation, Male, Middle Aged, Mitochondrial Proteins, Mutation, Missense, Oxidoreductases}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1008629}, author = {Emdin, Connor A and Haas, Mary E and Khera, Amit V and Aragam, Krishna and Chaffin, Mark and Klarin, Derek and Hindy, George and Jiang, Lan and Wei, Wei-Qi and Feng, Qiping and Karjalainen, Juha and Havulinna, Aki and Kiiskinen, Tuomo and Bick, Alexander and Ardissino, Diego and Wilson, James G and Schunkert, Heribert and McPherson, Ruth and Watkins, Hugh and Elosua, Roberto and Bown, Matthew J and Samani, Nilesh J and Baber, Usman and Erdmann, Jeanette and Gupta, Namrata and Danesh, John and Saleheen, Danish and Chang, Kyong-Mi and Vujkovic, Marijana and Voight, Ben and Damrauer, Scott and Lynch, Julie and Kaplan, David and Serper, Marina and Tsao, Philip and Mercader, Josep and Hanis, Craig and Daly, Mark and Denny, Joshua and Gabriel, Stacey and Kathiresan, Sekar} } @article {108, title = {Next Generation Sequencing of 134 Children with Autism Spectrum Disorder and Regression.}, journal = {Genes (Basel)}, volume = {11}, year = {2020}, month = {2020 07 25}, abstract = {

Approximately 30\% of individuals with autism spectrum disorder (ASD) experience developmental regression, the etiology of which remains largely unknown. We performed a complete literature search and identified 47 genes that had been implicated in such cases. We sequenced these genes in a preselected cohort of 134 individuals with regressive autism. In total, 16 variants in 12 genes with evidence supportive of pathogenicity were identified. They were classified as variants of uncertain significance based on ACMG standards and guidelines. Among these were recurring variants in and , variants in genes that were linked to syndromic forms of ASD (, , , , , and ), and variants in the form of oligogenic heterozygosity (, , and ).

}, keywords = {Autism Spectrum Disorder, Child, Child, Preschool, Cohort Studies, Disease Progression, Female, Gene Expression Regulation, Genetic Markers, Genetic Predisposition to Disease, High-Throughput Nucleotide Sequencing, Humans, Infant, Male, Mutation}, issn = {2073-4425}, doi = {10.3390/genes11080853}, author = {Yin, Jiani and Chun, Chun-An and Zavadenko, Nikolay N and Pechatnikova, Natalia L and Naumova, Oxana Yu and Doddapaneni, Harsha V and Hu, Jianhong and Muzny, Donna M and Schaaf, Christian P and Grigorenko, Elena L} } @article {141, title = {Non-parametric Polygenic Risk Prediction via Partitioned GWAS Summary Statistics.}, journal = {Am J Hum Genet}, volume = {107}, year = {2020}, month = {2020 07 02}, pages = {46-59}, abstract = {

In complex trait genetics, the ability to predict phenotype from genotype is the ultimate measure of our understanding of genetic architecture underlying the heritability of a trait. A complete understanding of the genetic basis of a trait should allow for predictive methods with accuracies approaching the trait{\textquoteright}s heritability. The highly polygenic nature of quantitative traits and most common phenotypes has motivated the development of statistical strategies focused on combining myriad individually non-significant genetic effects. Now that predictive accuracies are improving, there is a growing interest in the practical utility of such methods for predicting risk of common diseases responsive to early therapeutic intervention. However, existing methods require individual-level genotypes or depend on accurately specifying the genetic architecture underlying each disease to be predicted. Here, we propose a polygenic risk prediction method that does not require explicitly modeling any underlying genetic architecture. We start with summary statistics in the form of SNP effect sizes from a large GWAS cohort. We then remove the correlation structure across summary statistics arising due to linkage disequilibrium and apply a piecewise linear interpolation on conditional mean effects. In both simulated and real datasets, this new non-parametric shrinkage (NPS) method can reliably allow for linkage disequilibrium in summary statistics of 5 million dense genome-wide markers and consistently improves prediction accuracy. We show that NPS improves the identification of groups at high risk for breast cancer, type 2 diabetes, inflammatory bowel disease, and coronary heart disease, all of which have available early intervention or prevention treatments.

}, keywords = {Aged, Cohort Studies, Diabetes Mellitus, Type 2, Female, Genome-Wide Association Study, Genotype, Humans, Linkage Disequilibrium, Male, Middle Aged, Models, Genetic, Multifactorial Inheritance, Phenotype, Polymorphism, Single Nucleotide, Quantitative Trait Loci}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2020.05.004}, author = {Chun, Sung and Imakaev, Maxim and Hui, Daniel and Patsopoulos, Nikolaos A and Neale, Benjamin M and Kathiresan, Sekar and Stitziel, Nathan O and Sunyaev, Shamil R} } @article {105, title = {Polygenic background modifies penetrance of monogenic variants for tier 1 genomic conditions.}, journal = {Nat Commun}, volume = {11}, year = {2020}, month = {2020 08 20}, pages = {3635}, abstract = {

Genetic variation can predispose to disease both through (i) monogenic risk variants that disrupt a physiologic pathway with large effect on disease and (ii) polygenic risk that involves many variants of small effect in different pathways. Few studies have explored the interplay between monogenic and polygenic risk. Here, we study 80,928 individuals to examine whether polygenic background can modify penetrance of disease in tier 1 genomic conditions - familial hypercholesterolemia, hereditary breast and ovarian cancer, and Lynch syndrome. Among carriers of a monogenic risk variant, we estimate substantial gradients in disease risk based on polygenic background - the probability of disease by age 75 years ranged from 17\% to 78\% for coronary artery disease, 13\% to 76\% for breast cancer, and 11\% to 80\% for colon cancer. We propose that accounting for polygenic background is likely to increase accuracy of risk estimation for individuals who inherit a monogenic risk variant.

}, keywords = {Aged, Breast Neoplasms, Case-Control Studies, Colorectal Neoplasms, Coronary Artery Disease, Female, Genetic Predisposition to Disease, Genome, Human, Humans, Male, Middle Aged, Multifactorial Inheritance, Odds Ratio, Penetrance, Risk Factors}, issn = {2041-1723}, doi = {10.1038/s41467-020-17374-3}, author = {Fahed, Akl C and Wang, Minxian and Homburger, Julian R and Patel, Aniruddh P and Bick, Alexander G and Neben, Cynthia L and Lai, Carmen and Brockman, Deanna and Philippakis, Anthony and Ellinor, Patrick T and Cassa, Christopher A and Lebo, Matthew and Ng, Kenney and Lander, Eric S and Zhou, Alicia Y and Kathiresan, Sekar and Khera, Amit V} } @article {109, title = {RNA Identification of PRIME Cells Predicting Rheumatoid Arthritis Flares.}, journal = {N Engl J Med}, volume = {383}, year = {2020}, month = {2020 07 16}, pages = {218-228}, abstract = {

BACKGROUND: Rheumatoid arthritis, like many inflammatory diseases, is characterized by episodes of quiescence and exacerbation (flares). The molecular events leading to flares are unknown.

METHODS: We established a clinical and technical protocol for repeated home collection of blood in patients with rheumatoid arthritis to allow for longitudinal RNA sequencing (RNA-seq). Specimens were obtained from 364 time points during eight flares over a period of 4 years in our index patient, as well as from 235 time points during flares in three additional patients. We identified transcripts that were differentially expressed before flares and compared these with data from synovial single-cell RNA-seq. Flow cytometry and sorted-blood-cell RNA-seq in additional patients were used to validate the findings.

RESULTS: Consistent changes were observed in blood transcriptional profiles 1 to 2 weeks before a rheumatoid arthritis flare. B-cell activation was followed by expansion of circulating CD45-CD31-PDPN+ preinflammatory mesenchymal, or PRIME, cells in the blood from patients with rheumatoid arthritis; these cells shared features of inflammatory synovial fibroblasts. Levels of circulating PRIME cells decreased during flares in all 4 patients, and flow cytometry and sorted-cell RNA-seq confirmed the presence of PRIME cells in 19 additional patients with rheumatoid arthritis.

CONCLUSIONS: Longitudinal genomic analysis of rheumatoid arthritis flares revealed PRIME cells in the blood during the period before a flare and suggested a model in which these cells become activated by B cells in the weeks before a flare and subsequently migrate out of the blood into the synovium. (Funded by the National Institutes of Health and others.).

}, keywords = {Adult, Arthritis, Rheumatoid, B-Lymphocytes, Female, Fibroblasts, Flow Cytometry, Gene Expression, Humans, Male, Mesenchymal Stem Cells, Middle Aged, Patient Acuity, Sequence Analysis, RNA, Surveys and Questionnaires, Symptom Flare Up, Synovial Fluid}, issn = {1533-4406}, doi = {10.1056/NEJMoa2004114}, author = {Orange, Dana E and Yao, Vicky and Sawicka, Kirsty and Fak, John and Frank, Mayu O and Parveen, Salina and Blach{\`e}re, Nathalie E and Hale, Caryn and Zhang, Fan and Raychaudhuri, Soumya and Troyanskaya, Olga G and Darnell, Robert B} } @article {113, title = {Serum sphingolipids and incident diabetes in a US population with high diabetes burden: the Hispanic Community Health Study/Study of Latinos (HCHS/SOL).}, journal = {Am J Clin Nutr}, volume = {112}, year = {2020}, month = {2020 07 01}, pages = {57-65}, abstract = {

BACKGROUND: Genetic or pharmacological inhibition of de novo sphingolipid synthases prevented diabetes in animal studies.

OBJECTIVES: We sought to evaluate prospective associations of serum sphingolipids with incident diabetes in a population-based cohort.

METHODS: We included 2010 participants of the Hispanic Community Health Study/Study of Latinos (HCHS/SOL) aged 18-74 y who were free of diabetes and other major chronic diseases at baseline (2008-2011). Metabolomic profiling of fasting serum was performed using a global, untargeted approach. A total of 43 sphingolipids were quantified and, considering subclasses and chemical structures of individual species, 6 sphingolipid scores were constructed. Diabetes status was assessed using standard procedures including blood tests. Multivariable survey Poisson regressions were applied to estimate RR and 95\% CI of incident diabetes associated with individual sphingolipids or sphingolipid scores.

RESULTS: There were 224 incident cases of diabetes identified during, on average, 6 y of follow-up. After adjustment for socioeconomic and lifestyle factors, a ceramide score (RR Q4 versus Q1~=~2.40; 95\% CI: 1.24, 4.65; P-trend~=~0.003) and a score of sphingomyelins with fully saturated sphingoid-fatty acid pairs (RR Q4 versus Q1~=~3.15; 95\% CI: 1.75, 5.67; P-trend~<0.001) both were positively associated with risk of diabetes, whereas scores of glycosylceramides, lactosylceramides, or other unsaturated sphingomyelins (even if having an SFA base) were not associated with risk of diabetes. After additional adjustment for numerous traditional risk factors (especially triglycerides), both associations were attenuated and only the saturated-sphingomyelin score remained associated with risk of diabetes (RR Q4 versus Q1~=~1.98; 95\% CI: 1.09, 3.59; P-trend~=~0.031).

CONCLUSIONS: Our findings suggest that a cluster of saturated sphingomyelins may be associated with elevated risk of diabetes beyond traditional risk factors, which needs to be verified in other population studies. This study was registered at clinicaltrials.gov as NCT02060344.

}, keywords = {Adolescent, Adult, Aged, Diabetes Mellitus, Female, Hispanic Americans, Humans, Male, Middle Aged, Prospective Studies, Risk Factors, Sphingolipids, United States, Young Adult}, issn = {1938-3207}, doi = {10.1093/ajcn/nqaa114}, author = {Chen, Guo-Chong and Chai, Jin Choul and Yu, Bing and Michelotti, Gregory A and Grove, Megan L and Fretts, Amanda M and Daviglus, Martha L and Garcia-Bedoya, Olga L and Thyagarajan, Bharat and Schneiderman, Neil and Cai, Jianwen and Kaplan, Robert C and Boerwinkle, Eric and Qi, Qibin} } @article {114, title = {A structural variation reference for medical and population genetics.}, journal = {Nature}, volume = {581}, year = {2020}, month = {2020 05}, pages = {444-451}, abstract = {

Structural variants (SVs) rearrange large segments of DNA and can have profound consequences in evolution and human disease. As national biobanks, disease-association studies, and clinical genetic testing have grown increasingly reliant on genome sequencing, population references such as the Genome Aggregation Database (gnomAD) have become integral in the interpretation of single-nucleotide variants (SNVs). However, there are no reference maps of SVs from high-coverage genome sequencing comparable to those for SNVs. Here we present a reference of sequence-resolved SVs constructed from 14,891 genomes across diverse global populations (54\% non-European) in gnomAD. We discovered a rich and complex landscape of 433,371 SVs, from which we estimate that SVs are responsible for 25-29\% of all rare protein-truncating events per genome. We found strong correlations between natural selection against damaging SNVs and rare SVs that disrupt or duplicate protein-coding sequence, which suggests that genes that are highly intolerant to loss-of-function are also sensitive to increased dosage. We also uncovered modest selection against noncoding SVs in cis-regulatory elements, although selection against protein-truncating SVs was stronger than all noncoding effects. Finally, we identified very large (over one megabase), rare SVs in 3.9\% of samples, and estimate that 0.13\% of individuals may carry an SV that meets the existing criteria for clinically important incidental findings. This SV resource is freely distributed via the gnomAD browser and will have broad utility in population genetics, disease-association studies, and diagnostic screening.

}, keywords = {Continental Population Groups, Disease, Female, Genetic Testing, Genetic Variation, Genetics, Medical, Genetics, Population, Genome, Human, Genotyping Techniques, Humans, Male, Middle Aged, Mutation, Polymorphism, Single Nucleotide, Reference Standards, Selection, Genetic, Whole Genome Sequencing}, issn = {1476-4687}, doi = {10.1038/s41586-020-2287-8}, author = {Collins, Ryan L and Brand, Harrison and Karczewski, Konrad J and Zhao, Xuefang and Alf{\"o}ldi, Jessica and Francioli, Laurent C and Khera, Amit V and Lowther, Chelsea and Gauthier, Laura D and Wang, Harold and Watts, Nicholas A and Solomonson, Matthew and O{\textquoteright}Donnell-Luria, Anne and Baumann, Alexander and Munshi, Ruchi and Walker, Mark and Whelan, Christopher W and Huang, Yongqing and Brookings, Ted and Sharpe, Ted and Stone, Matthew R and Valkanas, Elise and Fu, Jack and Tiao, Grace and Laricchia, Kristen M and Ruano-Rubio, Valentin and Stevens, Christine and Gupta, Namrata and Cusick, Caroline and Margolin, Lauren and Taylor, Kent D and Lin, Henry J and Rich, Stephen S and Post, Wendy S and Chen, Yii-Der Ida and Rotter, Jerome I and Nusbaum, Chad and Philippakis, Anthony and Lander, Eric and Gabriel, Stacey and Neale, Benjamin M and Kathiresan, Sekar and Daly, Mark J and Banks, Eric and MacArthur, Daniel G and Talkowski, Michael E} } @article {136, title = {Titin Truncating Variants in Adults Without Known Congestive Heart~Failure.}, journal = {J Am Coll Cardiol}, volume = {75}, year = {2020}, month = {2020 03 17}, pages = {1239-1241}, keywords = {Adult, Aged, Asymptomatic Diseases, Connectin, Female, Genetic Variation, Heart Failure, Humans, Male, Middle Aged}, issn = {1558-3597}, doi = {10.1016/j.jacc.2020.01.013}, author = {Pirruccello, James P and Bick, Alexander and Chaffin, Mark and Aragam, Krishna G and Choi, Seung Hoan and Lubitz, Steven A and Ho, Carolyn Y and Ng, Kenney and Philippakis, Anthony and Ellinor, Patrick T and Kathiresan, Sekar and Khera, Amit V} } @article {127, title = {Validation of a Genome-Wide Polygenic~Score for Coronary Artery~Disease in~South Asians.}, journal = {J Am Coll Cardiol}, volume = {76}, year = {2020}, month = {2020 08 11}, pages = {703-714}, abstract = {

BACKGROUND: Genome-wide polygenic scores (GPS) integrate information from many common DNA variants into a single number. Because rates of coronary artery disease (CAD) are substantially higher among South Asians, a GPS to identify high-risk individuals may be particularly useful in this population.

OBJECTIVES: This analysis used summary statistics from a prior genome-wide association study to derive a new GPS for South Asians.

METHODS: This GPS was validated in 7,244 South Asian UK Biobank participants and tested in 491 individuals from a case-control study in Bangladesh. Next, a static ancestry and GPS reference distribution was built using whole-genome sequencing from 1,522 Indian individuals, and a framework was tested for projecting individuals onto this static ancestry and GPS reference distribution using 1,800 CAD cases and 1,163 control subjects newly recruited in India.

RESULTS: The GPS, containing 6,630,150 common DNA variants, had an odds ratio (OR) per SD of 1.58 in South Asian UK Biobank participants and 1.60 in the Bangladeshi study (p~<~0.001 for each). Next, individuals of the Indian case-control study were projected onto static reference distributions, observing an OR/SD of 1.66 (p~<~0.001). Compared with the middle quintile, risk for CAD was most pronounced for those in the top 5\% of the GPS distribution-ORs of 4.16, 2.46, and 3.22 in the South Asian UK Biobank, Bangladeshi, and Indian studies, respectively (p~<~0.05 for each).

CONCLUSIONS: The new GPS has been developed and tested using 3 distinct South Asian studies, and provides a generalizable framework for ancestry-specific GPS assessment.

}, keywords = {Adult, Aged, Bangladesh, Case-Control Studies, Coronary Artery Disease, Female, Genome-Wide Association Study, Humans, India, Male, Middle Aged, Multifactorial Inheritance}, issn = {1558-3597}, doi = {10.1016/j.jacc.2020.06.024}, author = {Wang, Minxian and Menon, Ramesh and Mishra, Sanghamitra and Patel, Aniruddh P and Chaffin, Mark and Tanneeru, Deepak and Deshmukh, Manjari and Mathew, Oshin and Apte, Sanika and Devanboo, Christina S and Sundaram, Sumathi and Lakshmipathy, Praveena and Murugan, Sakthivel and Sharma, Krishna Kumar and Rajendran, Karthikeyan and Santhosh, Sam and Thachathodiyl, Rajesh and Ahamed, Hisham and Balegadde, Aniketh Vijay and Alexander, Thomas and Swaminathan, Krishnan and Gupta, Rajeev and Mullasari, Ajit S and Sigamani, Alben and Kanchi, Muralidhar and Peterson, Andrew S and Butterworth, Adam S and Danesh, John and Di Angelantonio, Emanuele and Naheed, Aliya and Inouye, Michael and Chowdhury, Rajiv and Vedam, Ramprasad L and Kathiresan, Sekar and Gupta, Ravi and Khera, Amit V} } @article {40, title = {Complex rearrangements and oncogene amplifications revealed by long-read DNA and RNA sequencing of a breast cancer cell line.}, journal = {Genome Res}, volume = {28}, year = {2018}, month = {2018 08}, pages = {1126-1135}, abstract = {

The SK-BR-3 cell line is one of the most important models for HER2+ breast cancers, which affect one in five breast cancer patients. SK-BR-3 is known to be highly rearranged, although much of the variation is in complex and repetitive regions that may be underreported. Addressing this, we sequenced SK-BR-3 using long-read single molecule sequencing from Pacific Biosciences and develop one of the most detailed maps of structural variations (SVs) in a cancer genome available, with nearly 20,000 variants present, most of which were missed by short-read sequencing. Surrounding the important oncogene (also known as ), we discover a complex sequence of nested duplications and translocations, suggesting a punctuated progression. Full-length transcriptome sequencing further revealed several novel gene fusions within the nested genomic variants. Combining long-read genome and transcriptome sequencing enables an in-depth analysis of how SVs disrupt the genome and sheds new light on the complex mechanisms involved in cancer genome evolution.

}, keywords = {Breast Neoplasms, Female, Gene Amplification, Gene Rearrangement, Genome, Human, Genomic Structural Variation, High-Throughput Nucleotide Sequencing, Humans, MCF-7 Cells, Oncogenes, Receptor, ErbB-2, Repetitive Sequences, Nucleic Acid, Transcriptome}, issn = {1549-5469}, doi = {10.1101/gr.231100.117}, author = {Nattestad, Maria and Goodwin, Sara and Ng, Karen and Baslan, Timour and Sedlazeck, Fritz J and Rescheneder, Philipp and Garvin, Tyler and Fang, Han and Gurtowski, James and Hutton, Elizabeth and Tseng, Elizabeth and Chin, Chen-Shan and Beck, Timothy and Sundaravadanam, Yogi and Kramer, Melissa and Antoniou, Eric and McPherson, John D and Hicks, James and McCombie, W Richard and Schatz, Michael C} } @article {31, title = {Genetic variants in microRNA genes and targets associated with cardiovascular disease risk factors in the African-American population.}, journal = {Hum Genet}, volume = {137}, year = {2018}, month = {2018 Jan}, pages = {85-94}, abstract = {

The purpose of this study is to identify microRNA (miRNA) related polymorphism, including single nucleotide variants (SNVs) in mature miRNA-encoding sequences or in miRNA-target sites, and their association with cardiovascular disease (CVD) risk factors in African-American population. To achieve our objective, we examined 1900 African-Americans from the Atherosclerosis Risk in Communities study using SNVs identified from whole-genome sequencing data. A total of 971 SNVs found in 726 different mature miRNA-encoding sequences and 16,057 SNVs found in the three prime untranslated region (3{\textquoteright}UTR) of 3647 protein-coding genes were identified and interrogated their associations with 17 CVD risk factors. Using single-variant-based approach, we found 5 SNVs in miRNA-encoding sequences to be associated with serum Lipoprotein(a) [Lp(a)], high-density lipoprotein (HDL) or triglycerides, and 2 SNVs in miRNA-target sites to be associated with Lp(a) and HDL, all with false discovery rates of 5\%. Using a gene-based approach, we identified 3 pairs of associations between gene NSD1 and platelet count, gene HSPA4L and cardiac troponin T, and gene AHSA2 and magnesium. We successfully validated the association between a variant specific to African-American population, NR_039880.1:n.18A>C, in mature hsa-miR-4727-5p encoding sequence and serum HDL level in an independent sample of 2135 African-Americans. Our study provided candidate miRNAs and their targets for further investigation of their potential contribution to ethnic disparities in CVD risk factors.

}, keywords = {3{\textquoteright} Untranslated Regions, Adult, African Americans, Cardiovascular Diseases, Female, Genetic Predisposition to Disease, Genotyping Techniques, Humans, Male, MicroRNAs, Middle Aged, Polymorphism, Single Nucleotide, Risk Factors, Whole Genome Sequencing}, issn = {1432-1203}, doi = {10.1007/s00439-017-1858-8}, author = {Li, Chang and Grove, Megan L and Yu, Bing and Jones, Barbara C and Morrison, Alanna and Boerwinkle, Eric and Liu, Xiaoming} } @article {12, title = {ANGPTL3 Deficiency and Protection Against Coronary Artery Disease.}, journal = {J Am Coll Cardiol}, volume = {69}, year = {2017}, month = {2017 Apr 25}, pages = {2054-2063}, abstract = {

BACKGROUND: Familial combined hypolipidemia, a Mendelian condition characterized by substantial reductions in all 3~major lipid fractions, is caused by mutations that inactivate the gene angiopoietin-like 3 (ANGPTL3). Whether ANGPTL3 deficiency reduces risk of coronary artery disease (CAD) is unknown.

OBJECTIVES: The study goal was to leverage 3 distinct lines of evidence-a family that included individuals with complete (compound heterozygote) ANGPTL3 deficiency, a population based-study of humans with partial (heterozygote) ANGPTL3 deficiency, and biomarker levels in patients with myocardial infarction (MI)-to test whether ANGPTL3 deficiency is associated with lower risk for CAD.

METHODS: We assessed coronary atherosclerotic burden in 3 individuals with complete ANGPTL3 deficiency and 3~wild-type first-degree relatives using computed tomography angiography. In the population, ANGPTL3 loss-of-function (LOF) mutations were ascertained in up to 21,980 people with CAD and 158,200 control subjects. LOF mutations were~defined as nonsense, frameshift, and splice-site variants, along with missense variants resulting in~<25\% of wild-type ANGPTL3 activity in a mouse model. In a biomarker study, circulating ANGPTL3 concentration was measured in 1,493 people who presented with MI and 3,232 control subjects.

RESULTS: The 3 individuals with complete ANGPTL3 deficiency showed no evidence of coronary atherosclerotic plaque. ANGPTL3 gene sequencing demonstrated that approximately 1 in 309 people was a heterozygous carrier for an LOF mutation. Compared with those without mutation, heterozygous carriers of ANGPTL3 LOF mutations demonstrated a 17\% reduction in circulating triglycerides and a 12\% reduction in low-density lipoprotein cholesterol. Carrier status was associated with a 34\% reduction in odds of CAD (odds ratio: 0.66; 95\% confidence interval: 0.44 to 0.98; p~= 0.04). Individuals in the lowest tertile of circulating ANGPTL3 concentrations, compared with the highest, had reduced odds of MI (adjusted odds ratio: 0.65; 95\% confidence interval: 0.55 to 0.77; p~< 0.001).

CONCLUSIONS: ANGPTL3 deficiency is associated with protection from CAD.

}, keywords = {Adult, Angiopoietin-Like Protein 3, Angiopoietin-like Proteins, Angiopoietins, Animals, Atherosclerosis, Case-Control Studies, Coronary Artery Disease, Female, Humans, Lipids, Male, Mice, Inbred C57BL, Mice, Knockout, Middle Aged, Mutation, Missense, Myocardial Infarction, Risk Factors}, issn = {1558-3597}, doi = {10.1016/j.jacc.2017.02.030}, author = {Stitziel, Nathan O and Khera, Amit V and Wang, Xiao and Bierhals, Andrew J and Vourakis, A Christina and Sperry, Alexandra E and Natarajan, Pradeep and Klarin, Derek and Emdin, Connor A and Zekavat, Seyedeh M and Nomura, Akihiro and Erdmann, Jeanette and Schunkert, Heribert and Samani, Nilesh J and Kraus, William E and Shah, Svati H and Yu, Bing and Boerwinkle, Eric and Rader, Daniel J and Gupta, Namrata and Frossard, Philippe M and Rasheed, Asif and Danesh, John and Lander, Eric S and Gabriel, Stacey and Saleheen, Danish and Musunuru, Kiran and Kathiresan, Sekar} } @article {24, title = {cTag-PAPERCLIP Reveals Alternative Polyadenylation Promotes Cell-Type Specific Protein Diversity and Shifts Araf Isoforms with Microglia Activation.}, journal = {Neuron}, volume = {95}, year = {2017}, month = {2017 Sep 13}, pages = {1334-1349.e5}, abstract = {

Alternative polyadenylation (APA) is increasingly recognized to regulate gene expression across different cell types, but obtaining APA maps from individual cell types typically requires prior purification, a stressful procedure that can itself alter cellular states. Here, we describe a new platform, cTag-PAPERCLIP,~that generates APA profiles from single cell populations in intact tissues; cTag-PAPERCLIP requires no tissue dissociation and preserves transcripts in native states. Applying cTag-PAPERCLIP to profile four major cell types in the mouse brain revealed common APA preferences between excitatory and inhibitory neurons distinct from astrocytes~and microglia, regulated in part by neuron-specific~RNA-binding proteins NOVA2 and PTBP2. We further~identified a role of APA in switching Araf protein isoforms during microglia activation, impacting production of downstream inflammatory cytokines. Our results demonstrate the broad applicability of cTag-PAPERCLIP and a previously undiscovered role of APA in contributing to protein diversity between different cell types and cellular states within the brain.

}, keywords = {Animals, Antigens, Neoplasm, Astrocytes, Brain, Cells, Cultured, Female, Humans, Male, Mice, Microglia, Nerve Tissue Proteins, Neuro-Oncological Ventral Antigen, Neurons, Organ Specificity, Polyadenylation, Polypyrimidine Tract-Binding Protein, Protein Isoforms, Protein Serine-Threonine Kinases, RNA-Binding Proteins}, issn = {1097-4199}, doi = {10.1016/j.neuron.2017.08.024}, author = {Hwang, Hun-Way and Saito, Yuhki and Park, Christopher Y and Blach{\`e}re, Nathalie E and Tajima, Yoko and Fak, John J and Zucker-Scharff, Ilana and Darnell, Robert B} } @article {14, title = {Disruption of the ATXN1-CIC complex causes a spectrum of neurobehavioral phenotypes in mice and humans.}, journal = {Nat Genet}, volume = {49}, year = {2017}, month = {2017 Apr}, pages = {527-536}, abstract = {

Gain-of-function mutations in some genes underlie neurodegenerative conditions, whereas loss-of-function mutations in the same genes have distinct phenotypes. This appears to be the case with the protein ataxin 1 (ATXN1), which forms a transcriptional repressor complex with capicua (CIC). Gain of function of the complex leads to neurodegeneration, but ATXN1-CIC is also essential for survival. We set out to understand the functions of the ATXN1-CIC complex in the developing forebrain and found that losing this complex results in hyperactivity, impaired learning and memory, and abnormal maturation and maintenance of upper-layer cortical neurons. We also found that CIC activity in the hypothalamus and medial amygdala modulates social interactions. Informed by these neurobehavioral features in mouse mutants, we identified five individuals with de novo heterozygous truncating mutations in CIC who share similar clinical features, including intellectual disability, attention deficit/hyperactivity disorder (ADHD), and autism spectrum disorder. Our study demonstrates that loss of ATXN1-CIC complexes causes a spectrum of neurobehavioral phenotypes.

}, keywords = {Animals, Ataxin-1, Autism Spectrum Disorder, Cerebellum, Female, Humans, Intellectual Disability, Interpersonal Relations, Male, Mice, Nerve Tissue Proteins, Neurodegenerative Diseases, Nuclear Proteins, Phenotype, Repressor Proteins}, issn = {1546-1718}, doi = {10.1038/ng.3808}, author = {Lu, Hsiang-Chih and Tan, Qiumin and Rousseaux, Maxime W C and Wang, Wei and Kim, Ji-Yoen and Richman, Ronald and Wan, Ying-Wooi and Yeh, Szu-Ying and Patel, Jay M and Liu, Xiuyun and Lin, Tao and Lee, Yoontae and Fryer, John D and Han, Jing and Chahrour, Maria and Finnell, Richard H and Lei, Yunping and Zurita-Jimenez, Maria E and Ahimaz, Priyanka and Anyane-Yeboa, Kwame and Van Maldergem, Lionel and Lehalle, Daphne and Jean-Marcais, Nolwenn and Mosca-Boidron, Anne-Laure and Thevenon, Julien and Cousin, Margot A and Bro, Della E and Lanpher, Brendan C and Klee, Eric W and Alexander, Nora and Bainbridge, Matthew N and Orr, Harry T and Sillitoe, Roy V and Ljungberg, M Cecilia and Liu, Zhandong and Schaaf, Christian P and Zoghbi, Huda Y} } @article {32, title = {Genetic effects on gene expression across human tissues.}, journal = {Nature}, volume = {550}, year = {2017}, month = {2017 10 11}, pages = {204-213}, abstract = {

Characterization of the molecular function of the human genome and its variation across individuals is essential for identifying the cellular mechanisms that underlie human genetic traits and diseases. The Genotype-Tissue Expression (GTEx) project aims to characterize variation in gene expression levels across individuals and diverse tissues of the human body, many of which are not easily accessible. Here we describe genetic effects on gene expression levels across 44 human tissues. We find that local genetic variation affects gene expression levels for the majority of genes, and we further identify inter-chromosomal genetic effects for 93 genes and 112 loci. On the basis of the identified genetic effects, we characterize patterns of tissue specificity, compare local and distal effects, and evaluate the functional properties of the genetic effects. We also demonstrate that multi-tissue, multi-individual data can be used to identify genes and pathways affected by human disease-associated variation, enabling a mechanistic interpretation of gene regulation and the genetic basis of disease.

}, keywords = {Alleles, Chromosomes, Human, Disease, Female, Gene Expression Profiling, Gene Expression Regulation, Genetic Variation, Genome, Human, Genotype, Humans, Male, Organ Specificity, Quantitative Trait Loci}, issn = {1476-4687}, doi = {10.1038/nature24277}, author = {Battle, Alexis and Brown, Christopher D and Engelhardt, Barbara E and Montgomery, Stephen B} } @article {23, title = {Genomic Patterns of De Novo Mutation in Simplex Autism.}, journal = {Cell}, volume = {171}, year = {2017}, month = {2017 Oct 19}, pages = {710-722.e12}, abstract = {

To further our understanding of the genetic etiology of autism, we generated and analyzed genome sequence data from 516 idiopathic autism families (2,064 individuals). This resource includes >59 million single-nucleotide variants (SNVs) and 9,212 private copy number variants (CNVs), of which 133,992 and 88 are de novo mutations (DNMs), respectively. We estimate a mutation rate of \~{}1.5~{\texttimes} 10 SNVs per site per generation with a significantly higher mutation rate in repetitive DNA. Comparing probands and unaffected siblings, we observe several DNM trends. Probands carry more gene-disruptive CNVs and SNVs, resulting in severe~missense mutations and mapping to predicted fetal brain promoters and embryonic stem cell enhancers. These differences become more pronounced for autism genes (p~= 1.8~{\texttimes} 10, OR~= 2.2). Patients are more likely to carry multiple coding and noncoding DNMs in different genes, which are enriched for expression in striatal neurons (p~= 3~{\texttimes} 10), suggesting a path forward for genetically characterizing more complex cases of autism.

}, keywords = {Animals, Autistic Disorder, DNA Copy Number Variations, DNA Mutational Analysis, Female, Genome-Wide Association Study, Humans, INDEL Mutation, Male, Mice, Polymorphism, Single Nucleotide}, issn = {1097-4172}, doi = {10.1016/j.cell.2017.08.047}, author = {Turner, Tychele N and Coe, Bradley P and Dickel, Diane E and Hoekzema, Kendra and Nelson, Bradley J and Zody, Michael C and Kronenberg, Zev N and Hormozdiari, Fereydoun and Raja, Archana and Pennacchio, Len A and Darnell, Robert B and Eichler, Evan E} } @article {21, title = {Polygenic Risk Score Identifies Subgroup With Higher Burden of Atherosclerosis and Greater Relative Benefit From Statin Therapy in the Primary Prevention Setting.}, journal = {Circulation}, volume = {135}, year = {2017}, month = {2017 May 30}, pages = {2091-2101}, abstract = {

BACKGROUND: Relative risk reduction with statin therapy has been consistent across nearly all subgroups studied to date. However, in analyses of 2 randomized controlled primary prevention trials (ASCOT [Anglo-Scandinavian Cardiac Outcomes Trial-Lipid-Lowering Arm] and JUPITER [Justification for the Use of Statins in Prevention: An Intervention Trial Evaluating Rosuvastatin]), statin therapy led to a greater relative risk reduction among a subgroup at high genetic risk. Here, we aimed to confirm this observation in a third primary prevention randomized controlled trial. In addition, we assessed whether those at high genetic risk had a greater burden of subclinical coronary atherosclerosis.

METHODS: We studied participants from a randomized controlled trial of primary prevention with statin therapy (WOSCOPS [West of Scotland Coronary Prevention Study]; n=4910) and 2 observational cohort studies (CARDIA [Coronary Artery Risk Development in Young Adults] and BioImage; n=1154 and 4392, respectively). For each participant, we calculated a polygenic risk score derived from up to 57 common DNA sequence variants previously associated with coronary heart disease. We compared the relative efficacy of statin therapy in those at high genetic risk (top quintile of polygenic risk score) versus all others (WOSCOPS), as well as the association between the polygenic risk score and coronary artery calcification (CARDIA) and carotid artery plaque burden (BioImage).

RESULTS: Among WOSCOPS trial participants at high genetic risk, statin therapy was associated with a relative risk reduction of 44\% (95\% confidence interval [CI], 22-60; <0.001), whereas in all others, the relative risk reduction was 24\% (95\% CI, 8-37; =0.004) despite similar low-density lipoprotein cholesterol lowering. In a study-level meta-analysis across the WOSCOPS, ASCOT, and JUPITER primary prevention, relative risk reduction in those at high genetic risk was 46\% versus 26\% in all others ( for heterogeneity=0.05). Across all 3 studies, the absolute risk reduction with statin therapy was 3.6\% (95\% CI, 2.0-5.1) among those in the high genetic risk group and 1.3\% (95\% CI, 0.6-1.9) in all others. Each 1-SD increase in the polygenic risk score was associated with 1.32-fold (95\% CI, 1.04-1.68) greater likelihood of having coronary artery calcification and 9.7\% higher (95\% CI, 2.2-17.8) burden of carotid plaque.

CONCLUSIONS: Those at high genetic risk have a greater burden of subclinical atherosclerosis and derive greater relative and absolute benefit from statin therapy to prevent a first coronary heart disease event.

CLINICAL TRIAL REGISTRATION: URL: http://www.clinicaltrials.gov. Unique identifiers: NCT00738725 (BioImage) and NCT00005130 (CARDIA). WOSCOPS was carried out and completed before the requirement for clinical trial registration.

}, keywords = {Adolescent, Adult, Aged, Aged, 80 and over, Atherosclerosis, Cohort Studies, Cost of Illness, Female, Humans, Hydroxymethylglutaryl-CoA Reductase Inhibitors, Male, Middle Aged, Multifactorial Inheritance, Primary Prevention, Risk Factors, Young Adult}, issn = {1524-4539}, doi = {10.1161/CIRCULATIONAHA.116.024436}, author = {Natarajan, Pradeep and Young, Robin and Stitziel, Nathan O and Padmanabhan, Sandosh and Baber, Usman and Mehran, Roxana and Sartori, Samantha and Fuster, Valentin and Reilly, Dermot F and Butterworth, Adam and Rader, Daniel J and Ford, Ian and Sattar, Naveed and Kathiresan, Sekar} } @article {8, title = {Genetic Risk, Adherence to a Healthy Lifestyle, and Coronary Disease.}, journal = {N Engl J Med}, volume = {375}, year = {2016}, month = {2016 Dec 15}, pages = {2349-2358}, abstract = {

BACKGROUND: Both genetic and lifestyle factors contribute to individual-level risk of coronary artery disease. The extent to which increased genetic risk can be offset by a healthy lifestyle is unknown.

METHODS: Using a polygenic score of DNA sequence polymorphisms, we quantified genetic risk for coronary artery disease in three prospective cohorts - 7814 participants in the Atherosclerosis Risk in Communities (ARIC) study, 21,222 in the Women{\textquoteright}s Genome Health Study (WGHS), and 22,389 in the Malm{\"o} Diet and Cancer Study (MDCS) - and in 4260 participants in the cross-sectional BioImage Study for whom genotype and covariate data were available. We also determined adherence to a healthy lifestyle among the participants using a scoring system consisting of four factors: no current smoking, no obesity, regular physical activity, and a healthy diet.

RESULTS: The relative risk of incident coronary events was 91\% higher among participants at high genetic risk (top quintile of polygenic scores) than among those at low genetic risk (bottom quintile of polygenic scores) (hazard ratio, 1.91; 95\% confidence interval [CI], 1.75 to 2.09). A favorable lifestyle (defined as at least three of the four healthy lifestyle factors) was associated with a substantially lower risk of coronary events than an unfavorable lifestyle (defined as no or only one healthy lifestyle factor), regardless of the genetic risk category. Among participants at high genetic risk, a favorable lifestyle was associated with a 46\% lower relative risk of coronary events than an unfavorable lifestyle (hazard ratio, 0.54; 95\% CI, 0.47 to 0.63). This finding corresponded to a reduction in the standardized 10-year incidence of coronary events from 10.7\% for an unfavorable lifestyle to 5.1\% for a favorable lifestyle in ARIC, from 4.6\% to 2.0\% in WGHS, and from 8.2\% to 5.3\% in MDCS. In the BioImage Study, a favorable lifestyle was associated with significantly less coronary-artery calcification within each genetic risk category.

CONCLUSIONS: Across four studies involving 55,685 participants, genetic and lifestyle factors were independently associated with susceptibility to coronary artery disease. Among participants at high genetic risk, a favorable lifestyle was associated with a nearly 50\% lower relative risk of coronary artery disease than was an unfavorable lifestyle. (Funded by the National Institutes of Health and others.).

}, keywords = {Aged, Cohort Studies, Coronary Disease, Cross-Sectional Studies, Female, Genetic Predisposition to Disease, Healthy Lifestyle, Humans, Incidence, Male, Middle Aged, Multifactorial Inheritance, Patient Compliance, Polymorphism, Genetic, Risk}, issn = {1533-4406}, doi = {10.1056/NEJMoa1605086}, author = {Khera, Amit V and Emdin, Connor A and Drake, Isabel and Natarajan, Pradeep and Bick, Alexander G and Cook, Nancy R and Chasman, Daniel I and Baber, Usman and Mehran, Roxana and Rader, Daniel J and Fuster, Valentin and Boerwinkle, Eric and Melander, Olle and Orho-Melander, Marju and Ridker, Paul M and Kathiresan, Sekar} } @article {10, title = {Genome Sequencing of Autism-Affected Families Reveals Disruption of Putative Noncoding Regulatory DNA.}, journal = {Am J Hum Genet}, volume = {98}, year = {2016}, month = {2016 Jan 07}, pages = {58-74}, abstract = {

We performed whole-genome sequencing (WGS) of 208 genomes from 53 families affected by simplex autism. For the majority of these families, no copy-number variant (CNV) or candidate de novo gene-disruptive single-nucleotide variant (SNV) had been detected by microarray or whole-exome sequencing (WES). We integrated multiple CNV and SNV analyses and extensive experimental validation to identify additional candidate mutations in eight families. We report that compared to control individuals, probands showed a significant (p = 0.03) enrichment of de novo and private disruptive mutations within fetal CNS DNase I hypersensitive sites (i.e., putative regulatory regions). This effect was only observed within 50 kb of genes that have been previously associated with autism risk, including genes where dosage sensitivity has already been established by recurrent disruptive de novo protein-coding mutations (ARID1B, SCN2A, NR3C2, PRKCA, and DSCAM). In addition, we provide evidence of gene-disruptive CNVs (in DISC1, WNT7A, RBFOX1, and MBD5), as well as smaller de novo CNVs and exon-specific SNVs missed by exome sequencing in neurodevelopmental genes (e.g., CANX, SAE1, and PIK3CA). Our results suggest that the detection of smaller, often multiple CNVs affecting putative regulatory elements might help explain additional risk of simplex autism.

}, keywords = {Autistic Disorder, DNA, Exome, Female, Genome, Human, Humans, Male, Pedigree, Polymorphism, Single Nucleotide}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2015.11.023}, author = {Turner, Tychele N and Hormozdiari, Fereydoun and Duyzend, Michael H and McClymont, Sarah A and Hook, Paul W and Iossifov, Ivan and Raja, Archana and Baker, Carl and Hoekzema, Kendra and Stessman, Holly A and Zody, Michael C and Nelson, Bradley J and Huddleston, John and Sandstrom, Richard and Smith, Joshua D and Hanna, David and Swanson, James M and Faustman, Elaine M and Bamshad, Michael J and Stamatoyannopoulos, John and Nickerson, Deborah A and McCallion, Andrew S and Darnell, Robert and Eichler, Evan E} }