@article {156, title = {Haplotype-resolved diverse human genomes and integrated analysis of structural variation.}, journal = {Science}, volume = {372}, year = {2021}, month = {2021 04 02}, abstract = {

Long-read and strand-specific sequencing technologies together facilitate the de novo assembly of high-quality haplotype-resolved human genomes without parent-child trio data. We present 64 assembled haplotypes from 32 diverse human genomes. These highly contiguous haplotype assemblies (average minimum contig length needed to cover 50\% of the genome: 26 million base pairs) integrate all forms of genetic variation, even across complex loci. We identified 107,590 structural variants (SVs), of which 68\% were not discovered with short-read sequencing, and 278 SV hotspots (spanning megabases of gene-rich sequence). We characterized 130 of the most active mobile element source elements and found that 63\% of all SVs arise through homology-mediated mechanisms. This resource enables reliable graph-based genotyping from short reads of up to 50,340 SVs, resulting in the identification of 1526 expression quantitative trait loci as well as SV candidates for adaptive selection within the human population.

}, keywords = {Female, Genetic Variation, Genome, Human, Genotype, Haplotypes, High-Throughput Nucleotide Sequencing, Humans, INDEL Mutation, Interspersed Repetitive Sequences, Male, Population Groups, Quantitative Trait Loci, Retroelements, Sequence Analysis, DNA, Sequence Inversion, Whole Genome Sequencing}, issn = {1095-9203}, doi = {10.1126/science.abf7117}, author = {Ebert, Peter and Audano, Peter A and Zhu, Qihui and Rodriguez-Martin, Bernardo and Porubsky, David and Bonder, Marc Jan and Sulovari, Arvis and Ebler, Jana and Zhou, Weichen and Serra Mari, Rebecca and Yilmaz, Feyza and Zhao, Xuefang and Hsieh, PingHsun and Lee, Joyce and Kumar, Sushant and Lin, Jiadong and Rausch, Tobias and Chen, Yu and Ren, Jingwen and Santamarina, Martin and H{\"o}ps, Wolfram and Ashraf, Hufsah and Chuang, Nelson T and Yang, Xiaofei and Munson, Katherine M and Lewis, Alexandra P and Fairley, Susan and Tallon, Luke J and Clarke, Wayne E and Basile, Anna O and Byrska-Bishop, Marta and Corvelo, Andr{\'e} and Evani, Uday S and Lu, Tsung-Yu and Chaisson, Mark J P and Chen, Junjie and Li, Chong and Brand, Harrison and Wenger, Aaron M and Ghareghani, Maryam and Harvey, William T and Raeder, Benjamin and Hasenfeld, Patrick and Regier, Allison A and Abel, Haley J and Hall, Ira M and Flicek, Paul and Stegle, Oliver and Gerstein, Mark B and Tubio, Jose M C and Mu, Zepeng and Li, Yang I and Shi, Xinghua and Hastie, Alex R and Ye, Kai and Chong, Zechen and Sanders, Ashley D and Zody, Michael C and Talkowski, Michael E and Mills, Ryan E and Devine, Scott E and Lee, Charles and Korbel, Jan O and Marschall, Tobias and Eichler, Evan E} } @article {122, title = {Mapping and characterization of structural variation in 17,795 human genomes.}, journal = {Nature}, volume = {583}, year = {2020}, month = {2020 07}, pages = {83-89}, abstract = {

A key goal of whole-genome sequencing for studies of human genetics is to interrogate all forms of variation, including single-nucleotide variants, small insertion or deletion (indel) variants and structural variants. However, tools and resources for the study of structural variants have lagged behind those for smaller variants. Here we used a scalable pipeline to map and characterize structural variants in 17,795 deeply sequenced human genomes. We publicly release site-frequency data to create the largest, to our knowledge, whole-genome-sequencing-based structural variant resource so far. On average, individuals carry 2.9 rare structural variants that alter coding regions; these variants affect the dosage or structure of 4.2 genes and account for 4.0-11.2\% of rare high-impact coding alleles. Using a computational model, we estimate that structural variants account for 17.2\% of rare alleles genome-wide, with predicted deleterious effects that are equivalent to loss-of-function coding alleles; approximately 90\% of such structural variants are noncoding deletions (mean 19.1 per genome). We report 158,991 ultra-rare structural variants and show that 2\% of individuals carry ultra-rare megabase-scale structural variants, nearly half of which are balanced or complex rearrangements. Finally, we infer the dosage sensitivity of genes and noncoding elements, and reveal trends that relate to element class and conservation. This work will help to guide the analysis and interpretation of structural variants in the era of whole-genome sequencing.

}, keywords = {Alleles, Case-Control Studies, Continental Population Groups, Epigenesis, Genetic, Female, Gene Dosage, Genetic Variation, Genetics, Population, Genome, Human, High-Throughput Nucleotide Sequencing, Humans, Male, Molecular Sequence Annotation, Quantitative Trait Loci, Software, Whole Genome Sequencing}, issn = {1476-4687}, doi = {10.1038/s41586-020-2371-0}, author = {Abel, Haley J and Larson, David E and Regier, Allison A and Chiang, Colby and Das, Indraniel and Kanchi, Krishna L and Layer, Ryan M and Neale, Benjamin M and Salerno, William J and Reeves, Catherine and Buyske, Steven and Matise, Tara C and Muzny, Donna M and Zody, Michael C and Lander, Eric S and Dutcher, Susan K and Stitziel, Nathan O and Hall, Ira M} }