@inproceedings{black-etal-2022-gpt,
title = "{GPT}-{N}eo{X}-20{B}: An Open-Source Autoregressive Language Model",
author = "Black, Sidney and
Biderman, Stella and
Hallahan, Eric and
Anthony, Quentin and
Gao, Leo and
Golding, Laurence and
He, Horace and
Leahy, Connor and
McDonell, Kyle and
Phang, Jason and
Pieler, Michael and
Prashanth, Usvsn Sai and
Purohit, Shivanshu and
Reynolds, Laria and
Tow, Jonathan and
Wang, Ben and
Weinbach, Samuel",
editor = "Fan, Angela and
Ilic, Suzana and
Wolf, Thomas and
Gall{\'e}, Matthias",
booktitle = "Proceedings of BigScience Episode {\#}5 -- Workshop on Challenges {\&} Perspectives in Creating Large Language Models",
month = may,
year = "2022",
address = "virtual+Dublin",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.bigscience-1.9",
doi = "10.18653/v1/2022.bigscience-1.9",
pages = "95--136",
abstract = "We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge, the largest dense autoregressive model that has publicly available weights at the time of submission. In this work, we describe GPT-NeoX-20B{'}s architecture and training, and evaluate its performance. We open-source the training and evaluation code, as well as the model weights, at \url{https://github.com/EleutherAI/gpt-neox}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="black-etal-2022-gpt">
<titleInfo>
<title>GPT-NeoX-20B: An Open-Source Autoregressive Language Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sidney</namePart>
<namePart type="family">Black</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Biderman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Hallahan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quentin</namePart>
<namePart type="family">Anthony</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurence</namePart>
<namePart type="family">Golding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Horace</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Connor</namePart>
<namePart type="family">Leahy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">McDonell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Phang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Pieler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Usvsn</namePart>
<namePart type="given">Sai</namePart>
<namePart type="family">Prashanth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivanshu</namePart>
<namePart type="family">Purohit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laria</namePart>
<namePart type="family">Reynolds</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Tow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Weinbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzana</namePart>
<namePart type="family">Ilic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Gallé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">virtual+Dublin</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge, the largest dense autoregressive model that has publicly available weights at the time of submission. In this work, we describe GPT-NeoX-20B’s architecture and training, and evaluate its performance. We open-source the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.</abstract>
<identifier type="citekey">black-etal-2022-gpt</identifier>
<identifier type="doi">10.18653/v1/2022.bigscience-1.9</identifier>
<location>
<url>https://aclanthology.org/2022.bigscience-1.9</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>95</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GPT-NeoX-20B: An Open-Source Autoregressive Language Model
%A Black, Sidney
%A Biderman, Stella
%A Hallahan, Eric
%A Anthony, Quentin
%A Gao, Leo
%A Golding, Laurence
%A He, Horace
%A Leahy, Connor
%A McDonell, Kyle
%A Phang, Jason
%A Pieler, Michael
%A Prashanth, Usvsn Sai
%A Purohit, Shivanshu
%A Reynolds, Laria
%A Tow, Jonathan
%A Wang, Ben
%A Weinbach, Samuel
%Y Fan, Angela
%Y Ilic, Suzana
%Y Wolf, Thomas
%Y Gallé, Matthias
%S Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models
%D 2022
%8 May
%I Association for Computational Linguistics
%C virtual+Dublin
%F black-etal-2022-gpt
%X We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge, the largest dense autoregressive model that has publicly available weights at the time of submission. In this work, we describe GPT-NeoX-20B’s architecture and training, and evaluate its performance. We open-source the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
%R 10.18653/v1/2022.bigscience-1.9
%U https://aclanthology.org/2022.bigscience-1.9
%U https://doi.org/10.18653/v1/2022.bigscience-1.9
%P 95-136
Markdown (Informal)
[GPT-NeoX-20B: An Open-Source Autoregressive Language Model](https://aclanthology.org/2022.bigscience-1.9) (Black et al., BigScience 2022)
ACL
- Sidney Black, Stella Biderman, Eric Hallahan, Quentin Anthony, Leo Gao, Laurence Golding, Horace He, Connor Leahy, Kyle McDonell, Jason Phang, Michael Pieler, Usvsn Sai Prashanth, Shivanshu Purohit, Laria Reynolds, Jonathan Tow, Ben Wang, and Samuel Weinbach. 2022. GPT-NeoX-20B: An Open-Source Autoregressive Language Model. In Proceedings of BigScience Episode #5 -- Workshop on Challenges & Perspectives in Creating Large Language Models, pages 95–136, virtual+Dublin. Association for Computational Linguistics.