<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type='text/xsl' href='/oai/static/oai2.xsl' ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
  <responseDate>2026-05-21T23:18:26Z</responseDate>
  <request identifier="c00cabdc8a2df948264bea077e55aa637ab14d90ab85b14b864b363c6e1d06ba" metadataPrefix="oai_ddi25" verb="GetRecord">https://datacatalogue.cessda.eu/oai-pmh/v0/oai</request>
  <GetRecord>
    <record>
    <header>
      <identifier>c00cabdc8a2df948264bea077e55aa637ab14d90ab85b14b864b363c6e1d06ba</identifier>
      <datestamp>2025-06-17T03:16:16Z</datestamp>
      <setSpec>language:en</setSpec><setSpec>openaire_data</setSpec>
    </header>
      <metadata>
        <codeBook xmlns="ddi:codebook:2_5" version="2.5" xsi:schemaLocation="ddi:codebook:2_5 http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd">
    <docDscr>
      <citation>
        <titlStmt>
          <titl xml:lang="en">DDI study level documentation for study 10.7802/2470 TweetsCOV19 - A Semantically Annotated Corpus of Tweets About the COVID-19 Pandemic (Part 4, January 2021 - August 2022)</titl>
        </titlStmt>
        <prodStmt>
        </prodStmt>
        <holdings xml:lang="en" URI="https://search.gesis.org/research_data/SDN-10.7802-2470?lang=en"/><holdings xml:lang="de" URI="https://search.gesis.org/research_data/SDN-10.7802-2470?lang=de"/>
      </citation>
    </docDscr>
  <stdyDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="en">TweetsCOV19 - A Semantically Annotated Corpus of Tweets About the COVID-19 Pandemic (Part 4, January 2021 - August 2022)</titl>
        <parTitl xml:lang="de">TweetsCOV19 - A Semantically Annotated Corpus of Tweets About the COVID-19 Pandemic (Part 4, January 2021 - August 2022)</parTitl>
        <IDNo xml:lang="en" agency="GESIS">10.7802/2470</IDNo><IDNo xml:lang="de" agency="GESIS">10.7802/2470</IDNo><IDNo xml:lang="en" agency="DOI">10.7802/2470</IDNo><IDNo xml:lang="de" agency="DOI">10.7802/2470</IDNo>
      </titlStmt>
      <rspStmt>
        <AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="en">Dimitrov, Dimitar
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="de">Dimitrov, Dimitar
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="en">Baran, Erdal
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="de">Baran, Erdal
        </AuthEnty><AuthEnty affiliation="Institute of Computer Science, FORTH-ICS, Heraklion, Greece" xml:lang="en">Fafalios, Pavlos
        </AuthEnty><AuthEnty affiliation="Institute of Computer Science, FORTH-ICS, Heraklion, Greece" xml:lang="de">Fafalios, Pavlos
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="en">Yu, Ran
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="de">Yu, Ran
        </AuthEnty><AuthEnty affiliation="Chongqing University of Technology, Chongqing, China" xml:lang="en">Zhu, Xiaofei
        </AuthEnty><AuthEnty affiliation="Chongqing University of Technology, Chongqing, China" xml:lang="de">Zhu, Xiaofei
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="en">Zloch, Matthäus
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften" xml:lang="de">Zloch, Matthäus
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften &amp; Heinrich-Heine-University Düsseldorf, Germany &amp;  L3S Research Center, Hannover, Germany" xml:lang="en">Dietze, Stefan
        </AuthEnty><AuthEnty affiliation="GESIS - Leibniz-Institut für Sozialwissenschaften &amp; Heinrich-Heine-University Düsseldorf, Germany &amp;  L3S Research Center, Hannover, Germany" xml:lang="de">Dietze, Stefan
        </AuthEnty>
      </rspStmt>
      <prodStmt>
        <prodDate xml:lang="en"/>
      </prodStmt>
      <distStmt>
        <distrbtr abbr="GESIS" URI="http://www.gesis.org/" xml:lang="en">GESIS Data Archive for the Social Sciences</distrbtr><distrbtr abbr="GESIS" URI="http://www.gesis.org/" xml:lang="de">GESIS Datenarchiv für Sozialwissenschaften</distrbtr>
        <distDate xml:lang="en" date="2022"/><distDate xml:lang="de" date="2022"/>
      </distStmt>
      <verStmt>
        <version date="2022" xml:lang="en"/><version date="2022" xml:lang="de"/>
      </verStmt>
      <holdings xml:lang="en" URI="https://search.gesis.org/research_data/SDN-10.7802-2470?lang=en"/><holdings xml:lang="de" URI="https://search.gesis.org/research_data/SDN-10.7802-2470?lang=de"/>
    </citation>
    <stdyInfo>
      <subject>
        <keyword xml:lang="en">twitter</keyword><keyword xml:lang="en">social media</keyword><keyword xml:lang="en">text analysis</keyword><keyword xml:lang="en">discourse</keyword><keyword xml:lang="en">epidemic</keyword><keyword xml:lang="en">contagious disease</keyword><keyword xml:lang="de">twitter</keyword><keyword xml:lang="de">social media</keyword><keyword xml:lang="de">text analysis</keyword><keyword xml:lang="de">discourse</keyword><keyword xml:lang="de">epidemic</keyword><keyword xml:lang="de">contagious disease</keyword>
      </subject>
      <abstract xml:lang="en">TweetsCOV19 is a semantically annotated corpus of Tweets about the COVID-19 pandemic. It is a subset of TweetsKB and aims at capturing online discourse about various aspects of the pandemic and its societal impact. Metadata information about the tweets as well as extracted entities, sentiments, hashtags, user mentions, and resolved URLs are exposed in RDF using established RDF/S vocabularies (for the sake of privacy, we anonymize user IDs and we do not provide the text of the tweets). More information are available through TweetsCOV19's home page: https://data.gesis.org/tweetscov19/.  We also provide a tab-separated values (tsv) version of the dataset. Each line contains features of a tweet instance. Features are separated by tab character ("\t"). The following list indicate the feature indices:  1. Tweet Id: Long.  2. Username: String. Encrypted for privacy issues.  3. Timestamp: Format ( "EEE MMM dd HH:mm:ss Z yyyy" ).  4. #Followers: Integer.  5. #Friends: Integer.  6. #Retweets: Integer.  7. #Favorites: Integer.  8. Entities: String. For each entity, we aggregated the original text, the annotated entity and the produced score from FEL library. Each entity is separated from another entity by char ";". Also, each entity is separated by char ":" in order to store "original_text:annotated_entity:score;". If FEL did not find any entities, we have stored "null;".  9. Sentiment: String. SentiStrength produces a score for positive (1 to 5) and negative (-1 to -5) sentiment. We splitted these two numbers by whitespace char " ". Positive sentiment was stored first and then negative sentiment (i.e. "2 -1").  10. Mentions: String. If the tweet contains mentions, we remove the char "@" and concatenate the mentions with whitespace char " ". If no mentions appear, we have stored "null;".  11. Hashtags: String. If the tweet contains hashtags, we remove the char "#" and concatenate the hashtags with whitespace char " ". If no hashtags appear, we have stored "null;".  12. URLs: String: If the tweet contains URLs, we concatenate the URLs using ":-: ". If no URLs appear, we have stored "null;"  To extract the dataset from TweetsKB, we compiled a seed list of 268 COVID-19-related keywords.  You can find the previous part 3 at https://doi.org/10.5281/zenodo.4593523 .</abstract><abstract xml:lang="de">TweetsCOV19 is a semantically annotated corpus of Tweets about the COVID-19 pandemic. It is a subset of TweetsKB and aims at capturing online discourse about various aspects of the pandemic and its societal impact. Metadata information about the tweets as well as extracted entities, sentiments, hashtags, user mentions, and resolved URLs are exposed in RDF using established RDF/S vocabularies (for the sake of privacy, we anonymize user IDs and we do not provide the text of the tweets). More information are available through TweetsCOV19's home page: https://data.gesis.org/tweetscov19/.  We also provide a tab-separated values (tsv) version of the dataset. Each line contains features of a tweet instance. Features are separated by tab character ("\t"). The following list indicate the feature indices:  1. Tweet Id: Long.  2. Username: String. Encrypted for privacy issues.  3. Timestamp: Format ( "EEE MMM dd HH:mm:ss Z yyyy" ).  4. #Followers: Integer.  5. #Friends: Integer.  6. #Retweets: Integer.  7. #Favorites: Integer.  8. Entities: String. For each entity, we aggregated the original text, the annotated entity and the produced score from FEL library. Each entity is separated from another entity by char ";". Also, each entity is separated by char ":" in order to store "original_text:annotated_entity:score;". If FEL did not find any entities, we have stored "null;".  9. Sentiment: String. SentiStrength produces a score for positive (1 to 5) and negative (-1 to -5) sentiment. We splitted these two numbers by whitespace char " ". Positive sentiment was stored first and then negative sentiment (i.e. "2 -1").  10. Mentions: String. If the tweet contains mentions, we remove the char "@" and concatenate the mentions with whitespace char " ". If no mentions appear, we have stored "null;".  11. Hashtags: String. If the tweet contains hashtags, we remove the char "#" and concatenate the hashtags with whitespace char " ". If no hashtags appear, we have stored "null;".  12. URLs: String: If the tweet contains URLs, we concatenate the URLs using ":-: ". If no URLs appear, we have stored "null;"  To extract the dataset from TweetsKB, we compiled a seed list of 268 COVID-19-related keywords.  You can find the previous part 3 at https://doi.org/10.5281/zenodo.4593523 .</abstract>
      <sumDscr>
        <collDate xml:lang="en" date="2021-1-1" event="start"/><collDate xml:lang="en" date="2022-8-1" event="end"/>
        <universe xml:lang="en" clusion="I">TweetsKB (https://data.gesis.org/tweetskb/)</universe>
      </sumDscr>
    </stdyInfo>
    <method>
      <dataColl>
        <collMode xml:lang="en">Web Scraping<concept/></collMode><collMode xml:lang="de">Web Scraping<concept/></collMode>
      </dataColl>
    </method>
    <dataAccs>
      <useStmt>
        <restrctn xml:lang="en">Free access (without registration) - The research data can be downloaded directly by anyone without further limitations.
Data can only be used for non-commercial research</restrctn><restrctn xml:lang="de">Freier Zugang (ohne Registrierung) - Die Forschungsdaten können von jedem direkt heruntergeladen werden.
Data can only be used for non-commercial research</restrctn>
      </useStmt>
    </dataAccs>
    <othrStdyMat>
    </othrStdyMat>
  </stdyDscr>
  <fileDscr>
  </fileDscr>
</codeBook>
      </metadata>
      <about>
        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
    <originDescription harvestDate="2025-06-17T03:16:16Z" altered="true">
      <baseURL>http://dbkapps.gesis.org/dbkoai/oai.asp</baseURL>
      <identifier>oai:dbk.gesis.org:SDN/10.7802_2470</identifier>
      <datestamp>2024-10-19</datestamp>
      <metadataNamespace>ddi:codebook:2_5</metadataNamespace>
    </originDescription>
</provenance>
      </about>
    </record>
  </GetRecord>
</OAI-PMH>