{"id":1127,"name":"SDGmapR","description":"R functions and datasets related to the mapping of text to the United Nations 17 Sustainable Development Goals.","url":"https://github.com/CMUSustainability/SDGmapR","last_synced_at":"2026-05-23T11:30:17.548Z","repository":{"id":148046778,"uuid":"412943532","full_name":"CMUSustainability/SDGmapR","owner":"CMUSustainability","description":"R functions and datasets related to the mapping of text to the United Nations 17 Sustainable Development Goals (SDGs).","archived":false,"fork":false,"pushed_at":"2022-05-12T08:16:11.000Z","size":3221,"stargazers_count":12,"open_issues_count":0,"forks_count":1,"subscribers_count":2,"default_branch":"main","last_synced_at":"2026-05-12T06:04:03.902Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"R","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/CMUSustainability.png","metadata":{"files":{"readme":"README.Rmd","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null}},"created_at":"2021-10-03T00:46:41.000Z","updated_at":"2024-03-23T12:17:14.000Z","dependencies_parsed_at":"2023-04-29T20:54:27.284Z","dependency_job_id":null,"html_url":"https://github.com/CMUSustainability/SDGmapR","commit_stats":{"total_commits":50,"total_committers":2,"mean_commits":25.0,"dds":0.09999999999999998,"last_synced_commit":"18d37fa29fafb7ddba10610dff7607ea4303b0c6"},"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/CMUSustainability/SDGmapR","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/CMUSustainability","download_url":"https://codeload.github.com/CMUSustainability/SDGmapR/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":33254770,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-05-20T04:48:54.280Z","status":"ssl_error","status_checked_at":"2026-05-20T04:48:10.851Z","response_time":356,"last_error":"SSL_read: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"CMUSustainability","name":null,"uuid":"91693153","kind":"user","description":null,"email":"","website":null,"location":null,"twitter":null,"company":null,"icon_url":"https://avatars.githubusercontent.com/u/91693153?v=4","repositories_count":1,"last_synced_at":"2023-03-27T11:54:18.531Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/CMUSustainability","funding_links":[],"total_stars":null,"followers":null,"following":null,"created_at":"2023-03-27T11:54:18.533Z","updated_at":"2023-03-27T11:54:18.533Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/CMUSustainability","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/CMUSustainability/repositories"},"packages":[],"commits":{"id":1254627,"full_name":"CMUSustainability/SDGmapR","default_branch":"main","total_commits":50,"total_committers":2,"total_bot_commits":0,"total_bot_committers":0,"mean_commits":25.0,"dds":0.09999999999999998,"past_year_total_commits":0,"past_year_total_committers":0,"past_year_total_bot_commits":0,"past_year_total_bot_committers":0,"past_year_mean_commits":0.0,"past_year_dds":0.0,"last_synced_at":"2026-05-20T10:01:31.455Z","last_synced_commit":"18d37fa29fafb7ddba10610dff7607ea4303b0c6","created_at":"2023-03-27T11:50:15.913Z","updated_at":"2026-05-20T10:01:27.911Z","committers":[{"name":"pwu97","email":"43555461+pwu97","login":"pwu97","count":45},{"name":"CMUSustainability","email":"91693153+CMUSustainability","login":"CMUSustainability","count":5}],"past_year_committers":[],"commits_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR/commits","host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2026-05-22T00:00:12.925Z","repositories_count":6237695,"commits_count":884445214,"contributors_count":34893816,"owners_count":1155887,"icon_url":"https://github.com/github.png","host_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories"}},"issues_stats":{"full_name":"CMUSustainability/SDGmapR","html_url":"https://github.com/CMUSustainability/SDGmapR","last_synced_at":"2025-09-01T02:31:58.744Z","status":"error","issues_count":0,"pull_requests_count":0,"avg_time_to_close_issue":null,"avg_time_to_close_pull_request":null,"issues_closed_count":0,"pull_requests_closed_count":0,"pull_request_authors_count":0,"issue_authors_count":0,"avg_comments_per_issue":null,"avg_comments_per_pull_request":null,"merged_pull_requests_count":0,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":0,"past_year_pull_requests_count":0,"past_year_avg_time_to_close_issue":null,"past_year_avg_time_to_close_pull_request":null,"past_year_issues_closed_count":0,"past_year_pull_requests_closed_count":0,"past_year_pull_request_authors_count":0,"past_year_issue_authors_count":0,"past_year_avg_comments_per_issue":null,"past_year_avg_comments_per_pull_request":null,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":0,"created_at":"2023-05-09T10:39:04.012Z","updated_at":"2025-09-01T02:31:58.744Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/CMUSustainability%2FSDGmapR/issues","issue_labels_count":{},"pull_request_labels_count":{},"issue_author_associations_count":{},"pull_request_author_associations_count":{},"issue_authors":{},"pull_request_authors":{},"host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2026-05-22T00:00:21.018Z","repositories_count":14662835,"issues_count":34135897,"pull_requests_count":111729993,"authors_count":11269356,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"},"past_year_issue_labels_count":{},"past_year_pull_request_labels_count":{},"past_year_issue_author_associations_count":{},"past_year_pull_request_author_associations_count":{},"past_year_issue_authors":{},"past_year_pull_request_authors":{},"maintainers":[],"active_maintainers":[]},"events":{"total":{},"last_year":{}},"keywords":[],"dependencies":[{"ecosystem":"cran","filepath":"DESCRIPTION","sha":null,"kind":"manifest","created_at":"2023-04-29T20:54:27.149Z","updated_at":"2023-04-29T20:54:27.149Z","repository_link":"https://github.com/CMUSustainability/SDGmapR/blob/main/DESCRIPTION","dependencies":[{"id":9305935709,"package_name":"R","ecosystem":"cran","requirements":"\u003e= 2.10","direct":true,"kind":"depends","optional":false}]}],"score":3.1780538303479458,"created_at":"2023-09-11T11:54:37.013Z","updated_at":"2026-05-23T11:30:17.568Z","avatar_url":"https://github.com/CMUSustainability.png","language":"R","category":"Sustainable Development","sub_category":"Sustainable Development Goals","monthly_downloads":0,"total_dependent_repos":0,"total_dependent_packages":0,"readme":"---\noutput: github_document\n---\n\n\u003c!-- README.md is generated from README.Rmd. Please edit that file --\u003e\n\n```{r, include = FALSE}\nknitr::opts_chunk$set(\n  collapse = TRUE,\n  comment = \"#\u003e\",\n  fig.path = \"man/figures/README-\",\n  out.width = \"100%\"\n)\n```\n\n# SDGmapR\n\n\u003c!-- badges: start --\u003e\n\u003c!-- badges: end --\u003e\n\nThe goal of `SDGmapR` is to provide an open-source foundation for the systematic mapping\nto the United Nations Sustainable Development Goals (SDGs). In this R package one can find publicly available [SDG keyword datasets](https://github.com/pwu97/SDGmapR/tree/main/datasets) in the `tidy` data format,\nthe [UN Official SDG color scheme](https://www.un.org/sustainabledevelopment/wp-content/uploads/2019/01/SDG_Guidelines_AUG_2019_Final.pdf) and [SDG Descriptions](https://github.com/pwu97/SDGmapR/blob/main/datasets/sdg_desc_cleaned.csv), and several functions related to the\nmapping of text to particular sets of keywords.\n\n## Installation\n\nYou can install the development version from [GitHub](https://github.com/) with:\n\n``` r\n# install.packages(\"devtools\")\ndevtools::install_github(\"CMUSustainability/SDGmapR\")\n```\n\n## Publicly Available SDG Keywords\n\nThe table below lists publicly available SDG keywords that have been published online. Some\nof the lists have weights associated with every keyword, while some do not. For the purposes\nof the `SDGmapR` package, we will assign an equal weight of one to every word if weights are not given. \nNote that the column for `SDG17` will represent whether the dataset has keywords\nrelated to SDG17.\n\n```{r, echo=FALSE, example}\nlibrary(knitr)\nsdg_table \u003c- data.frame(\n  \"Source\" = c(\"[Core Elsevier (Work in Progress)](https://data.mendeley.com/datasets/87txkw7khs/1)\", \n               \"[Improved Elsevier Top 100](https://data.mendeley.com/datasets/9sxdykm8s4/2)\", \n               \"[SDSN](https://ap-unsdsn.org/regional-initiatives/universities-sdgs/)\", \n               \"[CMU Top 250 Words](https://www.cmu.edu/leadership/the-provost/provost-priorities/sustainability-initiative/sdg-definitions.html)\",\n               \"[CMU Top 500 Words](https://www.cmu.edu/leadership/the-provost/provost-priorities/sustainability-initiative/sdg-definitions.html)\",\n               \"[CMU Top 1000 Words](https://www.cmu.edu/leadership/the-provost/provost-priorities/sustainability-initiative/sdg-definitions.html)\",\n               \"[University of Auckland (Work in Progress)](https://www.sdgmapping.auckland.ac.nz/)\", \"[University of Toronto (Work in Progress)](https://data.utoronto.ca/sustainable-development-goals-sdg-report/sdg-report-appendix/)\"),\n  \"Dataset\" = c(\"`elsevier_keywords`\",\n             \"`elsevier100_keywords`\",\n             \"`sdsn_keywords`\",\n             \"`cmu250_keywords`\",\n             \"`cmu500_keywords`\",\n             \"`cmu1000_keywords`\",\n             \"`auckland_keywords`\",\n             \"`toronto_keywords`\"),\n  \"CSV\" = c(\"[Link](https://github.com/pwu97/SDGmapR/blob/main/datasets/elsevier_keywords_cleaned.csv)\", \"[Link](https://github.com/pwu97/SDGmapR/blob/main/datasets/elsevier100_keywords_cleaned.csv)\", \"[Link](https://github.com/pwu97/SDGmapR/blob/main/datasets/sdsn_keywords_cleaned.csv)\", \n\"[Link](https://github.com/pwu97/SDGmapR/blob/main/datasets/cmu250_keywords_cleaned.csv)\",\n\"[Link](https://github.com/pwu97/SDGmapR/blob/main/datasets/cmu500_keywords_cleaned.csv)\",\n\"[Link](https://github.com/pwu97/SDGmapR/blob/main/datasets/cmu1000_keywords_cleaned.csv)\", \"\", \"\"),\n  \"SDG17\" = c(\"No\", \"No\", \"Yes\", \"No\", \"No\", \"No\", \"Yes\", \"Yes\")\n)\nkable(sdg_table)\n```\n\n## Example SDGMapR Usage\n\nWe can map to one SDG with the `count_sdg_keywords` function that adds up the\nweights of the keywords found. We can find the keywords for one SDG with the\n`tabulate_sdg_keywords` that returns the words as a vector, which we can view\nin the `tidy` format by applying `unnest()` to our result.\n\n```{r, warning=FALSE, message=FALSE}\nlibrary(tidyverse)\nlibrary(SDGmapR)\n\n# Load first 1000 #tidytuesday tweets\ntweets \u003c- readRDS(url(\"https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-01-01/tidytuesday_tweets.rds?raw=true\")) %\u003e%\n  select(text) %\u003e%\n  head(1000) %\u003e%\n  mutate(text = str_to_lower(text))\n\n# Map to SDG 1 using Improved Elsevier Top 100 Keywords\ntweets_sdg1 \u003c- tweets %\u003e%\n  mutate(sdg_1_weight = count_sdg_weights(text, 1),\n         sdg_1_words = tabulate_sdg_keywords(text, 1)) %\u003e%\n  arrange(desc(sdg_1_weight)) %\u003e%\n  select(text, sdg_1_weight, sdg_1_words)\n\n# View SDG 1 matched keywords\ntweets_sdg1 %\u003e%\n  unnest(sdg_1_words)\n```\n\nWe can map to a different set of keywords by adding an additional input into\nour function, using the `cmu250` (CMU Top 250 Keywords) dataset of SDG keywords instead of the default `elsevier1000` dataset of SDG keywords.\n\n```{r}\n# Map to SDG 3 using Elsevier Core keywords\ntweets %\u003e%\n  mutate(sdg_weight = count_sdg_weights(text, 3, \"cmu250\")) %\u003e%\n  select(text, sdg_weight) %\u003e%\n  arrange(desc(sdg_weight))\n\n# Map to SDG 5 using Elsevier Core keywords\ntweets %\u003e%\n  mutate(sdg_weight = count_sdg_weights(text, 5, \"cmu250\")) %\u003e%\n  select(text, sdg_weight) %\u003e%\n  arrange(desc(sdg_weight))\n\n# Map to SDG 7 using Elsevier Core keywords\ntweets %\u003e%\n  mutate(sdg_weight = count_sdg_weights(text, 7, \"cmu250\")) %\u003e%\n  select(text, sdg_weight) %\u003e%\n  arrange(desc(sdg_weight))\n```\n\nWe can map course descriptions as well. Below, we show the package being used to map the CMU course descriptions from Fall 2022 to the SDGs.\n\n```{r}\n# Create dataframe of CMU course descriptions from Fall 2022\nclasses \u003c- readxl::read_excel(\"datasets/cmu_f22_course_info.xlsx\") %\u003e%\n  rename(semester = `Semester`,\n         course_title = `Course Title`,\n         course_num = `Course Number`,\n         course_desc = `Course Description`) %\u003e% \n  mutate(course_dept = substr(course_num, 1, 2),\n         course_level = substr(course_num, 3, 5),\n         course_level_specific = substr(course_num, 3, 3)) %\u003e%\n  mutate(text = paste(str_to_lower(course_title), str_to_lower(course_desc))) %\u003e%\n  # Clean the punctuation\n  mutate(text = gsub(\"[^[:alnum:]['-]\", \" \", text)) %\u003e%\n  arrange(desc(semester)) %\u003e%\n  distinct(course_num, .keep_all = TRUE) %\u003e%\n  # Only select 5% of courses for the purposes of this Markdown file\n  sample_frac(0.05)\n\n# Perform the mapping\nall_sdg_keywords \u003c- data.frame()\nfor (goal_num in 1:17) {\n  classes %\u003e%\n    mutate(goal = goal_num,\n           keyword = tabulate_sdg_keywords(text, goal_num, keywords = \"cmu250\")) %\u003e%\n    unnest(keyword) -\u003e cur_sdg_keywords\n  \n  all_sdg_keywords \u003c- rbind(all_sdg_keywords, cur_sdg_keywords) \n}\nall_sdg_keywords %\u003e%\n  left_join(cmu250_keywords, by = c(\"goal\", \"keyword\")) %\u003e%\n  select(keyword, weight, semester, course_num, goal, color) %\u003e%\n  arrange(course_num) -\u003e all_sdg_keywords\n\n# View mapped keywords dataset\nall_sdg_keywords\n```\n\n## Frequently Asked Questions (FAQs)\n\nQ: What are the `cmu1000`, `cmu500`, and `cmu250` datasets? Why 250, 500, and 1000?\n\nA: These are SDG keyword datasets created by Carnegie Mellon University (CMU). The number indicates approximately how many words are in each SDG for that dataset. For instance, for the `cmu500` dataset, we would expect roughly 500 words in SDG6. We initially created the dataset `cmu1000` to represent the dataset with roughly 1000 words for each SDG, and then we took the top 250 and 500 words based on keyword weight to generate `cmu250` and `cmu500`.\n\nQ: Is there any easy way to customize the SDG keyword dataset and add in and my own assessment of their weights?\n\nA: Yes! Instead of passing in one of the known SDG keyword datasets, you can directly pass in your own SDG keyword dataset. All you have to do is ensure that the columns match up with `goal`, `keyword`, `pattern`, `weight`, and `color`.\n\nQ: How were the weights generated for each keyword?\n\nA: Very loosely, they were interpolated from the  [Elsevier SDG Keyword weights](https://elsevier.digitalcommonsdata.com/datasets/9sxdykm8s4/2). Using Google's Word2Vec, we assigned the weight of each word to be a weighted proportion of defined Elsevier keywords, or keywords that were in Word2Vec's dataset, based on how often they were a 100 nearest neighbors in terms of semantic similarity.\n\nQ: Why didn't you use compound expressions like \"poverty AND economic resources or \"poverty AND (disaster OR disaster area)\"?\n\nA: We have attempted to use compound expressions for SDG mapping, but found that in practice, the specific compound expressions for SDG mapping were few and far between. For instance, when we tried to use compound expressions for SDG mapping using [Elsevier's newly released dataset](https://figshare.com/articles/dataset/Keywords_and_search_strings_for_generating_SDG_training_sets/17294255), we found that very few course descriptions had specific compound expression matchings. Thus, we used keyword weights instead.\n\nQ: Words like \"student\", \"semester\", and \"homework\" seem like very general SDG4 keywords when mapping to SDG4. When mapping to course descriptions, wouldn't this tag almost every course with SDG4?\n\nA: Yes. Thus, we filtered out words that were too general among course descriptions. The specific list of words we excluded for SDG4 mapping in mapping to course descriptions are: \"education\", \"educational\", \"school\", \"schools\", \"student\", \"students\", \"teaching\", \"learning\", \"apprenticeship\", \"skill\", \"skills\", \"curriculum\", \"teachers\", \"trainees\", \"trainee\", \"teacher\", \"classroom\", \"educators\", \"math\", \"classrooms\", \"educator\", \"graduates\", \"diploma\", \"undergraduates\", \"undergrad\", \"course\", \"mathematics\", \"achievement\", \"courses\", \"elementary\", \"academic\", \"training\", \"pupils\", \"undergraduate\", \"college\", \"colleges\", \"learners\", \"algebra\", \"reading\", \"comprehension\", \"achievements\", \"universities\", \"faculty\", \"internship\", \"principal\", \"internships\", \"career\", \"maths\", \"adult\", \"principals\", \"curricula\", \"grad\", \"biology\", \"university\", \"semester\", \"scholars\", \"literacy\", \"exam\", \"exams\", \"tutoring\", \"literacy\", \"syllabus\", \"instructor\", \"instructors\", \"degree\", \"classes\", \"language\", \"science\", \"instruction\", \"campus\", \"homework\", \"instructional\", \"curricular\", \"humanities\", \"mentoring\", \"teach\", \"employment\", \"qualifications\", \"coursework\", \"graduate\".\n\n## Acknowledgements\n\nThank you to Jingwen Mu and Kevin Kang from the University of Auckland for discussions and insights about regular expression matchings with the SDG keywords.\n\n\u003c!-- What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so: --\u003e\n\n\u003c!-- ```{r cars} --\u003e\n\u003c!-- summary(cars) --\u003e\n\u003c!-- ``` --\u003e\n\n\u003c!-- You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date. `devtools::build_readme()` is handy for this. You could also use GitHub Actions to re-render `README.Rmd` every time you push. An example workflow can be found here: \u003chttps://github.com/r-lib/actions/tree/master/examples\u003e. --\u003e\n\n\u003c!-- You can also embed plots, for example: --\u003e\n\n\u003c!-- ```{r pressure, echo = FALSE} --\u003e\n\u003c!-- plot(pressure) --\u003e\n\u003c!-- ``` --\u003e\n\n\u003c!-- In that case, don't forget to commit and push the resulting figure files, so they display on GitHub and CRAN. --\u003e\n","funding_links":[],"readme_doi_urls":[],"works":{},"citation_counts":{},"total_citations":0,"keywords_from_contributors":[],"project_url":"https://ost.ecosyste.ms/api/v1/projects/1127","html_url":"https://ost.ecosyste.ms/projects/1127"}