{
  "_id": "6a106fa8acfb0bcc41cab6f1",
  "Package": "nlpembeds",
  "Title": "Natural Language Processing Embeddings",
  "Version": "1.0.0",
  "Authors@R": "c(person(\"Thomas\", \"Charlon\", role = c(\"aut\", \"cre\"),\nemail = \"charlon@protonmail.com\",\ncomment = c(ORCID = \"0000-0001-7497-0470\")),\nperson(\"Doudou\", \"Zhou\", role = \"ctb\",\ncomment = c(ORCID = \"0000-0002-0830-2287\")),\nperson(\"CELEHS\", role = \"aut\",\ncomment = \"<https://celehs.hms.harvard.edu>\"))",
  "Description": "Provides efficient methods to compute co-occurrence\nmatrices, pointwise mutual information (PMI) and singular value\ndecomposition (SVD). In the biomedical and clinical settings,\none challenge is the huge size of databases, e.g. when\nanalyzing data of millions of patients over tens of years. To\naddress this, this package provides functions to efficiently\ncompute monthly co-occurrence matrices, which is the\ncomputational bottleneck of the analysis, by using the\n'RcppAlgos' package and sparse matrices. Furthermore, the\nfunctions can be called on 'SQL' databases, enabling the\ncomputation of co-occurrence matrices of tens of gigabytes of\ndata, representing millions of patients over tens of years.\nPartly based on Hong C. (2021)\n<doi:10.1038/s41746-021-00519-z>.",
  "VignetteBuilder": "knitr",
  "License": "GPL-3",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.2",
  "URL": "https://gitlab.com/thomaschln/nlpembeds",
  "BugReports": "https://gitlab.com/thomaschln/nlpembeds/-/issues",
  "Config/pak/sysreqs": "libgmp3-dev libicu-dev",
  "Repository": "https://thomaschln.r-universe.dev",
  "Date/Publication": "2025-06-11 13:05:53 UTC",
  "RemoteUrl": "https://gitlab.com/thomaschln/nlpembeds",
  "RemoteRef": "HEAD",
  "RemoteSha": "69584a84c86d71211317f1d3f950c1301a2cee2c",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-07 07:34:31 UTC",
    "User": "root"
  },
  "Author": "Thomas Charlon [aut, cre] (ORCID:\n<https://orcid.org/0000-0001-7497-0470>),\nDoudou Zhou [ctb] (ORCID: <https://orcid.org/0000-0002-0830-2287>),\nCELEHS [aut] (<https://celehs.hms.harvard.edu>)",
  "Maintainer": "Thomas Charlon <charlon@protonmail.com>",
  "MD5sum": "10ff667c7e8ca60e2666ad57db1294d4",
  "_user": "thomaschln",
  "_type": "src",
  "_file": "nlpembeds_1.0.0.tar.gz",
  "_fileid": "766417f817f1decbbf682fc343e8bfc63912e914cda616a2a96b8702840c4a65",
  "_filesize": 3791308,
  "_sha256": "766417f817f1decbbf682fc343e8bfc63912e914cda616a2a96b8702840c4a65",
  "_created": "2026-05-07T07:34:31.000Z",
  "_published": "2026-05-22T15:00:56.596Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77405806807,
      "time": 161,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6849456174"
    },
    {
      "job": 77405807055,
      "time": 163,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6849456441"
    },
    {
      "job": 77405806934,
      "time": 133,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6849445905"
    },
    {
      "job": 77405806941,
      "time": 131,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6849445293"
    },
    {
      "job": 77405806588,
      "time": 235,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6849408123"
    },
    {
      "job": 77405806114,
      "time": 109,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7163651310"
    },
    {
      "job": 77405807023,
      "time": 95,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6849435475"
    },
    {
      "job": 77405806951,
      "time": 114,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6849441012"
    },
    {
      "job": 77405806736,
      "time": 174,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6849458135"
    }
  ],
  "_buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://gitlab.com/thomaschln/nlpembeds",
  "_commit": {
    "id": "69584a84c86d71211317f1d3f950c1301a2cee2c",
    "author": "Thomas Charlon <charlon@protonmail.com>",
    "committer": "Thomas Charlon <charlon@protonmail.com>",
    "message": "rmed 25\n",
    "time": 1749647153
  },
  "_maintainer": {
    "name": "Thomas Charlon",
    "email": "charlon@protonmail.com",
    "login": "thomaschln",
    "linkedin": "in/thomas-charlon-meng-phd-aba0a3275",
    "orcid": "0000-0001-7497-0470",
    "uuid": 2394508
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "magrittr",
      "role": "Imports"
    },
    {
      "package": "Matrix",
      "role": "Imports"
    },
    {
      "package": "methods",
      "role": "Imports"
    },
    {
      "package": "parallel",
      "role": "Imports"
    },
    {
      "package": "RcppAlgos",
      "role": "Imports"
    },
    {
      "package": "reshape2",
      "role": "Imports"
    },
    {
      "package": "RSQLite",
      "role": "Imports"
    },
    {
      "package": "rsvd",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "role": "Suggests"
    }
  ],
  "_owner": "gitlab-thomaschln",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-23",
      "n": 5
    },
    {
      "week": "2025-24",
      "n": 2
    }
  ],
  "_tags": [],
  "_userbio": {
    "uuid": 2394508,
    "type": "user",
    "name": "Thomas Charlon",
    "description": "Harvard Medical School Researcher\r\nBiomedical Informatics @hms-dbmi\r\nCELEHS laboratory @CELEHS"
  },
  "_downloads": {
    "count": 198,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/nlpembeds"
  },
  "_devurl": "https://gitlab.com/thomaschln/nlpembeds",
  "_searchresults": 2,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/nlpembeds.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://gitlab.com/thomaschln/nlpembeds",
  "_realowner": "thomaschln",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.0",
      "date": "2025-02-04"
    }
  ],
  "_exports": [
    "%<>%",
    "%>%",
    "%$%",
    "build_df_cooc",
    "build_spm_cooc_sym",
    "get_pmi",
    "get_svd",
    "spm_to_df",
    "sql_cooc"
  ],
  "_help": [
    {
      "page": "compound",
      "title": "Assignment pipe",
      "topics": [
        "%<>%"
      ]
    },
    {
      "page": "pipe",
      "title": "Pipe",
      "topics": [
        "%>%"
      ]
    },
    {
      "page": "exposition",
      "title": "Exposition pipe",
      "topics": [
        "%$%"
      ]
    },
    {
      "page": "build_df_cooc",
      "title": "Compute monthly co-occurrence matrix",
      "topics": [
        "build_df_cooc"
      ]
    },
    {
      "page": "build_spm_cooc_sym",
      "title": "Build symmetric sparse matrix from data frame",
      "topics": [
        "build_spm_cooc_sym"
      ]
    },
    {
      "page": "get_pmi",
      "title": "Compute pointwise mutual information (PMI)",
      "topics": [
        "get_pmi"
      ]
    },
    {
      "page": "get_svd",
      "title": "Compute random singular value decomposition (rSVD)",
      "topics": [
        "get_svd"
      ]
    },
    {
      "page": "spm_to_df",
      "title": "Write sparse matrix to dataframe",
      "topics": [
        "spm_to_df"
      ]
    },
    {
      "page": "sql_cooc",
      "title": "Compute co-occurrence matrix on SQL file",
      "topics": [
        "sql_cooc"
      ]
    }
  ],
  "_readme": "https://gitlab.com/thomaschln/nlpembeds/raw/HEAD/README.md",
  "_rundeps": [
    "bit",
    "bit64",
    "blob",
    "cachem",
    "cli",
    "cpp11",
    "data.table",
    "DBI",
    "fastmap",
    "glue",
    "gmp",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "memoise",
    "pkgconfig",
    "plyr",
    "Rcpp",
    "RcppAlgos",
    "reshape2",
    "rlang",
    "RSQLite",
    "rsvd",
    "stringi",
    "stringr",
    "vctrs"
  ],
  "_vignettes": [
    {
      "source": "cooc_pmi_svd.Rmd",
      "filename": "cooc_pmi_svd.html",
      "title": "Co-occurrence Matrices and PMI-SVD Embeddings",
      "author": "Thomas Charlon",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Summary",
        "Background",
        "Co-occurrence matrix",
        "PMI",
        "SVD",
        "Out-of-memory SQL databases",
        "Batching by patients",
        "Code subsets based on dictionaries",
        "Running on HPC servers",
        "Installation",
        "Parameters tuning",
        "References"
      ],
      "created": "2025-02-02 01:40:14",
      "modified": "2025-02-02 01:40:14",
      "commits": 1
    }
  ],
  "_score": 4,
  "_indexed": true,
  "_nocasepkg": "nlpembeds",
  "_universes": [
    "thomaschln",
    "gitlab-thomaschln"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.0.0",
      "date": "2026-05-07T07:36:58.000Z",
      "distro": "noble",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "bb2ee689d25a6daa1365b80678c5854384b6f8d8622c1ca1de05535be5d30351",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.0.0",
      "date": "2026-05-07T07:37:00.000Z",
      "distro": "noble",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "2f07137a7c4b918cbec8cf814b18a6f85839019b7da0648335dc5a44326ae679",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.0.0",
      "date": "2026-05-07T07:36:28.000Z",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "4dfba16725e23676dc18f0f846417cbe2556d796a877f9732c685491228fded2",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.0.0",
      "date": "2026-05-07T07:36:23.000Z",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "8c788bf7329a127de817306bd66e4b499f17f3b993900be7803d5018549de08e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.0.0",
      "date": "2026-05-07T07:35:43.000Z",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "992baeede1589c0999a60791a17bd39e5daeeee09ec1bf21fed88f9e4de30725",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.0.0",
      "date": "2026-05-07T07:36:00.000Z",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "a6ac77010991eee63de809a6f85e2f7556323d0171542651446d0b4265f18b2b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.0.0",
      "date": "2026-05-07T07:37:00.000Z",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "67afada7302173730a795f0fa6184fea9bf93f81a830f8ce89af35cc859e0af3",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.0.0",
      "date": "2026-05-22T15:00:27.000Z",
      "commit": "69584a84c86d71211317f1d3f950c1301a2cee2c",
      "fileid": "3bf38a6ede7bb4e1f17ad56b606974005da1ff75c616bfe83edfca121d6e447c",
      "status": "success",
      "buildurl": "https://github.com/r-universe/thomaschln/actions/runs/25482318905"
    }
  ]
}