{
  "_id": "6a49d345b48287dcb3fdaa29",
  "Package": "zoomerjoin",
  "Title": "Superlatively Fast Fuzzy Joins",
  "Version": "0.2.2",
  "Authors@R": "c(\nperson(\"Beniamino\", \"Green\", , \"beniamino.green@yale.edu\", role = c(\"aut\", \"cre\", \"cph\")),\nperson(\"Etienne\", \"Bacher\", email = \"etienne.bacher@protonmail.com\", role = \"ctb\",\ncomment = c(ORCID = \"0000-0002-9271-5075\")),\nperson(given = \"The authors of the dependency Rust crates\",\nrole = c(\"ctb\", \"cph\"),\ncomment = \"see inst/AUTHORS file for details\")\n)",
  "Description": "Empowers users to fuzzily-merge data frames with millions\nor tens of millions of rows in minutes with low memory usage.\nThe package uses the locality sensitive hashing algorithms\ndeveloped by Datar, Immorlica, Indyk and Mirrokni (2004)\n<doi:10.1145/997817.997857>, and Broder (1998)\n<doi:10.1109/SEQUEN.1997.666900> to avoid having to compare\nevery pair of records in each dataset, resulting in\nfuzzy-merges that finish in linear time.",
  "License": "MIT + file LICENSE",
  "Encoding": "UTF-8",
  "Roxygen": "list(markdown = TRUE)",
  "RoxygenNote": "7.3.3",
  "SystemRequirements": "Cargo (>= 1.56) (Rust's package manager), rustc (>=\n1.70)",
  "Config/testthat/edition": "3",
  "URL": "https://beniamino.org/zoomerjoin/,\nhttps://github.com/beniaminogreen/zoomerjoin",
  "BugReports": "https://github.com/beniaminogreen/zoomerjoin/issues",
  "VignetteBuilder": "knitr",
  "LazyData": "true",
  "LazyDataCompression": "xz",
  "Config/rextendr/version": "0.4.2",
  "Config/pak/sysreqs": "libicu-dev libclang-dev",
  "Repository": "https://beniaminogreen.r-universe.dev",
  "Date/Publication": "2026-04-17 03:15:00 UTC",
  "RemoteUrl": "https://github.com/beniaminogreen/zoomerjoin",
  "RemoteRef": "HEAD",
  "RemoteSha": "d441bcdea9d4b74fa141d460e07734337c653d39",
  "NeedsCompilation": "yes",
  "Packaged": {
    "Date": "2026-06-16 11:06:32 UTC",
    "User": "root"
  },
  "Author": "Beniamino Green [aut, cre, cph],\nEtienne Bacher [ctb] (ORCID: <https://orcid.org/0000-0002-9271-5075>),\nThe authors of the dependency Rust crates [ctb, cph] (see inst/AUTHORS\nfile for details)",
  "Maintainer": "Beniamino Green <beniamino.green@yale.edu>",
  "_user": "beniaminogreen",
  "_type": "src",
  "_file": "zoomerjoin_0.2.2.tar.gz",
  "_fileid": "https://r2.ropensci.org/29a65f7a193436769d700fe22f3c7cd078c1c29411d25362db9a0f18909482b4",
  "_filesize": 5236434,
  "_sha256": "29a65f7a193436769d700fe22f3c7cd078c1c29411d25362db9a0f18909482b4",
  "_expires": "2026-10-13T03:45:07.000Z",
  "_created": "2026-06-16T11:06:32.000Z",
  "_published": "2026-07-05T03:45:09.306Z",
  "_jobs": [
    {
      "job": 85190221387,
      "time": 196,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7665024995"
    },
    {
      "job": 85190221363,
      "time": 196,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7665024850"
    },
    {
      "job": 85190221568,
      "time": 235,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7665038255"
    },
    {
      "job": 85190221574,
      "time": 203,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7665027382"
    },
    {
      "job": 85190221515,
      "time": 143,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7665005826"
    },
    {
      "job": 85190221480,
      "time": 354,
      "config": "macos-oldrel-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7665082390"
    },
    {
      "job": 85190221454,
      "time": 144,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7665006386"
    },
    {
      "job": 85190221356,
      "time": 327,
      "config": "macos-release-x86_64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7665071602"
    },
    {
      "job": 85190221194,
      "time": 278,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7664954512"
    },
    {
      "job": 85190221453,
      "time": 163,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7665013422"
    },
    {
      "job": 85190221393,
      "time": 208,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "7665028824"
    },
    {
      "job": 85190221548,
      "time": 196,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7665025849"
    },
    {
      "job": 85190221398,
      "time": 193,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7665023605"
    }
  ],
  "_host": "GitHub-Actions",
  "_buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087",
  "_status": "success",
  "_upstream": "https://github.com/beniaminogreen/zoomerjoin",
  "_commit": {
    "id": "d441bcdea9d4b74fa141d460e07734337c653d39",
    "author": "Ben Green <beniamino.green@tutanota.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "Merge pull request #136 from beniaminogreen/block_by_fix\n\nbugfix",
    "time": 1776395700
  },
  "_maintainer": {
    "name": "Beniamino Green",
    "email": "beniamino.green@yale.edu",
    "login": "beniaminogreen",
    "description": "\n",
    "uuid": 50080644
  },
  "_distro": "noble",
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.2",
      "role": "Depends"
    },
    {
      "package": "collapse",
      "role": "Imports"
    },
    {
      "package": "dplyr",
      "role": "Imports"
    },
    {
      "package": "tibble",
      "role": "Imports"
    },
    {
      "package": "tidyr",
      "role": "Imports"
    },
    {
      "package": "rlang",
      "role": "Imports"
    },
    {
      "package": "babynames",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "igraph",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "microbenchmark",
      "role": "Suggests"
    },
    {
      "package": "profmem",
      "role": "Suggests"
    },
    {
      "package": "purrr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "stringdist",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "tidyverse",
      "role": "Suggests"
    },
    {
      "package": "vdiffr",
      "role": "Suggests"
    }
  ],
  "_owner": "beniaminogreen",
  "_selfowned": true,
  "_usedby": 2,
  "_updates": [
    {
      "week": "2025-39",
      "n": 1
    },
    {
      "week": "2025-46",
      "n": 1
    },
    {
      "week": "2026-11",
      "n": 1
    },
    {
      "week": "2026-16",
      "n": 1
    }
  ],
  "_tags": [],
  "_topics": [
    "blazinglyfast",
    "fuzzyjoin",
    "join",
    "rust",
    "zoomer",
    "cargo"
  ],
  "_stars": 109,
  "_contributors": [
    {
      "user": "beniaminogreen",
      "count": 368,
      "uuid": 50080644
    },
    {
      "user": "etiennebacher",
      "count": 29,
      "uuid": 52219252
    },
    {
      "user": "jonddowns",
      "count": 2,
      "uuid": 110783391
    },
    {
      "user": "josiahparry",
      "count": 2,
      "uuid": 13534508
    },
    {
      "user": "floriancaro",
      "count": 1,
      "uuid": 34598596
    }
  ],
  "_userbio": {
    "uuid": 50080644,
    "type": "user",
    "name": "Ben Green",
    "followers": 0
  },
  "_downloads": {
    "count": 587,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/zoomerjoin"
  },
  "_devurl": "https://github.com/beniaminogreen/zoomerjoin",
  "_searchresults": 15,
  "_cargo": true,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/zoomerjoin.html",
    "LICENSE",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/beniaminogreen/zoomerjoin",
  "_realowner": "beniaminogreen",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.4",
      "date": "2024-01-31"
    },
    {
      "version": "0.1.5",
      "date": "2024-07-02"
    },
    {
      "version": "0.2.0",
      "date": "2024-09-24"
    },
    {
      "version": "0.2.1",
      "date": "2025-04-13"
    },
    {
      "version": "0.2.2",
      "date": "2026-01-30"
    },
    {
      "version": "0.2.3",
      "date": "2026-03-14"
    }
  ],
  "_exports": [
    "em_link",
    "euclidean_anti_join",
    "euclidean_full_join",
    "euclidean_inner_join",
    "euclidean_left_join",
    "euclidean_probability",
    "euclidean_right_join",
    "fuzzy_join_core",
    "hamming_anti_join",
    "hamming_distance",
    "hamming_full_join",
    "hamming_inner_join",
    "hamming_left_join",
    "hamming_probability",
    "hamming_right_join",
    "jaccard_anti_join",
    "jaccard_curve",
    "jaccard_full_join",
    "jaccard_hyper_grid_search",
    "jaccard_inner_join",
    "jaccard_left_join",
    "jaccard_probability",
    "jaccard_right_join",
    "jaccard_similarity",
    "jaccard_string_group"
  ],
  "_datasets": [
    {
      "name": "dime_data",
      "title": "Donors from DIME Database",
      "object": "dime_data",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "...1",
        "x"
      ],
      "rows": 1000,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "dime_data",
      "title": "Donors from DIME Database",
      "topics": [
        "dime_data"
      ]
    },
    {
      "page": "em_link",
      "title": "Fit a Probabilistic Matching Model using Naive Bayes + E.M.",
      "topics": [
        "em_link"
      ]
    },
    {
      "page": "euclidean-joins",
      "title": "Fuzzy joins for Euclidean distance using Locality Sensitive Hashing",
      "topics": [
        "euclidean_anti_join",
        "euclidean_full_join",
        "euclidean_inner_join",
        "euclidean_left_join",
        "euclidean_right_join"
      ]
    },
    {
      "page": "euclidean_curve",
      "title": "Plot S-Curve for a LSH with given hyperparameters",
      "topics": [
        "euclidean_curve"
      ]
    },
    {
      "page": "euclidean_probability",
      "title": "Find Probability of Match Based on Similarity",
      "topics": [
        "euclidean_probability"
      ]
    },
    {
      "page": "fuzzy_join_core",
      "title": "Perform a Fuzzy-Join With an Arbitrary Distance Metric",
      "topics": [
        "fuzzy_join_core"
      ]
    },
    {
      "page": "hamming_distance",
      "title": "Calculate Hamming distance of two character vectors",
      "topics": [
        "hamming_distance"
      ]
    },
    {
      "page": "hamming-joins",
      "title": "Fuzzy joins for Hamming distance using Locality Sensitive Hashing",
      "topics": [
        "hamming_anti_join",
        "hamming_full_join",
        "hamming_inner_join",
        "hamming_left_join",
        "hamming_right_join"
      ]
    },
    {
      "page": "hamming_probability",
      "title": "Find Probability of Match Based on Similarity",
      "topics": [
        "hamming_probability"
      ]
    },
    {
      "page": "jaccard_curve",
      "title": "Plot S-Curve for a LSH with given hyperparameters",
      "topics": [
        "jaccard_curve"
      ]
    },
    {
      "page": "jaccard_hyper_grid_search",
      "title": "Help Choose the Appropriate LSH Hyperparameters",
      "topics": [
        "jaccard_hyper_grid_search"
      ]
    },
    {
      "page": "jaccard-joins",
      "title": "Fuzzy joins for Jaccard distance using MinHash",
      "topics": [
        "jaccard_anti_join",
        "jaccard_full_join",
        "jaccard_inner_join",
        "jaccard_left_join",
        "jaccard_right_join"
      ]
    },
    {
      "page": "jaccard_probability",
      "title": "Find Probability of Match Based on Similarity",
      "topics": [
        "jaccard_probability"
      ]
    },
    {
      "page": "jaccard_similarity",
      "title": "Calculate Jaccard Similarity of two character vectors",
      "topics": [
        "jaccard_similarity"
      ]
    },
    {
      "page": "jaccard_string_group",
      "title": "Fuzzy String Grouping Using Minhashing",
      "topics": [
        "jaccard_string_group"
      ]
    }
  ],
  "_pkglogo": "https://github.com/beniaminogreen/zoomerjoin/raw/HEAD/logo.png",
  "_readme": "https://github.com/beniaminogreen/zoomerjoin/raw/HEAD/README.md",
  "_rundeps": [
    "cli",
    "collapse",
    "cpp11",
    "dplyr",
    "generics",
    "glue",
    "lifecycle",
    "magrittr",
    "pillar",
    "pkgconfig",
    "purrr",
    "R6",
    "Rcpp",
    "rlang",
    "stringi",
    "stringr",
    "tibble",
    "tidyr",
    "tidyselect",
    "utf8",
    "vctrs",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "guided_tour.Rmd",
      "filename": "guided_tour.html",
      "title": "A Zoomerjoin Guided Tour",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction:",
        "How Does it Work?",
        "Basic Syntax:",
        "Standardizing String Names After A Merge",
        "References:"
      ],
      "created": "2023-03-09 04:36:20",
      "modified": "2026-03-13 13:58:54",
      "commits": 10
    },
    {
      "source": "benchmarks.Rmd",
      "filename": "benchmarks.html",
      "title": "Benchmarks",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Benchmarks",
        "Benchmarking Code:"
      ],
      "created": "2023-03-09 19:42:12",
      "modified": "2024-09-23 23:04:25",
      "commits": 6
    },
    {
      "source": "matching_vectors.Rmd",
      "filename": "matching_vectors.html",
      "title": "Matching Vectors Based on Euclidean Distance",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Demonstration"
      ],
      "created": "2023-08-06 17:09:27",
      "modified": "2024-02-14 14:38:27",
      "commits": 5
    }
  ],
  "_score": 8.371880249091555,
  "_indexed": true,
  "_nocasepkg": "zoomerjoin",
  "_universes": [
    "beniaminogreen"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-06-16T11:09:29.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/dd0642d08ee7a7d8d92e949c35d97fbb4dc2a208e12284d8b6017c9c9b4b6cb3",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-06-16T11:09:30.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/7b1941a962e065169bdc8d57e4f4a15c83914c8ba72ed29090484869b910e7f0",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-06-16T11:10:04.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/f1930c3ccbbd65cc42e69759c279dbfeb9e57701b6414d78c205247e8cf73c9a",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-06-16T11:09:32.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/f9f51eff02b1b4f394d887eb6cf7cc987071fb82af84f0ac60e2edd983e982ce",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-06-16T11:08:23.000Z",
      "arch": "aarch64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/97e7a1e432d88502d13ca858563ffac148e2c5a16d969059a6e390cfa0dba60c",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-06-16T11:11:10.000Z",
      "arch": "x86_64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/73e274eb39a65b9235abf176325b680cfa14a3c974dab2dbb9e0d1c23e24408b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-06-16T11:08:30.000Z",
      "arch": "aarch64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/5da5fc46f403ed11d63996317f28e284562db52c4cc12317f6cf565810a72792",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-06-16T11:10:35.000Z",
      "arch": "x86_64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/80b020d137b2819d3368c116d4e5cbdbfd83d0c1a8ab4b5aaff8a627e803c966",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.2.2",
      "date": "2026-06-16T11:09:29.000Z",
      "arch": "emscripten",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/45fbd47b187a03e02147d6a1834b8bf80728ad82bb9a6a36408a58de5dcc5024",
      "status": "success",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.2.2",
      "date": "2026-06-16T11:08:29.000Z",
      "arch": "x86_64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/f3d170041205159bcbd1269585d548b60d042c61c192f0a729c4fdfc70be690d",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.2.2",
      "date": "2026-06-16T11:08:25.000Z",
      "arch": "x86_64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/c44f09f3cc95e350426821570ed27e116201926200fbd69b44b8ab6f689a7adc",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.2.2",
      "date": "2026-06-16T11:08:19.000Z",
      "arch": "x86_64",
      "commit": "d441bcdea9d4b74fa141d460e07734337c653d39",
      "fileid": "https://r2.ropensci.org/dca24c106c9566c019d6327bd78b6d05e4a8eb031f8644b69ea7194440faa5a0",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/beniaminogreen/actions/runs/27612924087"
    }
  ]
}