unescape_unicode <- function(x) {
# Extract all unicode escape sequences
matches <- gregexpr("\\\\u([0-9a-fA-F]{4})", x, perl = TRUE)
if (matches[[1]][1] == -1) {
return(x) # No matches found
}
# Get the matched strings
matched_strings <- regmatches(x, matches)[[1]]
# Convert each match to its unicode character
replacements <- sapply(matched_strings, function(match) {
hex_code <- sub("\\\\u", "", match)
intToUtf8(strtoi(hex_code, 16))
})
# Replace all matches
for (i in seq_along(matched_strings)) {
x <- gsub(matched_strings[i], replacements[i], x, fixed = TRUE)
}
return(x)
}