Abstract
This process translates the data inside the 3 ESCO Pillars into transactions for market basket analysis.# http://rmarkdown.rstudio.com/html_document_format.htm
sourceTimeNeeded <- c(0);
source.starting.time <- proc.time()[3]
Information about the libraries, environment, sources used and their execution is reported. Aditional information is provided within section tabs. Navigating through the report is also possible through the table of contents. Tables reported, can be dynamically filtered, searched ordered and exported into various formats.
librariesVersion <- c()
for(i in 1:length(libraries))
librariesVersion <- c(librariesVersion, paste(packageVersion(libraries[i] )))
librariesLoaded <- lapply(libraries, require, character.only = TRUE)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: fst
## Loading required package: DT
## Loading required package: stringr
## Loading required package: magrittr
## Loading required package: arules
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
../000.core/00.01.libraries.R completed in 1.95 seconds
sourceTimeNeeded <- c( sourceTimeNeeded, timeNeeded)
source.starting.time <- proc.time()[3]
## Base functions
# ESCO skills
# @authors ds@eworx.gr
repository <- "/data/generic/"
getSourcePath <- function(filename, baseFolder = repository){
return(paste(baseFolder, filename, sep = ""))
}
readData <- function(filename, colClasses = c(), baseFolder = repository, header = TRUE, sep = "\t", encoding = "UTF-8", stringsAsFactors = TRUE, na.strings = c("", "NULL"), verbose = FALSE){
if(length(colClasses) == 0)
return (data.table::fread(input = getSourcePath(filename, baseFolder), header = header, sep = sep, encoding = encoding, stringsAsFactors = stringsAsFactors, verbose = verbose, showProgress = TRUE, na.strings = na.strings ) )
return (data.table::fread(input = getSourcePath(filename, baseFolder), colClasses = colClasses, header = header, sep = sep, encoding = encoding, verbose = verbose, showProgress = TRUE, na.strings = na.strings ) )
}
#rds for small disk space & fst for fast load
saveBinary <- function(data, filename = filename, baseFolder = repository, format = "rds"){
fileName <- getSourcePath(filename, baseFolder)
dir.create(dirname(fileName), recursive = TRUE, showWarnings = FALSE)
if(format == "rds") saveRDS(data, fileName)
if(format == "fst") fst::write_fst(data, fileName)
}
#rds for small disk space & fst for fast load
loadBinary <- function(filename, baseFolder = repository, format = "rds", as.data.table = TRUE){
if(format == "rds"){return(readRDS(getSourcePath(filename, baseFolder)))}
if(format == "fst"){return(fst::read_fst(getSourcePath(filename, baseFolder), as.data.table = as.data.table))}
}
rowColumns <- function(data){
return(paste( format(nrow(data), big.mark=","), "Rows X ", ncol(data), "Columns"))
}
publishIncludeCss <- function(){
sourceFile <- "/data/jobs/wp41.analysis/000.core/include.css"
destinatinoFile <- "/data/tmpfs/results/include.css"
if (!file.exists(destinatinoFile)) {
return (file.copy(sourceFile, destinatinoFile))
}else{
return(TRUE);
}
}
#as the mountstorage is on memory make sure the asset include.css is there.
summariseTable <- function(data){
return(data.frame(unclass(summary(data)), check.names = FALSE, stringsAsFactors = FALSE))
#return(do.call(cbind, lapply(data, summary)))
}
factoriseCharacterColumns <- function(data){
for(name in names(data)){
if( class(data[[name]]) =="character"){
data[[name]] <- as.factor(data[[name]])
}
}
return(data)
}
codeBook <- function(dataset){
out <- lapply(names(dataset), function(var_name) {
knitr::knit_expand(text = readLines("../000.core/codeBook.template"))
})
cat(
knitr::knit(
text = unlist(paste(out, collapse = '\n')),
quiet = TRUE)
)
}
fwrite_zip <- function(data, filename, quote = TRUE){
dir.create(dirname(filename), recursive = TRUE, showWarnings = FALSE)
filename_csv <- strsplit(filename, "/") %>% unlist %>% tail(1)
filename_csv <- gsub(".zip", ".csv", filename_csv)
fwrite(data, filename_csv, quote = quote)
if(file.exists(filename))unlink(filename)
zip(filename, filename_csv)
unlink(filename_csv)
}
############################
# https://rstudio.github.io/DT/010-style.html
#https://rpubs.com/marschmi/RMarkdown
capitalise <- function(x) paste0(toupper(substring(x, 1, 1)), substring(x, 2, nchar(x)))
styliseDTNumericalColumn <- function(data, result, columnName, color, columnsName_original ){
if(columnName%in% columnsName_original){
result <- result %>% formatStyle(
columnName,
background = styleColorBar(data[[columnName]], color),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center'
)
}
return(result)
}
reportTabularData <- function(data){
columnsName <- names(data)
columnsName <- lapply(columnsName, capitalise)
columnsName_original <- names(data)
result <-
DT::datatable(
data,
class = 'cell-border stripe',
filter = 'top',
rownames = FALSE,
colnames = columnsName,
extensions = 'Buttons',
options = list(
pageLength = 20,
columnDefs = list(list(className = 'dt-left', targets = "_all")),
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf'),
searchHighlight = TRUE,
initComplete = JS(
"function(settings, json) {",
"$(this.api().table().header()).css({'border': '1px solid'});",
"}"
)
)
)
result <- styliseDTNumericalColumn(data,result, "Count", 'steelblue', columnsName_original)
result <- styliseDTNumericalColumn(data,result, "sourceTimeNeeded", '#808080', columnsName_original)
result <- styliseDTNumericalColumn(data,result, "timeNeeded", '#808080', columnsName_original)
#result <- styliseDTNumericalColumn(data,result, "percentMatch", '#5fba7d', columnsName_original)
result <- styliseDTNumericalColumn(data,result, "percentMatch", '#4682b4', columnsName_original)
return(result)
}
fonts <- list(
sans = "DejaVu Serif",
mono = "DejaVu Serif",
`Times New Roman` = "DejaVu Serif"
)
#read_xml_to_list <- function(filepath, is.gz = FALSE){
# if(is.gz){
# temp_data <- paste0(repository, "data/delete.me")
# result <- xmlToList(xmlParse(gunzip(filepath, destname = temp_data, remove =FALSE)))
# Sys.chmod(file.path(temp_data), "777", use_umask = FALSE)
# unlink(temp_data)
# result
# }else{
# xmlToList(xmlParse(filepath))
# }
#}
#transpose_list_to_dt <- function(data_list){
# dt <- t(as.data.table(data_list))
# dt <- as.data.table(dt)
# dt[, (names(dt)) := lapply(.SD, unlist), .SDcols = 1:ncol(dt)]
# dt[, (names(dt)) := lapply(.SD, unlist), .SDcols = 1:ncol(dt)]
# names(dt) <- names(data_list[[1]])
# dt
#}
cleansingCorpus <- function(
htmlString, rem.html =TRUE, rem.http = TRUE, rem.newline = TRUE,
rem.nonalphanum = TRUE, rem.longwords = TRUE, rem.space = TRUE,
tolower = TRUE, add.space.to.numbers = TRUE, rem.country.begin = FALSE,
rem.nonalphanum.begin = FALSE, rem.space.begin = FALSE
){
if(rem.html){text <- gsub("<.*?>", " ", htmlString)} # removing html commands
if(rem.http){text <- gsub(" ?(f|ht)tp(s?)://(.*)[.][a-z]+", " ", text)} #removing http destinations
if(rem.newline){text <- gsub("[\r\n\t]", " ", text)}
if(rem.nonalphanum){text <- gsub("[^[:alpha:]]", " ", text)} #removing non-alphanumeric
if(rem.longwords){text <- gsub("\\w{35,}", " ", text)} ##Removing words with more than 30 letters
if(rem.space){text <- gsub("\\s+", " ", text)} #removing excess space
if(tolower){text <- tolower(text)}
if(add.space.to.numbers){ #add space between number and letters
text <- gsub("([0-9])([[:alpha:]])", "\\1 \\2", text)
text <- gsub("([[:alpha:]]|[.])([0-9])", "\\1 \\2", text)
}
if(rem.space.begin){text <- gsub("^[[:space:]]*", "", text)}
if(rem.country.begin){text <- gsub("^EU", "", text)} #remove country codes from the beginning of the text
if(rem.nonalphanum.begin){text <- gsub("^[?–-]*", "", text)} #remove special characters identified in the beginning of text
if(rem.space.begin){text <- gsub("^[[:space:]]*", "", text)}
trimws(text)
}
cleansingEducationCorpus <- function(text) {
text <- gsub("\\.", "", text) #removing periods
text <- gsub("[[:punct:]]", " ", text) #removing other punctuation
text <- gsub("\\s+", " ", text) #removing excess space
text <- tolower(text) #changing case to lower
#removing accent from Greek
text <- gsub("ς", "σ", text)
text <- gsub("ά", "α", text)
text <- gsub("έ", "ε", text)
text <- gsub("ή", "η", text)
text <- gsub("ί", "ι", text)
text <- gsub("ύ", "υ", text)
text <- gsub("ό", "ο", text)
text <- gsub("ώ", "ω", text)
trimws(text) #trimming white-space
}
#This function removes dates that are "relics" from the xml parsing
removeDates <- function(text){
days <- "(Sunday,|Monday,|Tuesday,|Wednesday,|Thursday,|Friday,|Saturday,)"
months <- "(January|February|March|April|May|June|July|August|September|October|November|December|Months)"
date_form1 <- paste(days, months, "([0-9]|[0-9][0-9]), [0-9][0-9][0-9][0-9]")
date_form2 <- "\\?[0-9][0-9][0-9][0-9]"
text <- gsub(date_form1, " ", text)
gsub(date_form2, " ", text)
}
xmlToDataTable <- function(xmlData, itemNames){
itemList <- lapply(itemNames,
function(x){
xml_text(xml_find_all(xmlData, paste0(".//item/", x)))
}
)
names(itemList) <- xmlItems
as.data.table(itemList)
}
cleanCorpusHtml <- function(text){
unlist(lapply(text, function(x){
if(nchar(x) > 0){
# because nodes were starting with tag keywords in li, we relocate at the end so the information remains and the description
# starts with the main content
html <- gsub(">","> ", x) # add spaces after html tags so these aren't concatenated
xml <- read_xml(html, as_html = TRUE)
lis <- xml_find_all(xml, ".//li")
xml_remove(lis)
text <- paste( paste(xml_text(xml), collapse ="") , paste(xml_text(lis) , collapse =""), collapse ="")
text <- gsub("\\s+"," ", text)
}else {""}
}))
}
#Split equally a vector into chunks of number n_chunks
equal_split <- function(vct, n_chunks) {
lim <- length(vct)
fstep <- lim%/%n_chunks
idx_list <- list()
for(i in seq(n_chunks - 1)){
idx_list[[i]] <- vct[((i-1)*fstep + 1):(i*(fstep))]
}
idx_list[[n_chunks]] <- vct[((n_chunks - 1)*fstep + 1):(lim)]
return(idx_list)
}
#Function that takes a vector, and returns thresholded first 10 sorted indexes
getThresholdOrderRwmd <- function(vct, idVec, threshold = 1e-6, numHead = 10){
vct <- ifelse(vct > threshold, vct, Inf)
indexVec <- head(order(vct), numHead)
idVec[indexVec]
}
#Function to read xml nodes in description
maintainElements <- function(nodes, elementType = "a", attribute = "href"){
xml_attr(xml_find_all(nodes, paste0(".//", elementType)), attribute)
}
#Function to add results to datatable
elementsToDataTable <- function(result, elementType){
if(length(result) > 0)
data.table(elementType = elementType, attributeValue = result)
else
data.table()
}
#Function to retrieve urls from text
keepHtmlElements <- function(feedItem){
nodes <- read_xml(paste0("<div>", feedItem, "</div>"), as_html = TRUE)
rbind(
elementsToDataTable(maintainElements(nodes, "a", "href"), "link"),
elementsToDataTable(maintainElements(nodes, "img", "src"), "image"),
elementsToDataTable(maintainElements(nodes, "img-src", "src"), "image")
#All "img-src" are NA
)
}
#retrieve list of parameters in a http request query
getQueryParams <- function(url){
query <- httr::parse_url(url)$query
queryValues <- unlist(query)
queryNames <- names(query)
dat <- data.table(varName = queryNames, value = queryValues)
dat[queryValues != ""]
}
keepCountryName <- function(string){
string <- gsub(".*_", "", string)
gsub("\\..*", "", string)
}
keepNTokens <- function(string, num){
tokenList <- strsplit(string, split = " ")
sapply(tokenList, function(tokens){
tokens <- sort(tokens)
tokensShift <- shift(tokens, -num, fill = FALSE)
paste(tokens[tokens != tokensShift], collapse = " ")
})
}
findTFIDF <- function(corpus, stopwords, normalize = "double", min_char = 1) {
tokensList <- strsplit(corpus[, text], " ")
names(tokensList) <- corpus[, code]
tokensDT <- lapply(tokensList, as.data.table) %>%
rbindlist(idcol = TRUE) %>%
setnames(c("class", "term"))
tokensDT <- tokensDT[!term %in% stopwords][nchar(term) > min_char]
#inverse document frequency smooth
idfDT <- tokensDT[!duplicated(tokensDT)][, .(docFreq = .N), by = "term"]
idfDT[, idf := log(length(unique(tokensDT$class)) / (docFreq + 1)) + 1]
tfDT <- tokensDT[, .(term_count = .N), by = c("class", "term")]
if(normalize == "double")tfDT[, tf := 0.5 + 0.5 * term_count / max(term_count)]
if(normalize == "log")tfDT[, tf := log(1 +term_count)]
merge(tfDT, idfDT, on = "term")[, tfIdf := tf*idf ][, .(term, class, tfIdf)]
}
tidyJsonData <- function(jsonList){
if(length(jsonList) == 0)return(NULL)
unlistOccupations <- jsonList %>% unlist
codesMaleFemale <- names(unlistOccupations)
epasMapping <- data.table(unlistOccupations)
epasMapping[ , code := gsub("\\.[[:alpha:]]$", "", codesMaleFemale)]
uniqueMappingBoolean <- epasMapping[ , unlistOccupations != c(unlistOccupations[-1], F), by = code]$V1
codesEpasDB <- epasMapping[uniqueMappingBoolean]
names(codesEpasDB) <- c("title", "code")
codesEpasDB[ , title := cleansingCorpus(title)]
}
findNGrams <- function(corpus, min_n, max_n = min_n, stopWords = NA_character_) {
ngrams <- itoken(corpus, tokenizer = word_tokenizer, progressbar = FALSE) %>%
create_vocabulary(stopwords = stopWords, c(min_n, max_n), sep_ngram = " ") %>%
as.data.table()
ngrams[order(-doc_count)][, .(term, count = doc_count)]
}
`%W>%` <- function(lhs,rhs){
w <- options()$warn
on.exit(options(warn=w))
options(warn=-1)
eval.parent(substitute(lhs %>% rhs))
}
getStopwords <- function(locale) {
stopwordsLocale <- c(stopwords_getlanguages(source = "misc"), stopwords_getlanguages(source = "snowball"))
stopWords <- ""
if (locale %in% stopwordsLocale)
stopWords <- locale %W>% stopwords
stopWords
}
###################################################################################################
# Text translation
###################################################################################################
translateText <- function(sourceText, sourceLang, translationLang, batchSize = 4800) {
if (length(sourceText) == 0) {
return ("")
} else if (length(sourceText) == 1) {
return (requestTranslation(sourceText, sourceLang, translationLang))
} else if (length(sourceText) > 4800) {
return (NA)
}
sourceQueries <- gsub("$", "\n >", sourceText)
sourceQueries <- gsub("^", "< \n", sourceQueries)
queries <- data.table(query = sourceQueries, size = nchar(sourceQueries), batch = nchar(sourceQueries))
for (row in seq(nrow(queries) - 1)) {
cumulativeSum <- queries[row, batch] + queries[row + 1, batch] + 3
queries[row + 1, batch := ifelse(cumulativeSum > batchSize, batch, cumulativeSum)]
}
batchStarts <- which(queries[, size] == queries[, batch])
batchFins <- c(batchStarts[-1] - 1, nrow(queries))
batches <- lapply(seq_along(batchStarts), function(i) batchStarts[i]:batchFins[i])
pastedQueries <- lapply(batches, function(batch) paste0(queries[batch, query], collapse = "\n")) %>% unlist()
translatedText <- lapply(pastedQueries, requestTranslation, sourceLang, translationLang) %>%
unlist() %>%
paste0(collapse = " ")
translatedText <- gsub("\\s?<", "", translatedText)
gsub(">$", "", translatedText) %>%
space_tokenizer(sep = ">") %>%
unlist() %>%
trimws()
}
requestTranslation <- function(sourceText, sourceLang, translationLang) {
googleTranslateURL <- paste0(
"https://translate.google.com/m",
"?hl=", sourceLang,
"&sl=", sourceLang,
"&tl=", translationLang,
"&ie=UTF-8&prev=_m&q=", URLencode(sourceText, reserved = TRUE)
)
GET(googleTranslateURL, add_headers("user-agent" = "Mozilla/5.0")) %>%
read_html() %>%
xml_child(2) %>%
xml_child(5) %>%
xml_text() %>%
unlist()
}
euCountries <- c("BE", "BG", "CZ", "DK", "DE", "EE", "IE", "EL", "ES", "FR",
"HR", "IT", "CY", "LV", "LT", "LU", "HU", "MT", "NL", "AT",
"PL", "PT", "RO", "SI", "SK", "FI", "SE", "UK")
neuCountries <- c("ME", "MK", "AL", "RS", "TR")
getSourcePath <- function(filename, baseFolder = repository){
paste0(baseFolder, filename)
}
load_global <- function(filename, baseFolder = repository, checkIfExists = TRUE, varName = NULL){
obj <- sub('.*/', '', filename)
format <- tolower(sub('.*\\.', '', obj))
if(is.null(varName))
varName <- sub('\\..*', '', obj)
if(checkIfExists & exists(x = varName, envir = .GlobalEnv))return("exists in .GlobalEnv")
if(!format %in% c("rds", "fst"))return("unknown format")
if(format == "rds"){
assign(
varName,
readRDS(getSourcePath(filename, baseFolder)),
envir = .GlobalEnv)
}
if(format == "fst"){
assign(
varName,
fst::read_fst(getSourcePath(filename, baseFolder), as.data.table = TRUE),
envir = .GlobalEnv)
}
}
#loadBinary <- function(filename, baseFolder = repository){
# obj <- sub('.*/', '', filename)
# format <- tolower(sub('.*\\.', '', obj))
# if(!format %in% c("rds", "fst", "csv"))return("unknown format")
# if(format == "rds")return(readRDS(getSourcePath(filename, baseFolder)))
# if(format == "fst")return(fst::read_fst(getSourcePath(filename, baseFolder), as.data.table = TRUE))
#}
#
#saveBinary <- function(data, filename = filename, baseFolder = repository){
# obj <- sub('.*/', '', filename)
# format <- tolower(sub('.*\\.', '', obj))
# if(!format %in% c("rds", "fst"))return("unknown format")
# if(format == "rds"){saveRDS(data, getSourcePath(filename, baseFolder))}
# if(format == "fst"){fst::write_fst(data, getSourcePath(filename, baseFolder))}
#}
cleansingCorpus <- function(
htmlString, rem.html =TRUE, rem.http = TRUE, rem.newline = TRUE,
rem.nonalphanum = TRUE, rem.longwords = TRUE, rem.space = TRUE,
tolower = TRUE, add.space.to.numbers = TRUE, rem.country.begin = FALSE,
rem.nonalphanum.begin = FALSE, rem.space.begin = FALSE, fix.greek = TRUE
){
if(rem.html){text <- gsub("<.*?>", " ", htmlString)} # removing html commands
if(rem.http){text <- gsub(" ?(f|ht)tp(s?)://(.*)[.][a-z]+", " ", text)} #removing http destinations
if(rem.newline){text <- gsub("[\r\n\t]", " ", text)}
if(rem.nonalphanum){text <- gsub("[^[:alpha:]]", " ", text)} #removing non-alphanumeric
if(rem.longwords){text <- gsub("\\w{35,}", " ", text)} ##Removing words with more than 30 letters
if(rem.space){text <- gsub("\\s+", " ", text)} #removing excess space
if(tolower){text <- tolower(text)}
if(add.space.to.numbers){ #add space between number and letters
text <- gsub("([0-9])([[:alpha:]])", "\\1 \\2", text)
text <- gsub("([[:alpha:]]|[.])([0-9])", "\\1 \\2", text)
}
if(rem.space.begin){text <- gsub("^[[:space:]]*", "", text)}
if(rem.country.begin){text <- gsub("^EU", "", text)} #remove country codes from the beginning of the text
if(rem.nonalphanum.begin){text <- gsub("^[?–-]*", "", text)} #remove special characters identified in the beginning of text
if(rem.space.begin){text <- gsub("^[[:space:]]*", "", text)}
if(rem.space.begin){text <- gsub("^[[:space:]]*", "", text)}
if(fix.greek){
text <- gsub("ς", "σ", text)
text <- gsub("ά", "α", text)
text <- gsub("έ", "ε", text)
text <- gsub("ή", "η", text)
text <- gsub("ί", "ι", text)
text <- gsub("ύ", "υ", text)
text <- gsub("ό", "ο", text)
text <- gsub("ώ", "ω", text)
}
trimws(text)
}
getAesXTextWrapFeatures <- function(text, wrapCharLength = 20, lineCountMax = 5){
textBroken <- lapply(text, strwrap, width = wrapCharLength)
textWrapped <- sapply(textBroken, paste, collapse = "\n")
if(is.factor(text)){
levels <- levels(text)
levelsBroken <- lapply(levels, strwrap, width = wrapCharLength)
levelsWrapped <- sapply(levelsBroken, paste, collapse = "\n")
textWrapped <- factor(textWrapped, levels = levelsWrapped, ordered = TRUE)
}
textLinesCount <- sapply(textBroken, length)
textLinesMaxNchar <- sapply(sapply(textBroken, nchar), max) %>% head(length(textWrapped)) # head fix by OSCEDVS-33
textValuePrependLines <- sapply(textLinesCount, function(x){paste0(rep("\n", x), collapse = "")})
textLinesNcharDiff <- textLinesMaxNchar / wrapCharLength
textLinesCountDiff <- textLinesCount / lineCountMax
data.table(
textWrapped,
textLinesCount,
textLinesMaxNchar,
textValuePrependLines,
textLinesNcharDiff,
textLinesCountDiff
)
}
country_name <- function(vec){
missing <- data.table(Alpha_2 = c("XK"), Name = c("Kosovo"))
ISO_3166_1 <- rbind(ISO_3166_1[, c("Alpha_2", "Name")], missing, fill = TRUE)
vec <- toupper(vec)
vec <- ifelse(vec == "EL", "GR", vec)
vec <- ifelse(vec == "EN", "GB", vec)
vec <- ifelse(vec == "UK", "GB", vec)
ISO_3166_1[match(vec, ISO_3166_1$Alpha_2), ]$Name %>% as.factor
}
language_name <- function(vec){
iso_ext <- data.table(
Alpha_3_B = rep("", 2),
Alpha_3_T = rep("", 2),
Alpha_2 = c("sr-cyr", "sr-lat"),
Name = c("Serbian Cyrillic", "Serbian Latin")
) %>% rbind(ISO_639_2)
vec <- tolower(vec)
res <- iso_ext[match(vec, iso_ext$Alpha_2), ]$Name
gsub('[^[:alnum:]^[:space:]].*', '', res) %>% as.factor
}
demonyms <- fread(getSourcePath("input/misc/Demonyms-List.csv", repository))
setnames(demonyms, c("ISO_3166", "Country", "Demonym_1", "Demonym_2", "Demonym_3"))
nationality_name <- function(vec){
vec <- toupper(vec)
vec <- ifelse(vec == "EL", "GR", vec)
vec <- ifelse(vec == "EN", "GB", vec)
vec <- ifelse(vec == "UK", "GB", vec)
res <- demonyms[match(vec, ISO_3166)]$Demonym_1
res <- gsub('[^[:alnum:]].*', '', res)
ifelse(is.na(vec), NA, res) %>% as.factor
}
gender_name <- function(x){
gender_lookup <- data.table(
name = c("Female", "Male"),
value = c("F", "M")
)
gender_lookup[match(x, gender_lookup$value), name] %>% as.factor
}
year_groups <- function(year, firstYear = 1941, frame = 5){
year <- year %>% as.numeric
firstYear <- firstYear %>% as.numeric
lastYear <- max(year[!is.na(year)]) - ((max(year[!is.na(year)]) - firstYear) %% frame)
groupNum <- ((lastYear - firstYear) / frame) + 1
lowerBounds <- firstYear + frame*(seq(groupNum)-1)
upperBounds <- lowerBounds + frame - 1
yearLevels <- lapply(seq_along(1:groupNum), function(year)paste0(lowerBounds[year],"-",upperBounds[year])) %>% unlist
yearGroup <- rep(NA, length(year))
for(i in seq_along(yearLevels)){
yearGroup <- ifelse(year >= lowerBounds[i] & year <= upperBounds[i], yearLevels[i], yearGroup)
}
yearGroup %>% ordered(yearLevels)
}
employment_name <- function(x){
employment_lookup <- data.table(
name = c("Employed", "Unemployed"),
value = c(TRUE, FALSE)
)
employment_lookup[match(x, employment_lookup$value), name] %>% as.factor
}
broad_category_name <- function(x){
broad_category_lookup <- data.table(
name = c("Foreign Language", "Mother Tongue", "Computer", "Job Related", "Organisational", "Communication", "Certificate", "Driving", "Unknown"),
value = c("ForeignLanguage", "MotherTongue", "Computer", "JobRelated", "Organisational", "Communication", "Certificate", "Driving", NA)
)
broad_category_lookup[match(x, broad_category_lookup$value), name] %>% as.factor
}
studying_name <- function(x){
studying_lookup <- data.table(
name = c("Currently studying", "No ongoing studies"),
value = c(TRUE, FALSE)
)
studying_lookup[match(x, studying_lookup$value), name] %>% as.factor
}
eqf_level_name <- function(x, eqfLevelLevels = c(rep("Level 1-4", 4), "Level 5", "Level 6", "Level 7", "Level 8")){
eqf_level_lookup <- data.table(
name = eqfLevelLevels,
value = c("1", "2", "3", "4", "5", "6", "7", "8")
)
eqf_level_lookup[match(x, eqf_level_lookup$value), name] %>% ordered(unique(eqfLevelLevels))
}
age_groups <- function(age, ageLevels = c("15-24", "25-49", "50-64"), supremum = 65){
splitAge <- as.numeric(c(gsub("[^[:alnum:]].*", "", ageLevels), supremum))
if (splitAge[1] == 0) ageLevels[1] <- paste("Up to", splitAge[2] - 1)
ageGroup <- rep(NA, length(age))
for(i in seq_along(ageLevels)){
ageGroup <- ifelse(age >= splitAge[i] & age < splitAge[i+1], ageLevels[i], ageGroup)
}
ageGroup %>% ordered(ageLevels)
}
work_experiences <- function(work_years, workYearLevels = c("1-5", "6-10", "11-25", "26-49"), supremum = 50){
splitWorkYears <- as.numeric(c(gsub("[^[:alnum:]].*", "", workYearLevels), supremum))
if (splitWorkYears[1] == 1) workYearLevels[1] <- paste("Up to", splitWorkYears[2] - 1)
if (splitWorkYears[1] != 0) {
splitWorkYears <- c(0, splitWorkYears)
workYearLevels <- c("No experience", paste(workYearLevels, "years"))
} else {
workYearLevels <- paste(workYearLevels, "years")
}
workExpGroup <- rep(NA, length(work_years))
for(i in seq_along(workYearLevels)){
workExpGroup <- ifelse(work_years >= splitWorkYears[i] & work_years < splitWorkYears[i+1], workYearLevels[i], workExpGroup)
}
workExpGroup %>% ordered(workYearLevels)
}
job_positions <- function(num_jobs, max_jobs = 13) {
num_jobs <- as.numeric(num_jobs)
num_jobs[which(num_jobs > max_jobs)] <- max_jobs
job_levels <- sort(num_jobs) %>% unique
job_levels <- ifelse(job_levels < max_jobs, as.character(job_levels), paste0(as.character(job_levels), "+"))
job_levels <- ifelse(job_levels == 1, paste(job_levels, "position"), paste(job_levels, "positions"))
num_jobs <- ifelse(num_jobs < max_jobs, as.character(num_jobs), paste0(as.character(num_jobs), "+"))
num_jobs <- ifelse(num_jobs == 1, paste(num_jobs, "position"), paste(num_jobs, "positions"))
num_jobs %>% ordered(job_levels)
}
get_word_freqs <- function(dt, textCol = "text", groupCol = "group", stopWords = stop_words) {
setnames(dt, c(textCol, groupCol), c("text", "group"))
freqs <- dt %>%
unnest_tokens(word, text) %>%
anti_join(stopWords) %>%
count(group, word) %>%
complete(group, word, fill = list(n = 0)) %>%
group_by(group) %>%
mutate(total = sum(n), percent = n / total) %>%
ungroup()
setnames(dt, c("text", "group"), c(textCol, groupCol))
freqs
}
get_word_models <- function(dt, textCol = "text", groupCol = "group", freqs = NA, min_n = 50) {
setnames(dt, c(textCol, groupCol), c("text", "group"))
if(is.na(freqs))freqs <- get_word_freqs(dt)
models <- freqs %>%
group_by(word) %>%
filter(sum(n) > min_n) %>%
do(tidy(glm(cbind(n, total - n) ~ group, ., family = "binomial"))) %>%
ungroup()
setnames(dt, c("text", "group"), c(textCol, groupCol))
models
}
findNGrams <- function(corpus, min_n, max_n = min_n, stopWords = NA_character_) {
ngrams <- itoken(corpus, tokenizer = word_tokenizer, progressbar = FALSE) %>%
create_vocabulary(stopwords = stopWords, c(min_n, max_n), sep_ngram = " ") %>%
as.data.table()
ngrams[order(-doc_count)][, .(term, count = doc_count)]
}
getStopwords <- function(locale) {
stopwordsLocale <- c(stopwords_getlanguages(source = "misc"), stopwords_getlanguages(source = "snowball"))
stopWords <- ""
if (locale %in% stopwordsLocale)
stopWords <- locale %>% stopwords
stopWords
}
findTFIDF <- function(corpus, stopwords, normalize = "double", min_char = 1) {
tokensList <- strsplit(corpus[, text], " ")
names(tokensList) <- corpus[, code]
tokensDT <- lapply(tokensList, as.data.table) %>%
rbindlist(idcol = TRUE) %>%
setnames(c("class", "term"))
tokensDT <- tokensDT[!term %in% stopwords][nchar(term) > min_char]
#inverse document frequency smooth
idfDT <- tokensDT[!duplicated(tokensDT)][, .(docFreq = .N), by = "term"]
idfDT[, idf := log(length(unique(tokensDT$class)) / (docFreq + 1)) + 1]
tfDT <- tokensDT[, .(term_count = .N), by = c("class", "term")]
if(normalize == "double")tfDT[, tf := 0.5 + 0.5 * term_count / max(term_count)]
if(normalize == "log")tfDT[, tf := log(1 +term_count)]
merge(tfDT, idfDT, on = "term")[, tfIdf := tf*idf ][, .(term, class, tfIdf)]
}
igraph_from_arules <- function(x){
itemNodes <- which(itemFrequency(items(generatingItemsets(x)), type = "absolute") > 0)
assocNodes <- paste("assoc", 1:length(x), sep='')
lhs <- LIST(lhs(x), decode=FALSE)
from_lhs <- unlist(lhs)
to_lhs <- assocNodes[rep(1:length(x), sapply(lhs, length))]
rhs <- LIST(rhs(x), decode=FALSE)
to_rhs <- unlist(rhs)
from_rhs <- assocNodes[rep(1:length(x), sapply(rhs, length))]
type <- c(rep(1, length(itemNodes)), rep(2, length(assocNodes)))
nodeLabels <- c(itemLabels(x)[itemNodes], rep("", length(assocNodes)))
e.list <- cbind(c(from_lhs, from_rhs), c(to_lhs, to_rhs))
v.labels <- data.frame(
name = c(as.character(itemNodes), assocNodes),
label = nodeLabels,
stringsAsFactors = FALSE)
g <- igraph::graph.data.frame(e.list, directed=TRUE, vertices=v.labels)
## add quality measures
for(m in names(quality(x))) {
g <- igraph::set.vertex.attribute(g, m, which(type==2),
quality(x)[[m]])
}
return(g)
}
###########################################################################################################
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] arules_1.6-8 Matrix_1.3-2 magrittr_2.0.1 stringr_1.4.0
## [5] DT_0.18 fst_0.9.4 dplyr_1.0.6 rmarkdown_2.8
## [9] data.table_1.14.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.6 pillar_1.6.0 bslib_0.2.5 compiler_4.0.5
## [5] jquerylib_0.1.4 highr_0.9 tools_4.0.5 digest_0.6.27
## [9] jsonlite_1.7.2 evaluate_0.14 lifecycle_1.0.0 tibble_3.1.1
## [13] lattice_0.20-41 pkgconfig_2.0.3 rlang_0.4.11 yaml_2.2.1
## [17] parallel_4.0.5 xfun_0.23 knitr_1.33 generics_0.1.0
## [21] vctrs_0.3.8 sass_0.4.0 htmlwidgets_1.5.3 grid_4.0.5
## [25] tidyselect_1.1.1 glue_1.4.2 R6_2.5.0 fansi_0.4.2
## [29] purrr_0.3.4 ellipsis_0.3.2 htmltools_0.5.1.1 utf8_1.2.1
## [33] stringi_1.6.2 crayon_1.4.1
../000.core/00.02.base.functions.R completed in 0.14 seconds
Type of data: data.table, data.frame.
Dimensions: 353518, 17.
Column Names: id, locale, country, age_group1, age_group2, gender, nationality, mother_tongue, job_applied_for, latest_job_isco1, latest_job_isco2, latest_job_isco3, is_employed, num_jobs, total_work_years, eqf_highest, is_student.
Type of data: data.table, data.frame.
Dimensions: 1303728, 35.
Column Names: id, URI, from, to, label, employer, code, occupationTitle, iscoCode1, iscoCode2, iscoCode3, iscoCode4, iscoLabel1, iscoLabel2, iscoLabel3, iscoLabel4, locale, creationDate, lastUpdate, postalcode, country, gender, birthdate, nationality, work_years, num_work, min_work_years, max_work_years, mean_work_years, is_employed, eqf_level, eqf_previous, is_student, headline_type, headline_isco.
Type of data: data.table, data.frame.
Dimensions: 339566, 24.
Column Names: id, locale, creationDate, lastUpdate, postalcode, country, gender, birthdate, nationality, work_years, num_work, min_work_years, max_work_years, mean_work_years, is_employed, eqf_level, eqf_previous, is_student, headline_type, headline_isco, skillCode, type, category, skillTitle.
Type of data: data.table, data.frame.
Dimensions: 1766160, 26.
Column Names: index, id, locale, country, nationality, birthYear, age, final_level, institution_name, institution_short, numQual, title, organisation, organisationCountry, from, to, status, fromAge, toAge, studyAge, length, cumLength, eqf_level, eqf_group, edu_level, edu_field.
Type of data: data.table, data.frame.
Dimensions: 15385837, 7.
Column Names: index, id, locale, label, code, total_weight, skillTitle.
1.load.data.R completed in 83.36 seconds
demographStat[, new_index := paste0("trans_", sample(1:nrow(demographStat)))]
workStat[, id := demographStat$new_index[match(id, demographStat$id)]]
vars <- c("iscoLabel3") # vars in the baskets
molten_occupations <- melt(
workStat,
id = "id",
measure = vars
)[!is.na(value)][order(id)] %>% unique(by = c("id", "value"))
occupations_transactions <- methods::as(split(molten_occupations$value, molten_occupations$id), "transactions")
itemsetInfo(occupations_transactions) <- merge(
as.data.table(itemsetInfo(occupations_transactions)),
demographStat[, -c("id")],
by.x = "transactionID",
by.y = "new_index",
all.x = TRUE
)
2.process.data.R completed in 24.16 seconds
demographStat[, new_index := paste0("trans_", sample(1:nrow(demographStat)))]
skillsStat[, id := demographStat$new_index[match(id, demographStat$id)]]
vars <- c("skillTitle") # vars in the baskets
molten_skills <- melt(
skillsStat,
id = "id",
measure = vars
)[!is.na(value)][order(id)] %>% unique(by = c("id", "value"))
skills_transactions <- methods::as(split(molten_skills$value, molten_skills$id), "transactions")
itemsetInfo(skills_transactions) <- merge(
as.data.table(itemsetInfo(skills_transactions)),
demographStat[, -c("id")],
by.x = "transactionID",
by.y = "new_index",
all.x = TRUE
)
3.process.data.R completed in 11.43 seconds
demographStat[, new_index := paste0("trans_", sample(1:nrow(demographStat)))]
educationStat[, id := demographStat$new_index[match(id, demographStat$id)]]
vars <- c("edu_field") # vars in the baskets
molten_qualifications <- melt(
educationStat,
id = "id",
measure = vars
)[!is.na(value)][order(id)] %>% unique(by = c("id", "value"))
qualifications_transactions <- methods::as(split(molten_qualifications$value, molten_qualifications$id), "transactions")
itemsetInfo(qualifications_transactions) <- merge(
as.data.table(itemsetInfo(qualifications_transactions)),
demographStat[, -c("id")],
by.x = "transactionID",
by.y = "new_index",
all.x = TRUE
)
4.process.data.R completed in 22.9 seconds
skillsOccupations <- skillsStat[, .(id, item = skillTitle)] %>%
rbind(demographStat[, .(id, item = latest_job_isco3)]) %>%
unique
skills_occupations_transactions <- methods::as(
split(
as.character(skillsOccupations$item),
skillsOccupations$id
),
"transactions"
)
itemsetInfo(skills_occupations_transactions) <- merge(
as.data.table(itemsetInfo(skills_occupations_transactions)),
demographStat,
by.x = "transactionID",
by.y = "id",
all.x = TRUE
)
5.process.data.R completed in 40.78 seconds
skillsMatchOccupations <- skillsMatch[, .(id, item = skillTitle)] %>%
rbind(demographStat[, .(id, item = latest_job_isco3)]) %>%
unique %>%
na.omit
skills_match_occupations_transactions <- methods::as(
split(
as.character(skillsMatchOccupations$item),
skillsMatchOccupations$id
),
"transactions"
)
itemsetInfo(skills_match_occupations_transactions) <- merge(
as.data.table(itemsetInfo(skills_match_occupations_transactions)),
demographStat,
by.x = "transactionID",
by.y = "id",
all.x = TRUE
)
6.process.data.R completed in 49.68 seconds
rds
formatfileName <- "occupations_transactions.rds"
saveBinary(occupations_transactions, fileName, outputRepo)
Datasource : /data/generic/jobsOutput/basket_analysis_data/occupations_transactions.rds of 5,563,547 bytes.
## items transactionID locale country age_group1 age_group2 gender nationality mother_tongue job_applied_for latest_job_isco1 latest_job_isco2 latest_job_isco3 is_employed num_jobs total_work_years eqf_highest is_student
## [1] {Other services managers} trans_1 Italian Italy 25-49 26-30 Female Italian Italian Material-recording and transport clerks Managers Hospitality, retail and other services managers Other services managers Unemployed 1 position Up to 2 years Level 7 No ongoing studies
## [2] {Creative and performing artists,
## Retail and wholesale trade managers,
## Sales, marketing and development managers} trans_10 Greek Greece 25-49 36+ NA Greek Greek Client information workers Managers Administrative and commercial managers Sales, marketing and development managers Employed 6 positions 11-20 years Level 8 No ongoing studies
## [3] {Business services agents,
## Manufacturing, mining, construction, and distribution managers,
## Sales, marketing and public relations professionals,
## Travel attendants, conductors and guides,
## University and higher education teachers} trans_1000 Spanish Spain 25-49 26-30 Female Spanish Spanish NA Technicians and associate professionals Business and administration associate professionals Business services agents Employed 10+ positions 11-20 years Level 7 Currently studying
## [4] {Secondary education teachers} trans_10000 Bulgarian Bulgaria NA NA NA NA NA Secondary education teachers Professionals Teaching professionals Secondary education teachers Unemployed 1 position Up to 2 years NA No ongoing studies
## [5] {Social and religious professionals} trans_100000 Portuguese Portugal 25-49 36+ Male Portuguese Portuguese NA NA NA NA Employed 4 positions 11-20 years NA Currently studying
rds
formatDatasource : /data/generic/jobsOutput/basket_analysis_data/skills_transactions.rds of 2,743,027 bytes.
## items transactionID locale country age_group1 age_group2 gender nationality mother_tongue job_applied_for latest_job_isco1 latest_job_isco2 latest_job_isco3 is_employed num_jobs total_work_years eqf_highest is_student
## [1] {AJAX,
## apply technical communication skills,
## CSS,
## Eclipse (integrated development environment software),
## Java (computer programming),
## JavaScript,
## JSSS,
## PHP,
## report to the team leader,
## web application security threats} trans_1 English Kosovo NA NA NA NA Albanian Software and applications developers and analysts Professionals Information and communications technology professionals Software and applications developers and analysts Employed 3 positions 6-10 years Level 6 Currently studying
## [2] {comply with food safety and hygiene,
## organise facilities for office personnel} trans_100 Croatian NA 15-24 21-25 Female Croatian Croatian Shop salespersons Managers Chief executives, senior officials and legislators Managing directors and chief executives Employed 4 positions 6-10 years NA No ongoing studies
## [3] {communicate verbal instructions} trans_100002 English Estonia 25-49 26-30 Male Bangladeshi English NA Service and sales workers Personal service workers Travel attendants, conductors and guides Employed 4 positions 6-10 years Level 1-4 Currently studying
## [4] {draft corporate emails,
## liaise with security authorities} trans_100004 Romanian Romania NA NA NA NA Romanian NA Technicians and associate professionals Business and administration associate professionals Administrative and specialised secretaries Employed 4 positions 3-5 years Level 6 Currently studying
## [5] {use different communication channels} trans_100006 Romanian Romania 25-49 26-30 Male NA NA Engineering professionals (excluding electrotechnology) Service and sales workers Personal service workers Hairdressers, beauticians and related workers Employed 8 positions 6-10 years Level 6 No ongoing studies
rds
formatfileName <- "qualifications_transactions.rds"
saveBinary(qualifications_transactions, fileName, outputRepo)
Datasource : /data/generic/jobsOutput/basket_analysis_data/qualifications_transactions.rds of 5,210,610 bytes.
## items transactionID locale country age_group1 age_group2 gender nationality mother_tongue job_applied_for latest_job_isco1 latest_job_isco2 latest_job_isco3 is_employed num_jobs total_work_years eqf_highest is_student
## [1] {Electronics and automation,
## Social work and counselling,
## Software and applications development and analysis} trans_1 Portuguese Portugal 15-24 Up to 20 Female NA Portuguese NA Service and sales workers Sales workers Shop salespersons Employed 2 positions 6-10 years Level 1-4 No ongoing studies
## [2] {Nursing and midwifery} trans_10 Portuguese NA 15-24 Up to 20 Female Cape NA Professional services managers NA NA NA Unemployed 2 positions Up to 2 years NA Currently studying
## [3] {Audio-visual techniques and media production,
## Building and civil engineering,
## Database and network design and administration,
## Earth sciences,
## Engineering and engineering trades,
## Environmental sciences,
## Fashion, interior and industrial design,
## Language acquisition,
## Management and administration,
## Marketing and advertising,
## Physics,
## Software and applications development and analysis,
## Teacher training with subject specialisation} trans_100 English Serbia 25-49 36+ Female Serbian Serbian NA Managers Chief executives, senior officials and legislators Managing directors and chief executives Employed 10+ positions 20+ years NA Currently studying
## [4] {Electronics and automation,
## Food processing,
## Management and administration,
## Mechanics and metal trades,
## Military and defence,
## Therapy and rehabilitation} trans_10000 Spanish Spain 25-49 31-35 Male Spanish Spanish Administration professionals NA NA NA Employed 7 positions 6-10 years NA Currently studying
## [5] {Education science} trans_100004 Portuguese Portugal NA NA NA NA Portuguese Building finishers and related trades workers NA NA NA Employed 5 positions 20+ years NA No ongoing studies
rds
formatfileName <- "skills_occupations_transactions.rds"
saveBinary(skills_occupations_transactions, fileName, outputRepo)
Datasource : /data/generic/jobsOutput/basket_analysis_data/skills_occupations_transactions.rds of 16,644,506 bytes.
## items transactionID locale country age_group1 age_group2 gender nationality mother_tongue job_applied_for latest_job_isco1 latest_job_isco2 latest_job_isco3 is_employed num_jobs total_work_years eqf_highest is_student new_index
## [1] {Domestic, hotel and office cleaners and helpers} 00006113-6d7a-4458-8e12-5d79c5a7acdb-20190909141955 Italian Italy 25-49 26-30 Female Italian Italian NA Elementary occupations Cleaners and helpers Domestic, hotel and office cleaners and helpers Employed 1 position Up to 2 years Level 7 No ongoing studies trans_205343
## [2] {Shop salespersons} 000087a9-d7c9-45ff-9f14-1c05853db7d9-20190719172435 Italian Italy 25-49 36+ Female Italian Italian NA Service and sales workers Sales workers Shop salespersons Employed 1 position 20+ years NA No ongoing studies trans_172786
## [3] {Manufacturing, mining, construction, and distribution managers} 0000dadb-5236-42d2-95da-40225861c06b-20190905185510 English Italy NA NA NA NA Italian NA Managers Production and specialised services managers Manufacturing, mining, construction, and distribution managers Unemployed 1 position Up to 2 years NA Currently studying trans_272582
## [4] {Travel attendants, conductors and guides} 0000e8b9-e074-42f9-b93f-e2981b1ae5b1-20190901152842 English Turkey NA NA NA NA NA NA Service and sales workers Personal service workers Travel attendants, conductors and guides Unemployed 2 positions Up to 2 years Level 6 Currently studying trans_188554
## [5] {} 0000f941-4300-4267-b623-e961bc3a7719-20190823023655 Portuguese Portugal NA NA NA NA Portuguese NA NA NA NA NA NA NA Level 6 No ongoing studies trans_105913
rds
formatfileName <- "skills_match_occupations_transactions.rds"
saveBinary(skills_match_occupations_transactions, fileName, outputRepo)
Datasource : /data/generic/jobsOutput/basket_analysis_data/skills_match_occupations_transactions.rds of 41,597,065 bytes.
## items transactionID locale country age_group1 age_group2 gender nationality mother_tongue job_applied_for latest_job_isco1 latest_job_isco2 latest_job_isco3 is_employed num_jobs total_work_years eqf_highest is_student new_index
## [1] {communication related to hearing impairment,
## communication studies,
## interact verbally in Italian,
## interact verbally in Romanian,
## Italian,
## Latin,
## media studies,
## promote good habits to avoid communication disorders,
## Romanian,
## train animals and individuals to work together,
## understand spoken Italian,
## understand spoken Romanian,
## understand written Italian,
## understand written Latin,
## understand written Romanian,
## upsell products,
## usability engineering,
## write Italian,
## write Latin,
## write Romanian} 000004a3-fd23-4703-b080-b93d3377a559-20190912133257 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [2] {apply organisational techniques,
## assist in performing physical exercises,
## assist in the practical actions for developing marketing campaigns,
## assist the dentist during the dental treatment procedure,
## assist with litigation matters,
## broadcast using Internet Protocol,
## carry out internet research,
## collect mail,
## communicate professionally with colleagues in other fields,
## conduct physical examination in emergency,
## contribute to protecting individuals from harm,
## coordinate activities in audio recording studio,
## define organisational standards,
## develop organisational information goals,
## develop organisational policies,
## discuss the end point of therapeutic intervention,
## Domestic, hotel and office cleaners and helpers,
## draft corporate emails,
## evaluate focus of community arts practice,
## evaluate supporting team in community arts program,
## execute email marketing,
## execute ICT user research activities,
## exert a goal-oriented leadership role towards colleagues,
## facilitate healthcare user's engagement in occupations,
## facilitate teamwork between students,
## give constructive feedback,
## handle mail,
## handle registered mail,
## ICT capacity planning strategies,
## identify undetected organisational needs,
## implement short term objectives,
## interact with an audience,
## interact with fellow actors,
## interact with others,
## interact with users to gather requirements,
## interface with anti-mining lobbyists,
## internet governance,
## Internet of Things,
## lead disaster recovery exercises,
## maintain a safe, hygienic and secure working environment,
## maintain internet protocol configuration,
## maintain pricing database,
## make use of personal robots for practical support,
## manage groups outdoors,
## manage university department,
## Microsoft Access,
## Microsoft Visio,
## Microsoft Visual C++,
## navigate European inland waterways,
## negotiate settlements,
## perform cleaning activities in an environmentally friendly way,
## perform cleaning activities in an outdoor environment,
## perform small vessel navigation,
## prepare ceremonial locations,
## recognise the hazards of dangerous goods,
## restrain individuals,
## roles of supporting team for community arts programme,
## schedule artistic activities,
## set organisational policies,
## set quality assurance objectives,
## set up your arts offer in places relevant to your potential employers/contractors,
## speak about your work in public,
## supervise advocacy work,
## supervise students in social services,
## teamwork principles,
## undertake post-examination activities,
## use internet chat,
## use internet to increase sales,
## use microsoft office,
## work closely with news teams,
## work in a food processing team,
## work in restoration team,
## work in teams,
## work to develop physical ability to perform at the highest level in sport,
## work with a dance team,
## work with an artistic team,
## work with circus group,
## work with social service users in a group,
## work with supporting team in community arts programme} 00006113-6d7a-4458-8e12-5d79c5a7acdb-20190909141955 Italian Italy 25-49 26-30 Female Italian Italian NA Elementary occupations Cleaners and helpers Domestic, hotel and office cleaners and helpers Employed 1 position Up to 2 years Level 7 No ongoing studies trans_205343
## [3] {Absorb (learning management systems),
## advise customers on new equipment,
## advise superiors on military operations,
## analyse the credit history of potential customers,
## assist clients with special needs,
## Brightspace (learning management systems),
## build rapport with people from different cultural backgrounds,
## Canvas (learning management systems),
## carry out social work research,
## consult with business clients,
## cooperate with colleagues,
## coordinate public-private partnerships in tourism,
## customer relationship management,
## define set materials,
## ensure cross-department cooperation,
## estimate duration of work,
## ethics of sharing work through social media,
## facilities management in the organisation,
## good laboratory practice,
## implement veterinary clinical governance,
## learning management systems,
## maintain relationship with customers,
## maintain relationships with others in the performing arts,
## manage a good diction,
## patient autonomy,
## perform cleaning duties,
## perform clerical duties,
## perform manual work autonomously,
## provide a psychotherapeutic environment,
## provide chaperone for children on set,
## relationship between buildings, people and the environment,
## share good practices across subsidiaries,
## share through digital technologies,
## Shop salespersons,
## solve problems in gambling through digital means,
## store design layout,
## use customer relationship management software,
## work in partnership with social services users,
## work with e-services available to clients} 000087a9-d7c9-45ff-9f14-1c05853db7d9-20190719172435 Italian Italy 25-49 36+ Female Italian Italian NA Service and sales workers Sales workers Shop salespersons Employed 1 position 20+ years NA No ongoing studies trans_172786
## [4] {apply fishery biology to fishery management,
## apply problem solving in social service,
## apply technical communication skills,
## assist in children's development of basic personal skills,
## biology,
## chemical technologies in metal manufacture,
## communicate in foreign languages with health service providers,
## conduct health related research,
## conduct research on reproductive medicine,
## conduct research on speech-related topics,
## coordinate communication within a team,
## design well paths,
## develop personal skills,
## develop solutions to information issues,
## develop strategy to solve problems,
## develop strong attitudes in sports,
## employ foreign languages for health-related research,
## ensure positive relations within the artistic team,
## first aid for animals,
## fish biology,
## identify the healthcare user’s personal capacity,
## literary theory,
## maintain internal communication systems,
## maintain musical instruments,
## maintain trails,
## maintain working relationships,
## manage hospital-acquired infections,
## Manufacturing, mining, construction, and distribution managers,
## mix spirit flavourings according to recipe,
## molecular biology,
## nanotechnology,
## perform field research,
## prepare personal work environment,
## relay messages through radio and telephone systems,
## research new treatments for blood related disorders,
## show entrepreneurial spirit,
## solve location and navigation problems by using GPS tools,
## spirit taxation regulations,
## spirits development,
## teach communication methods,
## teach housekeeping skills,
## tolerate strong smells,
## transportation software related to an ERP system,
## understand game species,
## use microsoft office,
## use of specialised instruments in otorhinolaryngology,
## use therapeutic communication techniques,
## utilise advanced clinical skills,
## wine related sciences,
## work in a fishery team,
## work in a landscape team,
## work in a rail transport team,
## work independently,
## work independently as an artist,
## work independently in agriculture,
## work independently in forestry services,
## work independently in landscaping,
## work independently in rental services,
## work independently in service of a food production process,
## work independently on exhibitions,
## work to develop physical ability to perform at the highest level in sport} 0000dadb-5236-42d2-95da-40225861c06b-20190905185510 English Italy NA NA NA NA Italian NA Managers Production and specialised services managers Manufacturing, mining, construction, and distribution managers Unemployed 1 position Up to 2 years NA Currently studying trans_272582
## [5] {analyse election procedures,
## apply technical communication skills,
## assess students' preliminary learning experiences,
## combine business technology with user experience,
## communicate in foreign languages with health service providers,
## coordinate communication within a team,
## create prototype of user experience solutions,
## election law,
## ensure positive relations within the artistic team,
## evaluate prospective foster parents,
## improve students' examination skills,
## maintain internal communication systems,
## maintain relations with children's parents,
## promote good habits to avoid communication disorders,
## relay messages through radio and telephone systems,
## supervise audiology team,
## supervise speech and language team,
## teach communication methods,
## Travel attendants, conductors and guides,
## use experience map,
## use therapeutic communication techniques} 0000e8b9-e074-42f9-b93f-e2981b1ae5b1-20190901152842 English Turkey NA NA NA NA NA NA Service and sales workers Personal service workers Travel attendants, conductors and guides Unemployed 2 positions Up to 2 years Level 6 Currently studying trans_188554
7.save.data.R completed in 25.12 seconds
Completed in 259.52 seconds.