jurbanhost’s gists

agricolamz / get_embedings_from_bert.qmd

Last active November 14, 2023 15:30

	---
	title: "Get word embedings from Bert"
	format: html
	editor: source
	editor_options:
	chunk_output_type: console
	---

	Убедимся, что у нас третий питон:

jrosell / rpostgres-pdf.R

Created December 9, 2021 10:20

	library(RPostgres)

	# Get and read the PDF
	path <- file.path("os2.pdf")
	pdf <- readBin(con = path, what = raw(), n = file.info(path)$size)

	# Open it
	browseURL(path)

	# Connect to default DB and put seralized raw pdf in a data.frame

behrica / PWI_top2vec.py

Last active May 27, 2024 03:00 — forked from lcschv/PWI_top2vec.py

	from nltk.probability import FreqDist
	import math
	import pickle
	from top2vec import Top2Vec
	import numpy as np
	from gensim.utils import simple_preprocess
	from gensim.parsing.preprocessing import strip_tags
	from tqdm import tqdm

agricolamz / 2021-04-25_tidytuesday.R

Last active July 4, 2021 13:39

	# by G. Moroz
	# License: GPL-2

	library(tidyverse)
	netflix_titles <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-20/netflix_titles.csv')
	glimpse(netflix_titles)

	library(tidytext)
	library(ggwordcloud)
	library(ggtext)

adamlauretig / gensim_in_r.Rmd

Created September 5, 2018 15:26

	---
	title: "Using Gensim in R"
	author: "Adam Lauretig"
	date: "3/17/2018"
	output: html_document
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = TRUE)
	```

inkrement / clickhousedump

Created August 19, 2017 14:26

dump all clickhouse databases and tables

tjvananne / process GloVe pre-trained word vector.R

Created May 4, 2017 14:45

How to read and process a downloaded pre-trained GloVe word vector (turn it into a data.frame) in base R


	#' A word vector is a giant matrix of words, and each word contains a numeric array that represents the semantic
	#' meaning of that word. This is useful so we can discover relationships and analogies between words programmatically.
	#' The classic example is "king" minus "man" plus "woman" is most similar to "queen"


	# function definition --------------------------------------------------------------------------

	# input .txt file, exports list of list of values and character vector of names (words)
	proc_pretrained_vec <- function(p_vec) {

graydon / country-bounding-boxes.py

Created April 23, 2014 00:03

country bounding boxes

	# extracted from http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip
	# under public domain terms

	country_bounding_boxes = {
	'AF': ('Afghanistan', (60.5284298033, 29.318572496, 75.1580277851, 38.4862816432)),
	'AO': ('Angola', (11.6400960629, -17.9306364885, 24.0799052263, -4.43802336998)),
	'AL': ('Albania', (19.3044861183, 39.624997667, 21.0200403175, 42.6882473822)),
	'AE': ('United Arab Emirates', (51.5795186705, 22.4969475367, 56.3968473651, 26.055464179)),
	'AR': ('Argentina', (-73.4154357571, -55.25, -53.628348965, -21.8323104794)),
	'AM': ('Armenia', (43.5827458026, 38.7412014837, 46.5057198423, 41.2481285671)),

hadley / clustergram-had.r

Created June 15, 2010 21:30

	ks.default <- function(rows) seq(2, max(3, rows %/% 4))

	many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
	ldply(seq_along(ks), function(i) {
	cl <- kmeans(x, centers = ks[i], ...)
	data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
	})
	}

	all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {