Skip to content
This repository has been archived by the owner on Dec 21, 2023. It is now read-only.

Commit

Permalink
Language improvements, replace whatlanguage with CLD (mastodon#2753)
Browse files Browse the repository at this point in the history
* add failing en specs

* add cld2 gem

* Replace WhatLanguage with CLD
  • Loading branch information
mjankowski authored and Gargron committed May 3, 2017
1 parent 53384b0 commit 8c5ad23
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
gem 'paperclip-av-transcoder'

gem 'addressable'
gem 'cld2', require: 'cld'
gem 'devise'
gem 'devise-two-factor'
gem 'doorkeeper'
Expand Down Expand Up @@ -56,7 +57,6 @@ gem 'statsd-instrument'
gem 'twitter-text'
gem 'tzinfo-data'
gem 'webpacker', '~>1.2'
gem 'whatlanguage'

# For some reason the view specs start failing without this
gem 'react-rails'
Expand Down
6 changes: 4 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ GEM
rack-test (>= 0.5.4)
xpath (~> 2.0)
chunky_png (1.3.8)
cld2 (1.0.3)
ffi (~> 1.9.3)
climate_control (0.1.0)
cocaine (0.5.8)
climate_control (>= 0.0.3, < 1.0)
Expand Down Expand Up @@ -153,6 +155,7 @@ GEM
faker (1.7.3)
i18n (~> 0.5)
fast_blank (1.0.0)
ffi (1.9.18)
fuubar (2.2.0)
rspec-core (~> 3.0)
ruby-progressbar (~> 1.4)
Expand Down Expand Up @@ -463,7 +466,6 @@ GEM
websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2)
whatlanguage (1.0.6)
xpath (2.0.0)
nokogiri (~> 1.3)

Expand All @@ -484,6 +486,7 @@ DEPENDENCIES
capistrano-rbenv
capistrano-yarn
capybara
cld2
devise
devise-two-factor
doorkeeper
Expand Down Expand Up @@ -549,7 +552,6 @@ DEPENDENCIES
uglifier (>= 1.3.0)
webmock
webpacker (~> 1.2)
whatlanguage

RUBY VERSION
ruby 2.4.1p111
Expand Down
14 changes: 13 additions & 1 deletion app/lib/language_detector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,23 @@ def initialize(text, account = nil)
end

def to_iso_s
WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym
detected_language_code || default_locale.to_sym
end

private

def detected_language_code
detected_language[:code].to_sym if detected_language_reliable?
end

def detected_language
@_detected_language ||= CLD.detect_language(text_without_urls)
end

def detected_language_reliable?
detected_language[:reliable]
end

def text_without_urls
text.dup.tap do |new_text|
URI.extract(new_text).each do |url|
Expand Down
24 changes: 15 additions & 9 deletions spec/lib/language_detector_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@

describe LanguageDetector do
describe 'to_iso_s' do
it 'detects english language' do
string = 'Hello and welcome to mastodon'
result = described_class.new(string).to_iso_s

expect(result).to eq :en
it 'detects english language for basic strings' do
strings = [
"Hello and welcome to mastodon",
"I'd rather not!",
"a lot of people just want to feel righteous all the time and that's all that matters",
]
strings.each do |string|
result = described_class.new(string).to_iso_s

expect(result).to eq(:en), string
end
end

it 'detects spanish language' do
Expand All @@ -19,15 +25,15 @@

describe 'when language can\'t be detected' do
it 'confirm language engine cant detect' do
result = WhatLanguage.new(:all).language_iso('')
expect(result).to be_nil
result = CLD.detect_language('')
expect(result[:reliable]).to be false
end

describe 'because of a URL' do
it 'uses default locale when sent just a URL' do
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
wl_result = WhatLanguage.new(:all).language_iso(string)
expect(wl_result).not_to eq :en
cld_result = CLD.detect_language(string)[:code]
expect(cld_result).not_to eq :en

result = described_class.new(string).to_iso_s

Expand Down

0 comments on commit 8c5ad23

Please sign in to comment.