Skip to content

Commit

Permalink
Add ktlint support and reformat source code (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
pemistahl committed Jun 9, 2020
1 parent 3357990 commit f0a8d03
Show file tree
Hide file tree
Showing 82 changed files with 1,450 additions and 440 deletions.
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ indent_size = 4
insert_final_newline = true
trim_trailing_whitespace = true

[*.{kt,kts}]
max_line_length = 120

[*.md]
max_line_length = off
trim_trailing_whitespace = false
14 changes: 6 additions & 8 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ description = linguaDescription
plugins {
kotlin("jvm") version "1.3.72"
id("org.jetbrains.kotlin.plugin.serialization") version "1.3.72"
id("org.jlleitschuh.gradle.ktlint") version "9.2.1"
id("com.adarshr.test-logger") version "2.0.0"
id("org.jetbrains.dokka") version "0.10.1"
id("ru.vyarus.use-python") version "2.2.0"
Expand Down Expand Up @@ -92,7 +93,7 @@ tasks.jacocoTestReport {

tasks.register<Test>("writeAccuracyReports") {
group = linguaTaskGroup
description = "Runs Lingua on provided test data, reports detection accuracy for each language and writes results to files."
description = "Runs Lingua on provided test data, and writes detection accuracy reports for each language."

val allowedDetectors = linguaSupportedDetectors.split(',')
val detectors = if (project.hasProperty("detectors"))
Expand Down Expand Up @@ -153,7 +154,8 @@ tasks.register<Test>("writeAccuracyReports") {
detectors.forEach { detector ->
languages.forEach { language ->
includeTestsMatching(
"$linguaGroupId.$linguaArtifactId.report.${detector.toLowerCase()}.${language}DetectionAccuracyReport"
"$linguaGroupId.$linguaArtifactId.report" +
".${detector.toLowerCase()}.${language}DetectionAccuracyReport"
)
}
}
Expand Down Expand Up @@ -197,8 +199,7 @@ tasks.register("writeAggregatedAccuracyReport") {
csvFile.appendText(accuracyValues)
}
}
}
else {
} else {
csvFile.appendText(",NaN,NaN,NaN,NaN")
}
}
Expand Down Expand Up @@ -295,10 +296,7 @@ dependencies {
testImplementation("org.apache.opennlp:opennlp-tools:1.9.2")
testImplementation("org.apache.tika:tika-langdetect:1.24.1")

val slf4jVersion = "1.7.25"

testImplementation("org.slf4j:slf4j-api:$slf4jVersion")
testImplementation("org.slf4j:slf4j-log4j12:$slf4jVersion")
testImplementation("org.slf4j:slf4j-nop:1.7.30")
}

python {
Expand Down
135 changes: 68 additions & 67 deletions src/main/kotlin/com/github/pemistahl/lingua/api/Language.kt
Original file line number Diff line number Diff line change
Expand Up @@ -167,79 +167,80 @@ enum class Language(
internal val alphabets: Set<Alphabet>,
internal val uniqueCharacters: String
) {
AFRIKAANS (AF, AFR, setOf(Alphabet.LATIN), ""),
ALBANIAN (SQ, SQI, setOf(Alphabet.LATIN), "Ëë"),
ARABIC (AR, ARA, setOf(Alphabet.ARABIC), ""),
ARMENIAN (HY, HYE, setOf(Alphabet.ARMENIAN), ""),
AZERBAIJANI (AZ, AZE, setOf(Alphabet.LATIN), "Əə"),
BASQUE (EU, EUS, setOf(Alphabet.LATIN), ""),
BELARUSIAN (BE, BEL, setOf(CYRILLIC), ""),
BENGALI (BN, BEN, setOf(Alphabet.BENGALI), ""),
BOKMAL (NB, NOB, setOf(Alphabet.LATIN), ""),
BOSNIAN (BS, BOS, setOf(Alphabet.LATIN), ""),
BULGARIAN (BG, BUL, setOf(CYRILLIC), ""),
CATALAN (CA, CAT, setOf(Alphabet.LATIN), "Ïï"),
CHINESE (ZH, ZHO, setOf(HAN), ""),
CROATIAN (HR, HRV, setOf(Alphabet.LATIN), ""),
CZECH (CS, CES, setOf(Alphabet.LATIN), "ĚěŘřŮů"),
DANISH (DA, DAN, setOf(Alphabet.LATIN), ""),
DUTCH (NL, NLD, setOf(Alphabet.LATIN), ""),
ENGLISH (EN, ENG, setOf(Alphabet.LATIN), ""),
ESPERANTO (EO, EPO, setOf(Alphabet.LATIN), "ĈĉĜĝĤĥĴĵŜŝŬŭ"),
ESTONIAN (ET, EST, setOf(Alphabet.LATIN), ""),
FINNISH (FI, FIN, setOf(Alphabet.LATIN), ""),
FRENCH (FR, FRA, setOf(Alphabet.LATIN), ""),
GEORGIAN (KA, KAT, setOf(Alphabet.GEORGIAN), ""),
GERMAN (DE, DEU, setOf(Alphabet.LATIN), "ß"),
GREEK (EL, ELL, setOf(Alphabet.GREEK), ""),
GUJARATI (GU, GUJ, setOf(Alphabet.GUJARATI), ""),
HEBREW (HE, HEB, setOf(Alphabet.HEBREW), ""),
HINDI (HI, HIN, setOf(DEVANAGARI), ""),
HUNGARIAN (HU, HUN, setOf(Alphabet.LATIN), "ŐőŰű"),
ICELANDIC (IS, ISL, setOf(Alphabet.LATIN), ""),
INDONESIAN (ID, IND, setOf(Alphabet.LATIN), ""),
IRISH (GA, GLE, setOf(Alphabet.LATIN), ""),
ITALIAN (IT, ITA, setOf(Alphabet.LATIN), ""),
JAPANESE (JA, JPN, setOf(HIRAGANA, KATAKANA, HAN), ""),
KAZAKH (KK, KAZ, setOf(CYRILLIC), "ӘәҒғҚқҢңҰұ"),
KOREAN (KO, KOR, setOf(HANGUL), ""),
LATIN (LA, LAT, setOf(Alphabet.LATIN), ""),
LATVIAN (LV, LAV, setOf(Alphabet.LATIN), "ĀāĒēĢģĪīĶķĻļŅņ"),
LITHUANIAN (LT, LIT, setOf(Alphabet.LATIN), "ĖėĮįŲų"),
MACEDONIAN (MK, MKD, setOf(CYRILLIC), "ЃѓЅѕЌќЏџ"),
MALAY (MS, MSA, setOf(Alphabet.LATIN), ""),
MARATHI (MR, MAR, setOf(DEVANAGARI), ""),
MONGOLIAN (MN, MON, setOf(CYRILLIC), "ӨөҮү"),
NORWEGIAN (NO, NOR, setOf(Alphabet.LATIN), ""),
NYNORSK (NN, NNO, setOf(Alphabet.LATIN), ""),
PERSIAN (FA, FAS, setOf(Alphabet.ARABIC), ""),
POLISH (PL, POL, setOf(Alphabet.LATIN), "ŁłŃńŚśŹź"),
PORTUGUESE (PT, POR, setOf(Alphabet.LATIN), ""),
PUNJABI (PA, PAN, setOf(GURMUKHI), ""),
ROMANIAN (RO, RON, setOf(Alphabet.LATIN), "Țţ"),
RUSSIAN (RU, RUS, setOf(CYRILLIC), ""),
SERBIAN (SR, SRP, setOf(CYRILLIC), "ЂђЋћ"),
SLOVAK (SK, SLK, setOf(Alphabet.LATIN), "Ĺ弾Ŕŕ"),
SLOVENE (SL, SLV, setOf(Alphabet.LATIN), ""),
SOMALI (SO, SOM, setOf(Alphabet.LATIN), ""),
SPANISH (ES, SPA, setOf(Alphabet.LATIN), "¿¡"),
SWEDISH (SV, SWE, setOf(Alphabet.LATIN), ""),
TAGALOG (TL, TGL, setOf(Alphabet.LATIN), ""),
TAMIL (TA, TAM, setOf(Alphabet.TAMIL), ""),
TELUGU (TE, TEL, setOf(Alphabet.TELUGU), ""),
THAI (TH, THA, setOf(Alphabet.THAI), ""),
TURKISH (TR, TUR, setOf(Alphabet.LATIN), ""),
UKRAINIAN (UK, UKR, setOf(CYRILLIC), "ҐґЄєЇї"),
URDU (UR, URD, setOf(Alphabet.ARABIC), ""),
VIETNAMESE (VI, VIE, setOf(Alphabet.LATIN), "ẰằẦầẲẳẨẩẴẵẪẫẮắẤấẠạẶặẬậỀềẺẻỂểẼẽỄễẾếẸẹỆệỈỉĨĩỊịƠơỒồỜờỎỏỔổỞởỖỗỠỡỐốỚớỌọỘộỢợƯưỪừỦủỬửŨũỮữỨứỤụỰựỲỳỶỷỸỹỴỵ"),
WELSH (CY, CYM, setOf(Alphabet.LATIN), ""),
AFRIKAANS(AF, AFR, setOf(Alphabet.LATIN), ""),
ALBANIAN(SQ, SQI, setOf(Alphabet.LATIN), "Ëë"),
ARABIC(AR, ARA, setOf(Alphabet.ARABIC), ""),
ARMENIAN(HY, HYE, setOf(Alphabet.ARMENIAN), ""),
AZERBAIJANI(AZ, AZE, setOf(Alphabet.LATIN), "Əə"),
BASQUE(EU, EUS, setOf(Alphabet.LATIN), ""),
BELARUSIAN(BE, BEL, setOf(CYRILLIC), ""),
BENGALI(BN, BEN, setOf(Alphabet.BENGALI), ""),
BOKMAL(NB, NOB, setOf(Alphabet.LATIN), ""),
BOSNIAN(BS, BOS, setOf(Alphabet.LATIN), ""),
BULGARIAN(BG, BUL, setOf(CYRILLIC), ""),
CATALAN(CA, CAT, setOf(Alphabet.LATIN), "Ïï"),
CHINESE(ZH, ZHO, setOf(HAN), ""),
CROATIAN(HR, HRV, setOf(Alphabet.LATIN), ""),
CZECH(CS, CES, setOf(Alphabet.LATIN), "ĚěŘřŮů"),
DANISH(DA, DAN, setOf(Alphabet.LATIN), ""),
DUTCH(NL, NLD, setOf(Alphabet.LATIN), ""),
ENGLISH(EN, ENG, setOf(Alphabet.LATIN), ""),
ESPERANTO(EO, EPO, setOf(Alphabet.LATIN), "ĈĉĜĝĤĥĴĵŜŝŬŭ"),
ESTONIAN(ET, EST, setOf(Alphabet.LATIN), ""),
FINNISH(FI, FIN, setOf(Alphabet.LATIN), ""),
FRENCH(FR, FRA, setOf(Alphabet.LATIN), ""),
GEORGIAN(KA, KAT, setOf(Alphabet.GEORGIAN), ""),
GERMAN(DE, DEU, setOf(Alphabet.LATIN), "ß"),
GREEK(EL, ELL, setOf(Alphabet.GREEK), ""),
GUJARATI(GU, GUJ, setOf(Alphabet.GUJARATI), ""),
HEBREW(HE, HEB, setOf(Alphabet.HEBREW), ""),
HINDI(HI, HIN, setOf(DEVANAGARI), ""),
HUNGARIAN(HU, HUN, setOf(Alphabet.LATIN), "ŐőŰű"),
ICELANDIC(IS, ISL, setOf(Alphabet.LATIN), ""),
INDONESIAN(ID, IND, setOf(Alphabet.LATIN), ""),
IRISH(GA, GLE, setOf(Alphabet.LATIN), ""),
ITALIAN(IT, ITA, setOf(Alphabet.LATIN), ""),
JAPANESE(JA, JPN, setOf(HIRAGANA, KATAKANA, HAN), ""),
KAZAKH(KK, KAZ, setOf(CYRILLIC), "ӘәҒғҚқҢңҰұ"),
KOREAN(KO, KOR, setOf(HANGUL), ""),
LATIN(LA, LAT, setOf(Alphabet.LATIN), ""),
LATVIAN(LV, LAV, setOf(Alphabet.LATIN), "ĀāĒēĢģĪīĶķĻļŅņ"),
LITHUANIAN(LT, LIT, setOf(Alphabet.LATIN), "ĖėĮįŲų"),
MACEDONIAN(MK, MKD, setOf(CYRILLIC), "ЃѓЅѕЌќЏџ"),
MALAY(MS, MSA, setOf(Alphabet.LATIN), ""),
MARATHI(MR, MAR, setOf(DEVANAGARI), ""),
MONGOLIAN(MN, MON, setOf(CYRILLIC), "ӨөҮү"),
NORWEGIAN(NO, NOR, setOf(Alphabet.LATIN), ""),
NYNORSK(NN, NNO, setOf(Alphabet.LATIN), ""),
PERSIAN(FA, FAS, setOf(Alphabet.ARABIC), ""),
POLISH(PL, POL, setOf(Alphabet.LATIN), "ŁłŃńŚśŹź"),
PORTUGUESE(PT, POR, setOf(Alphabet.LATIN), ""),
PUNJABI(PA, PAN, setOf(GURMUKHI), ""),
ROMANIAN(RO, RON, setOf(Alphabet.LATIN), "Țţ"),
RUSSIAN(RU, RUS, setOf(CYRILLIC), ""),
SERBIAN(SR, SRP, setOf(CYRILLIC), "ЂђЋћ"),
SLOVAK(SK, SLK, setOf(Alphabet.LATIN), "Ĺ弾Ŕŕ"),
SLOVENE(SL, SLV, setOf(Alphabet.LATIN), ""),
SOMALI(SO, SOM, setOf(Alphabet.LATIN), ""),
SPANISH(ES, SPA, setOf(Alphabet.LATIN), "¿¡"),
SWEDISH(SV, SWE, setOf(Alphabet.LATIN), ""),
TAGALOG(TL, TGL, setOf(Alphabet.LATIN), ""),
TAMIL(TA, TAM, setOf(Alphabet.TAMIL), ""),
TELUGU(TE, TEL, setOf(Alphabet.TELUGU), ""),
THAI(TH, THA, setOf(Alphabet.THAI), ""),
TURKISH(TR, TUR, setOf(Alphabet.LATIN), ""),
UKRAINIAN(UK, UKR, setOf(CYRILLIC), "ҐґЄєЇї"),
URDU(UR, URD, setOf(Alphabet.ARABIC), ""),
VIETNAMESE(VI, VIE, setOf(Alphabet.LATIN),
"ẰằẦầẲẳẨẩẴẵẪẫẮắẤấẠạẶặẬậỀềẺẻỂểẼẽỄễẾếẸẹỆệỈỉĨĩỊịƠơỒồỜờỎỏỔổỞởỖỗỠỡỐốỚớỌọỘộỢợƯưỪừỦủỬửŨũỮữỨứỤụỰựỲỳỶỷỸỹỴỵ"),
WELSH(CY, CYM, setOf(Alphabet.LATIN), ""),

/**
* The imaginary unknown language.
*
* This value is returned if no language can be detected reliably.
*/
UNKNOWN (IsoCode639_1.UNKNOWN, IsoCode639_3.UNKNOWN, setOf(NONE), "");
UNKNOWN(IsoCode639_1.UNKNOWN, IsoCode639_3.UNKNOWN, setOf(NONE), "");

companion object {
/**
Expand Down
28 changes: 17 additions & 11 deletions src/main/kotlin/com/github/pemistahl/lingua/api/LanguageDetector.kt
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ import com.github.pemistahl.lingua.internal.util.extension.containsAnyOf
import java.util.SortedMap
import java.util.TreeMap
import java.util.regex.PatternSyntaxException
import kotlin.math.ceil
import kotlin.math.ln

/**
Expand All @@ -78,7 +77,9 @@ class LanguageDetector internal constructor(
internal val numberOfLoadedLanguages: Int = languages.size
) {
private val languagesWithUniqueCharacters = languages.filter { it.uniqueCharacters.isNotEmpty() }.asSequence()
private val alphabetsSupportingExactlyOneLanguage = Alphabet.allSupportingExactlyOneLanguage().filterValues { it in languages }
private val alphabetsSupportingExactlyOneLanguage = Alphabet.allSupportingExactlyOneLanguage().filterValues {
it in languages
}

internal val unigramLanguageModels = loadLanguageModels(ngramLength = 1)
internal val bigramLanguageModels = loadLanguageModels(ngramLength = 2)
Expand Down Expand Up @@ -210,7 +211,8 @@ class LanguageDetector internal constructor(
summedUpProbabilities[language] = probabilities.sumByDouble { it[language] ?: 0.0 }

if (unigramCountsOfInputText.containsKey(language)) {
summedUpProbabilities[language] = summedUpProbabilities.getValue(language) / unigramCountsOfInputText.getValue(language)
summedUpProbabilities[language] = summedUpProbabilities.getValue(language) /
unigramCountsOfInputText.getValue(language)
}
}
return summedUpProbabilities.filter { it.value != 0.0 }
Expand Down Expand Up @@ -321,7 +323,9 @@ class LanguageDetector internal constructor(
Alphabet.HAN -> languages.asSequence().filter { it.alphabets.contains(Alphabet.HAN) }
Alphabet.LATIN -> {
if (languages.contains(NORWEGIAN)) {
languages.asSequence().filter { it.alphabets.contains(Alphabet.LATIN) && it !in setOf(BOKMAL, NYNORSK) }
languages.asSequence().filter {
it.alphabets.contains(Alphabet.LATIN) && it !in setOf(BOKMAL, NYNORSK)
}
} else if (languages.contains(BOKMAL) || languages.contains(NYNORSK)) {
languages.asSequence().filter { it.alphabets.contains(Alphabet.LATIN) && it != NORWEGIAN }
} else {
Expand Down Expand Up @@ -443,8 +447,7 @@ class LanguageDetector internal constructor(
}

private val CHARS_TO_LANGUAGES_MAPPING = mapOf(
"Ćć" to setOf(BOSNIAN, CROATIAN, POLISH),
"Đđ" to setOf(BOSNIAN, CROATIAN, VIETNAMESE),

"Ãã" to setOf(PORTUGUESE, VIETNAMESE),
"ĄąĘę" to setOf(LITHUANIAN, POLISH),
"Ūū" to setOf(LATVIAN, LITHUANIAN),
Expand All @@ -455,23 +458,24 @@ class LanguageDetector internal constructor(
"ŇňŤť" to setOf(CZECH, SLOVAK),
"Ăă" to setOf(ROMANIAN, VIETNAMESE),
"İıĞğ" to setOf(AZERBAIJANI, TURKISH),
"ЁёЫыЭэ" to setOf(BELARUSIAN, KAZAKH, MONGOLIAN, RUSSIAN),
"ЩщЪъ" to setOf(BULGARIAN, KAZAKH, MONGOLIAN, RUSSIAN),
"ЈјЉљЊњ" to setOf(MACEDONIAN, SERBIAN),
"Іі" to setOf(BELARUSIAN, KAZAKH, UKRAINIAN),

"Şş" to setOf(AZERBAIJANI, ROMANIAN, TURKISH),
"Ďď" to setOf(CZECH, ROMANIAN, SLOVAK),
"ÐðÞþ" to setOf(ICELANDIC, LATVIAN, TURKISH),
"Ûû" to setOf(FRENCH, HUNGARIAN, LATVIAN),
"ÈèÙù" to setOf(FRENCH, ITALIAN, VIETNAMESE),
"Ćć" to setOf(BOSNIAN, CROATIAN, POLISH),
"Đđ" to setOf(BOSNIAN, CROATIAN, VIETNAMESE),
"Іі" to setOf(BELARUSIAN, KAZAKH, UKRAINIAN),

"ЙйЬьЮюЧчЯя" to setOf(BELARUSIAN, BULGARIAN, KAZAKH, MONGOLIAN, RUSSIAN, UKRAINIAN),
"Êê" to setOf(AFRIKAANS, FRENCH, PORTUGUESE, VIETNAMESE),
"Õõ" to setOf(ESTONIAN, HUNGARIAN, PORTUGUESE, VIETNAMESE),
"Òò" to setOf(CATALAN, ITALIAN, LATVIAN, VIETNAMESE),
"Ôô" to setOf(FRENCH, PORTUGUESE, SLOVAK, VIETNAMESE),
"Øø" to setOf(BOKMAL, DANISH, NORWEGIAN, NYNORSK),
"ЁёЫыЭэ" to setOf(BELARUSIAN, KAZAKH, MONGOLIAN, RUSSIAN),
"ЩщЪъ" to setOf(BULGARIAN, KAZAKH, MONGOLIAN, RUSSIAN),

"Ýý" to setOf(CZECH, ICELANDIC, SLOVAK, TURKISH, VIETNAMESE),
"Ää" to setOf(ESTONIAN, FINNISH, GERMAN, SLOVAK, SWEDISH),
Expand All @@ -480,9 +484,11 @@ class LanguageDetector internal constructor(
"Ææ" to setOf(BOKMAL, DANISH, ICELANDIC, NORWEGIAN, NYNORSK),
"Åå" to setOf(BOKMAL, DANISH, NORWEGIAN, NYNORSK, SWEDISH),

"Č芚Žž" to setOf(BOSNIAN, CZECH, CROATIAN, LATVIAN, LITHUANIAN, SLOVAK, SLOVENE),
"ЙйЬьЮюЧчЯя" to setOf(BELARUSIAN, BULGARIAN, KAZAKH, MONGOLIAN, RUSSIAN, UKRAINIAN),
"Üü" to setOf(AZERBAIJANI, CATALAN, ESTONIAN, GERMAN, HUNGARIAN, TURKISH),

"Č芚Žž" to setOf(BOSNIAN, CZECH, CROATIAN, LATVIAN, LITHUANIAN, SLOVAK, SLOVENE),

"Çç" to setOf(ALBANIAN, AZERBAIJANI, BASQUE, CATALAN, FRENCH, LATVIAN, PORTUGUESE, TURKISH),
"Öö" to setOf(AZERBAIJANI, ESTONIAN, FINNISH, GERMAN, HUNGARIAN, ICELANDIC, SWEDISH, TURKISH),
"ÁáÍíÚú" to setOf(CATALAN, CZECH, ICELANDIC, IRISH, HUNGARIAN, PORTUGUESE, SLOVAK, VIETNAMESE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,41 @@ object LanguageModelFilesWriter : FilesWriter() {
checkInputFilePath(inputFilePath)
checkOutputDirectoryPath(outputDirectoryPath)

val unigramModel = createLanguageModel(inputFilePath, inputFileCharset, language, 1, charClass, emptyMap())
val bigramModel = createLanguageModel(inputFilePath, inputFileCharset, language, 2, charClass, unigramModel.absoluteFrequencies)
val trigramModel = createLanguageModel(inputFilePath, inputFileCharset, language, 3, charClass, bigramModel.absoluteFrequencies)
val quadrigramModel = createLanguageModel(inputFilePath, inputFileCharset, language, 4, charClass, trigramModel.absoluteFrequencies)
val fivegramModel = createLanguageModel(inputFilePath, inputFileCharset, language, 5, charClass, quadrigramModel.absoluteFrequencies)
val unigramModel = createLanguageModel(
inputFilePath, inputFileCharset,
language,
1,
charClass,
emptyMap()
)
val bigramModel = createLanguageModel(
inputFilePath, inputFileCharset,
language,
2,
charClass,
unigramModel.absoluteFrequencies
)
val trigramModel = createLanguageModel(
inputFilePath, inputFileCharset,
language,
3,
charClass,
bigramModel.absoluteFrequencies
)
val quadrigramModel = createLanguageModel(
inputFilePath, inputFileCharset,
language,
4,
charClass,
trigramModel.absoluteFrequencies
)
val fivegramModel = createLanguageModel(
inputFilePath, inputFileCharset,
language,
5,
charClass,
quadrigramModel.absoluteFrequencies
)

writeLanguageModel(unigramModel, outputDirectoryPath, "unigrams.json")
writeLanguageModel(bigramModel, outputDirectoryPath, "bigrams.json")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,19 @@ object TestDataFilesWriter : FilesWriter() {
checkInputFilePath(inputFilePath)
checkOutputDirectoryPath(outputDirectoryPath)

createAndWriteSentencesFile(inputFilePath, inputFileCharset, outputDirectoryPath, fileName, maximumLines)
val singleWords = createAndWriteSingleWordsFile(inputFilePath, inputFileCharset, outputDirectoryPath, fileName, charClass, maximumLines)
createAndWriteSentencesFile(
inputFilePath, inputFileCharset,
outputDirectoryPath,
fileName,
maximumLines
)
val singleWords = createAndWriteSingleWordsFile(
inputFilePath, inputFileCharset,
outputDirectoryPath,
fileName,
charClass,
maximumLines
)
createAndWriteWordPairsFile(singleWords, outputDirectoryPath, fileName, maximumLines)
}

Expand Down Expand Up @@ -147,8 +158,8 @@ object TestDataFilesWriter : FilesWriter() {
Files.delete(wordPairsFilePath)
}

for (i in 0..(words.size-2)) {
wordPairs.add(words.slice(i..i+1).joinToString(" "))
for (i in 0..(words.size - 2)) {
wordPairs.add(words.slice(i..i + 1).joinToString(" "))
}

wordPairsFilePath.toFile().bufferedWriter().use { writer ->
Expand Down
6 changes: 2 additions & 4 deletions src/main/kotlin/com/github/pemistahl/lingua/app/App.kt
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,11 @@ private fun runApp() {
isoCodesList.clear()
println("At least one iso code you've entered is not supported. Try again.\n")
}
}
else {
} else {
break
}
}
}
else {
} else {
println("Loading language models...")
detectorBuilder = fromAllBuiltInLanguages()
}
Expand Down
Loading

0 comments on commit f0a8d03

Please sign in to comment.