Skip to content

Commit

Permalink
add binary dictionary test
Browse files Browse the repository at this point in the history
  • Loading branch information
mh-northlander committed Jan 16, 2025
1 parent 8f9dad9 commit 1f8d6ac
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 31 deletions.
27 changes: 9 additions & 18 deletions src/test/java/com/worksap/nlp/sudachi/JapaneseDictionaryTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024 Works Applications Co., Ltd.
* Copyright (c) 2024-2025 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -60,27 +60,18 @@ class JapaneseDictionaryTest {
@Test
fun throwExceptionOnUsingIncompatibleDicts() {
// build another system dict (should have different signature)
val anotherSystemDictData: MemChannel = run {
val chan = MemChannel()
DicBuilder.system()
.matrix(res("/dict/matrix.def"))
.lexicon(res("/dict/lex.csv"))
.comment("another system dictionary for the unit tests")
.build(chan)
chan
}
val anotherSystemDict = BinaryDictionary.loadSystem(anotherSystemDictData.buffer())
val anotherSystemDict =
BinaryDictionary.loadSystem(
TestDictionary.buildSystemDictData("another system dictionary for the unit tests")
.buffer())

// build user dict based on another system dict
val anotherUserDictData = run {
val chan = MemChannel()
DicBuilder.user().system(anotherSystemDict).lexicon(res("/dict/user.csv")).build(chan)
chan
}
val anotherUserDict = BinaryDictionary.loadUser(anotherUserDictData.buffer())
val anotherUserDict =
BinaryDictionary.loadUser(
TestDictionary.buildUserDictData(anotherSystemDict, res("/dict/user.csv")).buffer())

// TestDictionary.systemDict + another user dict
val confAnotherUser = TestDictionary.user0Cfg().addUserDictionary(anotherUserDict)
val confAnotherUser = TestDictionary.user1Cfg().addUserDictionary(anotherUserDict)
assertFailsWith(IllegalArgumentException::class) { Dictionary.load(confAnotherUser) }

// another system dict + TestDictionary.userDict
Expand Down
30 changes: 17 additions & 13 deletions src/test/java/com/worksap/nlp/sudachi/TestDictionary.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
* Copyright (c) 2017-2025 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,31 +19,35 @@ package com.worksap.nlp.sudachi
import com.worksap.nlp.sudachi.dictionary.BinaryDictionary
import com.worksap.nlp.sudachi.dictionary.build.DicBuilder
import com.worksap.nlp.sudachi.dictionary.build.MemChannel
import java.net.URL

/** Utility for lazily creating binary dictionaries for test */
object TestDictionary {
val systemDictData: MemChannel by lazy {
val result = MemChannel()
fun buildSystemDictData(comment: String): MemChannel {
val chan = MemChannel()
DicBuilder.system()
.matrix(res("/dict/matrix.def"))
.lexicon(res("/dict/lex.csv"))
.comment("the system dictionary for the unit tests")
.build(result)
result
.comment(comment)
.signature(null)
.build(chan)
return chan
}

val userDict1Data: MemChannel by lazy {
fun buildUserDictData(system: BinaryDictionary, url: URL): MemChannel {
val chan = MemChannel()
DicBuilder.user().system(systemDict).lexicon(res("/dict/user.csv")).build(chan)
chan
DicBuilder.user().system(system).lexicon(url).build(chan)
return chan
}

val userDict2Data: MemChannel by lazy {
val chan = MemChannel()
DicBuilder.user().system(systemDict).lexicon(res("/dict/user2.csv")).build(chan)
chan
val systemDictData: MemChannel by lazy {
buildSystemDictData("the system dictionary for the unit tests")
}

val userDict1Data: MemChannel by lazy { buildUserDictData(systemDict, res("/dict/user.csv")) }

val userDict2Data: MemChannel by lazy { buildUserDictData(systemDict, res("/dict/user2.csv")) }

val systemDict: BinaryDictionary
get() = BinaryDictionary.loadSystem(systemDictData.buffer())

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2025 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.worksap.nlp.sudachi

import com.worksap.nlp.sudachi.dictionary.BinaryDictionary
import java.io.IOException
import kotlin.test.Test
import kotlin.test.assertFailsWith
import kotlin.test.assertFalse
import kotlin.test.assertTrue

class BinaryDictionaryTest {
@Test
fun failToLoadSystemAsUser() {
assertFailsWith(IOException::class) {
BinaryDictionary.loadUser(TestDictionary.systemDictData.buffer())
}
}

@Test
fun failToLoadUserAsSystem() {
assertFailsWith(IOException::class) {
BinaryDictionary.loadSystem(TestDictionary.userDict1Data.buffer())
}
}

@Test
fun compatibleDicts() {
assertTrue(TestDictionary.systemDict.isCompatibleWith(TestDictionary.systemDict))
assertTrue(TestDictionary.userDict1.isCompatibleWith(TestDictionary.userDict1))

assertTrue(TestDictionary.systemDict.isCompatibleWith(TestDictionary.userDict1))
assertTrue(TestDictionary.userDict1.isCompatibleWith(TestDictionary.systemDict))
assertTrue(TestDictionary.userDict1.isCompatibleWith(TestDictionary.userDict2))
}

@Test
fun incompatibleDicts() {
// build another system dict (should have different signature)
val anotherSystemDict =
BinaryDictionary.loadSystem(
TestDictionary.buildSystemDictData("another system dictionary for the unit tests")
.buffer())
val anotherUserDict =
BinaryDictionary.loadUser(
TestDictionary.buildUserDictData(anotherSystemDict, res("/dict/user.csv")).buffer())
assertTrue(anotherSystemDict.isCompatibleWith(anotherUserDict))

assertFalse(anotherSystemDict.isCompatibleWith(TestDictionary.systemDict))
assertFalse(anotherSystemDict.isCompatibleWith(TestDictionary.userDict1))
assertFalse(anotherUserDict.isCompatibleWith(TestDictionary.systemDict))
assertFalse(anotherUserDict.isCompatibleWith(TestDictionary.userDict2))
}
}

0 comments on commit 1f8d6ac

Please sign in to comment.