kuromoji向けにカスタマイズ

This commit is contained in:
nagoya 2024-10-07 21:50:45 +09:00
parent 338e8dc77e
commit 978286baa5
3 changed files with 53 additions and 11 deletions

View File

@ -23,13 +23,22 @@ class AccountsIndex < Chewy::Index
analyzer: { analyzer: {
natural: { natural: {
tokenizer: 'standard', tokenizer: 'kuromoji',
type: 'custom',
char_filter: %w(
icu_normalizer
html_strip
kuromoji_iteration_mark
),
filter: %w( filter: %w(
english_possessive_stemmer
lowercase lowercase
asciifolding asciifolding
kuromoji_stemmer
kuromoji_number
kuromoji_baseform
icu_normalizer
cjk_width cjk_width
elision
english_possessive_stemmer
english_stop english_stop
english_stemmer english_stemmer
), ),
@ -52,6 +61,10 @@ class AccountsIndex < Chewy::Index
min_gram: 1, min_gram: 1,
max_gram: 15, max_gram: 15,
}, },
kuromoji: {
type: 'kuromoji_tokenizer',
mode: 'search',
},
}, },
} }

View File

@ -20,7 +20,12 @@ class PublicStatusesIndex < Chewy::Index
language: 'possessive_english', language: 'possessive_english',
}, },
}, },
tokenizer: {
kuromoji: {
type: 'kuromoji_tokenizer',
mode: 'search',
},
},
analyzer: { analyzer: {
verbatim: { verbatim: {
tokenizer: 'uax_url_email', tokenizer: 'uax_url_email',
@ -28,13 +33,22 @@ class PublicStatusesIndex < Chewy::Index
}, },
content: { content: {
tokenizer: 'standard', tokenizer: 'kuromoji',
type: 'custom',
char_filter: %w(
icu_normalizer
html_strip
kuromoji_iteration_mark
),
filter: %w( filter: %w(
english_possessive_stemmer
lowercase lowercase
asciifolding asciifolding
kuromoji_stemmer
kuromoji_number
kuromoji_baseform
icu_normalizer
cjk_width cjk_width
elision
english_possessive_stemmer
english_stop english_stop
english_stemmer english_stemmer
), ),

View File

@ -1,6 +1,7 @@
# frozen_string_literal: true # frozen_string_literal: true
class StatusesIndex < Chewy::Index class StatusesIndex < Chewy::Index
include FormattingHelper
include DatetimeClampingConcern include DatetimeClampingConcern
settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: { settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: {
@ -20,7 +21,12 @@ class StatusesIndex < Chewy::Index
language: 'possessive_english', language: 'possessive_english',
}, },
}, },
tokenizer: {
kuromoji: {
type: 'kuromoji_tokenizer',
mode: 'search',
},
},
analyzer: { analyzer: {
verbatim: { verbatim: {
tokenizer: 'uax_url_email', tokenizer: 'uax_url_email',
@ -28,13 +34,22 @@ class StatusesIndex < Chewy::Index
}, },
content: { content: {
tokenizer: 'standard', tokenizer: 'kuromoji',
type: 'custom',
char_filter: %w(
icu_normalizer
html_strip
kuromoji_iteration_mark
),
filter: %w( filter: %w(
english_possessive_stemmer
lowercase lowercase
asciifolding asciifolding
kuromoji_stemmer
kuromoji_number
kuromoji_baseform
icu_normalizer
cjk_width cjk_width
elision
english_possessive_stemmer
english_stop english_stop
english_stemmer english_stemmer
), ),