From 978286baa5cedecb3dd8976a4b2dfb351bdcf46a Mon Sep 17 00:00:00 2001 From: 758 Date: Mon, 7 Oct 2024 21:50:45 +0900 Subject: [PATCH] =?UTF-8?q?kuromoji=E5=90=91=E3=81=91=E3=81=AB=E3=82=AB?= =?UTF-8?q?=E3=82=B9=E3=82=BF=E3=83=9E=E3=82=A4=E3=82=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/chewy/accounts_index.rb | 19 ++++++++++++++++--- app/chewy/public_statuses_index.rb | 22 ++++++++++++++++++---- app/chewy/statuses_index.rb | 23 +++++++++++++++++++---- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 59f2f991f2..ef103d67fb 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -23,13 +23,22 @@ class AccountsIndex < Chewy::Index analyzer: { natural: { - tokenizer: 'standard', + tokenizer: 'kuromoji', + type: 'custom', + char_filter: %w( + icu_normalizer + html_strip + kuromoji_iteration_mark + ), filter: %w( + english_possessive_stemmer lowercase asciifolding + kuromoji_stemmer + kuromoji_number + kuromoji_baseform + icu_normalizer cjk_width - elision - english_possessive_stemmer english_stop english_stemmer ), @@ -52,6 +61,10 @@ class AccountsIndex < Chewy::Index min_gram: 1, max_gram: 15, }, + kuromoji: { + type: 'kuromoji_tokenizer', + mode: 'search', + }, }, } diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb index 076f72e525..7b40997c25 100644 --- a/app/chewy/public_statuses_index.rb +++ b/app/chewy/public_statuses_index.rb @@ -20,7 +20,12 @@ class PublicStatusesIndex < Chewy::Index language: 'possessive_english', }, }, - + tokenizer: { + kuromoji: { + type: 'kuromoji_tokenizer', + mode: 'search', + }, + }, analyzer: { verbatim: { tokenizer: 'uax_url_email', @@ -28,13 +33,22 @@ class PublicStatusesIndex < Chewy::Index }, content: { - tokenizer: 'standard', + tokenizer: 'kuromoji', + type: 'custom', + char_filter: %w( + icu_normalizer + html_strip + kuromoji_iteration_mark + ), filter: %w( + english_possessive_stemmer lowercase asciifolding + kuromoji_stemmer + kuromoji_number + kuromoji_baseform + icu_normalizer cjk_width - elision - english_possessive_stemmer english_stop english_stemmer ), diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index fcec20ff5c..6b1688dd8e 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true class StatusesIndex < Chewy::Index + include FormattingHelper include DatetimeClampingConcern settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: { @@ -20,7 +21,12 @@ class StatusesIndex < Chewy::Index language: 'possessive_english', }, }, - + tokenizer: { + kuromoji: { + type: 'kuromoji_tokenizer', + mode: 'search', + }, + }, analyzer: { verbatim: { tokenizer: 'uax_url_email', @@ -28,13 +34,22 @@ class StatusesIndex < Chewy::Index }, content: { - tokenizer: 'standard', + tokenizer: 'kuromoji', + type: 'custom', + char_filter: %w( + icu_normalizer + html_strip + kuromoji_iteration_mark + ), filter: %w( + english_possessive_stemmer lowercase asciifolding + kuromoji_stemmer + kuromoji_number + kuromoji_baseform + icu_normalizer cjk_width - elision - english_possessive_stemmer english_stop english_stemmer ),