Skip to content

Commit

Permalink
improve elasticsearch analyzers
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaume-sainthillier committed Mar 16, 2024
1 parent 4a90ff2 commit cf8c448
Showing 1 changed file with 141 additions and 141 deletions.
282 changes: 141 additions & 141 deletions config/packages/fos_elastica.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,75 +12,57 @@ fos_elastica:

indexes:
event:
indexable_callback: 'isIndexable'
indexable_callback: isIndexable
settings:
analysis:
filter:
french_elision:
type: 'elision'
type: elision
articles_case: true
articles:
['l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu']
french_stop:
type: stop
stopwords: _french_
- l
- m
- t
- qu
- n
- s
- j
- d
- c
- jusqu
- quoiqu
- lorsqu
- puisqu
french_stemmer:
type: 'stemmer'
language: 'light_french'
nGram:
type: 'edge_ngram'
min_gram: 2
max_gram: 10
type: stemmer
language: light_french
analyzer:
french_title_heavy:
tokenizer: 'icu_tokenizer'
french_heavy: &french_heavy
tokenizer: icu_tokenizer
filter:
- 'french_elision'
- 'icu_folding'
- 'lowercase'
- 'french_stop'
- 'nGram'
french_title_light:
tokenizer: 'icu_tokenizer'
- french_elision
- icu_folding
- lowercase
- french_stemmer
french_light: &french_light
tokenizer: icu_tokenizer
filter:
- 'french_elision'
- 'icu_folding'
- 'lowercase'
- 'french_stop'
french_natural_heavy:
tokenizer: 'icu_tokenizer'
filter:
- 'icu_folding'
- 'lowercase'
- 'nGram'
french_natural_light:
tokenizer: 'icu_tokenizer'
filter:
- 'icu_folding'
- 'lowercase'
french_literal_light:
tokenizer: 'icu_tokenizer'
filter:
- 'icu_folding'
- 'lowercase'
- 'french_stemmer'
french_description_heavy:
tokenizer: 'icu_tokenizer'
- french_elision
- icu_folding
- lowercase
french_html_heavy:
<<: *french_heavy
char_filter:
- 'html_strip'
filter:
- 'french_elision'
- 'icu_folding'
- 'lowercase'
- 'french_stop'
french_description_light:
tokenizer: 'icu_tokenizer'
- html_strip
french_html_light:
<<: *french_light
char_filter:
- html_strip
french_natural:
tokenizer: icu_tokenizer
filter:
- 'french_elision'
- 'icu_folding'
- 'lowercase'
- 'french_stop'

- icu_folding
- lowercase
persistence:
model: App\Entity\Event
repository: App\SearchRepository\EventElasticaRepository
Expand All @@ -92,94 +74,117 @@ fos_elastica:
query_builder_method: createIsActiveQueryBuilder
batch_size: 500
serializer:
groups: ['elasticsearch:event:details']
groups:
- 'elasticsearch:event:details'
properties:
name: { boost: 5, type: 'text', analyzer: french_title_heavy, search_analyzer: french_title_light }
description: { analyzer: french_description_heavy, search_analyzer: french_description_light }
start_date: { type: 'date', format: 'yyyy-MM-dd' }
end_date: { type: 'date', format: 'yyyy-MM-dd' }
draft: { type: 'boolean' }
name:
boost: 5
type: text
analyzer: french_light
fields:
heavy:
type: text
analyzer: french_heavy
description:
type: text
analyzer: french_html_light
fields:
heavy:
type: text
analyzer: french_html_heavy
start_date:
type: date
format: yyyy-MM-dd
end_date:
type: date
format: yyyy-MM-dd
draft:
type: boolean
place_name:
{ boost: 3, type: 'text', analyzer: french_natural_heavy, search_analyzer: french_natural_light }
place_street: { analyzer: french_natural_heavy, search_analyzer: french_natural_light }
boost: 3
type: text
analyzer: french_natural
place_street:
type: text
analyzer: french_natural
place_city:
{ boost: 2, type: 'text', analyzer: french_natural_heavy, search_analyzer: french_natural_light }
place_postal_code: { type: 'text', analyzer: french_natural_light }
theme: { type: 'text', analyzer: french_literal_light }
type: { type: 'text', analyzer: french_literal_light }
boost: 2
type: text
analyzer: french_natural
place_postal_code:
type: text
analyzer: french_natural
theme:
type: text
analyzer: french_natural
type:
type: text
analyzer: french_natural
place:
type: 'object'
type: object
properties:
name:
{
boost: 3,
type: 'text',
analyzer: french_natural_heavy,
search_analyzer: french_natural_light,
}
street: { analyzer: french_natural_heavy, search_analyzer: french_natural_light }
boost: 3
type: text
analyzer: french_natural
street:
type: text
analyzer: french_natural
city_name:
{
boost: 2,
type: 'text',
analyzer: french_natural_heavy,
search_analyzer: french_natural_light,
}
city_postal_code: { boost: 2, type: 'text', analyzer: french_natural_light }
boost: 2
type: text
analyzer: french_natural
city_postal_code:
boost: 3
type: text
analyzer: french_natural
city:
type: 'object'
type: object
properties:
location: { type: 'geo_point' }
location:
type: geo_point
country:
type: 'object'
type: object
properties:
id: { type: 'text', analyzer: french_natural_light, search_analyzer: french_natural_light }

id:
type: text
analyzer: french_natural
city:
settings:
analysis:
filter:
nGram:
type: edge_ngram
min_gram: 1
max_gram: 20
french_elision:
type: 'elision'
articles_case: true
articles:
['l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu']
analyzer:
text_analyzer_heavy:
tokenizer: 'icu_tokenizer'
filter:
- 'icu_folding'
- 'lowercase'
- 'nGram'
text_analyzer_light:
tokenizer: 'icu_tokenizer'
text_analyzer:
tokenizer: icu_tokenizer
filter:
- 'icu_folding'
- 'lowercase'
- icu_folding
- lowercase
serializer:
groups: ['elasticsearch:city:details']
groups:
- 'elasticsearch:city:details'
properties:
name: { type: 'text', boost: 3, analyzer: text_analyzer_heavy, search_analyzer: text_analyzer_light }
postal_codes: { type: 'text', analyzer: text_analyzer_light }
population: { type: 'double' }
name:
type: text
boost: 3
analyzer: text_analyzer
postal_codes:
boost: 10
type: text
analyzer: text_analyzer
population:
type: double
country:
type: 'object'
type: object
properties:
name:
{
boost: 5,
type: 'text',
analyzer: text_analyzer_heavy,
search_analyzer: text_analyzer_light,
}
boost: 5
type: text
analyzer: text_analyzer
parent:
type: 'object'
type: object
properties:
name: { type: 'text', analyzer: text_analyzer_heavy, search_analyzer: text_analyzer_light }
name:
type: text
analyzer: text_analyzer
persistence:
driver: orm
model: App\Entity\City
Expand All @@ -190,32 +195,27 @@ fos_elastica:
user:
settings:
analysis:
filter:
nGram:
type: edge_ngram
min_gram: 3
max_gram: 20
analyzer:
text_analyzer_heavy:
tokenizer: 'icu_tokenizer'
filter:
- 'icu_folding'
- 'lowercase'
- 'nGram'
text_analyzer_light:
tokenizer: 'icu_tokenizer'
text_analyzer:
tokenizer: icu_tokenizer
filter:
- 'icu_folding'
- 'lowercase'
- icu_folding
- lowercase
serializer:
groups: ['elasticsearch:user:details']
groups:
- 'elasticsearch:user:details'
properties:
username:
{ boost: 5, type: 'text', analyzer: text_analyzer_heavy, search_analyzer: text_analyzer_light }
boost: 5
type: text
analyzer: text_analyzer
firstname:
{ boost: 1, type: 'text', analyzer: text_analyzer_heavy, search_analyzer: text_analyzer_light }
type: text
analyzer: text_analyzer
lastname:
{ boost: 3, type: 'text', analyzer: text_analyzer_heavy, search_analyzer: text_analyzer_light }
boost: 3
type: text
analyzer: text_analyzer
persistence:
model: App\Entity\User
repository: App\SearchRepository\UserElasticaRepository
Expand Down

0 comments on commit cf8c448

Please sign in to comment.