Elasticsearch 7.x 修改分词

Elasticsearch(7.x)

官网:https://www.elastic.co/guide/en/elasticsearch/reference/7.5/index.html

elastic 默认分词器

template,index的创建以及分词修改

  • PUT _template

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    PUT /_template/my_test
    {
    "template": "my_test-*",
    "order": 1,
    "settings": {
    "index": {
    "number_of_shards": "3",
    "number_of_replicas": "1",
    "store": {
    "type": "niofs"
    }
    }
    },
    "mappings": {
    "_source": {
    "enabled": "true"
    },
    "dynamic_templates": [
    {
    "stringType": {
    "mapping": {
    "type": "keyword"
    },
    "match_mapping_type": "string"
    }
    }
    ],
    "properties":{
    "name": {
    "analyzer": "ik_max_word", //指定分词
    "type": "text"
    }
    }
    },
    "aliases": {} //设置别名
    }
  • PUT index

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    PUT /my_test-1
    {
    "settings": {
    "index": {
    "number_of_shards": "3",
    "number_of_replicas": "1",
    "store": {
    "type": "niofs"
    }
    }
    },
    "mappings": {
    "_source": {
    "includes": [
    "name" //指定索引文本字段
    ]
    },
    "dynamic_templates": [
    {
    "stringType": {
    "match_mapping_type": "string",
    "mapping": {
    "index": false,
    "store": false,
    "type": "keyword"
    }
    }
    }
    ],
    "properties": {
    "name": {
    "type": "text",
    "fields": {
    "std": {
    "type": "text",
    "analyzer": "standard"
    }
    },
    "analyzer": "ik_max_word" //指定分词
    }
    }
    }
    }
  • 给指定index添加新的分词(ngram)

  1. 首先将分词加入setting中,修改是需要将index 关闭
    1
    2
    POST my_test-2/_close
    POST my_test-2/_open
  2. 添加分词
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    PUT /my_test-1/_settings
    {
    "settings": {
    "analysis": {
    "analyzer": {
    "ngram_analyzer": {
    "tokenizer": "ngram_tokenizer"
    }
    },
    "tokenizer": {
    "ngram_tokenizer": {
    "type": "ngram",
    "min_gram": 3,
    "max_gram": 3,
    "token_chars": [
    "letter",
    "digit"
    ]
    }
    }
    }
    }
    }
  • 将索引中指定字段使用新增加的分词

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    PUT my_test-2/_mapping/
    {
    "properties": {
    "name": {
    "type": "text",
    "fields": {
    "std": {
    "type": "text",
    "analyzer": "standard"
    }
    },
    "analyzer": "ngram_analyzer"
    }
    }
    }
  • PUT doc

    1
    2
    3
    4
    PUT my_test-1/_doc/[id]?routing=[my_test_data-1]
    {
    "name": ""
    }
  • GET search

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    GET my_test-2/_search
    {
    "size": 20,
    "query": {
    "bool": {
    "must": [
    {
    "match": {
    "name": {
    "query": "",
    "minimum_should_match": "-25%",
    "boost": 2
    }
    }
    }
    ]
    }
    }
    }
  • 查看分词结果(ngram和ik——max_word分词)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    GET /my_test-1/_analyze
    {
    "tokenizer": "ngram",
    "text":""
    }

    GET /my_test-2/_analyze
    {
    "analyzer": "ik_max_word",
    "text": ""
    }
  • 指定字段的子field,可以指定不同的分词机制,在search的时候可 name.field 来实现不同分词搜索

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    PUT my_test-1/_mapping
    {
    "properties": {
    "name": {
    "type": "text",
    "fields": {
    "std": {
    "type": "text",
    "analyzer": "ngram_analyzer"
    }
    },
    "analyzer": "ik_max_word"
    }
    }
    }

小记

修改添加到source中的字段的mapping,可通过 POST my_index/_update_by_query?conflicts=proceed 来重新索引数据

curl查询es

  • Authorization认证 Basic方式 后续编码属于 用户名:密码 用base64方式编码的结果

  • 查看索引
    curl -XGET -H’Authorization: Basic {basic.authentication}’ ‘http://{ip:port}/_cat/indices/{index_name}*’

  • 创建索引
    curl -XPUT -H’Authorization: Basic {basic.authentication}’ ‘http://{ip:port}/{index_name}’

  • 查模版
    curl -XGET -H’Authorization: Basic {basic.authentication}’ ‘http://{ip:port}/_templates/{template_name}’
    curl -XGET -H’Authorization: Basic {basic.authentication}’ ‘http://{ip:port}/_template/{template_name}?pretty’