JakduK / tasks

0 stars 0 forks source link

Elasticsearch 7.x 설치 #2

Open Pyohwan opened 4 years ago

Pyohwan commented 4 years ago

목표

Pyohwan commented 4 years ago
  1. yum upgrade 로 먼저 OS 라이브러리 업그레이드 함
    1. 이에 따라 elasticsearch 버전도 5.6.16 (5.x 의 마지막 릴리즈) 로 올라감
    2. /usr/share/elasticsearch/plugins/analysis-seunjeon/plugin-descriptor.properties 도 5.6.16 으로 바꿔줌
  2. backup 해두자
    1. /etc/elasticsearch/elasticsearch.yml 에서 path.repo: ["/jakduk/storage/elasticsearch-snapshot/2"] 바꾸고 재시작

[root@localhost ~]# curl -X PUT "192.168.0.14:9200/_snapshot/my_backup?pretty" -H 'Content-Type: application/json' -d' { "type": "fs", "settings": { "location": "/jakduk/storage/elasticsearch-snapshot2" } } '

Pyohwan commented 4 years ago

진행 상황

Pyohwan commented 4 years ago

jakduk-batch 에서 Index 생성시 mapping 이 안됨

Pyohwan commented 4 years ago

Elasticsearch 5.x 에서 Mapping

[root@localhost ~]# curl -X GET "192.168.0.14:9200/jakduk_dev_board/_mapping?pretty"
{
  "jakduk_dev_board" : {
    "mappings" : {
      "comment" : {
        "_parent" : {
          "type" : "article"
        },
        "_routing" : {
          "required" : true
        },
        "properties" : {
          "article" : {
            "properties" : {
              "board" : {
                "type" : "keyword",
                "index" : false
              },
              "id" : {
                "type" : "keyword",
                "index" : false
              },
              "seq" : {
                "type" : "integer",
                "index" : false
              }
            }
          },
          "content" : {
            "type" : "text",
            "analyzer" : "korean"
          },
          "galleries" : {
            "type" : "keyword",
            "index" : false
          },
          "id" : {
            "type" : "text"
          },
          "writer" : {
            "properties" : {
              "picture" : {
                "properties" : {
                  "id" : {
                    "type" : "text",
                    "fields" : {
                      "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                      }
                    }
                  },
                  "largePictureUrl" : {
                    "type" : "text",
                    "fields" : {
                      "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                      }
                    }
                  },
                  "smallPictureUrl" : {
                    "type" : "text",
                    "fields" : {
                      "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                      }
                    }
                  },
                  "sourceType" : {
                    "type" : "text",
                    "fields" : {
                      "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                      }
                    }
                  }
                }
              },
              "providerId" : {
                "type" : "keyword",
                "index" : false
              },
              "userId" : {
                "type" : "keyword",
                "index" : false
              },
              "username" : {
                "type" : "keyword",
                "index" : false
              }
            }
          }
        }
      },
      "article" : {
        "properties" : {
          "board" : {
            "type" : "keyword"
          },
          "category" : {
            "type" : "keyword"
          },
          "content" : {
            "type" : "text",
            "analyzer" : "korean"
          },
          "galleries" : {
            "type" : "keyword",
            "index" : false
          },
          "id" : {
            "type" : "text"
          },
          "registerDate" : {
            "type" : "date"
          },
          "seq" : {
            "type" : "integer",
            "index" : false
          },
          "subject" : {
            "type" : "text",
            "analyzer" : "korean"
          },
          "writer" : {
            "properties" : {
              "picture" : {
                "properties" : {
                  "id" : {
                    "type" : "text",
                    "fields" : {
                      "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                      }
                    }
                  },
                  "largePictureUrl" : {
                    "type" : "text",
                    "fields" : {
                      "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                      }
                    }
                  },
                  "smallPictureUrl" : {
                    "type" : "text",
                    "fields" : {
                      "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                      }
                    }
                  }
                }
              },
              "providerId" : {
                "type" : "keyword",
                "index" : false
              },
              "userId" : {
                "type" : "keyword",
                "index" : false
              },
              "username" : {
                "type" : "keyword",
                "index" : false
              }
            }
          }
        }
      }
    }
  }
}
Pyohwan commented 2 years ago

은전한닢 to 노리

https://esbook.kimjmin.net/06-text-analysis/6.7-stemming/6.7.2-nori

테스트

standard 토크나이저로 "동해물과 백두산이" 문장 분석

curl --location --request GET 'http://192.168.0.18:9200/_analyze' \
--header 'Content-Type: application/json' \
--data-raw '{
  "tokenizer": "standard",
  "text": [
    "동해물과 백두산이"
  ]
}'
{
    "tokens": [
        {
            "token": "동해물과",
            "start_offset": 0,
            "end_offset": 4,
            "type": "<HANGUL>",
            "position": 0
        },
        {
            "token": "백두산이",
            "start_offset": 5,
            "end_offset": 9,
            "type": "<HANGUL>",
            "position": 1
        }
    ]
}

nori_tokenizer 토크나이저로 "동해물과 백두산이" 문장 분석

curl --location --request GET 'http://192.168.0.18:9200/_analyze' \
--header 'Content-Type: application/json' \
--data-raw '{
  "tokenizer": "nori_tokenizer",
  "text": [
    "동해물과 백두산이"
  ]
}'
{
    "tokens": [
        {
            "token": "동해",
            "start_offset": 0,
            "end_offset": 2,
            "type": "word",
            "position": 0
        },
        {
            "token": "물",
            "start_offset": 2,
            "end_offset": 3,
            "type": "word",
            "position": 1
        },
        {
            "token": "과",
            "start_offset": 3,
            "end_offset": 4,
            "type": "word",
            "position": 2
        },
        {
            "token": "백두",
            "start_offset": 5,
            "end_offset": 7,
            "type": "word",
            "position": 3
        },
        {
            "token": "산",
            "start_offset": 7,
            "end_offset": 8,
            "type": "word",
            "position": 4
        },
        {
            "token": "이",
            "start_offset": 8,
            "end_offset": 9,
            "type": "word",
            "position": 5
        }
    ]
}
Pyohwan commented 2 years ago

searchWord Reindex

https://www.elastic.co/guide/en/elasticsearch/reference/7.17/reindex-upgrade-remote.html Elasticsearch 5.x 의 search_word index 를 새로 설치한 Elasticsearch 7.x 로 reindex 하자

/etc/elasticsearch/elasticsearch.yml 에 whitelist remote ES 주소 넣기

reindex.remote.whitelist: 192.168.0.13:9200
curl --location --request POST 'http://192.168.0.13:9200/_reindex' \
--header 'Content-Type: application/json' \
--data-raw '{
    "source": {
        "remote": {
            "host": "http://192.168.0.13:9200"
        },
        "index": "jakduk_dev_search_word"
    },
    "dest": {
        "index": "jakduk_dev_search_word"
    }
}'
{
    "took": 4930,
    "timed_out": false,
    "total": 1115,
    "updated": 0,
    "created": 1115,
    "deleted": 0,
    "batches": 2,
    "version_conflicts": 0,
    "noops": 0,
    "retries": {
        "bulk": 0,
        "search": 0
    },
    "throttled_millis": 0,
    "requests_per_second": -1.0,
    "throttled_until_millis": 0,
    "failures": []
}

결과. jakduk_dev_search_word 인덱스는 reindex 되었음

health status index                  uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   jakduk_dev_board       mEJgbcUzSM-bLmOSD0EaAg   1   1        339            0    116.5kb        116.5kb
yellow open   jakduk_dev_gallery     zPT9QZ2SSFy5Lv1L14q6dA   1   1          0            0       283b           283b
yellow open   jakduk_dev_search_word nbAEKgBfRuW8sNoXmCgs9A   1   1       1115            0     91.4kb         91.4kb
yellow open   nori_sample            4t7rm5eWTYyDjDlaB9Xv6w   1   1          0            0       283b           283b