es-name-test.sh

#!/bin/sh
# Delete old version to make sure new settings are applied
curl -XDELETE "localhost:9200/names-test/"
echo
# See https://github.com/elasticsearch/elasticsearch-analysis-phonetic
curl -XPUT "localhost:9200/names-test" -d '{
    "settings": {
        "analysis": {
            "analyzer": {
                "lowkeyword-analyzer": {
                    "type": "custom",
                    "tokenizer": "keyword",
                    "filter" : ["lowercase"]
                },
                "phonetic-analyzer": {
                    "type": "custom",
                    "tokenizer": "whitespace",
                    "filter" : ["standard", "lowercase", "dmeta-filter"]
                },
                "nickname-analyzer": {
                    "type": "custom",
                    "tokenizer": "whitespace",
                    "filter" : ["onetoken-filter", "lowercase", "nickname-filter"]
                }
            },
            "filter" : {
                "dmeta-filter" : {
                    "type" : "phonetic",
                    "encoder" : "doublemetaphone",
                    "replace" : true
                },
                "onetoken-filter" : {
                    "type" : "limit",
                    "max_token_count" : 1
                },
                "nickname-filter" : {
                    "type" : "synonym",
                    "synonyms_path" : "../config/analysis/nick2basename.txt"
                }
            }
        }
    },
    "mappings": {
        "_default_": {
          "_timestamp" : {
            "enabled" : true,
            "store" : true
          }
        },
        "doc": {
            "properties": {
                "name" : {
                  "type" : "multi_field",
                  "fields" : {
                    "name" : {"type" : "string", "index" : "analyzed",
                        "index_analyzer": "standard", "search_analyzer": "standard"},
                    "asis" : {"type" : "string", "index" : "analyzed",
                        "index_analyzer": "lowkeyword-analyzer", "search_analyzer": "lowkeyword-analyzer"},
                    "phonetic" : {"type" : "string", "index" : "analyzed",
                        "index_analyzer": "phonetic-analyzer", "search_analyzer": "phonetic-analyzer"},
                    "basename" : {"type" : "string", "index" : "analyzed",
                        "index_analyzer": "nickname-analyzer", "search_analyzer": "nickname-analyzer"}
                  }
                },
                "date" : {
                  "type" : "date",
                  "format" : "dateOptionalTime"
                }
            }
        }
    }
}'


# curious about path analyzer? test it:
echo testing lowkeyword analyzier
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=lowkeyword-analyzer&pretty=1' -d 'ALAN JURGENSEN'
echo

echo testing phonetic analyzier
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=phonetic-analyzer&pretty=1' -d 'ALEN JORGENSEN'
echo

echo testing nickname analyzier
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=nickname-analyzer&pretty=1' -d 'BILL ALAN'
echo

#!/bin/sh
# first load :  schema-accts-search-exp.sh

putdata () {
# Put some new docs
curl -sXPUT "localhost:9200/names-test/doc/1001" -d '{"name": "Joe Johnson", "date": "2013-08-01"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1002" -d '{"name": "JOHN JOHNSON", "date": "2013-08-02"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1003" -d '{"name": "jeff smith", "date": "2013-08-03"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1004" -d '{"name": "Jeff Johnson", "date": "2013-08-04"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1005" -d '{"name": "john smith", "date": "2013-08-05"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1006" -d '{"name": "ALAN HANSON", "date": "2013-08-06"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1007" -d '{"name": "MIKE SMITH", "date": "2013-08-07"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1008" -d '{"name": "ALAN JURGENSEN", "date": "2013-08-08"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1009" -d '{"name": "ALLEN JORGENSON", "date": "2013-08-09"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1010" -d '{"name": "ALLEN JURGENSON", "date": "2013-08-10"}'
echo

curl -sXPOST "localhost:9200/names-test/_refresh"
echo
# check for existence via:
curl -sXHEAD 'localhost:9200/names-test/doc/1001'
echo
# get doc
curl -sXGET 'localhost:9200/names-test/doc/1008?pretty'
echo
curl -sXGET 'localhost:9200/names-test/doc/1009?pretty'
echo

# get specific fields (default field is _source)
curl -sXGET 'localhost:9200/names-test/doc/1008?fields=_timestamp,name&pretty'
echo
}

# search and match _all
curl -sXPOST "localhost:9200/names-test/_search?pretty" -d '{"fields": ["_timestamp","_source"], "query": {"match_all" : {}}}'
echo

# curious about path analyzer? test it:
echo testing lowkeyword analyzier
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=lowkeyword-analyzer&pretty' -d 'ALAN JURGENSEN'
echo

echo testing phonetic analyzier
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=phonetic-analyzer&pretty' -d 'ALAN JURGENSEN'
echo

echo testing nickname to basename analyzier
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=nickname-analyzer&pretty' -d 'BILL ALAN'
echo

echo 'Name match query: name'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
    "query": {
        "match": {
            "name": "ALAN"
        }
    }
}'
echo

echo 'Name match query: name.asis'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
    "query": {
        "match": {
            "name.asis": "ALAN"
        }
    }
}'
echo

echo 'Name match query: name.asis'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
    "query": {
        "match": {
            "name.asis": "alan jurgensen"
        }
    }
}'
echo

echo 'Name match query: name.phonetic'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
    "query": {
        "match": {
            "name.phonetic": "alan jurgensen"
        }
    }
}'
echo

echo 'Name match query: name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
    "query": {
        "match": {
            "name.basename": "allen"
        }
    }
}'
echo

echo 'Name multi-match query: name.name name.asis name.phonetic name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
    "query": {
        "multi_match": {
            "query": "alan Jurgensen",
            "fields": [ "name.name", "name.asis", "name.phonetic", "name.basename" ]
        }
    }
}'
echo

echo 'Name multi-match BOOST query: name.name name.asis name.phonetic name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
    "query": {
        "multi_match": {
            "query": "alan Jurgensen",
            "fields": [ "name.name^1.6", "name.asis^3", "name.phonetic^1.4", "name.basename^1.2" ]
        }
    }
}'
echo

echo 'Name multi-match BIGBOOST query: name.name name.asis name.phonetic name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty&explain" -d '{
    "query": {
            "multi_match": {
                "query": "alan Jurgensen",
                "fields": [ "name.name^4", "name.asis^6", "name.phonetic^3", "name.basename^2" ]
            }
    }
}'
echo

sublime text3 user setting

{
    "always_show_minimap_viewport": true,
    "bold_folder_labels": true,
    "caret_extra_bottom": 1,
    "caret_extra_top": 1,
    "caret_extra_width": 1,
    "caret_style": "blink",
    "color_scheme": "Packages/Boxy Theme/schemes/Boxy Tomorrow.tmTheme",
    "default_line_ending": "LF",
    "enable_tab_scrolling": false,
    "fade_fold_buttons": false,
    "folder_exclude_patterns":
    [
        ".svn",
        ".git",
        ".hg",
        "CVS",
        ".phpintel",
        ".idea"
    ],
    "font_size": 14,
    "highlight_line": true,
    "ignored_packages":
    [
        "Vintage"
    ],
    "indent_guide_options":
    [
        "draw_normal",
        "draw_active"
    ],
    "line_padding_bottom": 2,
    "line_padding_top": 2,
    "overlay_scroll_bars": "enabled",
    "rulers":
    [
        80
    ],
    "show_encoding": true,
    "show_line_endings": true,
    "tab_size": 4,
    "theme": "Boxy Tomorrow.sublime-theme",
    "theme_accent_green": true,
    "theme_bar": true,
    "theme_find_panel_close_hidden": true,
    "theme_sidebar_size_sm": true,
    "theme_size_lg": true,
    "theme_statusbar_size_md": true,
    "theme_tab_line_size_lg": true,
    "theme_tab_selected_transparent": true,
    "theme_tab_selected_underlined": true,
    "theme_tab_separator": true,
    "translate_tabs_to_spaces": true,
    "word_wrap": false,
    "wrap_width": 100
}

elasticsearch.yml

##################################################################
# /etc/elasticsearch/elasticsearch.yml
#
# Base configuration for a write heavy cluster
#

# Cluster / Node Basics
cluster.name: logng

# Node can have abritrary attributes we can use for routing
node.name: logsearch-01
node.datacenter: amsterdam

# Force all memory to be locked, forcing the JVM to never swap
bootstrap.mlockall: true

## Threadpool Settings ##

# Search pool
threadpool.search.type: fixed
threadpool.search.size: 20
threadpool.search.queue_size: 100

# Bulk pool
threadpool.bulk.type: fixed
threadpool.bulk.size: 60
threadpool.bulk.queue_size: 300

# Index pool
threadpool.index.type: fixed
threadpool.index.size: 20
threadpool.index.queue_size: 100

# Indices settings
indices.memory.index_buffer_size: 30%
indices.memory.min_shard_index_buffer_size: 12mb
indices.memory.min_index_buffer_size: 96mb

# Cache Sizes
indices.fielddata.cache.size: 15%
indices.fielddata.cache.expire: 6h
indices.cache.filter.size: 15%
indices.cache.filter.expire: 6h

# Indexing Settings for Writes
index.refresh_interval: 30s
index.translog.flush_threshold_ops: 50000

# Minimum nodes alive to constitute an operational cluster
discovery.zen.minimum_master_nodes: 2

# Unicast Discovery (disable multicast)
discovery.zen.ping.multicast.enabled: false
discovery.zen.ping.unicast.hosts: [ "logsearch-01", "logsearch-02", "logsearch-03" ]

MongoDB + ElasticSearch + Nginx 简单试用

install jdk:

wget http://download.oracle.com/otn-pub/java/jdk/8u77-b03/jdk-8u77-linux-x64.rpm #以实际地址为准
sudo rpm -ivh jdk-8u77-linux-x64.rpm
java -version

install elasticsearch:

wget https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/rpm/elasticsearch/2.3.1/elasticsearch-2.3.1.rpm
sudo rpm -ivh elasticsearch-2.3.1.rpm
sudo chkconfig --add elasticsearch

config elasticsearch:

mkdir -p /home/worker/data/elasticsearch/data
mkdir -p /home/worker/data/elasticsearch/log
mkdir -p /home/worker/data/elasticsearch/pid
sudo chown -R worker:worker /etc/elasticsearch
export ES_HEAP_SIZE=15000000    # half useable memory

sudo vi /etc/sysconfig/elasticsearch

DATA_DIR=/home/worker/data/elasticsearch/data
LOG_DIR=/home/worker/data/elasticsearch/log
PID_DIR=/home/worker/data/elasticsearch/pid
ES_HEAP_SIZE=15g
ES_USER=worker
ES_GROUP=worker

sudo /usr/share/elasticsearch/bin/plugin install lmenezes/elasticsearch-kopf

配置:
sudo vi /etc/elasticsearch/elasticsearch.yml

cluster.name: elasticsearch
node.name: node-${HOSTNAME}
network.host: 10.100.30.105
discovery .zen.ping.unicast.hosts: ["10.100.30.105","10.100.30.106"]

sudo service elasticsearch start

curl 10.100.30.105:9200

mongo-connector elastic2-doc-manager:

sudo pip install elastic2-doc-manager

hosts: 127.0.0.1 search.test.com

nginx负载均衡配置:

upstream elasticsearch {
    server 10.100.30.105:9200;
    server 10.100.30.106:9200;
    keepalive 100;
}
server {
    server_name search.test.com;
    listen 80;
    location / {
        proxy_pass http://elasticsearch;
        proxy_http_version 1.1;
        proxy_set_header Connection "";
        proxy_set_header  X-Real-IP  $remote_addr;
        access_log off;
        allow all;
    }
}

openresty:

wget https://openresty.org/download/openresty-1.9.7.4.tar.gz
./configure --prefix=/home/worker/openresty --with-pcre-jit --with-http_stub_status_module --with-http_ssl_module
sudo /home/worker/nginx/sbin/nginx -s reload

curl search.test.com

ab -n 1000000 -c100 -k http://10.100.30.105:9200/
ab -n 1000000 -c100 -k http://search.test.com/

http://search.test.com/_plugin/kopf/#!/cluster

import MongoDB data:

mongoimport -h 127.0.0.1:27017 -d user_center -c users users.data.201601061

db.users.count()

2640260 264万 0.625GB

config = {
    _id: "RS0",
    members: [
        {_id: 0, host: "10.100.30.52:27017"},
        {_id: 1, host: "10.100.30.52:27018"},
    ]
}

mongo-connector:

config.json
{
    "mainAddress": "10.100.30.52:27017",
    "oplogFile": "/home/worker/data/mongo-connector/oplog.timestamp",
    "noDump": false,
    "batchSize": -1,
    "verbosity": 0,
    "continueOnError": true,
    "logging": {
        "type": "file",
        "filename": "/home/worker/data/mongo-connector/mongo-connector.log"
    },
    "namespaces": {
        "include": ["user_center.users"]
    },
    "fields": ["email", "loginType", "certificated", "destroyed", "regDate", "mobile", "detail"],
    "docManagers": [
        {
            "docManager": "elastic2_doc_manager",
            "targetURL": "10.100.30.106:9200",
            "args": {
                "clientOptions": {"timeout": 200}
            },
            "autoCommitInterval": 0
        }
    ]
}

nohup mongo-connector -c config.json > /home/worker/data/mongo-connector/hup.log 2>&1 &

ab:

query.json
{
    "query": {
        "match": {
            "detail.lowerName": "张三"
        }
    }
}

query1.json
{
    "query": {
        "match": {
            "email": "163.com"
        }
    }
}

ab -n 10000 -c100 -k -p query.json "http://search.test.com/user_center/users/_search"

ab -n 10000 -c100 -k "http://search.test.com/user_center/users/_search?size=1&q=detail.lowerName:张三"

ab -n 10000 -c100 -k "http://search.test.com/user_center/users/_search?size=1&q=email:gmail"

搭建MongoDB复制集

主节点:

28001.conf

port=28001
bind_ip=192.168.0.1
logpath=/usr/local/mongodb/log/28001.log
dbpath=/usr/local/mongodb/data/28001/
logappend=true
pidfilepath=/usr/local/mongodb/data/28001/28001.pid
fork=true
oplogSize=1024
replSet=RS0

28002.conf

port=28002
bind_ip=192.168.0.2
logpath=/usr/local/mongodb/log/28002.log
dbpath=/usr/local/mongodb/data/28002/
logappend=true
pidfilepath=/usr/local/mongodb/data/28002/28002.pid
fork=true
oplogSize=1024
replSet=RS0

28003.conf arbiter投票节点

port=28003
bind_ip=192.168.0.3
logpath=/usr/local/mongodb/log/28003.log
dbpath=/usr/local/mongodb/data/28003/
logappend=true
pidfilepath=/usr/local/mongodb/data/28003/28003.pid
fork=true
oplogSize=1024
replSet=RS0

启动实例:

mongod -f /usr/local/mongodb/conf/28001.conf

mongod -f /usr/local/mongodb/conf/28002.conf

mongod -f /usr/local/mongodb/conf/28003.conf

配置:
mongo 192.168.0.1:28001/admin

config = {
    _id: "RS0",
    members: [
        {_id: 0, host: "192.168.0.1:28001"},
        {_id: 1, host: "192.168.0.2:28002"},
        {_id: 2, host: "192.168.0.3:28003"},
    ]
}

// 设置 arbiter 节点
config.members[2] = {"_id": 2, "host": "192.168.0.3:28003", "arbiterOnly": true}

rs.initiate(config)  //初始化
rs.status //查看状态