S
S
shagguboy2017-11-01 13:13:31
Sphinx
shagguboy, 2017-11-01 13:13:31

Why is the RT index in Sphinx not picking up the synonyms file?

wordfroms.txt:
one > two

sphinx config:
source src1
{
    type      = mysql
    sql_host  = 192.168.10.165
    sql_user  = bitrix
    sql_pass  = 3354010
    sql_db    = bitrix
    sql_port  = 3306    # optional, default is 3306
    sql_query = \
        SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
        FROM documents

    sql_attr_uint      = group_id
    sql_attr_timestamp = date_added


}

index test1
{
    source       = src1
    path         = /var/lib/sphinx/test1
    docinfo      = extern

    wordforms = /home/bitrix/www/upload/wordforms.txt # один и тотже

}

index testrt
{
    type            = rt
    rt_mem_limit    = 128M

    path            = /var/lib/sphinx/testrt

    rt_field        = title
    rt_field        = content
    rt_attr_uint    = gid

    wordforms = /home/bitrix/www/upload/wordforms.txt # один и тот же

}

searchd
{
        listen                    = 9312
        listen                    = 9306:mysql41
        log                       = /var/log/sphinx/searchd.log
        query_log                 = /var/log/sphinx/query.log
        pid_file                  = /var/run/sphinx/searchd.pid
        binlog_path               = /var/lib/sphinx

        read_timeout              = 5
        max_children              = 30
        seamless_rotate           = 1
        preopen_indexes           = 1
        unlink_old                = 1
        workers                   = threads # for RT to work
        binlog_max_log_size = 512M
  #  2 - flush every transaction, sync every second. Good performance, and every committed transaction is guarante                                                                                                                                                             ed to be saved in case of daemon crash.
  #  1 - flush and sync every transaction. Worst performance, but every committed transaction data is guaranteed t                                                                                                                                                             o be saved
        binlog_flush     = 2
        rt_flush_period  = 3600
}

# 22
indexer
{
        lemmatizer_cache        = 128M
}
common
{
    lemmatizer_base       = /usr/share/sphinx/dicts
}
data for first index
DROP TABLE IF EXISTS test.documents;
CREATE TABLE test.documents
(
  id			INTEGER PRIMARY KEY NOT NULL AUTO_INCREMENT,
  group_id	INTEGER NOT NULL,
  group_id2	INTEGER NOT NULL,
  date_added	DATETIME NOT NULL,
  title		VARCHAR(255) NOT NULL,
  content		TEXT NOT NULL
);

REPLACE INTO test.documents ( id, group_id, group_id2, date_added, title, content ) VALUES
  ( 1, 1, 5, NOW(), 'test one', 'this is my test document number one. also checking search within phrases.' ),
  ( 2, 1, 6, NOW(), 'test two', 'this is my test document number two' ),
  ( 3, 2, 7, NOW(), 'another doc', 'this is another group' ),
  ( 4, 2, 8, NOW(), 'doc number four', 'this is to test groups' );

DROP TABLE IF EXISTS test.tags;
CREATE TABLE test.tags
(
  docid INTEGER NOT NULL,
  tagid INTEGER NOT NULL,
  UNIQUE(docid,tagid)
);

INSERT INTO test.tags VALUES
  (1,1), (1,3), (1,5), (1,7),
  (2,6), (2,4), (2,2),
  (3,15),
(4,7), (4,40);


result:
spoiler
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 1
Server version: 2.2.11-id64-release (95ae9a6)

Copyright (c) 2009-2017 Percona LLC and/or its affiliates
Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql> SELECT * FROM test1 WHERE MATCH('two');
+------+----------+------------+
| id   | group_id | date_added |
+------+----------+------------+
|    1 |        1 | 1509528588 |
|    2 |        1 | 1509528588 |
+------+----------+------------+
2 rows in set (0.00 sec)

mysql> SELECT * FROM test1 WHERE MATCH('one');
+------+----------+------------+
| id   | group_id | date_added |
+------+----------+------------+
|    1 |        1 | 1509528588 |
|    2 |        1 | 1509528588 |
+------+----------+------------+
2 rows in set (0.00 sec)

mysql> SELECT * FROM testrt ORDER BY gid DESC;
Empty set (0.00 sec)

mysql> truncate rtindex testrt;
Query OK, 0 rows affected (0.00 sec)

mysql> INSERT INTO testrt VALUES
    ->   (1, 'one', 'one some sample text', 11),
    ->   (2, 'two some more', 'two text here', 22),
    ->   (3, 'mmooooorrree', 'even more text', 33);
Query OK, 3 rows affected (0.00 sec)

mysql> SELECT * FROM testrt ORDER BY gid DESC;                                                                                                                                                                                                                                 +------+------+
| id   | gid  |
+------+------+
|    3 |   33 |
|    2 |   22 |
|    1 |   11 |
+------+------+
3 rows in set (0.00 sec)

mysql> SELECT * FROM testrt WHERE MATCH('one');                                                                                                                                                                                                                                +------+------+
| id   | gid  |
+------+------+
|    1 |   11 |
+------+------+
1 row in set (0.00 sec)

mysql> SELECT * FROM testrt WHERE MATCH('two');
+------+------+
| id   | gid  |
+------+------+
|    2 |   22 |
+------+------+
1 row in set (0.00 sec)

mysql>


question - why does the RT index not pick up the synonyms file?

Answer the question

In order to leave comments, you need to log in

2 answer(s)
F
Fortop, 2017-12-05
@Fortop

One of the answers to the question "why"
sphinxsearch.com/forum/view.html?id=12917

P
Puma Thailand, 2017-11-07
@opium

and not rt chtoli picks up?

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question