D
D
Dmitry Sidorenko2015-10-14 10:00:31
PHP
Dmitry Sidorenko, 2015-10-14 10:00:31

How to make sphinx search for short words (less than 4 characters)?

Set up, it works
If I pass /_sphinx/index.php?q=stove to search, then it finds a lot of results, taking into account morphology, everything is super
But if /_sphinx/index.php?q=stove then nothing works =)
Where to dig?
By the way, he finds /_sphinx/index.php?q=p, because there is a phrase "9 meters high."
I tried the parameters
#min_word_len = 1
#min_infix_len = 1
#enable_star = 0
#min_word_len = 1
#html_strip = 1
#min_infix_len = 1
#index_exact_words = 1
In various combinations, did not help =)
Here is the sphinx config

source src1
{
  type			= mysql

  sql_host		= localhost
  sql_user		= root
  sql_pass		=
  sql_db			= yii2advanced2
  sql_port		= 3306	# optional, default is 3306

  sql_query_pre = SET NAMES utf8
    sql_query_pre = SET CHARACTER SET utf8

  sql_query	= SELECT id_product, id_old, model, UNIX_TIMESTAMP(create_time) AS date_added, seo_name, full_description FROM products

  sql_attr_uint		= id_product
  sql_attr_uint		= id_old
  sql_attr_uint		= id_manufacturer
  sql_attr_timestamp	= date_added
}


index test1
{
  source			= src1
  path			= c:\OpenServer\modules\sphinx\configdir\data\test1
  #min_word_len = 1
  #min_infix_len = 1
  #enable_star = 0
  #min_word_len      = 1
  #html_strip        = 1
  #min_infix_len     = 1
  #index_exact_words = 1
}


index testrt
{
  type			= rt
  rt_mem_limit		= 128M
  path			= c:\OpenServer\modules\sphinx\configdir\data\testrt
  rt_field		= seo_name
  rt_field		= full_description
  rt_attr_uint		= gid
}


indexer
{
  mem_limit		= 128M
}


searchd
{
  listen			= 9312
  listen			= 9306:mysql41
  log			= c:\OpenServer\modules\sphinx\configdir\log\searchd.log
  query_log		= c:\OpenServer\modules\sphinx\configdir\log\query.log
  read_timeout		= 5
  max_children		= 30
  pid_file		= c:\OpenServer\modules\sphinx\configdir\log\searchd.pid
  seamless_rotate		= 1
  preopen_indexes		= 1
  unlink_old		= 1
  workers			= threads # for RT to work
  binlog_path		= c:\OpenServer\modules\sphinx\configdir\data
}

And here is the php script itself, maybe it will be useful for someone
. Don’t look at the quality of the code, this is a hastily redone example from phpapi =)
<?php

header('Content-type: text/html; charset=utf-8');

error_reporting( 0 );

require( "sphinxapi.php" );

//////////////////////
// parse command line
//////////////////////
$_SERVER["argv"] = [];
$_SERVER["argv"][1] = $_GET['q'];
#$_SERVER["argv"][1] = 'печ';

//$_SERVER["argv"]['-l'] = 110;

// for very old PHP versions, like at my home test server
if ( is_array( $argv ) && !isset( $_SERVER["argv"] ) ) $_SERVER["argv"] = $argv;
unset ( $_SERVER["argv"][0] );

// build query
if ( !is_array( $_SERVER["argv"] ) || empty( $_SERVER["argv"] ) ) {
  print ( "Usage: php -f test.php [OPTIONS] query words\n\n" );
  print ( "Options are:\n" );
  print ( "-h, --host <HOST>\tconnect to searchd at host HOST\n" );
  print ( "-p, --port\t\tconnect to searchd at port PORT\n" );
  print ( "-i, --index <IDX>\tsearch through index(es) specified by IDX\n" );
  print ( "-s, --sortby <CLAUSE>\tsort matches by 'CLAUSE' in sort_extended mode\n" );
  print ( "-S, --sortexpr <EXPR>\tsort matches by 'EXPR' DESC in sort_expr mode\n" );
  print ( "-a, --any\t\tuse 'match any word' matching mode\n" );
  print ( "-b, --boolean\t\tuse 'boolean query' matching mode\n" );
  print ( "-e, --extended\t\tuse 'extended query' matching mode\n" );
  print ( "-ph,--phrase\t\tuse 'exact phrase' matching mode\n" );
  print ( "-f, --filter <ATTR>\tfilter by attribute 'ATTR' (default is 'group_id')\n" );
  print ( "-fr,--filterrange <ATTR> <MIN> <MAX>\n\t\t\tadd specified range filter\n" );
  print ( "-v, --value <VAL>\tadd VAL to allowed 'group_id' values list\n" );
  print ( "-g, --groupby <EXPR>\tgroup matches by 'EXPR'\n" );
  print ( "-gs,--groupsort <EXPR>\tsort groups by 'EXPR'\n" );
  print ( "-d, --distinct <ATTR>\tcount distinct values of 'ATTR''\n" );
  print ( "-l, --limit <COUNT>\tretrieve COUNT matches (default: 20)\n" );
  print ( "--select <EXPRLIST>\tuse 'EXPRLIST' as select-list (default: *)\n" );
  exit;
}

$args = array();
foreach ( $_SERVER["argv"] as $arg ) $args[] = $arg;

$cl = new SphinxClient ();

$q = "";
$sql = "";
$mode = SPH_MATCH_ALL;
$host = "localhost";
$port = 9312;
$index = "*";
$groupby = "";
$groupsort = "@group desc";
$filter = "group_id";
$filtervals = array();
$distinct = "";
$sortby = "";
$sortexpr = "";
$limit = 210;
$ranker = SPH_RANK_PROXIMITY_BM25;
$select = "";
for ( $i = 0; $i < count( $args ); $i++ ) {
  $arg = $args[$i];

  if ( $arg == "-h" || $arg == "--host" ) $host = $args[++$i]; else if ( $arg == "-p" || $arg == "--port" ) $port = (int)$args[++$i]; else if ( $arg == "-i" || $arg == "--index" ) $index = $args[++$i]; else if ( $arg == "-s" || $arg == "--sortby" ) {
    $sortby = $args[++$i];
    $sortexpr = "";
  } else if ( $arg == "-S" || $arg == "--sortexpr" ) {
    $sortexpr = $args[++$i];
    $sortby = "";
  } else if ( $arg == "-a" || $arg == "--any" ) $mode = SPH_MATCH_ANY; else if ( $arg == "-b" || $arg == "--boolean" ) $mode = SPH_MATCH_BOOLEAN; else if ( $arg == "-e" || $arg == "--extended" ) $mode = SPH_MATCH_EXTENDED; else if ( $arg == "-e2" ) $mode = SPH_MATCH_EXTENDED2; else if ( $arg == "-ph" || $arg == "--phrase" ) $mode = SPH_MATCH_PHRASE; else if ( $arg == "-f" || $arg == "--filter" ) $filter = $args[++$i]; else if ( $arg == "-v" || $arg == "--value" ) $filtervals[] = $args[++$i]; else if ( $arg == "-g" || $arg == "--groupby" ) $groupby = $args[++$i]; else if ( $arg == "-gs" || $arg == "--groupsort" ) $groupsort = $args[++$i]; else if ( $arg == "-d" || $arg == "--distinct" ) $distinct = $args[++$i]; else if ( $arg == "-l" || $arg == "--limit" ) $limit = (int)$args[++$i]; else if ( $arg == "--select" ) $select = $args[++$i]; else if ( $arg == "-fr" || $arg == "--filterrange" ) $cl->SetFilterRange( $args[++$i], $args[++$i], $args[++$i] ); else if ( $arg == "-r" ) {
    $arg = strtolower( $args[++$i] );
    if ( $arg == "bm25" ) $ranker = SPH_RANK_BM25;
    if ( $arg == "none" ) $ranker = SPH_RANK_NONE;
    if ( $arg == "wordcount" ) $ranker = SPH_RANK_WORDCOUNT;
    if ( $arg == "fieldmask" ) $ranker = SPH_RANK_FIELDMASK;
    if ( $arg == "sph04" ) $ranker = SPH_RANK_SPH04;
  } else
    $q .= $args[$i] . " ";
}


////////////
// do query
////////////

$cl->SetServer( $host, $port );
$cl->SetConnectTimeout( 1 );
$cl->SetArrayResult( true );
$cl->SetMatchMode( $mode );
if ( count( $filtervals ) ) $cl->SetFilter( $filter, $filtervals );
if ( $groupby ) $cl->SetGroupBy( $groupby, SPH_GROUPBY_ATTR, $groupsort );
if ( $sortby ) $cl->SetSortMode( SPH_SORT_EXTENDED, $sortby );
if ( $sortexpr ) $cl->SetSortMode( SPH_SORT_EXPR, $sortexpr );
if ( $distinct ) $cl->SetGroupDistinct( $distinct );
if ( $select ) $cl->SetSelect( $select );
if ( $limit ) $cl->SetLimits( 0, $limit, ( $limit > 1000 ) ? $limit : 1000 );
$cl->SetRankingMode( $ranker );
$res = $cl->Query( $q, $index );

////////////////
// print me out
////////////////

if ( $res === false ) {
  print "Query failed: " . $cl->GetLastError() . ".\n";
} else {
  if ( $cl->GetLastWarning() ) print "WARNING: " . $cl->GetLastWarning() . "\n\n";
  if ( is_array( $res["words"] ) ) foreach ( $res["words"] as $word => $info )

    if ( is_array( $res["matches"] ) ) {
      $n = 1;
      foreach ( $res["matches"] as $docinfo ) {
        #echo '<div><a target="_blank" href="http://jullycms/item/' . $docinfo[id] . '">Дом печей - ' . $docinfo[weight] . '</a></div>';
        $n++;
      }
    }
}
$answer = [];
foreach ( $res['matches'] as $k => $v ) {
  $answer[] = $v['id'];
}
$answer = json_encode($answer);
echo ($answer);
function p($a) {
  echo '<pre>';
  print_r( $a );
  echo '</pre>';
}

?>

Answer the question

In order to leave comments, you need to log in

2 answer(s)
M
Max, 2015-10-14
@sidorenkoda

according to the developers, then
sphinxsearch.com/docs/current/conf-min-word-len.html
min_word_len
Minimum indexed word length. Optional, default is 1 (index everything).
the problem is that "oven" is not the same as "stove". here "oven *" is equal to "stove".
read here

A
Andrew, 2015-10-14
@R0dger

read here, I think it will help
sphinxsearch.com/forum/view.html?id=13352

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question