1 rizwank 1.1 <?php
2 /***************************************************************************
3 * functions_search.php
4 * -------------------
5 * begin : Wed Sep 05 2001
6 * copyright : (C) 2002 The phpBB Group
7 * email : support@phpbb.com
8 *
9 * $Id: functions_search.php,v 1.8.2.10 2003/01/06 09:18:35 bartvb Exp $
10 *
11 ****************************************************************************/
12
13 /***************************************************************************
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 ***************************************************************************/
21
22 rizwank 1.1 function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
23 {
24 static $drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
25 static $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' , ' ', ' ', ' ', ' ', ' ', ' ');
26
27 $entry = ' ' . strip_tags(strtolower($entry)) . ' ';
28
29 if ( $mode == 'post' )
30 {
31 // Replace line endings by a space
32 $entry = preg_replace('/[\n\r]/is', ' ', $entry);
33 // HTML entities like
34 $entry = preg_replace('/\b&[a-z]+;\b/', ' ', $entry);
35 // Remove URL's
36 $entry = preg_replace('/\b[a-z0-9]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/]+)?/', ' ', $entry);
37 // Quickly remove BBcode.
38 $entry = preg_replace('/\[img:[a-z0-9]{10,}\].*?\[\/img:[a-z0-9]{10,}\]/', ' ', $entry);
39 $entry = preg_replace('/\[\/?url(=.*?)?\]/', ' ', $entry);
40 $entry = preg_replace('/\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]/', ' ', $entry);
41 }
42 else if ( $mode == 'search' )
43 rizwank 1.1 {
44 $entry = str_replace(' +', ' and ', $entry);
45 $entry = str_replace(' -', ' not ', $entry);
46 }
47
48 //
49 // Filter out strange characters like ^, $, &, change "it's" to "its"
50 //
51 for($i = 0; $i < count($drop_char_match); $i++)
52 {
53 $entry = str_replace($drop_char_match[$i], $drop_char_replace[$i], $entry);
54 }
55
56 if ( $mode == 'post' )
57 {
58 $entry = str_replace('*', ' ', $entry);
59
60 // 'words' that consist of <3 or >20 characters are removed.
61 $entry = preg_replace('/\b([a-z0-9]{1,2}|[a-z0-9]{21,})\b/',' ', $entry);
62 }
63
64 rizwank 1.1 if ( !empty($stopword_list) )
65 {
66 for ($j = 0; $j < count($stopword_list); $j++)
67 {
68 $stopword = trim($stopword_list[$j]);
69
70 if ( $mode == 'post' || ( $stopword != 'not' && $stopword != 'and' && $stopword != 'or' ) )
71 {
72 $entry = preg_replace('#\b' . preg_quote($stopword) . '\b#', ' ', $entry);
73 }
74 }
75 }
76
77 if ( !empty($synonym_list) )
78 {
79 for ($j = 0; $j < count($synonym_list); $j++)
80 {
81 list($replace_synonym, $match_synonym) = split(' ', trim(strtolower($synonym_list[$j])));
82 if ( $mode == 'post' || ( $match_synonym != 'not' && $match_synonym != 'and' && $match_synonym != 'or' ) )
83 {
84 $entry = preg_replace('#\b' . trim($match_synonym) . '\b#', ' ' . trim($replace_synonym) . ' ', $entry);
85 rizwank 1.1 }
86 }
87 }
88
89 return $entry;
90 }
91
92 function split_words(&$entry, $mode = 'post')
93 {
94 $rex = ( $mode == 'post' ) ? "/\b([\w±µ-ÿ][\w±µ-ÿ']*[\w±µ-ÿ]+|[\w±µ-ÿ]+?)\b/" : '/(\*?[a-z0-9±µ-ÿ]+\*?)|\b([a-z0-9±µ-ÿ]+)\b/';
95 preg_match_all($rex, $entry, $split_entries);
96
97 return $split_entries[1];
98 }
99
100 function add_search_words($mode, $post_id, $post_text, $post_title = '')
101 {
102 global $db, $phpbb_root_path, $board_config, $lang;
103
104 $stopword_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_stopwords.txt");
105 $synonym_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_synonyms.txt");
106 rizwank 1.1
107 $search_raw_words = array();
108 $search_raw_words['text'] = split_words(clean_words('post', $post_text, $stopword_array, $synonym_array));
109 $search_raw_words['title'] = split_words(clean_words('post', $post_title, $stopword_array, $synonym_array));
110
111 $word = array();
112 $word_insert_sql = array();
113 while ( list($word_in, $search_matches) = @each($search_raw_words) )
114 {
115 $word_insert_sql[$word_in] = '';
116 if ( !empty($search_matches) )
117 {
118 for ($i = 0; $i < count($search_matches); $i++)
119 {
120 $search_matches[$i] = trim($search_matches[$i]);
121
122 if( $search_matches[$i] != '' )
123 {
124 $word[] = $search_matches[$i];
125 if ( !strstr($word_insert_sql[$word_in], "'" . $search_matches[$i] . "'") )
126 {
127 rizwank 1.1 $word_insert_sql[$word_in] .= ( $word_insert_sql[$word_in] != "" ) ? ", '" . $search_matches[$i] . "'" : "'" . $search_matches[$i] . "'";
128 }
129 }
130 }
131 }
132 }
133
134 if ( count($word) )
135 {
136 sort($word);
137
138 $prev_word = '';
139 $word_text_sql = '';
140 $temp_word = array();
141 for($i = 0; $i < count($word); $i++)
142 {
143 if ( $word[$i] != $prev_word )
144 {
145 $temp_word[] = $word[$i];
146 $word_text_sql .= ( ( $word_text_sql != '' ) ? ', ' : '' ) . "'" . $word[$i] . "'";
147 }
148 rizwank 1.1 $prev_word = $word[$i];
149 }
150 $word = $temp_word;
151
152 $check_words = array();
153 switch( SQL_LAYER )
154 {
155 case 'postgresql':
156 case 'msaccess':
157 case 'mssql-odbc':
158 case 'oracle':
159 case 'db2':
160 $sql = "SELECT word_id, word_text
161 FROM " . SEARCH_WORD_TABLE . "
162 WHERE word_text IN ($word_text_sql)";
163 if ( !($result = $db->sql_query($sql)) )
164 {
165 message_die(GENERAL_ERROR, 'Could not select words', '', __LINE__, __FILE__, $sql);
166 }
167
168 while ( $row = $db->sql_fetchrow($result) )
169 rizwank 1.1 {
170 $check_words[$row['word_text']] = $row['word_id'];
171 }
172 break;
173 }
174
175 $value_sql = '';
176 $match_word = array();
177 for ($i = 0; $i < count($word); $i++)
178 {
179 $new_match = true;
180 if ( isset($check_words[$word[$i]]) )
181 {
182 $new_match = false;
183 }
184
185 if ( $new_match )
186 {
187 switch( SQL_LAYER )
188 {
189 case 'mysql':
190 rizwank 1.1 case 'mysql4':
191 $value_sql .= ( ( $value_sql != '' ) ? ', ' : '' ) . '(\'' . $word[$i] . '\', 0)';
192 break;
193 case 'mssql':
194 $value_sql .= ( ( $value_sql != '' ) ? ' UNION ALL ' : '' ) . "SELECT '" . $word[$i] . "', 0";
195 break;
196 default:
197 $sql = "INSERT INTO " . SEARCH_WORD_TABLE . " (word_text, word_common)
198 VALUES ('" . $word[$i] . "', 0)";
199 if( !$db->sql_query($sql) )
200 {
201 message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
202 }
203 break;
204 }
205 }
206 }
207
208 if ( $value_sql != '' )
209 {
210 switch ( SQL_LAYER )
211 rizwank 1.1 {
212 case 'mysql':
213 case 'mysql4':
214 $sql = "INSERT IGNORE INTO " . SEARCH_WORD_TABLE . " (word_text, word_common)
215 VALUES $value_sql";
216 break;
217 case 'mssql':
218 $sql = "INSERT INTO " . SEARCH_WORD_TABLE . " (word_text, word_common)
219 $value_sql";
220 break;
221 }
222
223 if ( !$db->sql_query($sql) )
224 {
225 message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
226 }
227 }
228 }
229
230 while( list($word_in, $match_sql) = @each($word_insert_sql) )
231 {
232 rizwank 1.1 $title_match = ( $word_in == 'title' ) ? 1 : 0;
233
234 if ( $match_sql != '' )
235 {
236 $sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match)
237 SELECT $post_id, word_id, $title_match
238 FROM " . SEARCH_WORD_TABLE . "
239 WHERE word_text IN ($match_sql)";
240 if ( !$db->sql_query($sql) )
241 {
242 message_die(GENERAL_ERROR, 'Could not insert new word matches', '', __LINE__, __FILE__, $sql);
243 }
244 }
245 }
246
247 if ($mode == 'single')
248 {
249 remove_common('single', 0.4, $word);
250 }
251
252 return;
253 rizwank 1.1 }
254
255 //
256 // Check if specified words are too common now
257 //
258 function remove_common($mode, $fraction, $word_id_list = array())
259 {
260 global $db;
261
262 $sql = "SELECT COUNT(post_id) AS total_posts
263 FROM " . POSTS_TABLE;
264 if ( !($result = $db->sql_query($sql)) )
265 {
266 message_die(GENERAL_ERROR, 'Could not obtain post count', '', __LINE__, __FILE__, $sql);
267 }
268
269 $row = $db->sql_fetchrow($result);
270
271 if ( $row['total_posts'] >= 100 )
272 {
273 $common_threshold = floor($row['total_posts'] * $fraction);
274 rizwank 1.1
275 if ( $mode == 'single' && count($word_id_list) )
276 {
277 $word_id_sql = '';
278 for($i = 0; $i < count($word_id_list); $i++)
279 {
280 $word_id_sql .= ( ( $word_id_sql != '' ) ? ', ' : '' ) . "'" . $word_id_list[$i] . "'";
281 }
282
283 $sql = "SELECT m.word_id
284 FROM " . SEARCH_MATCH_TABLE . " m, " . SEARCH_WORD_TABLE . " w
285 WHERE w.word_text IN ($word_id_sql)
286 AND m.word_id = w.word_id
287 GROUP BY m.word_id
288 HAVING COUNT(m.word_id) > $common_threshold";
289 }
290 else
291 {
292 $sql = "SELECT word_id
293 FROM " . SEARCH_MATCH_TABLE . "
294 GROUP BY word_id
295 rizwank 1.1 HAVING COUNT(word_id) > $common_threshold";
296 }
297
298 if ( !($result = $db->sql_query($sql)) )
299 {
300 message_die(GENERAL_ERROR, 'Could not obtain common word list', '', __LINE__, __FILE__, $sql);
301 }
302
303 $common_word_id = '';
304 while ( $row = $db->sql_fetchrow($result) )
305 {
306 $common_word_id .= ( ( $common_word_id != '' ) ? ', ' : '' ) . $row['word_id'];
307 }
308 $db->sql_freeresult($result);
309
310 if ( $common_word_id != '' )
311 {
312 $sql = "UPDATE " . SEARCH_WORD_TABLE . "
313 SET word_common = " . TRUE . "
314 WHERE word_id IN ($common_word_id)";
315 if ( !$db->sql_query($sql) )
316 rizwank 1.1 {
317 message_die(GENERAL_ERROR, 'Could not delete word list entry', '', __LINE__, __FILE__, $sql);
318 }
319
320 $sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "
321 WHERE word_id IN ($common_word_id)";
322 if ( !$db->sql_query($sql) )
323 {
324 message_die(GENERAL_ERROR, 'Could not delete word match entry', '', __LINE__, __FILE__, $sql);
325 }
326 }
327 }
328
329 return;
330 }
331
332 function remove_search_post($post_id_sql)
333 {
334 global $db;
335
336 $words_removed = false;
337 rizwank 1.1
338 switch ( SQL_LAYER )
339 {
340 case 'mysql':
341 case 'mysql4':
342 $sql = "SELECT word_id
343 FROM " . SEARCH_MATCH_TABLE . "
344 WHERE post_id IN ($post_id_sql)
345 GROUP BY word_id";
346 if ( $result = $db->sql_query($sql) )
347 {
348 $word_id_sql = '';
349 while ( $row = $db->sql_fetchrow($result) )
350 {
351 $word_id_sql .= ( $word_id_sql != '' ) ? ', ' . $row['word_id'] : $row['word_id'];
352 }
353
354 $sql = "SELECT word_id
355 FROM " . SEARCH_MATCH_TABLE . "
356 WHERE word_id IN ($word_id_sql)
357 GROUP BY word_id
358 rizwank 1.1 HAVING COUNT(word_id) = 1";
359 if ( $result = $db->sql_query($sql) )
360 {
361 $word_id_sql = '';
362 while ( $row = $db->sql_fetchrow($result) )
363 {
364 $word_id_sql .= ( $word_id_sql != '' ) ? ', ' . $row['word_id'] : $row['word_id'];
365 }
366
367 if ( $word_id_sql != '' )
368 {
369 $sql = "DELETE FROM " . SEARCH_WORD_TABLE . "
370 WHERE word_id IN ($word_id_sql)";
371 if ( !$db->sql_query($sql) )
372 {
373 message_die(GENERAL_ERROR, 'Could not delete word list entry', '', __LINE__, __FILE__, $sql);
374 }
375
376 $words_removed = $db->sql_affectedrows();
377 }
378 }
379 rizwank 1.1 }
380 break;
381
382 default:
383 $sql = "DELETE FROM " . SEARCH_WORD_TABLE . "
384 WHERE word_id IN (
385 SELECT word_id
386 FROM " . SEARCH_MATCH_TABLE . "
387 WHERE word_id IN (
388 SELECT word_id
389 FROM " . SEARCH_MATCH_TABLE . "
390 WHERE post_id IN ($post_id_sql)
391 GROUP BY word_id
392 )
393 GROUP BY word_id
394 HAVING COUNT(word_id) = 1
395 )";
396 if ( !$db->sql_query($sql) )
397 {
398 message_die(GENERAL_ERROR, 'Could not delete old words from word table', '', __LINE__, __FILE__, $sql);
399 }
400 rizwank 1.1
401 $words_removed = $db->sql_affectedrows();
402
403 break;
404 }
405
406 $sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "
407 WHERE post_id IN ($post_id_sql)";
408 if ( !$db->sql_query($sql) )
409 {
410 message_die(GENERAL_ERROR, 'Error in deleting post', '', __LINE__, __FILE__, $sql);
411 }
412
413 return $words_removed;
414 }
415
416 //
417 // Username search
418 //
419 function username_search($search_match)
420 {
421 rizwank 1.1 global $db, $board_config, $template, $lang, $images, $theme, $phpEx, $phpbb_root_path;
422 global $starttime, $gen_simple_header;
423
424 $gen_simple_header = TRUE;
425
426 $username_list = '';
427 if ( !empty($search_match) )
428 {
429 $username_search = preg_replace('/\*/', '%', trim(strip_tags($search_match)));
430
431 $sql = "SELECT username
432 FROM " . USERS_TABLE . "
433 WHERE username LIKE '" . str_replace("\'", "''", $username_search) . "'
434 ORDER BY username";
435 if ( !($result = $db->sql_query($sql)) )
436 {
437 message_die(GENERAL_ERROR, 'Could not obtain search results', '', __LINE__, __FILE__, $sql);
438 }
439
440 if ( $row = $db->sql_fetchrow($result) )
441 {
442 rizwank 1.1 do
443 {
444 $username_list .= '<option value="' . $row['username'] . '">' . $row['username'] . '</option>';
445 }
446 while ( $row = $db->sql_fetchrow($result) );
447 }
448 else
449 {
450 $username_list .= '<option>' . $lang['No_match']. '</option>';
451 }
452 $db->sql_freeresult($result);
453 }
454
455 $page_title = $lang['Search'];
456 include($phpbb_root_path . 'includes/page_header.'.$phpEx);
457
458 $template->set_filenames(array(
459 'search_user_body' => 'search_username.tpl')
460 );
461
462 $template->assign_vars(array(
463 rizwank 1.1 'USERNAME' => ( !empty($search_match) ) ? strip_tags($search_match) : '',
464
465 'L_CLOSE_WINDOW' => $lang['Close_window'],
466 'L_SEARCH_USERNAME' => $lang['Find_username'],
467 'L_UPDATE_USERNAME' => $lang['Select_username'],
468 'L_SELECT' => $lang['Select'],
469 'L_SEARCH' => $lang['Search'],
470 'L_SEARCH_EXPLAIN' => $lang['Search_author_explain'],
471 'L_CLOSE_WINDOW' => $lang['Close_window'],
472
473 'S_USERNAME_OPTIONS' => $username_list,
474 'S_SEARCH_ACTION' => append_sid("search.$phpEx?mode=searchuser"))
475 );
476
477 if ( $username_list != '' )
478 {
479 $template->assign_block_vars('switch_select_name', array());
480 }
481
482 $template->pparse('search_user_body');
483
484 rizwank 1.1 include($phpbb_root_path . 'includes/page_tail.'.$phpEx);
485
486 return;
487 }
488
489 ?>
|