File xapian-fix-snippet-first-char-nonword.patch of Package xapian-core
Fix nonword chars before 1st word in snippet generator
Such characters were being spuriously added to the result string
regardless of whether a match was present or not.
Index: xapian-core-1fmwheezy28569/queryparser/snippetgenerator_internal.cc
===================================================================
--- xapian-core-1fmwheezy28569.orig/queryparser/snippetgenerator_internal.cc 2013-05-24 06:57:11.742793603 +0000
+++ xapian-core-1fmwheezy28569/queryparser/snippetgenerator_internal.cc 2013-05-24 07:01:21.433656331 +0000
@@ -92,8 +92,10 @@ SnippetGenerator::Internal::accept_term(
// not explicitly in a context, but remember the
// term in the context queue for later
context.push(term);
- while (context.size() > context_length)
+ while (context.size() > context_length) {
context.pop();
+ leading_nonword = "";
+ }
// this order handles the context_length=0 case gracefully
}
}
@@ -114,7 +116,11 @@ SnippetGenerator::Internal::accept_nonwo
nwhitespace = 0;
}
- if (pos <= horizon) {
+ if (!pos) {
+ // non-word characters before the first word
+ Unicode::append_utf8(leading_nonword, ch);
+ }
+ else if (pos <= horizon) {
// the last word of the after-context of a snippet
if (ch == ' ' && pos == horizon) {
// after-context ends on first whitespace
Index: xapian-core-1fmwheezy28569/tests/termgentest.cc
===================================================================
--- xapian-core-1fmwheezy28569.orig/tests/termgentest.cc 2013-05-24 07:01:06.206456378 +0000
+++ xapian-core-1fmwheezy28569/tests/termgentest.cc 2013-05-24 07:01:21.433656331 +0000
@@ -917,6 +917,40 @@ static bool test_snipgen1()
return true;
}
+static bool test_sg_first_nonword()
+{
+ Xapian::SnippetGenerator snipgen;
+ snipgen.set_context_length(3);
+
+ // first character is a non-word character - this
+ // exercises some obscure code paths
+ std::string text("[Brooklyn] Re: locavore cosby sweater");
+
+ // no match at all
+ snipgen.reset();
+ snipgen.add_match("readymade");
+ snipgen.accept_text(text);
+ TEST_EQUAL(snipgen.get_snippets(), "");
+
+ // matched first word
+ snipgen.reset();
+ snipgen.add_match("brooklyn");
+ snipgen.accept_text(text);
+ TEST_EQUAL(snipgen.get_snippets(), "["
+ "<b>Brooklyn</b>"
+ "] Re: locavore cosby");
+
+ // matched second word
+ snipgen.reset();
+ snipgen.add_match("re");
+ snipgen.accept_text(text);
+ TEST_EQUAL(snipgen.get_snippets(), "[Brooklyn] "
+ "<b>Re</b>: "
+ "locavore cosby sweater");
+
+ return true;
+}
+
static bool test_sg_stem()
{
Xapian::SnippetGenerator snipgen;
@@ -956,6 +990,7 @@ static const test_desc tests[] = {
TESTCASE(tg_spell2),
TESTCASE(tg_max_word_length1),
TESTCASE(snipgen1),
+ TESTCASE(sg_first_nonword),
TESTCASE(sg_stem),
END_OF_TESTCASES
};