LogoKolab Groupware OBS > Projects
Log In

View File xapian-fix-snippet-first-char-nonword.patch of Package xapian-core (Project Kolab:3.4:Updates)

Fix nonword chars before 1st word in snippet generator

Such characters were being spuriously added to the result string
regardless of whether a match was present or not.

Index: xapian-core-1fmwheezy28569/queryparser/snippetgenerator_internal.cc
===================================================================
--- xapian-core-1fmwheezy28569.orig/queryparser/snippetgenerator_internal.cc	2013-05-24 06:57:11.742793603 +0000
+++ xapian-core-1fmwheezy28569/queryparser/snippetgenerator_internal.cc	2013-05-24 07:01:21.433656331 +0000
@@ -92,8 +92,10 @@ SnippetGenerator::Internal::accept_term(
 	// not explicitly in a context, but remember the
 	// term in the context queue for later
 	context.push(term);
-	while (context.size() > context_length)
+	while (context.size() > context_length) {
 	    context.pop();
+	    leading_nonword = "";
+	}
 	// this order handles the context_length=0 case gracefully
     }
 }
@@ -114,7 +116,11 @@ SnippetGenerator::Internal::accept_nonwo
 	nwhitespace = 0;
     }
 
-    if (pos <= horizon) {
+    if (!pos) {
+	// non-word characters before the first word
+	Unicode::append_utf8(leading_nonword, ch);
+    }
+    else if (pos <= horizon) {
 	// the last word of the after-context of a snippet
 	if (ch == ' ' && pos == horizon) {
 	    // after-context ends on first whitespace
Index: xapian-core-1fmwheezy28569/tests/termgentest.cc
===================================================================
--- xapian-core-1fmwheezy28569.orig/tests/termgentest.cc	2013-05-24 07:01:06.206456378 +0000
+++ xapian-core-1fmwheezy28569/tests/termgentest.cc	2013-05-24 07:01:21.433656331 +0000
@@ -917,6 +917,40 @@ static bool test_snipgen1()
     return true;
 }
 
+static bool test_sg_first_nonword()
+{
+    Xapian::SnippetGenerator snipgen;
+    snipgen.set_context_length(3);
+
+    // first character is a non-word character - this
+    // exercises some obscure code paths
+    std::string text("[Brooklyn] Re: locavore cosby sweater");
+
+    // no match at all
+    snipgen.reset();
+    snipgen.add_match("readymade");
+    snipgen.accept_text(text);
+    TEST_EQUAL(snipgen.get_snippets(), "");
+
+    // matched first word
+    snipgen.reset();
+    snipgen.add_match("brooklyn");
+    snipgen.accept_text(text);
+    TEST_EQUAL(snipgen.get_snippets(), "["
+				       "<b>Brooklyn</b>"
+				       "] Re: locavore cosby");
+
+    // matched second word
+    snipgen.reset();
+    snipgen.add_match("re");
+    snipgen.accept_text(text);
+    TEST_EQUAL(snipgen.get_snippets(), "[Brooklyn] "
+				       "<b>Re</b>: "
+				       "locavore cosby sweater");
+
+    return true;
+}
+
 static bool test_sg_stem()
 {
     Xapian::SnippetGenerator snipgen;
@@ -956,6 +990,7 @@ static const test_desc tests[] = {
     TESTCASE(tg_spell2),
     TESTCASE(tg_max_word_length1),
     TESTCASE(snipgen1),
+    TESTCASE(sg_first_nonword),
     TESTCASE(sg_stem),
     END_OF_TESTCASES
 };