Skip to content

Commit

Permalink
Merge pull request #233 from tulibraries/BL-1855-solr-exact-searches
Browse files Browse the repository at this point in the history
BL-1855 solr exact searches
  • Loading branch information
dkinzer authored Dec 11, 2024
2 parents c6941fe + ab8c673 commit a64e9fb
Show file tree
Hide file tree
Showing 9 changed files with 438 additions and 205 deletions.
4 changes: 3 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ gem "rsolr"
gem "rspec"
gem "cob_index",
git: "https://github.com/tulibraries/cob_index.git",
tag: "v0.14.1"
tag: "v0.15.0"
gem "alma"
gem "lc_solr_sortable", git: "https://github.com/tulibraries/lc_solr_sortable", branch: "main"

gem "pry-rails", "~> 0.3.11"
26 changes: 18 additions & 8 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
GIT
remote: https://github.com/tulibraries/cob_index.git
revision: 72b487fde702fb8f3405d9c4f262c08fb3c1b370
tag: v0.14.1
revision: 03a8eff27fadb7d2e12cc60d79e95d5fd716760f
tag: v0.15.0
specs:
cob_index (0.1.0)
gli (~> 2.18)
Expand Down Expand Up @@ -38,6 +38,7 @@ GEM
base64 (0.2.0)
bigdecimal (3.1.8)
builder (3.2.4)
coderay (1.1.3)
concurrent-ruby (1.3.4)
connection_pool (2.4.1)
csv (3.3.0)
Expand All @@ -57,15 +58,15 @@ GEM
ffi-compiler (1.3.2)
ffi (>= 1.15.5)
rake
gli (2.21.5)
gli (2.22.0)
hashie (5.0.0)
http (5.2.0)
addressable (~> 2.8)
base64 (~> 0.1)
http-cookie (~> 1.0)
http-form_data (~> 2.2)
llhttp-ffi (~> 0.5.0)
http-cookie (1.0.7)
http-cookie (1.0.8)
domain_name (~> 0.5)
http-form_data (2.3.0)
httparty (0.22.0)
Expand All @@ -86,20 +87,26 @@ GEM
unf
marc-fastxmlwriter (1.1.0)
marc (~> 1.0)
method_source (1.1.0)
mini_mime (1.1.5)
minitest (5.25.1)
multi_xml (0.7.1)
bigdecimal (~> 3.1)
net-http (0.4.1)
uri
nokogiri (1.16.7-aarch64-linux)
nokogiri (1.17.1-aarch64-linux)
racc (~> 1.4)
nokogiri (1.16.7-arm64-darwin)
nokogiri (1.17.1-arm64-darwin)
racc (~> 1.4)
nokogiri (1.16.7-x86_64-darwin)
nokogiri (1.17.1-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.16.7-x86_64-linux)
nokogiri (1.17.1-x86_64-linux)
racc (~> 1.4)
pry (0.15.0)
coderay (~> 1.1)
method_source (~> 1.0)
pry-rails (0.3.11)
pry (>= 0.13.0)
public_suffix (6.0.1)
racc (1.8.1)
rake (13.2.1)
Expand Down Expand Up @@ -145,14 +152,17 @@ GEM
PLATFORMS
aarch64-linux
arm64-darwin-21
arm64-darwin-23
x86_64-darwin-20
x86_64-darwin-21
x86_64-darwin-22
x86_64-linux

DEPENDENCIES
alma
cob_index!
lc_solr_sortable!
pry-rails (~> 0.3.11)
rsolr
rspec

Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ services:
solr:
image: solr:9.6.1
volumes:
- $PWD/bin/solr-configs-reset:/opt/docker-solr/scripts/solr-configs-reset
- $PWD/bin/solr-configs-reset:/usr/bin/solr-configs-reset
- $PWD:/opt/solr/conf/tul_cob-catalog-solr
ports:
- 8983:8983
Expand Down
391 changes: 200 additions & 191 deletions schema.xml

Large diffs are not rendered by default.

108 changes: 105 additions & 3 deletions solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@
<int name="ps">3</int>
<float name="tie">0.01</float>

<str name="bq">pub_date_tdt:[NOW/DAY-10YEAR TO NOW/DAY]^3500.0</str>
<str name="bq">(library_based_boost_t:boost)^10000.0</str>
<str name="bq">(library_based_boost_t:no_boost)^0.001</str>
<str name="bq">pub_date_tdt:[NOW/DAY-10YEAR TO NOW/DAY]^175000.0</str>
<str name="bq">(library_based_boost_txt:boost)^10000.0</str>
<str name="bq">(library_based_boost_txt:no_boost)^0.001</str>
<str name="fq">-suppress_items_b:true</str>

<!-- NOT using marc_display because it is large and will slow things down for search results -->
Expand Down Expand Up @@ -345,6 +345,108 @@
-->
</requestHandler>

<requestHandler name="/single_quoted_search" class="solr.SearchHandler">
<lst name="defaults">
<str name="df">text</str>
<str name="defType">edismax</str>
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<bool name="sow">false</bool>
<str name="q.alt">*:*</str>
<str name="wt">json</str>
<str name="mm">8&lt;-1 8&lt;90%</str>
<bool name="lowercaseOperators">false</bool>
<int name="ps">3</int>
<float name="tie">0.01</float>
<str name="bq">pub_date_tdt:[NOW/DAY-10YEAR TO NOW/DAY]^3500.0</str>
<str name="bq">(library_based_boost_txt:boost)^10000.0</str>
<str name="bq">(library_based_boost_txt:no_boost)^0.001</str>
<str name="fq">-suppress_items_b:true</str>

<str name="fl">
id,
availability_facet,
bound_with_ids,
call_number_alt_display,
call_number_display,
contributor_display,
creator_display,
date_added_facet,
format,
holdings_display,
holdings_summary_display,
holdings_with_no_items_display,
imprint_display,
imprint_dist_display,
imprint_man_display,
imprint_prod_display,
imprint_date_display,
imprint_prod_date_display,
imprint_dist_date_display,
imprint_man_date_display,
isbn_display,
issn_display,
lc_call_number_display,
lccn_display,
library_facet,
location_display,
note_summary_display,
oclc_number_display,
pub_date,
purchase_order,
responsibility_display,
responsibility_truncated_display,
score,
title_series_display,
title_statement_display,
title_truncated_display,
title_with_subtitle_display,
title_with_subtitle_truncated_display,
title_uniform_display,
electronic_resource_display:[json],
hathi_trust_bib_key_display:[json],
items_json_display:[json],
url_finding_aid_display:[json],
url_more_links_display:[json]
</str>

<str name="qf">
title_unstem_search^1000000.0
subtitle_unstem_search^500000.0
work_access_point^10000.0
title_statement_unstem_search^50000.0
title_uniform_unstem_search^150000.0
title_addl_unstem_search^50000.0
title_added_entry_unstem_search^15000.0
subject_topic_unstem_search^10000.0
subject_unstem_search^7500.0
subject_topic_facet^6250.0
note_toc_unstem_search^1000.0
note_summary_unstem_search^1000.0
creator_unstem_search^2500.0
subject_addl_unstem_search^2500.0
title_series_unstem_search^250.0
</str>
<str name="author_qf">
creator_unstem_search^200.0
</str>
<str name="title_qf">
title_unstem_search^50000.0
subtitle_unstem_search^25000.0
title_uniform_unstem_search^15000.0
title_addl_unstem_search^10000.0
title_added_entry_unstem_search^50.0
title_series_unstem_search^5.0
</str>
<str name="subject_qf">
subject_topic_unstem_search^200.0
subject_unstem_search^125.0
subject_topic_facet^100.0
subject_addl_unstem_search^10.0
</str>
</lst>
</requestHandler>

<!-- for requests to get a single document; use id=666 instead of q=id:666 -->
<requestHandler name="/document" class="solr.SearchHandler" >
<lst name="defaults">
Expand Down
2 changes: 1 addition & 1 deletion spec/fixtures/contingent+labor.xml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions spec/relevance/format_score_search_records_spec.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true
require "spec_helper"
require "pry"

RSpec.describe "Searches with format set to 'Score' "do
solr = RSolr.connect(url: ENV["SOLR_URL"])
Expand Down
3 changes: 3 additions & 0 deletions spec/relevance/query_results_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,6 @@
.before(["c", "b"])
end
end


# body = JSON.parse(phrase_query_results.response[:body])["response"]["numFound"]
106 changes: 106 additions & 0 deletions spec/relevance/quoted_searches_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# frozen_string_literal: true
require "spec_helper"

RSpec.describe "Searches with quotes in the terms" do
before do
end

let(:solr) { RSolr.connect(url: ENV["SOLR_URL"]) }
let(:term) { "" }
let(:quoted_term) { "\"#{term}\""}
let(:solr_path) { "search" }
let(:extra_params) { {} }
let(:num_found_quoted) {
results = solr.get(solr_path, params: { q: quoted_term }
.merge(extra_params))
results["response"]["numFound"]
}
let(:num_found_not_quoted) {
results = solr.get("search", params: { q: term }
.merge(extra_params))
results["response"]["numFound"]
}


context "quoted queries with more than one term" do
let(:term) { "book readers"}

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end

context "quoted queries with one term" do
let(:term) { "readers" }
# In the application with override this path dynamically.
let(:solr_path) { "single_quoted_search" }

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end


context "title quoted query with one term" do
let(:term) { "readers" }
let(:extra_params) { { qf: "${title_qf}", pf: "${title_pf}" }}
# In the application with override this path dynamically.
let(:solr_path) { "single_quoted_search" }

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end

context "title quoted query with multiple terms" do
let(:term) { "Book readers" }
let(:extra_params) { { qf: "${title_qf}", pf: "${title_pf}" }}

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end

context "subject quoted query with one term" do
let(:term) { "chemically" }
let(:extra_params) { { qf: "${subject_qf}", pf: "${subject_pf}" }}
# In the application with override this path dynamically.
let(:solr_path) { "single_quoted_search" }

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end

context "subject quoted query with multiple terms" do
let(:term) { "ancient history" }
let(:extra_params) { { qf: "${subject_qf}", pf: "${subject_pf}" }}

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end

context "author quoted query with one term" do
# This test doesn't work with all names in this test set.
let(:term) { "William" }
let(:solr_path) { "single_quoted_search" }
let(:extra_params) { { qf: "${author_qf}", pf: "${author_pf}" }}

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end

context "author quoted query with multiple terms" do
let(:term) { "William Shakespeare" }
let(:extra_params) { { qf: "${author_qf}", pf: "${author_pf}" }}

it "quoted query to have less results than regular query" do
expect(num_found_quoted).to be < num_found_not_quoted
end
end

end


0 comments on commit a64e9fb

Please sign in to comment.