001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.lucene.demo.facet; 018 019import java.io.IOException; 020import java.time.LocalDate; 021import java.time.ZoneOffset; 022import java.util.Arrays; 023import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 024import org.apache.lucene.document.Document; 025import org.apache.lucene.document.Field; 026import org.apache.lucene.document.FloatPoint; 027import org.apache.lucene.document.IntPoint; 028import org.apache.lucene.document.LongPoint; 029import org.apache.lucene.document.StringField; 030import org.apache.lucene.facet.FacetResult; 031import org.apache.lucene.facet.Facets; 032import org.apache.lucene.facet.FacetsCollector; 033import org.apache.lucene.facet.FacetsCollectorManager; 034import org.apache.lucene.facet.facetset.DimRange; 035import org.apache.lucene.facet.facetset.ExactFacetSetMatcher; 036import org.apache.lucene.facet.facetset.FacetSet; 037import org.apache.lucene.facet.facetset.FacetSetDecoder; 038import org.apache.lucene.facet.facetset.FacetSetMatcher; 039import org.apache.lucene.facet.facetset.FacetSetsField; 040import org.apache.lucene.facet.facetset.MatchingFacetSetsCounts; 041import org.apache.lucene.facet.facetset.RangeFacetSetMatcher; 042import org.apache.lucene.index.DirectoryReader; 043import org.apache.lucene.index.IndexWriter; 044import org.apache.lucene.index.IndexWriterConfig; 045import org.apache.lucene.index.IndexWriterConfig.OpenMode; 046import org.apache.lucene.search.BooleanClause; 047import org.apache.lucene.search.BooleanQuery; 048import org.apache.lucene.search.IndexSearcher; 049import org.apache.lucene.search.MatchAllDocsQuery; 050import org.apache.lucene.search.Query; 051import org.apache.lucene.search.TermInSetQuery; 052import org.apache.lucene.store.ByteBuffersDirectory; 053import org.apache.lucene.store.Directory; 054import org.apache.lucene.util.BytesRef; 055import org.apache.lucene.util.NumericUtils; 056 057/** 058 * Shows usage of indexing and searching {@link FacetSetsField} with a custom {@link FacetSet} 059 * implementation. Unlike the out of the box {@link FacetSet} implementations, this example shows 060 * how to mix and match dimensions of different types, as well as implementing a custom {@link 061 * FacetSetMatcher}. 062 */ 063public class CustomFacetSetExample { 064 065 private static final long MAY_SECOND_2022 = date("2022-05-02"); 066 private static final long JUNE_SECOND_2022 = date("2022-06-02"); 067 private static final long JULY_SECOND_2022 = date("2022-07-02"); 068 private static final float HUNDRED_TWENTY_DEGREES = fahrenheitToCelsius(120); 069 private static final float HUNDRED_DEGREES = fahrenheitToCelsius(100); 070 private static final float EIGHTY_DEGREES = fahrenheitToCelsius(80); 071 072 private final Directory indexDir = new ByteBuffersDirectory(); 073 074 /** Empty constructor */ 075 public CustomFacetSetExample() {} 076 077 /** Build the example index. */ 078 private void index() throws IOException { 079 IndexWriter indexWriter = 080 new IndexWriter( 081 indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); 082 083 // Every document holds the temperature measures for a City by Date 084 085 Document doc = new Document(); 086 doc.add(new StringField("city", "city1", Field.Store.YES)); 087 doc.add( 088 FacetSetsField.create( 089 "temperature", 090 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES), 091 new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES), 092 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 093 addFastMatchFields(doc); 094 indexWriter.addDocument(doc); 095 096 doc = new Document(); 097 doc.add(new StringField("city", "city2", Field.Store.YES)); 098 doc.add( 099 FacetSetsField.create( 100 "temperature", 101 new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES), 102 new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES), 103 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 104 addFastMatchFields(doc); 105 indexWriter.addDocument(doc); 106 107 indexWriter.close(); 108 } 109 110 private void addFastMatchFields(Document doc) { 111 // day field 112 doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO)); 113 doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO)); 114 doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO)); 115 116 // temp field 117 doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO)); 118 doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO)); 119 doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO)); 120 } 121 122 /** Counting documents which exactly match a given {@link FacetSet}. */ 123 private FacetResult exactMatching() throws IOException { 124 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 125 IndexSearcher searcher = new IndexSearcher(indexReader); 126 127 // MatchAllDocsQuery is for "browsing" (counts facets 128 // for all non-deleted docs in the index); normally 129 // you'd use a "normal" query: 130 FacetsCollector fc = 131 searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager()); 132 133 // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions 134 Facets facets = 135 new MatchingFacetSetsCounts( 136 "temperature", 137 fc, 138 TemperatureReadingFacetSet::decodeTemperatureReading, 139 new ExactFacetSetMatcher( 140 "May 2022 (100f)", 141 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)), 142 new ExactFacetSetMatcher( 143 "July 2022 (120f)", 144 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 145 146 // Retrieve results 147 return facets.getAllChildren("temperature"); 148 } 149 } 150 151 /** 152 * Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates 153 * how to use a fast match query to improve the counting efficiency by skipping over documents 154 * which cannot possibly match a set. 155 */ 156 private FacetResult exactMatchingWithFastMatchQuery() throws IOException { 157 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 158 IndexSearcher searcher = new IndexSearcher(indexReader); 159 160 // MatchAllDocsQuery is for "browsing" (counts facets 161 // for all non-deleted docs in the index); normally 162 // you'd use a "normal" query: 163 FacetsCollector fc = 164 searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager()); 165 166 // Match documents whose "day" field is either "May 2022" or "July 2022" 167 Query dateQuery = 168 new TermInSetQuery( 169 "day", 170 Arrays.asList( 171 new BytesRef(String.valueOf(MAY_SECOND_2022)), 172 new BytesRef(String.valueOf(JULY_SECOND_2022)))); 173 // Match documents whose "temp" field is either "80" or "120" degrees 174 Query temperatureQuery = 175 new TermInSetQuery( 176 "temp", 177 Arrays.asList( 178 new BytesRef(String.valueOf(HUNDRED_DEGREES)), 179 new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES)))); 180 // Documents must match both clauses 181 Query fastMatchQuery = 182 new BooleanQuery.Builder() 183 .add(dateQuery, BooleanClause.Occur.MUST) 184 .add(temperatureQuery, BooleanClause.Occur.MUST) 185 .build(); 186 187 // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions 188 Facets facets = 189 new MatchingFacetSetsCounts( 190 "temperature", 191 fc, 192 TemperatureReadingFacetSet::decodeTemperatureReading, 193 fastMatchQuery, 194 new ExactFacetSetMatcher( 195 "May 2022 (100f)", 196 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)), 197 new ExactFacetSetMatcher( 198 "July 2022 (120f)", 199 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 200 201 // Retrieve results 202 return facets.getAllChildren("temperature"); 203 } 204 } 205 206 /** Counting documents which match a certain degrees value for any date. */ 207 private FacetResult rangeMatching() throws IOException { 208 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 209 IndexSearcher searcher = new IndexSearcher(indexReader); 210 211 // MatchAllDocsQuery is for "browsing" (counts facets 212 // for all non-deleted docs in the index); normally 213 // you'd use a "normal" query: 214 FacetsCollector fc = 215 searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager()); 216 217 // Count 80-100 degrees 218 Facets facets = 219 new MatchingFacetSetsCounts( 220 "temperature", 221 fc, 222 TemperatureReadingFacetSet::decodeTemperatureReading, 223 new RangeFacetSetMatcher( 224 "Eighty to Hundred Degrees", 225 DimRange.fromLongs(Long.MIN_VALUE, true, Long.MAX_VALUE, true), 226 DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true))); 227 228 // Retrieve results 229 return facets.getAllChildren("temperature"); 230 } 231 } 232 233 /** 234 * Like {@link #rangeMatching()}, however this example demonstrates a custom {@link 235 * FacetSetMatcher} which only considers certain dimensions (in this case only the temperature 236 * one). 237 */ 238 private FacetResult customRangeMatching() throws IOException { 239 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 240 IndexSearcher searcher = new IndexSearcher(indexReader); 241 242 // MatchAllDocsQuery is for "browsing" (counts facets 243 // for all non-deleted docs in the index); normally 244 // you'd use a "normal" query: 245 FacetsCollector fc = 246 searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager()); 247 248 // Count 80-100 degrees 249 Facets facets = 250 new MatchingFacetSetsCounts( 251 "temperature", 252 fc, 253 TemperatureReadingFacetSet::decodeTemperatureReading, 254 new TemperatureOnlyFacetSetMatcher( 255 "Eighty to Hundred Degrees", 256 DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true))); 257 258 // Retrieve results 259 return facets.getAllChildren("temperature"); 260 } 261 } 262 263 private static long date(String dateString) { 264 return LocalDate.parse(dateString).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); 265 } 266 267 private static float fahrenheitToCelsius(int degrees) { 268 return (degrees - 32.0f) * 5.f / 9.f; 269 } 270 271 /** Runs the exact matching example. */ 272 public FacetResult runExactMatching() throws IOException { 273 index(); 274 return exactMatching(); 275 } 276 277 /** Runs the exact matching with fast match query example. */ 278 public FacetResult runExactMatchingWithFastMatchQuery() throws IOException { 279 index(); 280 return exactMatchingWithFastMatchQuery(); 281 } 282 283 /** Runs the range matching example. */ 284 public FacetResult runRangeMatching() throws IOException { 285 index(); 286 return rangeMatching(); 287 } 288 289 /** Runs the custom range matching example. */ 290 public FacetResult runCustomRangeMatching() throws IOException { 291 index(); 292 return customRangeMatching(); 293 } 294 295 /** Runs the search and drill-down examples and prints the results. */ 296 public static void main(String[] args) throws Exception { 297 CustomFacetSetExample example = new CustomFacetSetExample(); 298 299 System.out.println("Exact Facet Set matching example:"); 300 System.out.println("-----------------------"); 301 FacetResult result = example.runExactMatching(); 302 System.out.println("Temperature Reading: " + result); 303 304 System.out.println("Exact Facet Set matching with fast match query example:"); 305 System.out.println("-----------------------"); 306 result = example.runExactMatchingWithFastMatchQuery(); 307 System.out.println("Temperature Reading: " + result); 308 309 System.out.println("Range Facet Set matching example:"); 310 System.out.println("-----------------------"); 311 result = example.runRangeMatching(); 312 System.out.println("Temperature Reading: " + result); 313 314 System.out.println("Custom Range Facet Set matching example:"); 315 System.out.println("-----------------------"); 316 result = example.runCustomRangeMatching(); 317 System.out.println("Temperature Reading: " + result); 318 } 319 320 /** 321 * A {@link FacetSet} which encodes a temperature reading in a date (long) and degrees (celsius; 322 * float). 323 */ 324 public static class TemperatureReadingFacetSet extends FacetSet { 325 326 private static final int SIZE_PACKED_BYTES = Long.BYTES + Float.BYTES; 327 328 private final long date; 329 private final float degrees; 330 331 /** Constructor */ 332 public TemperatureReadingFacetSet(long date, float degrees) { 333 super(2); // We encode two dimensions 334 335 this.date = date; 336 this.degrees = degrees; 337 } 338 339 @Override 340 public long[] getComparableValues() { 341 return new long[] {date, NumericUtils.floatToSortableInt(degrees)}; 342 } 343 344 @Override 345 public int packValues(byte[] buf, int start) { 346 LongPoint.encodeDimension(date, buf, start); 347 // Encode 'degrees' as a sortable integer. 348 FloatPoint.encodeDimension(degrees, buf, start + Long.BYTES); 349 return sizePackedBytes(); 350 } 351 352 @Override 353 public int sizePackedBytes() { 354 return SIZE_PACKED_BYTES; 355 } 356 357 /** 358 * An implementation of {@link FacetSetDecoder#decode(BytesRef, int, long[])} for {@link 359 * TemperatureReadingFacetSet}. 360 */ 361 public static int decodeTemperatureReading(BytesRef bytesRef, int start, long[] dest) { 362 dest[0] = LongPoint.decodeDimension(bytesRef.bytes, start); 363 // Decode the degrees as a sortable integer. 364 dest[1] = IntPoint.decodeDimension(bytesRef.bytes, start + Long.BYTES); 365 return SIZE_PACKED_BYTES; 366 } 367 } 368 369 /** 370 * A {@link FacetSetMatcher} which matches facet sets only by their temperature dimension, 371 * ignoring the date. 372 */ 373 public static class TemperatureOnlyFacetSetMatcher extends FacetSetMatcher { 374 375 private final DimRange temperatureRange; 376 377 /** Constructor */ 378 protected TemperatureOnlyFacetSetMatcher(String label, DimRange temperatureRange) { 379 super(label, 1); // We only evaluate one dimension 380 381 this.temperatureRange = temperatureRange; 382 } 383 384 @Override 385 public boolean matches(long[] dimValues) { 386 return temperatureRange.min() <= dimValues[1] && temperatureRange.max() >= dimValues[1]; 387 } 388 } 389}