<?php
/** adds pdf excerpt to results from a facetwp search facet with searchwp engine
** based on https://searchwp.com/docs/kb/add-pdf-snippet-to-excerpts/
** change 'keywords' to name of your facet in ln 25-26
**/
add_filter( 'get_the_excerpt', function( $excerpt ) {
global $post;
$pdf_excerpt_length = 15; // number of words in PDF excerpt
if ( ! post_password_required() ) {
// prep our 'environment'
// set up common words
$common_words = array();
if ( class_exists( 'SearchWP' ) ) {
$searchwp = SearchWP::instance();
$common_words = $searchwp->common;
}
// grab the terms
if ( isset( FWP()->facet->facets['keywords'] ) ) { // change 'keywords' to name of your facet
$terms = FWP()->facet->facets['keywords']['selected_values'];
$terms = (array) $terms;
} else {
$terms = explode( ' ', get_search_query() );
$terms = array_map( 'sanitize_text_field', $terms );
}
// if we're on a search page, we want to check to see if the current result
// has a PDF with any of the search terms in the content
// first we need to backtrack and find all of the PDFs that are attached to this post
// since their weight has been attributed to this post
$attached_pdfs = get_attached_media( 'application/pdf', $post->ID );
foreach ( $attached_pdfs as $attached_pdf ) {
// check to make sure there is file content to scan
if ( $pdf_content = get_post_meta( $attached_pdf->ID, 'searchwp_content', true ) ) {
// find the first applicable search term (based on character length)
$flag = false;
foreach ( $terms as $termkey => $term ) {
if ( ! in_array( $term, $common_words ) && absint( apply_filters( 'searchwp_minimum_word_length', 3 ) ) <= strlen( $term ) ) {
$flag = $term;
break;
}
}
// our haystack is the PDF content
$haystack = explode( ' ', $pdf_content );
$pdf_excerpt = '';
// build our contextual excerpt
foreach ( $haystack as $haystack_key => $haystack_term ) {
preg_match( "/b$flagb(?!([^<]+)?>)/i", $haystack_term, $matches );
if ( count( $matches ) ) {
// our buffer is going to be 1/3 the total number of words in hopes of snagging one or two more
// highlighted terms in the second and third thirds
$buffer = floor( ( $pdf_excerpt_length - 1 ) / 3 ); // -1 to accommodate the search term itself
// find the start point
$start = 0;
$underflow = 0;
if ( $haystack_key < $buffer ) {
// the match occurred too early to get a proper first buffer
$underflow = $buffer - $haystack_key;
} else {
// there is enough room to grab a proper first buffer
$start = $haystack_key - $buffer;
}
// find the end point
$end = count( $haystack );
if ( $end > ( $haystack_key + ( $buffer * 2 ) ) ) {
$end = $haystack_key + ( $buffer * 2 );
}
// if we had an underflow (e.g. the first buffer wasn't fully included) grab more at the end
$end += $underflow;
$pdf_excerpt = array_slice( $haystack, $start, $end - $start );
$pdf_excerpt = implode( ' ', $pdf_excerpt );
break;
}
}
// append our PDF-specific excerpt to the main excerpt
if ( ! empty( $pdf_excerpt ) ) {
$pdf_label = get_the_title( $attached_pdf->ID ); // the PDF label will be the title of the PDF post
$excerpt .= '<br /><br /><strong>' . $pdf_label . '</strong>: ' . $pdf_excerpt;
}
}
}
}
return $excerpt;
});