Commit 5e49806

HPCesia <me@hpcesia.com>
2025-02-01 07:38:08
feat: implement pagefind search
- Integrate astro-pagefind for backend indexing - Build custom search UI components - Handle search queries and result rendering
1 parent 8e70f6b
src/components/search/Pagefind.astro
@@ -1,3 +1,99 @@
 ---
+import { isFirstInstance } from '@utils/component-utils';
+import { Icon } from 'astro-icon/components';
+import SearchBaseUI from './SearchBaseUI.astro';
 
+const bundlePath = `${import.meta.env.BASE_URL}pagefind/`;
 ---
+
+<SearchBaseUI data-pagefind-ui data-bundle-path={bundlePath} />
+
+{
+  (isFirstInstance('md-has-pre', Astro.url) || import.meta.env.DEV) && (
+    <template id="pagefind-result-template">
+      <a class="theme-card-bg hover:theme-card-bg-hl-trans group rounded-md p-2" href="#">
+        <div class="flex flex-row items-center gap-1 text-center">
+          <span class="group-hover:theme-text-hl-contrast text-lg">Fake Result</span>
+          <Icon
+            name="material-symbols:chevron-right-rounded"
+            class="theme-text-hl-contrast text-lg"
+          />
+        </div>
+        <div id="pagefind-result-template-excerpt" class="theme-text-second">
+          This is a fake result.
+        </div>
+      </a>
+    </template>
+  )
+}
+
+<script>
+  import type { PagefindSearchResult } from '@/types/PagefindSearchAPI';
+
+  async function initPageFind() {
+    for (const el of document.querySelectorAll('[data-pagefind-ui]')) {
+      const bundlePath = el.getAttribute('data-bundle-path');
+      const pagefind = await import(/* @vite-ignore */ `${bundlePath}pagefind.js`);
+      await pagefind.options({
+        baseUrl: import.meta.env.BASE_URL,
+        bundlePath: bundlePath,
+      });
+      pagefind.init();
+
+      const searchInput = el.querySelector('input') as HTMLInputElement;
+      const searchResultsWrapper = el.querySelector('.search-result') as HTMLDivElement;
+      const searchResultTemplate = document.getElementById(
+        'pagefind-result-template'
+      ) as HTMLTemplateElement;
+      if (!searchInput || !searchResultsWrapper || !searchResultTemplate) {
+        console.error('Pagefind: Required elements not found');
+        return;
+      }
+
+      const search = async (text: string) => {
+        const results: PagefindSearchResult[] = (await pagefind.debouncedSearch(text, 300))
+          .results;
+        searchResultsWrapper.innerHTML = '';
+        if (results.length === 0) {
+          searchResultsWrapper.textContent = 'No results found';
+          return;
+        }
+        results.forEach(async (result) => {
+          const data = await result.data();
+          const resultNode = searchResultTemplate.content.cloneNode(true) as DocumentFragment;
+          const resultLink = resultNode.querySelector('a') as HTMLAnchorElement;
+          const resultTitle = resultNode.querySelector('span') as HTMLSpanElement;
+          const resultExcerpt = resultNode.querySelector(
+            '#pagefind-result-template-excerpt'
+          ) as HTMLDivElement;
+
+          resultLink.setAttribute('href', data.url);
+          resultTitle.textContent = data.meta.title;
+          resultExcerpt.innerHTML = data.excerpt;
+
+          resultExcerpt.removeAttribute('id');
+          searchResultsWrapper.appendChild(resultNode);
+        });
+      };
+
+      searchInput.addEventListener('input', async () => {
+        await search(searchInput.value);
+      });
+    }
+  }
+
+  document.addEventListener('astro:page-load', initPageFind);
+  if (document.readyState === 'loading') {
+    document.addEventListener('DOMContentLoaded', initPageFind);
+  } else {
+    initPageFind();
+  }
+</script>
+
+<style is:global>
+  [data-pagefind-ui] mark {
+    background-color: transparent;
+
+    @apply text-[var(--theme-color-light-darken)] dark:text-[var(--theme-color-dark-lighten)];
+  }
+</style>
src/components/search/SearchBaseUI.astro
@@ -0,0 +1,36 @@
+---
+import { Icon } from 'astro-icon/components';
+import type { HTMLAttributes } from 'astro/types';
+
+type Props = HTMLAttributes<'div'>;
+
+const { class: className, ...rest } = Astro.props;
+---
+
+<div class:list={['w-full', className]} {...rest}>
+  <div
+    class="theme-bg theme-border mb-2 flex w-full flex-row items-center gap-2 rounded-md border-2 p-4 text-center"
+  >
+    <Icon name="material-symbols:search-rounded" class="text-3xl" />
+    <input type="text" class="theme-bg w-full py-1" />
+  </div>
+  <div
+    class="search-result flex h-fit max-h-[calc(60vh-8rem)] flex-col items-center gap-2 overflow-y-auto text-center"
+  >
+  </div>
+</div>
+
+<style>
+  input::-webkit-outer-spin-button,
+  input::-webkit-inner-spin-button {
+    appearance: none;
+  }
+
+  input[type='number'] {
+    appearance: textfield;
+  }
+
+  input:focus {
+    outline: none;
+  }
+</style>
src/types/PagefindSearchAPI.ts
@@ -0,0 +1,235 @@
+/** From https://github.com/CloudCannon/pagefind/blob/production-docs/pagefind_web_js/types/index.d.ts */
+
+/** Global index options that can be passed to pagefind.options() */
+export type PagefindIndexOptions = {
+  /** Overrides the URL path that Pagefind uses to load its search bundle */
+  basePath?: string;
+  /** Appends the given baseURL to all search results. May be a path, or a full domain */
+  baseUrl?: string;
+  /** The maximum length of excerpts that Pagefind should generate for search results. Default to 30 */
+  excerptLength?: number;
+  /**
+   * Multiply all rankings for this index by the given weight.
+   *
+   * Only applies in multisite setups, where one site should rank higher or lower than others.
+   */
+  indexWeight?: number;
+  /**
+   * Merge this filter object into all search queries in this index.
+   *
+   * Only applies in multisite setups.
+   */
+  mergeFilter?: object;
+  /**
+   * If set, will ass the search term as a query parameter under this key, for use with Pagefind's highlighting script.
+   */
+  highlightParam?: string;
+  language?: string;
+  /**
+   * Whether an instance of Pagefind is the primary index or not (for multisite).
+   *
+   * This is set for you automatically, so it is unlikely you should set this directly.
+   */
+  primary?: boolean;
+  /**
+   * Provides the ability to fine tune Pagefind's ranking algorithm to better suit your dataset.
+   */
+  ranking?: PagefindRankingWeights;
+};
+
+export type PagefindRankingWeights = {
+  /**
+            Controls page ranking based on similarity of terms to the search query (in length).
+            Increasing this number means pages rank higher when they contain words very close to the query,
+            e.g. if searching for `part` then `party` will boost a page higher than one containing `partition`.
+            Minimum value is 0.0, where `party` and `partition` would be viewed equally.
+        */
+  termSimilarity?: number;
+  /**
+            Controls how much effect the average page length has on ranking.
+            Maximum value is 1.0, where ranking will strongly favour pages that are shorter than the average page on the site.
+            Minimum value is 0.0, where ranking will exclusively look at term frequency, regardless of how long a document is.
+        */
+  pageLength?: number;
+  /**
+            Controls how quickly a term saturates on the page and reduces impact on the ranking.
+            Maximum value is 2.0, where pages will take a long time to saturate, and pages with very high term frequencies will take over.
+            As this number trends to 0, it does not take many terms to saturate and allow other paramaters to influence the ranking.
+            Minimum value is 0.0, where terms will saturate immediately and results will not distinguish between one term and many.
+        */
+  termSaturation?: number;
+  /**
+            Controls how much ranking uses term frequency versus raw term count.
+            Maximum value is 1.0, where term frequency fully applies and is the main ranking factor.
+            Minimum value is 0.0, where term frequency does not apply, and pages are ranked based on the raw sum of words and weights.
+            Values between 0.0 and 1.0 will interpolate between the two ranking methods.
+            Reducing this number is a good way to boost longer documents in your search results, as they no longer get penalized for having a low term frequency.
+         */
+  termFrequency?: number;
+};
+
+/** Options that can be passed to pagefind.search() */
+export type PagefindSearchOptions = {
+  /** If set, this call will load all assets but return before searching. Prefer using pagefind.preload() instead */
+  preload?: boolean;
+  /** Add more verbose console logging for this search query */
+  verbose?: boolean;
+  /** The set of filters to execute with this search. Input export type  is extremely flexible, see the filtering docs for details */
+  filters?: object;
+  /** The set of sorts to use for this search, instead of relevancy */
+  sort?: object;
+};
+
+/** Filter counts returned from pagefind.filters(), and alongside results from pagefind.search() */
+export type PagefindFilterCounts = Record<string, Record<string, number>>;
+
+/** The main results object returned from a call to pagefind.search() */
+export type PagefindSearchResults = {
+  /** All pages that match the search query and filters provided */
+  results: PagefindSearchResult[];
+  /** How many results would there have been if you had omitted the filters */
+  unfilteredResultCount: number;
+  /** Given the query and filters provided, how many remaining results are there under each filter? */
+  filters: PagefindFilterCounts;
+  /** If the searched filters were removed, how many total results for each filter are there? */
+  totalFilters: PagefindFilterCounts;
+  /** Information on how long it took Pagefind to execute this query */
+  timings: {
+    preload: number;
+    search: number;
+    total: number;
+  };
+};
+
+/** A single result from a search query, before actual data has been loaded */
+export type PagefindSearchResult = {
+  /** Pagefind's internal ID for this page, unique across the site */
+  id: string;
+  /** Pagefind's internal score for your query matching this page, that is used when ranking these results */
+  score: number;
+  /** The locations of all matching words in this page */
+  words: number[];
+  /**
+   * Calling data() loads the final data fragment needed to display this result.
+   *
+   * Only call this when you need to display the data, rather than all at once.
+   * (e.g. one page as a time, or in a scroll listener)
+   * */
+  data: () => Promise<PagefindSearchFragment>;
+};
+
+/** The useful data Pagefind provides for a search result */
+export type PagefindSearchFragment = {
+  /** Pagefind's processed URL for this page. Will include the baseUrl if configured */
+  url: string;
+  /** Pagefind's unprocessed URL for this page */
+  raw_url?: string;
+  /** The full processed content text of this page */
+  content: string;
+  /** Internal export type  — ignore for now */
+  raw_content?: string;
+  /** The processed excerpt for this result, with matching terms wrapping in `<mark>` elements */
+  excerpt: string;
+  /**
+   * What regions of the page matched this search query?
+   *
+   * Precalculates based on h1->6 tags with IDs, using the text between each.
+   */
+  sub_results: PagefindSubResult[];
+  /** How many total words are there on this page? */
+  word_count: number;
+  /** The locations of all matching words in this page */
+  locations: number[];
+  /**
+   * The locations of all matching words in this page,
+   * paired with data about their weight and relevance to this query
+   */
+  weighted_locations: PagefindWordLocation[];
+  /** The filter keys and values this page was tagged with */
+  filters: Record<string, string[]>;
+  /** The metadata keys and values this page was tagged with */
+  meta: Record<string, string>;
+  /**
+   * The raw anchor data that Pagefind used to generate sub_results.
+   *
+   * Contains _all_ elements that had IDs on the page, so can be used to
+   * implement your own sub result calculations with different semantics.
+   */
+  anchors: PagefindSearchAnchor[];
+};
+
+/** Data for a matched section within a page */
+export type PagefindSubResult = {
+  /**
+   * Title of this sub result — derived from the heading content.
+   *
+   * If this is a result for the section of the page before any headings with IDs,
+   * this will be the same as the page's meta.title value.
+   */
+  title: string;
+  /**
+   * Direct URL to this sub result, comprised of the page's URL plus the hash string of the heading.
+   *
+   * If this is a result for the section of the page before any headings with IDs,
+   * this will be the same as the page URL.
+   */
+  url: string;
+  /** The locations of all matching words in this segment */
+  locations: number[];
+  /**
+   * The locations of all matching words in this segment,
+   * paired with data about their weight and relevance to this query
+   */
+  weighted_locations: PagefindWordLocation[];
+  /** The processed excerpt for this segment, with matching terms wrapping in `<mark>` elements */
+  excerpt: string;
+  /**
+   * Raw data about the anchor element associated with this sub result.
+   *
+   * The omission of this field means this sub result is for text found on the page
+   * before the first heading that had an ID.
+   */
+  anchor?: PagefindSearchAnchor;
+};
+
+/** Information about a matching word on a page */
+export type PagefindWordLocation = {
+  /** The weight that this word was originally tagged as */
+  weight: number;
+  /**
+   * An internal score that Pagefind calculated for this word.
+   *
+   * The absolute value is somewhat meaningless, but the value can be used
+   * in comparison to other values in this set of search results to perform custom ranking.
+   */
+  balanced_score: number;
+  /**
+   * The index of this word in the result content.
+   *
+   * Splitting the content key by whitespacing and indexing by this number
+   * will yield the correct word.
+   */
+  location: number;
+};
+
+/** Raw data about elements with IDs that Pagefind encountered when indexing the page */
+export type PagefindSearchAnchor = {
+  /** What element export type  was this anchor? e.g. `h1`, `div` */
+  element: string;
+  /** The raw id="..." attribute contents of the element */
+  id: string;
+  /**
+   * The text content of this element.
+   *
+   * In order to prevent repeating most of the page data for every anchor,
+   * Pagefind will only take top level text nodes, or text nodes nested within
+   * inline elements such as <a> and <span>.
+   */
+  text?: string;
+  /**
+   * The position of this anchor in the result content.
+   * Splitting the content key by whitespacing and indexing by this number
+   * will yield the first word indexed after this element's ID was found.
+   */
+  location: number;
+};
astro.config.mjs
@@ -2,6 +2,7 @@
 import sitemap from '@astrojs/sitemap';
 import tailwind from '@astrojs/tailwind';
 import icon from 'astro-icon';
+import pagefind from 'astro-pagefind';
 import { defineConfig } from 'astro/config';
 
 // https://astro.build/config
@@ -14,5 +15,6 @@ export default defineConfig({
     tailwind({ nesting: true }),
     icon(),
     sitemap({ filter: (page) => !page.includes('/archives/') && !page.includes('/about/') }),
+    pagefind(),
   ],
 });
package.json
@@ -19,6 +19,7 @@
     "astro": "^5.2.3",
     "astro-compress": "2.3.5",
     "astro-icon": "^1.1.5",
+    "astro-pagefind": "^1.8.0",
     "autoprefixer": "^10.4.20",
     "postcss-load-config": "^6.0.1",
     "sass": "^1.83.4",