From 003a405bb873b97e47b9e2447f8836efb383c656 Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Thu, 13 Feb 2025 14:09:42 +0100 Subject: [PATCH] fix: process of records without a ppn (#908) Signed-off-by: Nico Wagner --- crates/pica-cli/src/commands/concat.rs | 2 +- crates/pica-cli/src/commands/filter.rs | 6 ++++-- crates/pica-cli/src/commands/frequency.rs | 6 ++++-- crates/pica-cli/src/commands/hash.rs | 5 ++++- crates/pica-cli/src/commands/select.rs | 6 ++++-- crates/pica-cli/tests/filter/mod.rs | 16 ++++++++++++++++ src/path.rs | 8 ++++---- 7 files changed, 37 insertions(+), 12 deletions(-) diff --git a/crates/pica-cli/src/commands/concat.rs b/crates/pica-cli/src/commands/concat.rs index 712a5ea6a..4eb81209c 100644 --- a/crates/pica-cli/src/commands/concat.rs +++ b/crates/pica-cli/src/commands/concat.rs @@ -78,7 +78,7 @@ pub(crate) enum Strategy { #[inline] fn record_key(record: &ByteRecord, strategy: &Strategy) -> String { match strategy { - Strategy::Idn => record.ppn().to_string(), + Strategy::Idn => record.ppn().expect("ppn").to_string(), Strategy::Hash => { record.sha256().iter().fold(String::new(), |mut out, b| { let _ = write!(out, "{b:02x}"); diff --git a/crates/pica-cli/src/commands/filter.rs b/crates/pica-cli/src/commands/filter.rs index f20067c81..84d20ddf6 100644 --- a/crates/pica-cli/src/commands/filter.rs +++ b/crates/pica-cli/src/commands/filter.rs @@ -196,8 +196,10 @@ impl Filter { Ok(ref mut record) => { progress.update(false); - if !filter_set.check(record.ppn()) { - continue; + if let Some(ppn) = record.ppn() { + if !filter_set.check(ppn) { + continue; + } } let mut is_match = diff --git a/crates/pica-cli/src/commands/frequency.rs b/crates/pica-cli/src/commands/frequency.rs index b200c1882..c80688e44 100644 --- a/crates/pica-cli/src/commands/frequency.rs +++ b/crates/pica-cli/src/commands/frequency.rs @@ -202,8 +202,10 @@ impl Frequency { Ok(ref record) => { progress.update(false); - if !filter_set.check(record.ppn()) { - continue; + if let Some(ppn) = record.ppn() { + if !filter_set.check(ppn) { + continue; + } } if let Some(ref matcher) = matcher { diff --git a/crates/pica-cli/src/commands/hash.rs b/crates/pica-cli/src/commands/hash.rs index 32f7b7725..a6d044755 100644 --- a/crates/pica-cli/src/commands/hash.rs +++ b/crates/pica-cli/src/commands/hash.rs @@ -80,7 +80,10 @@ impl Hash { ); writer.write_record(&[ - record.ppn().to_string(), + record + .ppn() + .unwrap_or_default() + .to_string(), hash, ])?; } diff --git a/crates/pica-cli/src/commands/select.rs b/crates/pica-cli/src/commands/select.rs index a2ce64113..acb8cb6e8 100644 --- a/crates/pica-cli/src/commands/select.rs +++ b/crates/pica-cli/src/commands/select.rs @@ -221,8 +221,10 @@ impl Select { Ok(ref record) => { progress.update(false); - if !filter_set.check(record.ppn()) { - continue; + if let Some(ppn) = record.ppn() { + if !filter_set.check(ppn) { + continue; + } } if let Some(ref matcher) = matcher { diff --git a/crates/pica-cli/tests/filter/mod.rs b/crates/pica-cli/tests/filter/mod.rs index 2cea36f03..52956bc68 100644 --- a/crates/pica-cli/tests/filter/mod.rs +++ b/crates/pica-cli/tests/filter/mod.rs @@ -741,3 +741,19 @@ fn filter_tee() -> TestResult { temp_dir.close().unwrap(); Ok(()) } + +/// https://github.com/deutsche-nationalbibliothek/pica-rs/issues/907 +#[test] +fn filter_no_ppn() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + + let data = "036E/00 \x1faSpringer-Lehrbuch\x1e036E/01 \x1faSpringer-Link\x1fpBücher\x1e\n"; + let assert = + cmd.args(["filter", "....?"]).write_stdin(data).assert(); + assert + .success() + .code(0) + .stdout(predicates::ord::eq(data)) + .stderr(predicates::str::is_empty()); + Ok(()) +} diff --git a/src/path.rs b/src/path.rs index 6b2502183..44d18ab19 100644 --- a/src/path.rs +++ b/src/path.rs @@ -250,15 +250,15 @@ pub trait PathExt { /// use pica_record::prelude::*; /// /// let record = ByteRecord::from_bytes(b"003@ \x1f0118540238\x1e\n")?; - /// assert_eq!(record.ppn(), "118540238"); + /// assert_eq!(record.ppn().unwrap(), "118540238"); /// /// # Ok::<(), Box>(()) /// ``` - fn ppn(&self) -> &Self::Value { + fn ppn(&self) -> Option<&Self::Value> { static PATH: LazyLock = LazyLock::new(|| Path::new("003@.0").unwrap()); - self.first(&PATH, &Default::default()).unwrap() + self.first(&PATH, &Default::default()) } } impl PathExt for RecordRef<'_> { @@ -492,7 +492,7 @@ mod tests { fn test_path_ppn() -> TestResult { let data = ada_lovelace(); let record = ByteRecord::from_bytes(&data)?; - assert_eq!(record.ppn(), "119232022"); + assert_eq!(record.ppn().unwrap(), "119232022"); Ok(()) } }