diff --git a/graphannis/src/annis/db/corpusstorage/subgraph.rs b/graphannis/src/annis/db/corpusstorage/subgraph.rs index 5698c4652..a9e2d3010 100644 --- a/graphannis/src/annis/db/corpusstorage/subgraph.rs +++ b/graphannis/src/annis/db/corpusstorage/subgraph.rs @@ -183,12 +183,12 @@ fn get_left_right_token_with_offset_with_segmentation( } })?; - let left_seg = *covering_segmentation_nodes - .first() - .ok_or(GraphAnnisError::NoCoveredTokenForSubgraph)?; - let right_seg = *covering_segmentation_nodes - .last() - .ok_or(GraphAnnisError::NoCoveredTokenForSubgraph)?; + let (left_seg, right_seg) = match covering_segmentation_nodes[..] { + // If none of the covered tokens are covered by a segmentation node, return without context + [] => return Ok((left_most_covered_token, right_most_covered_token)), + [only] => (only, only), + [left, .., right] => (left, right), + }; // The context might be larger than the actual document, try to get the // largest possible context diff --git a/graphannis/src/annis/db/corpusstorage/tests.rs b/graphannis/src/annis/db/corpusstorage/tests.rs index c70223c3e..85de8faae 100644 --- a/graphannis/src/annis/db/corpusstorage/tests.rs +++ b/graphannis/src/annis/db/corpusstorage/tests.rs @@ -286,7 +286,7 @@ fn create_simple_graph(cs: &mut CorpusStorage) { } #[test] -fn subgraphs_simple() { +fn subgraph_simple() { let tmp = tempfile::tempdir().unwrap(); let mut cs = CorpusStorage::with_auto_cache_size(tmp.path(), false).unwrap(); @@ -440,7 +440,7 @@ fn subgraphs_simple() { } #[test] -fn subgraphs_non_overlapping_regions() { +fn subgraph_non_overlapping_regions() { let tmp = tempfile::tempdir().unwrap(); let mut cs = CorpusStorage::with_auto_cache_size(tmp.path(), false).unwrap(); @@ -573,7 +573,7 @@ fn subgraphs_non_overlapping_regions() { } #[test] -fn subgraphs_non_overlapping_regions_one_context_zero() { +fn subgraph_non_overlapping_regions_one_context_zero() { let tmp = tempfile::tempdir().unwrap(); let mut cs = CorpusStorage::with_auto_cache_size(tmp.path(), false).unwrap(); @@ -674,7 +674,7 @@ fn subgraphs_non_overlapping_regions_one_context_zero() { } #[test] -fn subgraphs_non_overlapping_regions_no_context_tokens_specified_out_of_order() { +fn subgraph_non_overlapping_regions_no_context_tokens_specified_out_of_order() { let tmp = tempfile::tempdir().unwrap(); let mut cs = CorpusStorage::with_auto_cache_size(tmp.path(), false).unwrap(); @@ -999,6 +999,59 @@ fn subgraph_with_segmentation_and_gap() { .unwrap() .is_some() ); + + // Get the context for the token in the gap using the norm segmentation + let g = cs + .subgraph( + &corpus_name, + vec!["SegmentationWithGaps/doc01#tok_13".to_string()], + 0, + 0, + Some("norm".to_string()), + ) + .unwrap(); + // Check that the token is included even though it is not covered by a segmentation node + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_13") + .unwrap() + .is_some() + ); +} + +#[test] +fn subgraph_with_node_spanning_multiple_segmentation_nodes() { + let tmp = tempfile::tempdir().unwrap(); + let cs = CorpusStorage::with_auto_cache_size(tmp.path(), false).unwrap(); + + let mut g = GraphUpdate::new(); + example_generator::create_multiple_segmentations(&mut g, "root/doc1"); + + cs.apply_update("root", &mut g).unwrap(); + + let graph = cs + .subgraph( + "root", + vec!["root/doc1#b3".to_string()], + 0, + 0, + Some("a".to_string()), + ) + .unwrap(); + assert!( + graph + .get_node_annos() + .get_node_id_from_name("root/doc1#a2") + .unwrap() + .is_some() + ); + assert!( + graph + .get_node_annos() + .get_node_id_from_name("root/doc1#a3") + .unwrap() + .is_some() + ); } #[test]