Hello! Thank you for putting this cool crate together. I'm hitting a segfault when trying to append to my trainer. Not sure if this is a bug in the crate or my code. Here's the error:
(lldb) process launch -- train
Process 37281 launched: '/Users/raphaellaude/Documents/GitHub/rust_addr_clean/target/debug/us_addrs' (arm64)
Address: ["431", "Marietta", "St", "NW", "Fl.", "3"]
Tags: ["AddressNumber", "StreetName", "StreetNamePostType", "StreetNamePostDirectional", "OccupancyType", "OccupancyIdentifier"]
Process 37281 stopped
* thread #1, name = 'main', queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x28)
frame #0: 0x000000010006d5e0 us_addrs`crfsuite::Trainer::set_message_callback::he14ba900e348f69f(self=0x000000016fdfddb8) at lib.rs:582:13
579 // XXX: make it a public API?
580 fn set_message_callback(&mut self) {
581 unsafe {
-> 582 (*self.trainer)
583 .set_message_callback
584 .map(|f| f(self.trainer, mem::transmute(self), Some(logging_callback)))
585 .unwrap();
Target 0: (us_addrs) stopped.
And my code:
use std::fs::File;
use std::io::BufReader;
use crfsuite::{Algorithm, GraphicalModel, Trainer};
use xml::reader::{EventReader, XmlEvent};
use crate::{get_address_features, tokenize};
pub fn train_model() -> std::io::Result<()> {
let file = File::open("training/labeled.xml")?;
let file = BufReader::new(file);
let parser = EventReader::new(file);
let mut trainer = Trainer::new(false);
let mut address: Vec<String> = Vec::new();
let mut yseq: Vec<String> = Vec::new(); // make Vec<AddressComponent>
// let mut yseq: &[String];
for e in parser {
match e {
Ok(XmlEvent::StartElement { name, .. }) => {
if name.local_name == "AddressString" {
address.clear();
yseq.clear();
} else {
yseq.push(name.local_name.to_string());
}
}
Ok(XmlEvent::Characters(s)) => {
address.push(s);
}
Ok(XmlEvent::EndElement { name }) => {
if name.local_name == "AddressString" {
println!("Address: {:?}", address);
let tokens = tokenize(&address.join(" "));
let xseq = get_address_features(&tokens);
println!("Tags: {:?}", yseq);
match trainer.append(&xseq, &yseq, 0) {
Ok(()) => println!("Appended data"),
Err(e) => {
eprintln!("Error appending data: {}", e);
break;
}
}
}
}
Err(e) => {
eprintln!("Error: {e}");
break;
}
_ => {}
}
}
match trainer.select(Algorithm::LBFGS, GraphicalModel::CRF1D) {
Ok(()) => println!("Selected algorithm"),
Err(e) => println!("Error selecting algorithm: {}", e),
}
match trainer.train("usaddr.crfsuite", -1) {
Ok(()) => println!("Trained model"),
Err(e) => println!("Error training model: {}", e),
}
Ok(())
}
Any examples or help greatly appreciated! Thank you!