You did a good job, your code already leverages on main rust features, but there are some suggestions to make your code more idiomatic and efficient:
Idiomatic improvements
1. Use ? for error propagation
Instead of manual match expressions:
// Current approach
let reader = match fs::read_dir(directory) {
Ok(r) => r,
Err(error) => return Err(error.to_string()),
};
// More idiomatic
let reader = fs::read_dir(directory).map_err(|e| e.to_string())?;
2. Use if let for simpler pattern matching (it's cleaner)
// Instead of
match depth {
Some(0) => return Err(String::from("Depth cannot be 0")),
_ => (),
};
// More idiomatic
if let Some(0) = depth {
return Err("Depth cannot be 0".to_string());
}
3. Avoid unnecessary empty match arms
// Instead of
match self.max_depth {
Some(depth) if usize::from(depth) == self.readers.len() => return,
_ => (),
}
// Better
if let Some(depth) = self.max_depth {
if usize::from(depth) == self.readers.len() {
return;
}
}
Performance considerations
Binary file detection: Your
is_line_printablefunction reads every character of every line. Consider checking a sample of bytes at the beginning of the file instead.Path cloning: You're creating a new
PathBuffor each match withself.path.to_path_buf(). This is sometimes unavoidable (unfortunately) but worth noting.File filtering: Add extension filtering to avoid scanning binary files altogether.
Memory usage: The current approach holds file handles open until matches are found, which is generally fine but could be improved.
Here's an improved version of your code:
use std::fs;
use std::io::{self, BufRead};
use std::iter;
use std::path::{Path, PathBuf};
use std::vec::Vec;
use regex::Regex;
pub struct Location {
pub file: PathBuf,
pub line: usize,
pub line_content: String, // store whole line for context
pub match_text: String, // store matched text separately
pub match_span: (usize, usize), // start and end positions
}
struct FileScanner<'a> {
path: PathBuf,
pattern: &'a Regex,
lines: iter::Enumerate<io::Lines<io::BufReader<fs::File>>>,
}
// improved binary detection - (check first few bytes)
fn is_likely_binary(path: &Path) -> io::Result<bool> {
let file = fs::File::open(path)?;
let mut buffer = [0; 512]; // sample first 512 bytes
let bytes_read = file.take(512).read(&mut buffer)?;
// check for null bytes or high concentration of non-ASCII chars
let non_text_chars = buffer[..bytes_read]
.iter()
.filter(|&&b| b == 0 || b > 127)
.count();
Ok(non_text_chars > bytes_read / 4) // > 25% non-text chars indicates binary
}
impl<'a> FileScanner<'a> {
fn new(path: PathBuf, pattern: &'a Regex) -> io::Result<Self> {
let handle = fs::File::open(&path)?;
let reader = io::BufReader::new(handle);
Ok(FileScanner {
path,
pattern,
lines: reader.lines().enumerate(),
})
}
fn build(path: PathBuf, pattern: &'a Regex) -> Option<Self> {
if is_likely_binary(&path).unwrap_or(true) {
return None; // skip
}
Self::new(path).ok()
}
}
impl<'a> Iterator for FileScanner<'a> {
type Item = Location;
fn next(&mut self) -> Option<Location> {
while let Some((index, line_result)) = self.lines.next() {
let line = match line_result {
Ok(l) => l,
Err(_) => continue, // skip lines with IO errors instead of aborting
};
if let Some(pattern_match) = self.pattern.find(&line) {
let start = pattern_match.start();
let end = pattern_match.end();
return Some(Location {
file: self.path.clone(), // clone is necessary here
line: index + 1,
line_content: line.clone(),
match_text: line[start..end].to_string(),
match_span: (start, end),
});
}
}
None
}
}
pub struct Searcher<'a> {
pattern: &'a Regex,
max_depth: Option<u8>,
readers: Vec<fs::ReadDir>,
current_scanner: Option<FileScanner<'a>>,
allowed_extensions: Option<Vec<String>>, // add extension-based filtering
}
impl<'a> Searcher<'a> {
pub fn new(
pattern: &'a Regex,
directory: &Path,
depth: Option<u8>,
extensions: Option<Vec<String>>,
) -> Result<Self, String> {
if let Some(0) = depth {
return Err("Depth cannot be 0".to_string());
}
let reader = fs::read_dir(directory).map_err(|e| e.to_string())?;
Ok(Searcher {
pattern,
max_depth: depth,
readers: vec![reader],
current_scanner: None,
allowed_extensions: extensions,
})
}
pub fn build(
pattern: &'a Regex,
directory: &Path,
depth: Option<u8>,
) -> Result<Self, String> {
Self::new(pattern, directory, depth, None)
}
fn should_scan_file(&self, path: &Path) -> bool {
if let Some(ref extensions) = self.allowed_extensions {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
return extensions.iter().any(|e| e == ext);
}
return false; // no extension
}
true // no extension filtering
}
fn push_directory(&mut self, directory: PathBuf) {
// early return if we've reached max depth
if let Some(depth) = self.max_depth {
if usize::from(depth) == self.readers.len() {
return;
}
}
if let Ok(reader) = fs::read_dir(directory) {
self.readers.push(reader);
}
}
fn next_match_from_file(&mut self) -> Option<Location> {
let location = self.current_scanner.as_mut()?.next();
if location.is_none() {
self.current_scanner = None;
}
location
}
}
impl<'a> Iterator for Searcher<'a> {
type Item = Location;
fn next(&mut self) -> Option<Location> {
if let Some(location) = self.next_match_from_file() {
return Some(location);
}
while let Some(reader) = self.readers.last_mut() {
// here we search through all directories
match reader.next() {
Some(Ok(entry)) => {
let path = entry.path();
if path.is_dir() {
self.push_directory(path);
} else if self.should_scan_file(&path) {
self.current_scanner = FileScanner::build(path, self.pattern);
if let Some(location) = self.next_match_from_file() {
return Some(location);
}
}
}
_ => {
// if it reaches here it means there's an error
self.readers.pop();
}
}
}
None
}
}