Skip to main content
2 of 2
added 2 characters in body
toolic
  • 15.8k
  • 6
  • 29
  • 217

You did a good job, your code already leverages on main rust features, but there are some suggestions to make your code more idiomatic and efficient:

Idiomatic improvements

1. Use ? for error propagation

Instead of manual match expressions:

// Current approach
let reader = match fs::read_dir(directory) {
    Ok(r) => r,
    Err(error) => return Err(error.to_string()),
};

// More idiomatic
let reader = fs::read_dir(directory).map_err(|e| e.to_string())?;

2. Use if let for simpler pattern matching (it's cleaner)

// Instead of
match depth {
    Some(0) => return Err(String::from("Depth cannot be 0")),
    _ => (),
};

// More idiomatic
if let Some(0) = depth {
    return Err("Depth cannot be 0".to_string());
}

3. Avoid unnecessary empty match arms

// Instead of
match self.max_depth {
    Some(depth) if usize::from(depth) == self.readers.len() => return,
    _ => (),
}

// Better
if let Some(depth) = self.max_depth {
    if usize::from(depth) == self.readers.len() {
        return;
    }
}

Performance considerations

  1. Binary file detection: Your is_line_printable function reads every character of every line. Consider checking a sample of bytes at the beginning of the file instead.

  2. Path cloning: You're creating a new PathBuf for each match with self.path.to_path_buf(). This is sometimes unavoidable (unfortunately) but worth noting.

  3. File filtering: Add extension filtering to avoid scanning binary files altogether.

  4. Memory usage: The current approach holds file handles open until matches are found, which is generally fine but could be improved.

Here's an improved version of your code:

use std::fs;
use std::io::{self, BufRead};
use std::iter;
use std::path::{Path, PathBuf};
use std::vec::Vec;

use regex::Regex;

pub struct Location {
    pub file: PathBuf,
    pub line: usize,
    pub line_content: String, // store whole line for context
    pub match_text: String,   // store matched text separately
    pub match_span: (usize, usize), // start and end positions
}

struct FileScanner<'a> {
    path: PathBuf,
    pattern: &'a Regex,
    lines: iter::Enumerate<io::Lines<io::BufReader<fs::File>>>,
}

// improved binary detection - (check first few bytes)
fn is_likely_binary(path: &Path) -> io::Result<bool> {
    let file = fs::File::open(path)?;
    let mut buffer = [0; 512]; // sample first 512 bytes
    let bytes_read = file.take(512).read(&mut buffer)?;
    
    // check for null bytes or high concentration of non-ASCII chars
    let non_text_chars = buffer[..bytes_read]
        .iter()
        .filter(|&&b| b == 0 || b > 127)
        .count();
        
    Ok(non_text_chars > bytes_read / 4) // > 25% non-text chars indicates binary
}

impl<'a> FileScanner<'a> {
    fn new(path: PathBuf, pattern: &'a Regex) -> io::Result<Self> {
        let handle = fs::File::open(&path)?;
        let reader = io::BufReader::new(handle);
        
        Ok(FileScanner {
            path,
            pattern,
            lines: reader.lines().enumerate(),
        })
    }
    
    fn build(path: PathBuf, pattern: &'a Regex) -> Option<Self> {
        if is_likely_binary(&path).unwrap_or(true) {
            return None; // skip
        }
        
        Self::new(path).ok()
    }
}

impl<'a> Iterator for FileScanner<'a> {
    type Item = Location;

    fn next(&mut self) -> Option<Location> {
        while let Some((index, line_result)) = self.lines.next() {
            let line = match line_result {
                Ok(l) => l,
                Err(_) => continue, // skip lines with IO errors instead of aborting
            };

            if let Some(pattern_match) = self.pattern.find(&line) {
                let start = pattern_match.start();
                let end = pattern_match.end();
                
                return Some(Location {
                    file: self.path.clone(), // clone is necessary here
                    line: index + 1,
                    line_content: line.clone(),
                    match_text: line[start..end].to_string(),
                    match_span: (start, end),
                });
            }
        }
        None
    }
}

pub struct Searcher<'a> {
    pattern: &'a Regex,
    max_depth: Option<u8>,
    readers: Vec<fs::ReadDir>,
    current_scanner: Option<FileScanner<'a>>,
    allowed_extensions: Option<Vec<String>>, // add extension-based filtering
}

impl<'a> Searcher<'a> {
    pub fn new(
        pattern: &'a Regex,
        directory: &Path,
        depth: Option<u8>,
        extensions: Option<Vec<String>>,
    ) -> Result<Self, String> {
        if let Some(0) = depth {
            return Err("Depth cannot be 0".to_string());
        }

        let reader = fs::read_dir(directory).map_err(|e| e.to_string())?;
        
        Ok(Searcher {
            pattern,
            max_depth: depth,
            readers: vec![reader],
            current_scanner: None,
            allowed_extensions: extensions,
        })
    }
    
    pub fn build(
        pattern: &'a Regex,
        directory: &Path,
        depth: Option<u8>,
    ) -> Result<Self, String> {
        Self::new(pattern, directory, depth, None)
    }

    fn should_scan_file(&self, path: &Path) -> bool {
        if let Some(ref extensions) = self.allowed_extensions {
            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
                return extensions.iter().any(|e| e == ext);
            }
            return false; // no extension
        }
        true // no extension filtering
    }

    fn push_directory(&mut self, directory: PathBuf) {
        // early return if we've reached max depth
        if let Some(depth) = self.max_depth {
            if usize::from(depth) == self.readers.len() {
                return;
            }
        }

        if let Ok(reader) = fs::read_dir(directory) {
            self.readers.push(reader);
        }
    }

    fn next_match_from_file(&mut self) -> Option<Location> {
        let location = self.current_scanner.as_mut()?.next();
        if location.is_none() {
            self.current_scanner = None;
        }
        location
    }
}

impl<'a> Iterator for Searcher<'a> {
    type Item = Location;

    fn next(&mut self) -> Option<Location> {
        if let Some(location) = self.next_match_from_file() {
            return Some(location);
        }

        while let Some(reader) = self.readers.last_mut() {
            // here we search through all directories
            match reader.next() {
                Some(Ok(entry)) => {
                    let path = entry.path();
                    
                    if path.is_dir() {
                        self.push_directory(path);
                    } else if self.should_scan_file(&path) {
                        self.current_scanner = FileScanner::build(path, self.pattern);
                        if let Some(location) = self.next_match_from_file() {
                            return Some(location);
                        }
                    }
                }
                _ => {
                    // if it reaches here it means there's an error
                    self.readers.pop();
                }
            }
        }

        None
    }
}
Adversing
  • 336
  • 1
  • 8