package com.ronsoft.books.nio.regex;
import java.io.File;
import java.io.FileReader;
import java.io.LineNumberReader;
import java.io.IOException;
import java.util.List;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A file searching class, similar to grep, which returns information about
* lines matched in the specified files. Instances of this class are tied
* to a specific regular expression pattern and may be applied repeatedly
* to multiple files. Instances of Grep are thread safe, they may be shared.
*
* @author Michael Daudel (mgd@ronsoft.com) (original)
* @author Ron Hitchens (ron@ronsoft.com) (hacked)
* @version $Id: Grep.java,v 1.3 2002/04/11 02:58:04 ron Exp $
*/
public class Grep
{
// The pattern to use for this instance
private Pattern pattern;
/**
* Instantiate a Grep object for the given pre-compiled Pattern object.
* @param pattern A java.util.regex.Pattern object specifying the
* pattern to search for.
*/
public Grep (Pattern pattern)
{
this.pattern = pattern;
}
/**
* Instantiate a Grep object and compile the given regular expression
* string.
* @param regex The regular expression string to compile into a
* Pattern for internal use.
* @param ignoreCase If true, pass Pattern.CASE_INSENSITIVE to the
* Pattern constuctor so that seaches will be done without regard
* to alphabetic case. Note, this only applies to the ASCII
* character set. Use embedded expressions to set other options.
*/
public Grep (String regex, boolean ignoreCase)
{
this.pattern = Pattern.compile (regex,
(ignoreCase) ? Pattern.CASE_INSENSITIVE : 0);
}
/**
* Instantiate a Grep object with the given regular expression string,
* with default options.
*/
public Grep (String regex)
{
this (regex, false);
}
// ---------------------------------------------------------------
/**
* Perform a grep on the given file.
* @param file A File object denoting the file to scan for the
* regex given when this Grep instance was constructed.
* @return A type-safe array of Grep.MatchedLine objects describing
* the lines of the file matched by the pattern.
* @exception IOException If there is a problem reading the file.
*/
public MatchedLine [] grep (File file)
throws IOException
{
List list = grepList (file);
MatchedLine matches [] = new MatchedLine [list.size()];
list.toArray (matches);
return (matches);
}
/**
* Perform a grep on the given file.
* @param file A String filename denoting the file to scan for the
* regex given when this Grep instance was constructed.
* @return A type-safe array of Grep.MatchedLine objects describing
* the lines of the file matched by the pattern.
* @exception IOException If there is a problem reading the file.
*/
public MatchedLine [] grep (String fileName)
throws IOException
{
return (grep (new File (fileName)));
}
/**
* Perform a grep on the given list of files. If a given file cannot
* be read, it will be ignored as if empty.
* @param files An array of File objects to scan.
* @return A type-safe array of Grep.MatchedLine objects describing
* the lines of the file matched by the pattern.
*/
public MatchedLine [] grep (File [] files)
{
List aggregate = new LinkedList();
for (int i = 0; i < files.length; i++) {
try {
List temp = grepList (files [i]);
aggregate.addAll (temp);
} catch (IOException e) {
// ignore I/O exceptions
}
}
MatchedLine matches [] = new MatchedLine [aggregate.size()];
aggregate.toArray (matches);
return (matches);
}
// -------------------------------------------------------------
/**
* Encapsulation of a matched line from a file. This immutable
* object has three read-only properties:
* - getFile(): The File this match pertains to.
* - getLineNumber(): The line number (1-relative) within the
* file where the match was found.
* - getLineText(): The text of the matching line
* - start(): The index within the line where the matching
* pattern begins.
* - end(): The index, plus one, of the end of the matched
* character sequence.
*
*/
public static class MatchedLine
{
private File file;
private int lineNumber;
private String lineText;
private int start;
private int end;
MatchedLine (File file, int lineNumber, String lineText,
int start, int end)
{
this.file = file;
this.lineNumber = lineNumber;
this.lineText = lineText;
this.start = start;
this.end = end;
}
public File getFile()
{
return (this.file);
}
public int getLineNumber()
{
return (this.lineNumber);
}
public String getLineText()
{
return (this.lineText);
}
public int start()
{
return (this.start);
}
public int end()
{
return (this.end);
}
}
// -----------------------------------------------------------
/**
* Run the grepper on the given File.
* @return A (non-type-safe) List of MatchedLine objects.
*/
private List grepList (File file)
throws IOException
{
if ( ! file.exists()) {
throw new IOException ("Does not exist: " + file);
}
if ( ! file.isFile()) {
throw new IOException ("Not a regular file: " + file);
}
if ( ! file.canRead()) {
throw new IOException ("Unreadable file: " + file);
}
LinkedList list = new LinkedList();
FileReader fr = new FileReader (file);
LineNumberReader lnr = new LineNumberReader (fr);
Matcher matcher = this.pattern.matcher ("");
String line;
while ((line = lnr.readLine()) != null) {
matcher.reset (line);
if (matcher.find()) {
list.add (new MatchedLine (file,
lnr.getLineNumber(), line,
matcher.start(), matcher.end()));
}
}
lnr.close();
return (list);
}
// ---------------------------------------------------------------
/**
* Test code to run grep operations. Accepts two command-line
* options: -i or --ignore-case, compile the givn pattern so
* that case of alpha characters is ignored. Or -1, which runs
* the grep operation on each individual file, rather that passing
* them all to one invocation. This is just to test the different
* methods. The printed ouptut is slightly different when -1 is
* specified.
*/
public static void main (String [] argv)
{
// set defaults
boolean ignoreCase = false;
boolean onebyone = false;
List argList = new LinkedList(); // to gather args
// loop through the args, looking for switches and saving
// off the pattern an file names
for (int i = 0; i < argv.length; i++) {
if (argv [i].startsWith ("-")) {
if (argv [i].equals ("-i")
|| argv [i].equals ("--ignore-case"))
{
ignoreCase = true;
}
if (argv [i].equals ("-1")) {
onebyone = true;
}
continue;
}
// not a switch, add it to the list
argList.add (argv [i]);
}
// enough args to run?
if (argList.size() < 2) {
System.err.println ("usage: [options] pattern filename ...");
return;
}
// first arg on the list will be taken as the regex pattern
// pass the pattern to the new Grep object, along with the
// current value of the ignore case flag
Grep grepper = new Grep ((String) argList.remove (0),
ignoreCase);
// somewhat arbitrarily split into two ways of calling the
// grepper and printing out the results.
if (onebyone) {
Iterator it = argList.iterator();
// loop through the filenames and grep them
while (it.hasNext()) {
String fileName = (String) it.next();
// print the filename once before each grep
System.out.println (fileName + ":");
MatchedLine [] matches = null;
// catch exceptions
try {
matches = grepper.grep (fileName);
} catch (IOException e) {
System.err.println ("\t*** " + e);
continue;
}
// print out info about the matched lines
for (int i = 0; i < matches.length; i++) {
MatchedLine match = matches [i];
System.out.println (" "
+ match.getLineNumber()
+ " [" + match.start()
+ "-" + (match.end() - 1)
+ "]: "
+ match.getLineText());
}
}
} else {
// convert the filename list to an array of File
File [] files = new File [argList.size()];
for (int i = 0; i < files.length; i++) {
files [i] = new File ((String) argList.get (i));
}
// run the grepper, unreadable files are ignored
MatchedLine [] matches = grepper.grep (files);
// print out info about the matched lines
for (int i = 0; i < matches.length; i++) {
MatchedLine match = matches [i];
System.out.println (match.getFile().getName()
+ ", " + match.getLineNumber() + ": "
+ match.getLineText());
}
}
}
}