[Chugalug] gedit vs geany
Dan Lyke
danlyke at flutterby.com
Thu Dec 20 16:39:41 UTC 2012
So there are things about this file that I'm not understanding, I
thought that the '.bla' starting a line was an ID, and the following
script shows a lot of duplicates of those, but a start on parsing this
might look like:
#!/usr/bin/perl -w
use warnings;
use strict;
open my $fh, '<', 'gnotes'
|| die "Unable to open gnotes";
my $generation;
my $generationDates;
my $people = {};
while (my $line = <$fh>)
{
chomp $line;
my $lineNumber = $.;
if ($line =~ /^\s*\;.*?\s+GENERATION\s+(\d+)?/)
{
$generation = $1;
$generationDates = undef;
if ($line =~ /(\d{4})\-(\d{4})/)
{
$generationDates = [$1,$2];
}
}
elsif ($line =~ /^\s*\;/)
{
# unrecognized comment line
}
elsif ($line =~ /^\.(\S*)\s+(.*?)?\s*\;\;/)
{
my ($id1, $lineageNotSure) = ($1,$2);
my ($stuff, $name, $dates) = split /\;\;/, $line;
my ($familyName, $maidenName);
$familyName = $1 if ($name =~ /\/(\w+)\//);
$maidenName = $1 if ($name =~ /\((\w+)\)/);
if ($people->{$id1})
{
print "Duplicate ID at line $lineNumber: $line\n was $people->{$i\
d1}->{line}\n";
}
else
{
$people->{$id1} =
{
line => $line,
name => $name,
dates => $dates,
lineage => $lineageNotSure,
};
}
}
elsif ($line =~ /^\.(\S*)\s+(\(+.*?\))\s*(.*)/)
{
my ($id1, $lineageNotSure,$nameAndStuff) = ($1,$2,$3);
if ($people->{$id1})
{
print "Duplicate ID at line $lineNumber: $line\n was $people->{$i\
d1}->{line}\n";
}
else
{
$people->{$id1} =
{
line => $line,
name => $nameAndStuff,
lineage => $lineageNotSure,
};
}
}
elsif ($line !~ /^\s*$/)
{
print "Unrecognized line: $line\n";
}
}
More information about the Chugalug
mailing list