Class XhochY::Drossellog::LogFileParser
In: xhochy/drossellog/logfileparser.rb
Parent: Object
PageCounter EntryCounter ImpressionCounter DataCounter LogFileParser WeeklyReport CodeCounter MonthlyReport LogLine DailyReport DailyIPCounter IPCounter ImpressionBlacklist Drossellog XhochY dot/f_1.png

Parses a hole Apache combined log file

Methods

Attributes

filename  [R]  The name of the logfile which is processed with a certain instance
sha1sum  [R]  The SHA1 checksum of the logfile

Public Class methods

Create a new XYLogFileParser instance

[Source]

    # File xhochy/drossellog/logfileparser.rb, line 18
18:       def initialize(filename, tmp_path)
19:         @filename = filename
20:         @sha1sum = ''
21:         @tmp_path = tmp_path
22:       end

Public Instance methods

Remove the old, outdated data

[Source]

    # File xhochy/drossellog/logfileparser.rb, line 54
54:       def cleanup_old
55:         path = File.join(@tmp_path, File.basename(@filename));
56:         
57:         # remove old directory
58:         if File.exist? path
59:           FileUtils.rm_r path
60:           puts path
61:         end
62:         
63:         # make new
64:         FileUtils.mkdir_p path
65:       end

Generate the SHA1 checksum of the log file

[Source]

    # File xhochy/drossellog/logfileparser.rb, line 25
25:       def generate_sha1sum
26:         s = File.read(@filename)
27:         @sha1sum = Digest::SHA1.hexdigest(s) 
28:       end

Checks if a directory already exists and if the content is outdated

[Source]

    # File xhochy/drossellog/logfileparser.rb, line 31
31:       def outdated?
32:         if @sha1sum.empty?
33:           generate_sha1sum
34:         end
35:         
36:         path = File.join(@tmp_path, File.basename(@filename));
37:         path2 = File.join(path, 'sha1sum')
38:         
39:         if File.exist?(path) # look for tmp-directory
40:           
41:           if File.exist?(path2) # look for sha1sum file
42:             sum = File.read(path2).strip
43:             return !sum.eql?(@sha1sum)
44:           else # sha1sum file does not exist
45:             return true
46:           end
47:           
48:         else # tmp-directory does not exist
49:           return true
50:         end
51:       end

Parse the complete file and store the Lines in an array. If a line could not be parsed(most times it is not in the correct format) it will be outputted to stdout. If you think the line was well formed check the regular expression in xhochy/drossellog/logline.rb and try to correct it, if it is really something missing, please report this as a bug.

[Source]

    # File xhochy/drossellog/logfileparser.rb, line 73
73:       def parse
74:         path = File.join(@tmp_path, File.basename(@filename));
75:         path2 = File.join(path, 'sha1sum')
76:         
77:         File.open(@filename, 'r') do |f|
78:           @lines = []
79:           
80:           while line = f.gets
81:             if line =~ XhochY::Drossellog::LogLine.get_rxp
82:               line_obj = XhochY::Drossellog::LogLine.new(line)
83:               
84:               @lines << line_obj
85:             else # parse line
86:               puts line
87:             end
88:           end
89:         end
90:       end

Save the data into seperate YAML-Files

The filenames consist of the following: <tmp_path>/<year>/<month>/<day>.yml

[Source]

     # File xhochy/drossellog/logfileparser.rb, line 95
 95:       def save_data(domain, domain_alias)
 96:         sorted = {}
 97:         
 98:         @lines.each do |line|
 99:           if sorted[line.year] == nil
100:             sorted[line.year] = {}
101:           end
102:           
103:           if sorted[line.year][line.month] == nil
104:             sorted[line.year][line.month] = {}
105:           end
106:           
107:           if sorted[line.year][line.month][line.day] == nil
108:             sorted[line.year][line.month][line.day] = []
109:           end
110:           
111:           sorted[line.year][line.month][line.day] << line
112:         end #^ @lines.each
113:         
114:         sorted.each do |year, value|
115:           value.each do |month, value2|
116:             value2.each do |day, value3|
117:               path = File.join(@tmp_path, File.basename(@filename), year.to_s, month)
118:               filename = File.join(path, day.to_s) + '.yml'
119:               FileUtils.mkdir_p(path)
120: 
121:               outdoc = {
122:                 'day' => day.to_i,
123:                 'month' => month,
124:                 'year' => year.to_i,
125:                 'domain' => domain,
126:                 'alias' => domain_alias,
127:                 'lines' => []
128:               }
129: 
130:               strip_tags = ['day', 'month', 'year']
131:               value3.each do |line|
132:                 hash = line.to_hash strip_tags
133:                 outdoc['lines'] << hash
134:               end
135:               
136:               File.open(filename, 'w') do |f|
137:                 f.write outdoc.to_yaml
138:               end
139: 
140:             end #^ value2.each
141:           end #^ value.each
142:         end #^ sorted.each
143:     
144:         path = File.join(@tmp_path, File.basename(@filename), 'sha1sum')
145:         File.open(path, 'w') do |f|
146:           f.write(@sha1sum)
147:         end
148:       end

[Validate]