=== modified file 'configs/testrunner/launchpad-lazr.conf'
--- configs/testrunner/launchpad-lazr.conf	2010-05-06 20:08:41 +0000
+++ configs/testrunner/launchpad-lazr.conf	2010-06-04 07:40:50 +0000
@@ -131,6 +131,7 @@
 [launchpad]
 max_attachment_size: 1024
 geoip_database: /usr/share/GeoIP/GeoLiteCity.dat
+logparser_max_parsed_lines: 100000
 
 [launchpad_session]
 cookie: launchpad_tests

=== modified file 'lib/canonical/config/schema-lazr.conf'
--- lib/canonical/config/schema-lazr.conf	2010-05-28 23:06:25 +0000
+++ lib/canonical/config/schema-lazr.conf	2010-06-04 07:40:50 +0000
@@ -1131,6 +1131,10 @@
 # ba-ws.geonames.net.
 geonames_identity:
 
+# The maximum number of lines that should be parsed by the launchpad
+# log parser.
+logparser_max_parsed_lines: 100000
+
 
 [launchpad_session]
 # The hostname where the session database is located.

=== modified file 'lib/lp/services/apachelogparser/base.py'
--- lib/lp/services/apachelogparser/base.py	2009-12-22 16:10:13 +0000
+++ lib/lp/services/apachelogparser/base.py	2010-06-04 07:40:50 +0000
@@ -11,10 +11,11 @@
 
 from contrib import apachelog
 
-from lp.services.apachelogparser.model.parsedapachelog import ParsedApacheLog
+from canonical.config import config
 from canonical.launchpad.interfaces.geoip import IGeoIP
 from canonical.launchpad.webapp.interfaces import (
     IStoreSelector, MAIN_STORE, DEFAULT_FLAVOR)
+from lp.services.apachelogparser.model.parsedapachelog import ParsedApacheLog
 
 
 parser = apachelog.parser(apachelog.formats['extended'])
@@ -84,20 +85,36 @@
     """
     # Seek file to given position, read all lines.
     fd.seek(start_position)
-    lines = fd.readlines()
-    # Always skip the last line as it may be truncated since we're rsyncing
-    # live logs.
-    last_line = lines.pop(-1)
+    line = fd.readline()
+
     parsed_bytes = start_position
-    if len(lines) == 0:
-        # This probably means we're dealing with a logfile that has been
-        # rotated already, so it should be safe to parse its last line.
-        lines = [last_line]
 
     geoip = getUtility(IGeoIP)
     downloads = {}
-    for line in lines:
-        try:
+    parsed_lines = 0
+
+    # Check for an optional max_parsed_lines config option.
+    max_parsed_lines = getattr(
+        config.launchpad, 'logparser_max_parsed_lines', None)
+
+    while line:
+        if max_parsed_lines is not None and parsed_lines >= max_parsed_lines:
+            break
+
+        # Always skip the last line as it may be truncated since we're
+        # rsyncing live logs, unless there is only one line for us to
+        # parse, in which case This probably means we're dealing with a
+        # logfile that has been rotated already, so it should be safe to
+        # parse its last line.
+        next_line = ''
+        try:
+            next_line = fd.next()
+        except StopIteration:
+            if parsed_lines > 0:
+                break
+
+        try:
+            parsed_lines += 1
             parsed_bytes += len(line)
             host, date, status, request = get_host_date_status_and_request(
                 line)
@@ -143,6 +160,8 @@
             parsed_bytes -= len(line)
             logger.error('Error (%s) while parsing "%s"' % (e, line))
             break
+
+        line = next_line
     return downloads, parsed_bytes
 
 

=== modified file 'lib/lp/services/apachelogparser/tests/test_apachelogparser.py'
--- lib/lp/services/apachelogparser/tests/test_apachelogparser.py	2010-01-07 06:47:46 +0000
+++ lib/lp/services/apachelogparser/tests/test_apachelogparser.py	2010-06-04 07:40:50 +0000
@@ -6,10 +6,12 @@
 import os
 from StringIO import StringIO
 import tempfile
+import textwrap
 import unittest
 
 from zope.component import getUtility
 
+from canonical.config import config
 from canonical.launchpad.scripts.logger import BufferLogger
 from canonical.launchpad.webapp.interfaces import (
     IStoreSelector, MAIN_STORE, DEFAULT_FLAVOR)
@@ -215,12 +217,40 @@
         self.assertEqual(self.logger.buffer.getvalue(), '')
 
         date = datetime(2008, 6, 13)
-        self.assertEqual(downloads, 
+        self.assertEqual(downloads,
             {'/15018215/ul_logo_64x64.png':
                 {datetime(2008, 6, 13): {'US': 1}}})
 
         self.assertEqual(parsed_bytes, fd.tell())
 
+    def test_max_parsed_lines(self):
+        # The max_parsed_lines config option limits the number of parsed
+        # lines.
+        config.push(
+            'log_parser config',
+            textwrap.dedent('''\
+                [launchpad]
+                logparser_max_parsed_lines: 2
+                '''))
+        fd = open(os.path.join(
+            here, 'apache-log-files', 'launchpadlibrarian.net.access-log'))
+        downloads, parsed_bytes = parse_file(
+            fd, start_position=0, logger=self.logger,
+            get_download_key=get_path_download_key)
+        config.pop("log_parser config")
+
+        self.assertEqual(self.logger.buffer.getvalue(), '')
+        date = datetime(2008, 6, 13)
+        self.assertContentEqual(
+            downloads.items(),
+            [('/12060796/me-tv-icon-64x64.png', {date: {'AU': 1}}),
+             ('/9096290/me-tv-icon-14x14.png', {date: {'AU': 1}})])
+
+        # We should have parsed only the first two lines of data.
+        fd.seek(0)
+        lines = fd.readlines()
+        self.assertEqual(parsed_bytes, len(lines[0]) + len(lines[1]))
+
 
 class TestParsedFilesDetection(TestCase):
     """Test the detection of already parsed logs."""
@@ -263,7 +293,7 @@
 
     def test_different_files_with_same_name(self):
         # Thanks to log rotation, two runs of our script may see files with
-        # the same name but completely different content.  If we see a file 
+        # the same name but completely different content.  If we see a file
         # with a name matching that of an already parsed file but with content
         # differing from the last file with that name parsed, we know we need
         # to parse the file from the start.

