summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGermán Póo-Caamaño <gpoo@gnome.org>2011-06-24 00:12:01 -0700
committerGermán Póo-Caamaño <gpoo@gnome.org>2011-06-24 00:12:01 -0700
commitb2fd0c693997b1aea57800870d13fdf52474610a (patch)
treea8884f2c7189e6245ce85c3b899d43dbf9192531
parent1a85acef6b98f27621e5f55e395c82691ed89693 (diff)
Move filetypes onto configuration file
The filetypes can be extended using a configuration files, where is possible to associate file type and its corresponden regular expression. The code includes a script to test the regex without running gitdm. Signed-off-by: Germán Póo-Caamaño <gpoo@gnome.org>
-rw-r--r--ConfigFile.py40
-rw-r--r--README18
-rw-r--r--database.py19
-rw-r--r--file_types.py406
-rwxr-xr-xgitdm3
-rw-r--r--gitdm.config5
-rw-r--r--sample-config/filetypes.txt362
7 files changed, 445 insertions, 408 deletions
diff --git a/ConfigFile.py b/ConfigFile.py
index 32a4aec..0a942f4 100644
--- a/ConfigFile.py
+++ b/ConfigFile.py
@@ -123,6 +123,43 @@ def ReadVirtual (file, name):
croak ('Missing "end" line for virtual employer %s' % (name))
#
+# Read file type patterns for more fine graned reports
+#
+def ReadFileType (filename):
+ try:
+ file = open (filename, 'r')
+ except IOError:
+ croak ('Unable to open file type mapping file %s' % (filename))
+ patterns = {}
+ order = []
+ regex_order = re.compile ('^order\s+(.*)$')
+ regex_file_type = re.compile ('^filetype\s+(\S+)\s+(.+)$')
+ line = ReadConfigLine (file)
+ while line:
+ o = regex_order.match (line)
+ if o:
+ # Consider only the first definition in the config file
+ elements = o.group(1).replace (' ', '')
+ order = order or elements.split(',')
+ line = ReadConfigLine (file)
+ continue
+
+ m = regex_file_type.match (line)
+ if not m or len (m.groups ()) != 2:
+ ConfigFile.croak ('Funky file type line "%s"' % (line))
+ if not patterns.has_key (m.group (1)):
+ patterns[m.group (1)] = []
+ if m.group (1) not in order:
+ print '%s not found, appended to the last order' % m.group (1)
+ order.append (m.group (1))
+
+ patterns[m.group (1)].append (re.compile (m.group (2), re.IGNORECASE))
+
+ line = ReadConfigLine (file)
+ file.close ()
+ return patterns, order
+
+#
# Read an overall config file.
#
@@ -146,6 +183,9 @@ def ConfigFile (name, confdir):
ReadGroupMap (os.path.join (confdir, sline[1]), sline[2])
elif sline[0] == 'VirtualEmployer':
ReadVirtual (file, ' '.join (sline[1:]))
+ elif sline[0] == 'FileTypeMap':
+ patterns, order = ReadFileType (os.path.join (confdir, sline[1]))
+ database.FileTypes = database.FileType (patterns, order)
else:
croak ('Unrecognized config line: "%s"' % (line))
line = ReadConfigLine (file)
diff --git a/README b/README
index 8d3922e..b837bd8 100644
--- a/README
+++ b/README
@@ -145,6 +145,24 @@ end
for example, no check to ensure that the percentages add up to
something rational.
+FileTypeMap file
+
+ Map file names/extensions onto file types. These files contain lines
+ like:
+
+ order <type1>,<type2>,...,<typeN>
+
+ filetype <type> <regex>
+ ...
+
+ This construct allows fine graned reports by type of contribution
+ (build, code, image, multimedia, documentation, etc.)
+
+ Order is important because it is possible to have overlapping between
+ filenames. For instance, ltmain.sh fits better as 'build' instead of
+ 'code' (the filename instead of '\.sh$'). The first element in order
+ has precedence over the next ones.
+
OTHER TOOLS
diff --git a/database.py b/database.py
index b5d9382..6a62adc 100644
--- a/database.py
+++ b/database.py
@@ -188,6 +188,25 @@ class VirtualEmployer (Employer):
# Should check that they add up too, but I'm lazy
Employers[self.name] = self
+class FileType:
+ def __init__ (self, patterns={}, order=[]):
+ self.patterns = patterns
+ self.order = order
+
+ def guess_file_type (self, filename, patterns=None, order=None):
+ patterns = patterns or self.patterns
+ order = order or self.order
+
+ for file_type in order:
+ if patterns.has_key (file_type):
+ for patt in patterns[file_type]:
+ if patt.search (filename):
+ return file_type
+
+ return 'unknown'
+
+FileTypes = None
+
#
# Mix all the virtual employers into their real destinations.
#
diff --git a/file_types.py b/file_types.py
deleted file mode 100644
index 78420cc..0000000
--- a/file_types.py
+++ /dev/null
@@ -1,406 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-# Copyright (C) 2006 Libresoft
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# Authors : Gregorio Robles <grex@gsyc.escet.urjc.es>
-
-"""
-This modules contains configuration parameters regarding filetypes
-(documentation, develompent, sound, images...)
-
-
-@author: Gregorio Robles
-@organization: Grupo de Sistemas y Comunicaciones, Universidad Rey Juan Carlos
-@copyright: Universidad Rey Juan Carlos (Madrid, Spain)
-@license: GNU GPL version 2 or any later version
-@contact: grex@gsyc.escet.urjc.es
-"""
-
-import re
-
-# Code files (headers and the like included)
-# (most common languages first)
-
-config_files_code = [
- re.compile('\.c$'), # C
- re.compile('\.pc$'), # C
- re.compile('\.ec$'), # C
- re.compile('\.ecp$'), # C
- re.compile('\.C$'), # C++
- re.compile('\.cpp$'), # C++
- re.compile('\.c\+\+$'), # C++
- re.compile('\.cxx$'), # C++
- re.compile('\.cc$'), # C++
- re.compile('\.pcc$'), # C++
- re.compile('\.cpy$'), # C++
- re.compile('\.h$'), # C or C++ header
- re.compile('\.hh$'), # C++ header
- re.compile('\.hpp$'), # C++ header
- re.compile('\.hxx$'), # C++ header
- re.compile('\.sh$'), # Shell
- re.compile('\.pl$'), # Perl
- re.compile('\.pm$'), # Perl
- re.compile('\.pod$'), # Perl
- re.compile('\.perl$'), # Perl
- re.compile('\.cgi$'), # CGI
- re.compile('\.php$'), # PHP
- re.compile('\.php3$'), # PHP
- re.compile('\.php4$'), # PHP
- re.compile('\.inc$'), # PHP
- re.compile('\.py$'), # Python
- re.compile('\.java$'), # Java
- re.compile('\.class$'), # Java Class (or at least a class in some OOPL)
- re.compile('\.ada$'), # ADA
- re.compile('\.ads$'), # ADA
- re.compile('\.adb$'), # ADA
- re.compile('\.pad$'), # ADA
- re.compile('\.s$'), # Assembly
- re.compile('\.S$'), # Assembly
- re.compile('\.asm$'), # Assembly
- re.compile('\.awk$'), # awk
- re.compile('\.cs$'), # C#
- re.compile('\.csh$'), # CShell (including tcsh)
- re.compile('\.cob$'), # COBOL
- re.compile('\.cbl$'), # COBOL
- re.compile('\.COB$'), # COBOL
- re.compile('\.CBL$'), # COBOL
- re.compile('\.exp$'), # Expect
- re.compile('\.l$'), # (F)lex
- re.compile('\.ll$'), # (F)lex
- re.compile('\.lex$'), # (F)lex
- re.compile('\.f$'), # Fortran
- re.compile('\.f77$'), # Fortran
- re.compile('\.F$'), # Fortran
- re.compile('\.hs$'), # Haskell
- re.compile('\.lhs$'), # Not preprocessed Haskell
- re.compile('\.el$'), # LISP (including Scheme)
- re.compile('\.scm$'), # LISP (including Scheme)
- re.compile('\.lsp$'), # LISP (including Scheme)
- re.compile('\.jl$'), # LISP (including Scheme)
- re.compile('\.ml$'), # ML
- re.compile('\.ml3$'), # ML
- re.compile('\.m3$'), # Modula3
- re.compile('\.i3$'), # Modula3
- re.compile('\.m$'), # Objective-C
- re.compile('\.p$'), # Pascal
- re.compile('\.pas$'), # Pascal
- re.compile('\.rb$'), # Ruby
- re.compile('\.sed$'), # sed
- re.compile('\.tcl$'), # TCL
- re.compile('\.tk$'), # TCL
- re.compile('\.itk$'), # TCL
- re.compile('\.y$'), # Yacc
- re.compile('\.yy$'), # Yacc
- re.compile('\.idl$'), # CORBA IDL
- re.compile('\.gnorba$'), # GNOME CORBA IDL
- re.compile('\.oafinfo$'), # GNOME OAF
- re.compile('\.mcopclass$'), # MCOP IDL compiler generated class
- re.compile('\.autoforms$'), # Autoform
- re.compile('\.atf$'), # Autoform
- re.compile('\.gnuplot$'),
- re.compile('\.xs$'), # Shared library? Seen a lot of them in gnome-perl
- re.compile('\.js$'), # JavaScript (and who knows, maybe more)
- re.compile('\.patch$'),
- re.compile('\.diff$'), # Sometimes patches appear this way
- re.compile('\.ids$'), # Not really sure what this means
- re.compile('\.upd$'), # ¿¿¿??? (from Kcontrol)
- re.compile('$.ad$'), # ¿¿¿??? (from Kdisplay and mc)
- re.compile('$.i$'), # Appears in the kbindings for Qt
- re.compile('$.pri$'), # from Qt
- re.compile('\.schema$'), # Not really sure what this means
- re.compile('\.fd$'), # Something to do with latex
- re.compile('\.cls$'), # Something to do with latex
- re.compile('\.pro$'), # Postscript generation
- re.compile('\.ppd$'), # PDF generation
- re.compile('\.dlg$'), # Not really sure what this means
- re.compile('\.plugin$'), # Plug-in file
- re.compile('\.dsp'), # Microsoft Developer Studio Project File
- re.compile('\.vim$'), # vim syntax file
- re.compile('\.trm$'), # gnuplot term file
- re.compile('\.font$'), # Font mapping
- re.compile('\.ccg$'), # C++ files - Found in gtkmm*
- re.compile('\.hg$'), # C++ headers - Found in gtkmm*
- re.compile('\.dtd'), # XML Document Type Definition
- re.compile('\.bat'), # DOS batch files
- re.compile('\.vala'), # Vala
- re.compile('\.py\.in$'),
- re.compile('\.rhtml$'), # eRuby
- re.compile('\.sql$') # SQL script
- ]
-
-# Development documentation files (for hacking generally)
-
-config_files_devel_doc = [
- re.compile('^readme.*$'),
- re.compile('^changelog.*'),
- re.compile('^todo.*$'),
- re.compile('^credits.*$'),
- re.compile('^authors.*$'),
- re.compile('^changes.*$'),
- re.compile('^news.*$'),
- re.compile('^install.*$'),
- re.compile('^hacking.*$'),
- re.compile('^copyright.*$'),
- re.compile('^licen(s|c)e.*$'),
- re.compile('^copying.*$'),
- re.compile('manifest$'),
- re.compile('faq$'),
- re.compile('building$'),
- re.compile('howto$'),
- re.compile('design$'),
- re.compile('\.files$'),
- re.compile('files$'),
- re.compile('subdirs$'),
- re.compile('maintainers$'),
- re.compile('developers$'),
- re.compile('contributors$'),
- re.compile('thanks$'),
- re.compile('releasing$'),
- re.compile('test$'),
- re.compile('testing$'),
- re.compile('build$'),
- re.compile('comments?$'),
- re.compile('bugs$'),
- re.compile('buglist$'),
- re.compile('problems$'),
- re.compile('debug$'),
- re.compile('hacks$'),
- re.compile('hacking$'),
- re.compile('versions?$'),
- re.compile('mappings$'),
- re.compile('tips$'),
- re.compile('ideas?$'),
- re.compile('spec$'),
- re.compile('compiling$'),
- re.compile('notes$'),
- re.compile('missing$'),
- re.compile('done$'),
- re.compile('\.omf$'), # XML-based format used in GNOME
- re.compile('\.lsm$'),
- re.compile('^doxyfile$'),
- re.compile('\.kdevprj$'),
- re.compile('\.directory$'),
- re.compile('\.dox$'),
- re.compile('\.doap$')
- ]
-
-# Building, compiling, configuration and CVS admin files
-
-config_files_building = [
- re.compile('\.in.*$'),
- re.compile('configure.*$'),
- re.compile('makefile.*$'),
- re.compile('config\.sub$'),
- re.compile('config\.guess$'),
- re.compile('config\.status$'),
- re.compile('ltmain\.sh$'),
- re.compile('autogen\.sh$'),
- re.compile('config$'),
- re.compile('conf$'),
- re.compile('cvsignore$'),
- re.compile('\.cfg$'),
- re.compile('\.m4$'),
- re.compile('\.mk$'),
- re.compile('\.mak$'),
- re.compile('\.make$'),
- re.compile('\.mbx$'),
- re.compile('\.protocol$'),
- re.compile('\.version$'),
- re.compile('mkinstalldirs$'),
- re.compile('install-sh$'),
- re.compile('rules$'),
- re.compile('\.kdelnk$'),
- re.compile('\.menu$'),
- re.compile('linguas$'), # Build translations
- re.compile('potfiles.*$'), # Build translations
- re.compile('\.shlibs$'), # Shared libraries
-# re.compile('%debian%'),
-# re.compile('%specs/%'),
- re.compile('\.spec$'), # It seems they're necessary for RPM building
- re.compile('\.def$') # build bootstrap for DLLs on win32
- ]
-
-
-
-# Documentation files
-
-config_files_documentation = [
-# 'doc/%'),
-# re.compile('%HOWTO%'),
- re.compile('\.html$'),
- re.compile('\.txt$'),
- re.compile('\.ps(\.gz|\.bz2)?$'),
- re.compile('\.dvi(\.gz|\.bz2)?$'),
- re.compile('\.lyx$'),
- re.compile('\.tex$'),
- re.compile('\.texi$'),
- re.compile('\.pdf(\.gz|\.bz2)?$'),
- re.compile('\.djvu$'),
- re.compile('\.epub$'),
- re.compile('\.sgml$'),
- re.compile('\.docbook$'),
- re.compile('\.wml$'),
- re.compile('\.xhtml$'),
- re.compile('\.phtml$'),
- re.compile('\.shtml$'),
- re.compile('\.htm$'),
- re.compile('\.rdf$'),
- re.compile('\.phtm$'),
- re.compile('\.tmpl$'),
- re.compile('\.ref$'), # References
- re.compile('\.css$'),
-# re.compile('%tutorial%'),
- re.compile('\.templates$'),
- re.compile('\.dsl$'),
- re.compile('\.ent$'),
- re.compile('\.xml$'),
- re.compile('\.xmi$'),
- re.compile('\.xsl$'),
- re.compile('\.entities$'),
- re.compile('\.[1-7]$'), # Man pages
- re.compile('\.man$'),
- re.compile('\.manpages$'),
- re.compile('\.doc$'),
- re.compile('\.rtf$'),
- re.compile('\.wpd$'),
- re.compile('\.qt3$'),
- re.compile('man\d?/.*\.\d$'),
- re.compile('\.docs$'),
- re.compile('\.sdw$'), # OpenOffice.org Writer document
- re.compile('\.odt$'), # OpenOffice.org document
- re.compile('\.en$'), # Files in English language
- re.compile('\.de$'), # Files in German
- re.compile('\.es$'), # Files in Spanish
- re.compile('\.fr$'), # Files in French
- re.compile('\.it$'), # Files in Italian
- re.compile('\.cz$') # Files in Czech
- ]
-
-# Images
-
-config_files_images = [
- re.compile('\.png$'),
- re.compile('\.jpg$'),
- re.compile('\.jpeg$'),
- re.compile('\.bmp$'),
- re.compile('\.gif$'),
- re.compile('\.xbm$'),
- re.compile('\.eps$'),
- re.compile('\.mng$'),
- re.compile('\.pnm$'),
- re.compile('\.pbm$'),
- re.compile('\.ppm$'),
- re.compile('\.pgm$'),
- re.compile('\.gbr$'),
- re.compile('\.svg$'),
- re.compile('\.fig$'),
- re.compile('\.tif$'),
- re.compile('\.swf$'),
- re.compile('\.svgz$'),
- re.compile('\.shape$'), # XML files used for shapes for instance in Kivio
- re.compile('\.sml$'), # XML files used for shapes for instance in Kivio
- re.compile('\.bdf$'), # vfontcap - Vector Font Capability Database (VFlib Version 2)
- re.compile('\.ico$'),
- re.compile('\.dia$') # We consider .dia as images, I don't want them in unknown
- ]
-
-# Translation files
-
-config_files_translation = [
- re.compile('\.po$'),
- re.compile('\.pot$'),
- re.compile('\.charset$'),
- re.compile('\.mo$')
- ]
-
-# User interface files
-
-config_files_ui = [
- re.compile('\.desktop$'),
- re.compile('\.ui$'),
- re.compile('\.xpm$'),
- re.compile('\.xcf$'),
- re.compile('\.3ds$'),
- re.compile('\.theme$'),
- re.compile('\.kimap$'),
- re.compile('\.glade$'),
- re.compile('\.gtkbuilder$'),
- re.compile('rc$')
- ]
-
-# Sound files
-
-config_files_sound = [
- re.compile('\.mp3$'),
- re.compile('\.ogg$'),
- re.compile('\.wav$'),
- re.compile('\.au$'),
- re.compile('\.mid$'),
- re.compile('\.vorbis$'),
- re.compile('\.midi$'),
- re.compile('\.arts$')
- ]
-
-# Packages (yes, there are people who upload packages to the repo)
-
-config_files_packages = [
- re.compile('\.tar$'),
- re.compile('\.tar.gz$'),
- re.compile('\.tar.bz2$'),
- re.compile('\.tgz$'),
- re.compile('\.deb$'),
- re.compile('\.rpm$'),
- re.compile('\.srpm$'),
- re.compile('\.ebuild$')
- ]
-
-# The list should keep this order
-# ie. we want ltmain.sh -> build instead of code
-config_files = [
- ('image' , config_files_images),
- ('i18n' , config_files_translation),
- ('ui' , config_files_ui),
- ('multimedia' , config_files_sound),
- ('package' , config_files_packages),
- ('build' , config_files_building),
- ('code' , config_files_code),
- ('documentation' , config_files_documentation),
- ('devel-doc' , config_files_devel_doc)
- ]
-
-def guess_file_type (filename):
- for type, patt_list in config_files:
- for patt in patt_list:
- if patt.search (filename.lower ()):
- return type
-
- return 'unknown'
-
-if __name__ == '__main__':
- import sys
- import os
-
- path = sys.argv[1]
- if os.path.isdir (path):
- for root, dirs, files in os.walk (path):
- for skip in ('.svn', 'CVS', '.git'):
- if skip in dirs:
- dirs.remove (skip)
-
- for file in files:
- print "%s: %s" % (os.path.join (root, file), guess_file_type (file))
- else:
- print guess_file_type (path)
diff --git a/gitdm b/gitdm
index 28df314..2c0193b 100755
--- a/gitdm
+++ b/gitdm
@@ -15,7 +15,6 @@
import database, csvdump, ConfigFile, reports
import getopt, datetime
import os, re, sys, rfc822, string
-import file_types
import logparser
from patterns import patterns
@@ -202,7 +201,7 @@ def parse_numstat(line, file_filter):
if m:
filename = '%s%s%s' % (m.group (1), m.group (3), m.group (4))
- filetype = file_types.guess_file_type (os.path.basename(filename))
+ filetype = database.FileTypes.guess_file_type (os.path.basename(filename))
return filename, filetype, added, removed
else:
return None, None, None, None
diff --git a/gitdm.config b/gitdm.config
index 588d6ef..3ae2f20 100644
--- a/gitdm.config
+++ b/gitdm.config
@@ -20,3 +20,8 @@ EmailMap sample-config/domain-map
#
# GroupMap sample-config/illuminati The Illuminati
#
+#
+# Use FileTypeMap to map a file types to file names using regular
+# regular expressions.
+#
+FileTypeMap sample-config/filetypes.txt
diff --git a/sample-config/filetypes.txt b/sample-config/filetypes.txt
new file mode 100644
index 0000000..e24c396
--- /dev/null
+++ b/sample-config/filetypes.txt
@@ -0,0 +1,362 @@
+# -*- coding:utf-8 -*-
+# Copyright (C) 2006 Libresoft
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Authors : Gregorio Robles <grex@gsyc.escet.urjc.es>
+# Authors : Germán Póo-Caamaño <gpoo@gnome.org>
+#
+# This file contains associations parameters regarding filetypes
+# (documentation, develompent, multimedia, images...)
+#
+# format:
+# filetype <type> <regex> [<comment>]
+#
+# Order:
+# The list should keep an order, so filetypes can be counted properly.
+# ie. we want ltmain.sh -> 'build' instead of 'code'.
+#
+# If there is an filetype which is not in order but has values, it will
+# be added at the end.
+#
+order image,translation,ui,multimedia,package,build,code,documentation,devel-doc
+#
+#
+# Code files (headers and the like included
+# (most common languages first
+#
+filetype code \.c$ # C
+filetype code \.pc$ # C
+filetype code \.ec$ # C
+filetype code \.ecp$ # C
+filetype code \.C$ # C++
+filetype code \.cpp$ # C++
+filetype code \.c\+\+$ # C++
+filetype code \.cxx$ # C++
+filetype code \.cc$ # C++
+filetype code \.pcc$ # C++
+filetype code \.cpy$ # C++
+filetype code \.h$ # C or C++ header
+filetype code \.hh$ # C++ header
+filetype code \.hpp$ # C++ header
+filetype code \.hxx$ # C++ header
+filetype code \.sh$ # Shell
+filetype code \.pl$ # Perl
+filetype code \.pm$ # Perl
+filetype code \.pod$ # Perl
+filetype code \.perl$ # Perl
+filetype code \.cgi$ # CGI
+filetype code \.php$ # PHP
+filetype code \.php3$ # PHP
+filetype code \.php4$ # PHP
+filetype code \.inc$ # PHP
+filetype code \.py$ # Python
+filetype code \.java$ # Java
+filetype code \.class$ # Java Class (or at least a class in some OOPL
+filetype code \.ada$ # ADA
+filetype code \.ads$ # ADA
+filetype code \.adb$ # ADA
+filetype code \.pad$ # ADA
+filetype code \.s$ # Assembly
+filetype code \.S$ # Assembly
+filetype code \.asm$ # Assembly
+filetype code \.awk$ # awk
+filetype code \.cs$ # C#
+filetype code \.csh$ # CShell (including tcsh
+filetype code \.cob$ # COBOL
+filetype code \.cbl$ # COBOL
+filetype code \.COB$ # COBOL
+filetype code \.CBL$ # COBOL
+filetype code \.exp$ # Expect
+filetype code \.l$ # (F lex
+filetype code \.ll$ # (F lex
+filetype code \.lex$ # (F lex
+filetype code \.f$ # Fortran
+filetype code \.f77$ # Fortran
+filetype code \.F$ # Fortran
+filetype code \.hs$ # Haskell
+filetype code \.lhs$ # Not preprocessed Haskell
+filetype code \.el$ # LISP (including Scheme
+filetype code \.scm$ # LISP (including Scheme
+filetype code \.lsp$ # LISP (including Scheme
+filetype code \.jl$ # LISP (including Scheme
+filetype code \.ml$ # ML
+filetype code \.ml3$ # ML
+filetype code \.m3$ # Modula3
+filetype code \.i3$ # Modula3
+filetype code \.m$ # Objective-C
+filetype code \.p$ # Pascal
+filetype code \.pas$ # Pascal
+filetype code \.rb$ # Ruby
+filetype code \.sed$ # sed
+filetype code \.tcl$ # TCL
+filetype code \.tk$ # TCL
+filetype code \.itk$ # TCL
+filetype code \.y$ # Yacc
+filetype code \.yy$ # Yacc
+filetype code \.idl$ # CORBA IDL
+filetype code \.gnorba$ # GNOME CORBA IDL
+filetype code \.oafinfo$ # GNOME OAF
+filetype code \.mcopclass$ # MCOP IDL compiler generated class
+filetype code \.autoforms$ # Autoform
+filetype code \.atf$ # Autoform
+filetype code \.gnuplot$
+filetype code \.xs$ # Shared library? Seen a lot of them in gnome-perl
+filetype code \.js$ # JavaScript (and who knows, maybe more
+filetype code \.patch$
+filetype code \.diff$ # Sometimes patches appear this way
+filetype code \.ids$ # Not really sure what this means
+filetype code \.upd$ # ¿¿¿??? (from Kcontrol
+filetype code $.ad$ # ¿¿¿??? (from Kdisplay and mc
+filetype code $.i$ # Appears in the kbindings for Qt
+filetype code $.pri$ # from Qt
+filetype code \.schema$ # Not really sure what this means
+filetype code \.fd$ # Something to do with latex
+filetype code \.cls$ # Something to do with latex
+filetype code \.pro$ # Postscript generation
+filetype code \.ppd$ # PDF generation
+filetype code \.dlg$ # Not really sure what this means
+filetype code \.plugin$ # Plug-in file
+filetype code \.dsp # Microsoft Developer Studio Project File
+filetype code \.vim$ # vim syntax file
+filetype code \.trm$ # gnuplot term file
+filetype code \.font$ # Font mapping
+filetype code \.ccg$ # C++ files - Found in gtkmm*
+filetype code \.hg$ # C++ headers - Found in gtkmm*
+filetype code \.dtd # XML Document Type Definition
+filetype code \.bat # DOS batch files
+filetype code \.vala # Vala
+filetype code \.py\.in$
+filetype code \.rhtml$ # eRuby
+filetype code \.sql$ # SQL script
+#
+#
+# Development documentation files (for hacking generally
+#
+filetype devel-doc ^readme.*$
+filetype devel-doc ^changelog.*
+filetype devel-doc ^todo.*$
+filetype devel-doc ^credits.*$
+filetype devel-doc ^authors.*$
+filetype devel-doc ^changes.*$
+filetype devel-doc ^news.*$
+filetype devel-doc ^install.*$
+filetype devel-doc ^hacking.*$
+filetype devel-doc ^copyright.*$
+filetype devel-doc ^licen(s|c)e.*$
+filetype devel-doc ^copying.*$
+filetype devel-doc manifest$
+filetype devel-doc faq$
+filetype devel-doc building$
+filetype devel-doc howto$
+filetype devel-doc design$
+filetype devel-doc \.files$
+filetype devel-doc files$
+filetype devel-doc subdirs$
+filetype devel-doc maintainers$
+filetype devel-doc developers$
+filetype devel-doc contributors$
+filetype devel-doc thanks$
+filetype devel-doc releasing$
+filetype devel-doc test$
+filetype devel-doc testing$
+filetype devel-doc build$
+filetype devel-doc comments?$
+filetype devel-doc bugs$
+filetype devel-doc buglist$
+filetype devel-doc problems$
+filetype devel-doc debug$
+filetype devel-doc hacks$
+filetype devel-doc hacking$
+filetype devel-doc versions?$
+filetype devel-doc mappings$
+filetype devel-doc tips$
+filetype devel-doc ideas?$
+filetype devel-doc spec$
+filetype devel-doc compiling$
+filetype devel-doc notes$
+filetype devel-doc missing$
+filetype devel-doc done$
+filetype devel-doc \.omf$ # XML-based format used in GNOME
+filetype devel-doc \.lsm$
+filetype devel-doc ^doxyfile$
+filetype devel-doc \.kdevprj$
+filetype devel-doc \.directory$
+filetype devel-doc \.dox$
+filetype devel-doc \.doap$
+#
+#
+# Building, compiling, configuration and CVS admin files
+#
+filetype build \.in.*$
+filetype build configure.*$
+filetype build makefile.*$
+filetype build config\.sub$
+filetype build config\.guess$
+filetype build config\.status$
+filetype build ltmain\.sh$
+filetype build autogen\.sh$
+filetype build config$
+filetype build conf$
+filetype build cvsignore$
+filetype build \.cfg$
+filetype build \.m4$
+filetype build \.mk$
+filetype build \.mak$
+filetype build \.make$
+filetype build \.mbx$
+filetype build \.protocol$
+filetype build \.version$
+filetype build mkinstalldirs$
+filetype build install-sh$
+filetype build rules$
+filetype build \.kdelnk$
+filetype build \.menu$
+filetype build linguas$ # Build translations
+filetype build potfiles.*$ # Build translations
+filetype build \.shlibs$ # Shared libraries
+# filetype build %debian%
+# filetype build %specs/%
+filetype build \.spec$ # It seems theyre necessary for RPM build
+filetype build \.def$ # build bootstrap for DLLs on win32
+#
+#
+# Documentation files
+#
+# filetype documentation doc/%
+# filetype documentation %HOWTO%
+filetype documentation \.html$
+filetype documentation \.txt$
+filetype documentation \.ps(\.gz|\.bz2)?$
+filetype documentation \.dvi(\.gz|\.bz2)?$
+filetype documentation \.lyx$
+filetype documentation \.tex$
+filetype documentation \.texi$
+filetype documentation \.pdf(\.gz|\.bz2)?$
+filetype documentation \.djvu$
+filetype documentation \.epub$
+filetype documentation \.sgml$
+filetype documentation \.docbook$
+filetype documentation \.wml$
+filetype documentation \.xhtml$
+filetype documentation \.phtml$
+filetype documentation \.shtml$
+filetype documentation \.htm$
+filetype documentation \.rdf$
+filetype documentation \.phtm$
+filetype documentation \.tmpl$
+filetype documentation \.ref$ # References
+filetype documentation \.css$
+# filetype documentation %tutorial%
+filetype documentation \.templates$
+filetype documentation \.dsl$
+filetype documentation \.ent$
+filetype documentation \.xml$
+filetype documentation \.xmi$
+filetype documentation \.xsl$
+filetype documentation \.entities$
+filetype documentation \.[1-7]$ # Man pages
+filetype documentation \.man$
+filetype documentation \.manpages$
+filetype documentation \.doc$
+filetype documentation \.rtf$
+filetype documentation \.wpd$
+filetype documentation \.qt3$
+filetype documentation man\d?/.*\.\d$
+filetype documentation \.docs$
+filetype documentation \.sdw$ # OpenOffice.org Writer document
+filetype documentation \.odt$ # OpenOffice.org document
+filetype documentation \.en$ # Files in English language
+filetype documentation \.de$ # Files in German
+filetype documentation \.es$ # Files in Spanish
+filetype documentation \.fr$ # Files in French
+filetype documentation \.it$ # Files in Italian
+filetype documentation \.cz$ # Files in Czech
+filetype documentation \.page$ # Mallard
+filetype documentation \.page.stub$ # Mallard stub
+#
+#
+# Images
+#
+filetype image \.png$
+filetype image \.jpg$
+filetype image \.jpeg$
+filetype image \.bmp$
+filetype image \.gif$
+filetype image \.xbm$
+filetype image \.eps$
+filetype image \.mng$
+filetype image \.pnm$
+filetype image \.pbm$
+filetype image \.ppm$
+filetype image \.pgm$
+filetype image \.gbr$
+filetype image \.svg$
+filetype image \.fig$
+filetype image \.tif$
+filetype image \.swf$
+filetype image \.svgz$
+filetype image \.shape$ # XML files used for shapes for instance in Kivio
+filetype image \.sml$ # XML files used for shapes for instance in Kivio
+filetype image \.bdf$ # vfontcap - Vector Font Capability Database (VFlib Version 2
+filetype image \.ico$
+filetype image \.dia$ # We consider .dia as images, I dont want them in unknown
+#
+#
+# Translation files
+#
+filetype translation \.po$
+filetype translation \.pot$
+filetype translation \.charset$
+filetype translation \.mo$
+#
+#
+# User interface files
+#
+filetype ui \.desktop$
+filetype ui \.ui$
+filetype ui \.xpm$
+filetype ui \.xcf$
+filetype ui \.3ds$
+filetype ui \.theme$
+filetype ui \.kimap$
+filetype ui \.glade$
+filetype ui \.gtkbuilder$
+filetype ui rc$
+#
+#
+# Sound files
+#
+filetype multimedia \.mp3$
+filetype multimedia \.ogg$
+filetype multimedia \.wav$
+filetype multimedia \.au$
+filetype multimedia \.mid$
+filetype multimedia \.vorbis$
+filetype multimedia \.midi$
+filetype multimedia \.arts$
+#
+#
+# Packages (yes, there are people who upload packages to the repo)
+#
+filetype package \.tar$
+filetype package \.tar.gz$
+filetype package \.tar.bz2$
+filetype package \.tar.xz$
+filetype package \.tgz$
+filetype package \.deb$
+filetype package \.rpm$
+filetype package \.srpm$
+filetype package \.ebuild$