diff -u --recursive --new-file v2.3.99-pre1/linux/CREDITS linux/CREDITS --- v2.3.99-pre1/linux/CREDITS Tue Mar 14 19:10:38 2000 +++ linux/CREDITS Fri Mar 17 10:56:19 2000 @@ -318,6 +318,15 @@ D: IEEE 1394 subsystem rewrite and maintainer D: Texas Instruments PCILynx IEEE 1394 driver +N: Marc Boucher +E: marc@mbsi.ca +P: CA 67 A5 1A 38 CE B6 F2 D5 83 51 03 D2 9C 30 9E CE D2 DD 65 +D: Netfilter core +D: IP policy routing by mark +D: Various fixes (mostly networking) +S: Montreal, Quebec +S: Canada + N: Zoltán Böszörményi E: zboszor@mail.externet.hu D: MTRR emulation with Cyrix style ARR registers, Athlon MTRR support @@ -1368,6 +1377,11 @@ D: Author of the dialog utility, foundation D: for Menuconfig's lxdialog. +N: Paul Laufer +E: pelaufer@csupomona.edu +D: Soundblaster driver fixes, ISAPnP quirk +S: California, USA + N: Tom Lees E: tom@lpsg.demon.co.uk W: http://www.lpsg.demon.co.uk/ @@ -2247,6 +2261,13 @@ S: 22 Irvington Cres. S: Willowdale, Ontario S: Canada M2N 2Z1 + +N: Adam Sulmicki +E: adam@cfar.umd.edu +W: http://www.eax.com +D: core networking fixes +D: patch-kernel enhancements +D: misc kernel fixes and updates N: Adrian Sun E: asun@cobaltnet.com diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/Changes linux/Documentation/Changes --- v2.3.99-pre1/linux/Documentation/Changes Fri Mar 10 16:40:38 2000 +++ linux/Documentation/Changes Fri Mar 17 13:10:45 2000 @@ -43,7 +43,7 @@ encountered a bug! If you're unsure what version you're currently running, the suggested command should tell you. -- Kernel modutils 2.3.7 ; insmod -V +- Kernel modutils 2.3.10 ; insmod -V - Gnu C 2.7.2.3 ; gcc --version - Binutils 2.9.1.0.7 ; ld -v - Linux libc5 C Library 5.4.46 ; ls -l /lib/libc* @@ -62,7 +62,7 @@ - Bash 1.14.7 ; bash -version - Ncpfs 2.2.0 ; ncpmount -v - Pcmcia-cs 3.1.2 ; cardmgr -V -- PPP 2.3.9 ; pppd --version +- PPP 2.3.11 ; pppd --version - Util-linux 2.9i ; chsh -v - isdn4k-utils v3.1beta7 ; isdnctrl 2>&1|grep version @@ -174,7 +174,7 @@ ======= You need to upgrade to the latest version of modutils for the Linux -2.3 kernel. This version will also work with your 2.0 kernel. +2.3 kernel. This version can also be built to work with your 2.0 kernel. As of 2.1.90-pre1, kerneld has been replaced by a kernel thread, kmod. See Documentation/kmod.txt for more information. The main @@ -390,7 +390,7 @@ Due to changes in the PPP driver and routing code, those of you using PPP networking will need to upgrade your pppd to at least -version 2.3.9. See ftp://cs.anu.edu.au/pub/software/ppp/ for newest +version 2.3.11. See ftp://cs.anu.edu.au/pub/software/ppp/ for newest versions. You must make sure that the special device file /dev/ppp exists. @@ -586,8 +586,8 @@ Modules utilities ================= -The 2.3.7 release: -ftp://ftp.ocs.com.au/pub/modutils/v2.3/modutils-2.3.7.tar.gz +The 2.3.10 release: +ftp://ftp.ocs.com.au/pub/modutils/v2.3/modutils-2.3.10.tar.gz Procps utilities ================ @@ -715,8 +715,8 @@ PPP === -The 2.3.9 release: -ftp://cs.anu.edu.au/pub/software/ppp/ppp-2.3.9.tar.gz +The 2.3.11 release: +ftp://cs.anu.edu.au/pub/software/ppp/ppp-2.3.11.tar.gz IP Chains ========= diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/Configure.help linux/Documentation/Configure.help --- v2.3.99-pre1/linux/Documentation/Configure.help Tue Mar 14 19:10:38 2000 +++ linux/Documentation/Configure.help Sat Mar 18 16:41:47 2000 @@ -320,6 +320,47 @@ If unsure, say N. +ATA/IDE/MFM/RLL support +CONFIG_IDE + If you say Y here, your kernel will be able to manage low cost mass storage + units such as ATA/(E)IDE and ATAPI units. + + Integrated Disk Electronics (IDE aka ATA-1) is a connecting standard for + mass storage units such as hard disks. It was designed by Western Digital + and Compaq Computer in 1984. It was then named ST506. + Quite a number of disks use IDE interface. State of the art disks use SCSI + interface. + + AT Atachment (ATA) is a subset of the IDE specifications. + ST506 was also called ATA-1. + + Fast-IDE is ATA-2 (also named Fast ATA), Enhanced IDE (EIDE) is ATA-3. It + provides support for larger disks (up to 8.4GB by means of the LBA standard), + more disks (4 instead of 2) and for other mass storage units such as tapes + and cdrom. + UDMA/33 (aka UltraDMA/33) is ATA-4 and provides faster (and more CPU friendly) + transfer modes than previous PIO (Programmed processor Input/Ouput) from + previous ATA/IDE standards by means of fast DMA controlers. + + ATA Packet Interface (ATAPI) is a protocol used by EIDE tape and CDROM + drives, similar in many respects to the SCSI protocol. + + SMART IDE (Self Monitoring, Analysis and Reporting Technology) was designed + in order to prevent data corruption and disk crash by detecting pre hardware + faillure conditions (heat, access time, and the like...). Disks builded since + june 1995 may follow this standard. The kernel itself don't manage this; + however there are quite a number of user programs such as smart that can + query the status of SMART parameters disk. + + If you want to compile this driver as a module ( = code which can be + inserted in and removed from the running kernel whenever you want), + say M here and read Documentation/modules.txt. The module will be + called ide.o. + + For further information, please read Documentation/ide.txt. + + If unsure, say Y. + Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support CONFIG_BLK_DEV_IDE If you say Y here, you will use the full-featured IDE driver to @@ -1657,6 +1698,230 @@ Chances are that you should say Y here if you compile a kernel which will run as a router and N for regular hosts. If unsure, say N. +IP: connection tracking (required for masq/NAT) +CONFIG_IP_NF_CONNTRACK + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is required to do Masquerading or other kinds of Network + Address Translation (except for Fast NAT). It can also be used to + enhance packet filtering (see `Connection state match support' + below). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +FTP protocol support +CONFIG_IP_NF_FTP + Tracking FTP connections is problematic: special helpers are + required for tracking them, and doing masquerading and other forms + of Network Address Translation on them. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `Y'. + +IP: userspace queueing via NETLINK (EXPERIMENTAL) +CONFIG_IP_NF_QUEUE + Netfilter has the ability to queue packets to userspace: the netlink + device can be used to access them using this driver. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +IP: ip tables support (required for filtering/masq/NAT) +CONFIG_IP_NF_IPTABLES + iptables is a general, extensible packet identification framework. + The packet filtering and full NAT (masquerading, port forwarding, + etc) subsystems now use this: say `Y' or `M' here if you want to use + either of those. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +limit match support +CONFIG_IP_NF_MATCH_LIMIT + limit matching allows you to control the rate at which a rule can be + matched: mainly useful in combination with the LOG target. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MAC address match support +CONFIG_IP_NF_MATCH_MAC + mac matching allows you to match packets based on the source + ethernet address of the packet. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +netfilter mark match support +CONFIG_IP_NF_MATCH_MARK + Netfilter mark matching allows you to match packets based on the + `nfmark' value in the packet. This can be set by the MARK target + (see below). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Multiple port match support +CONFIG_IP_NF_MATCH_MULTIPORT + Multiport matching allows you to match TCP or UDP packets based on + a series of source or destination ports: normally a rule can only + match a single range of ports. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +TOS match support +CONFIG_IP_NF_MATCH_TOS + TOS matching allows you to match packets based on the Type Of + Service fields of the IP packet. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Connection state match support +CONFIG_IP_NF_MATCH_STATE + Connection state matching allows you to match packets based on their + relationship to a tracked connection (ie. previous packets). This + is a powerful tool for packet classification. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Unclean match support (EXPERIMENTAL) +CONFIG_IP_NF_MATCH_UNCLEAN + Unclean packet matching matches any strange or invalid packets, by + looking at a series of fields in the IP, TCP, UDP and ICMP headers. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Owner match support (EXPERIMENTAL) +CONFIG_IP_NF_MATCH_OWNER + + Packet owner matching allows you to match locally-generated packets + based on who created them: the user, group, process or session. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Packet filtering +CONFIG_IP_NF_FILTER + Packet filtering defines a table `filter', which has a series of + rules for simple packet filtering at local input, forwarding and + local output. See the man page for iptables(8). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +REJECT target support +CONFIG_IP_NF_TARGET_REJECT + The REJECT target allows a filtering rule to specify that an ICMP + error should be issued in response to an incoming packet, rather + than silently being dropped. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MIRROR target support (EXPERIMENTAL) +CONFIG_IP_NF_TARGET_MIRROR + The MIRROR target allows a filtering rule to specify that an + incoming packet should be bounced back to the sender. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Full NAT +CONFIG_IP_NF_NAT + The Full NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in iptables: see the man page for iptables(8). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MASQUERADE target support +CONFIG_IP_NF_TARGET_MASQUERADE + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +REDIRECT target support +CONFIG_IP_NF_TARGET_REDIRECT + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for tranparent proxies. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Packet mangling +CONFIG_IP_NF_MANGLE + This option adds a `mangle' table to iptables: see the man page for + iptables(8). This table is used for various packet alterations + which can effect how the packet is routed. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +TOS target support +CONFIG_IP_NF_TARGET_TOS + This option adds a `TOS' target, which allows you to create rules in + the `mangle' table which alter the Type Of Service field of an IP + packet prior to routing. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MARK target support +CONFIG_IP_NF_TARGET_MARK + This option adds a `MARK' target, which allows you to create rules in + the `mangle' table which alter the netfilter mark (nfmark) field + associated with the packet packet prior to routing. This can change + the routing method (see `IP: use netfilter MARK value as routing key') + and can also be used by other subsystems to change their behavior. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +LOG target support +CONFIG_IP_NF_TARGET_LOG + This option adds a `LOG' target, which allows you to create rules in + any iptables table which records the packet header to the syslog. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +ipchains (2.2-style) support +CONFIG_IP_NF_COMPAT_IPCHAINS + This option places ipchains (with masquerading and redirection + support) back into the kernel, using the new netfilter + infrastructure. It is not recommended for new installations (see + `Packet filtering'). With this enabled, you should be able to use + the ipchains tool exactly as in 2.2 kernels. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +ipfwadm (2.0-style) support +CONFIG_IP_NF_COMPAT_IPFWADM + This option places ipfwadm (with masquerading and redirection + support) back into the kernel, using the new netfilter + infrastructure. It is not recommended for new installations (see + `Packet filtering'). With this enabled, you should be able to use + the ipfwadm tool exactly as in 2.0 kernels. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + SYN flood protection CONFIG_SYN_COOKIES Normal TCP/IP networking is open to an attack known as "SYN @@ -3098,17 +3363,16 @@ IP: use TOS value as routing key CONFIG_IP_ROUTE_TOS - The header of every IP packet carries a TOS (Type of Service) value + The header of every IP packet carries a TOS (Type Of Service) value with which the packet requests a certain treatment, e.g. low latency (for interactive traffic), high throughput, or high reliability. If you say Y here, you will be able to specify different routes for packets with different TOS values. -IP: use FWMARK value as routing key +IP: use netfilter MARK value as routing key CONFIG_IP_ROUTE_FWMARK If you say Y here, you will be able to specify different routes for - packets with different FWMARK ("firewalling mark") values - (see ipchains(8), "-m" argument). + packets with different mark values (see iptables(8), MARK target). IP: verbose route monitoring CONFIG_IP_ROUTE_VERBOSE @@ -5793,6 +6057,26 @@ Guide", to be found in ftp://metalab.unc.edu/pub/Linux/docs/LDP . If unsure, say Y. +Bonding driver support +CONFIG_BONDING + Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet + Channels together. This is called 'Etherchannel' by Cisco, 'Trunking' + by Sun, and 'Bonding' in Linux. + + If you have two ethernet connections to some other computer, you can + make them behave like one double speed connection using this driver. + Naturally, this has to be supported at the other end as well, either + with a similar Bonding Linux driver, a Cisco 5500 switch or a + SunTrunking SunSoft driver. + + This is similar to the EQL driver, but it merge etherner segments instead + of serial lines. + + If you want to compile this as a module ( = code which can be + inserted in and removed from the running kernel whenever you want), + say M here and read Documentation/modules.txt. The module will be + called bonding.o. + Dummy net driver support CONFIG_DUMMY This is essentially a bit-bucket device (i.e. traffic you send to @@ -7085,11 +7369,12 @@ module, say M here and read Documentation/modules.txt as well as Documentation/networking/net-modules.txt. -PCI DM9102 support +PCI DM9102(A)/DM9132/DM9801 support CONFIG_DM9102 This driver is for DM9102 compatible PCI cards from Davicom - (http://www.davicom.com.tw) - If you have a PCI DM9102 network (Ethernet) card, say Y. + (http://www.davicom.com.tw) including models DM9132 & DM9801. + If you have a PCI DM9102, DM9132 or DM9801 network (Ethernet) + card, say Y. This driver is also available as a module ( = code which can be inserted in and removed from the running kernel whenever you want). @@ -8221,6 +8506,14 @@ say M here and read Documentation/modules.txt. This is recommended. The module will be called skfp.o. +Linux telephony support +CONFIG_PHONE + This option enable the kernel to manage telephony cards. + + There are very few supported cards at this moment. + + If unsure, say N. + HIgh Performance Parallel Interface support (EXPERIMENTAL) CONFIG_HIPPI HIgh Performance Parallel Interface (HIPPI) is a 800Mbit/sec and @@ -9454,7 +9747,7 @@ answer N. QNX4 file system support (read only) (EXPERIMENTAL) -CONFIG_QNX4FS_FS +CONFIG_QNX4FS_FS This is the file system used by the operating system QNX 4. Say Y if you intend to mount QNX hard disks or floppies. Unless you say Y to "QNX4FS write support" below, you will only be able to read @@ -11077,6 +11370,16 @@ If unsure, say N. +Power Management support +CONFIG_PM + This enable the kernel to lower the requested computer power by making some + devices enter in lower power levels (standy, sleep, ... modes). + Basically, this let you save power. + Two majors interfaces exist between the hardware and the OS, the older + Advanced Power Management (APM) and the newer Advanced Configuration and + Power Interface (ACPI). + Both are supported by the Linux Kernel. + Enter S1 for sleep (EXPERIMENTAL) CONFIG_ACPI_S1_SLEEP This enable ACPI compliant devices to enter level 1 of ACPI saving @@ -13908,7 +14211,7 @@ Say Y here if you intend to run this kernel on the NetWinder. Include support for the Compaq Personal Server -CONFIG_PERSONAL_SERVER +CONFIG_ARCH_PERSONAL_SERVER Say Y here if you intend to run this kernel on the Compaq Personal Server. diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/DocBook/Makefile linux/Documentation/DocBook/Makefile --- v2.3.99-pre1/linux/Documentation/DocBook/Makefile Tue Mar 14 19:10:38 2000 +++ linux/Documentation/DocBook/Makefile Sat Mar 18 16:41:47 2000 @@ -1,4 +1,4 @@ -BOOKS := wanbook.sgml z8530book.sgml mcabook.sgml videobook.sgml +BOOKS := wanbook.sgml z8530book.sgml mcabook.sgml videobook.sgml kernel-api.sgml books: docproc $(BOOKS) @@ -10,7 +10,7 @@ wanbook.sgml z8530book.sgml: z8530book.tmpl - $(TOPDIR)/scripts/docgen $(TOPDIR)/drivers/net/wan/syncppp.c \ + $(TOPDIR)/scripts/docgen $(TOPDIR)/drivers/net/wan/z85230.c \ z8530book.sgml mcabook.sgml: mcabook.tmpl @@ -21,9 +21,23 @@ $(TOPDIR)/scripts/docgen $(TOPDIR)/drivers/char/videodev.c \ videobook.sgml +kernel-api.sgml: kernel-api.tmpl + $(TOPDIR)/scripts/docgen $(TOPDIR)/drivers/char/videodev.c \ + $(TOPDIR)/arch/i386/kernel/mca.c \ + $(TOPDIR)/arch/i386/kernel/mtrr.c \ + $(TOPDIR)/drivers/char/misc.c \ + $(TOPDIR)/drivers/char/serial.c \ + $(TOPDIR)/drivers/char/videodev.c \ + $(TOPDIR)/drivers/sound/sound_core.c \ + $(TOPDIR)/drivers/sound/sound_firmware.c \ + $(TOPDIR)/drivers/net/wan/syncppp.c \ + $(TOPDIR)/drivers/net/wan/z85230.c \ + $(TOPDIR)/kernel/pm.c \ + kernel-api.sgml + clean: rm -f core *~ - rm -r $(BOOKS) + rm -f $(BOOKS) include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/DocBook/kernel-api.tmpl linux/Documentation/DocBook/kernel-api.tmpl --- v2.3.99-pre1/linux/Documentation/DocBook/kernel-api.tmpl Wed Dec 31 16:00:00 1969 +++ linux/Documentation/DocBook/kernel-api.tmpl Wed Mar 15 10:28:32 2000 @@ -0,0 +1,92 @@ + + + + The Linux Kernel API + + + + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later + version. + + + + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + + + + For more details see the file COPYING in the source + distribution of Linux. + + + + + + + Interrupt Handling +!Iarch/i386/kernel/irq.c + + + MTRR Handling +!Earch/i386/kernel/mtrr.c + + + + Power Management +!Ekernel/pm.c + + + + Miscellaneous Device +!Edrivers/char/misc.c + + + + Video4Linux +!Edrivers/char/videodev.c + + + + Sound Devices +!Edrivers/sound/sound_core.c +!Idrivers/sound/sound_firmware.c + + + + MCA Architecture + MCA Device Functions +!Earch/i386/kernel/mca.c + + MCA Bus DMA +!Iinclude/asm-i386/mca_dma.h + + + + + Synchronous PPP +!Edrivers/net/wan/syncppp.c + + + + 16x50 UART Driver +!Edrivers/char/serial.c + + + + Z85230 Support Library +!Edrivers/net/wan/z85230.c + + + diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/DocBook/videobook.tmpl linux/Documentation/DocBook/videobook.tmpl --- v2.3.99-pre1/linux/Documentation/DocBook/videobook.tmpl Tue Mar 14 19:10:38 2000 +++ linux/Documentation/DocBook/videobook.tmpl Wed Mar 15 10:28:32 2000 @@ -1642,7 +1642,7 @@ - API Deficiences + API Deficiencies The existing API poorly reflects compression capable devices. There diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/i2c/dev-interface linux/Documentation/i2c/dev-interface --- v2.3.99-pre1/linux/Documentation/i2c/dev-interface Mon Dec 20 18:48:21 1999 +++ linux/Documentation/i2c/dev-interface Sun Mar 19 11:15:29 2000 @@ -89,15 +89,29 @@ ioctl(file,I2C_FUNCS,unsigned long *funcs) Gets the adapter functionality and puts it in *funcs. +ioctl(file,I2C_RDWR,struct i2c_ioctl_rdwr_data *msgset) + + Do combined read/write transaction without stop in between. + The argument is a pointer to a struct i2c_ioctl_rdwr_data { + + struct i2c_msg *msgs; /* ptr to array of simple messages */ + int nmsgs; /* number of messages to exchange */ + } + + The msgs[] themselves contain further pointers into data buffers. + The function will write or read data to or from that buffers depending + on whether the I2C_M_RD flag is set in a particular message or not. + The slave address and whether to use ten bit address mode has to be + set in each message, overriding the values set with the above ioctl's. + + Other values are NOT supported at this moment, except for I2C_SMBUS, which you should never directly call; instead, use the access functions below. You can do plain i2c transactions by using read(2) and write(2) calls. -Combined read/write transactions are not yet supported (they will in -the future, through an ioctl). You do not need to pass the address -byte; instead, set it through ioctl I2C_SLAVE before you try to -access the device. +You do not need to pass the address byte; instead, set it through +ioctl I2C_SLAVE before you try to access the device. You can do SMBus level transactions (see documentation file smbus-protocol for details) through the following functions: diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/scsi-generic.txt linux/Documentation/scsi-generic.txt --- v2.3.99-pre1/linux/Documentation/scsi-generic.txt Thu Aug 5 14:34:01 1999 +++ linux/Documentation/scsi-generic.txt Sun Mar 19 11:15:29 2000 @@ -1,17 +1,18 @@ - Notes on Linux's SG driver version 2.1.34 + Notes on Linux's SG driver version 2.1.36 ----------------------------------------- - 990606 + 20000110 Introduction ============ -Sg is one of the four "high level" SCSI device drivers along with -sd, st and sr (disk, tape and CDROM respectively). Sg is more generalized -(but lower level) than its siblings and tends to be used on SCSI devices -that don't fit into the already serviced categories. Thus sg is used for -scanners, cd writers and reading audio cds digitally amongst other things. +The SCSI Generic driver (sg) is one of the four "high level" SCSI device +drivers along with sd, st and sr (disk, tape and CDROM respectively). Sg +is more generalized (but lower level) than its siblings and tends to be +used on SCSI devices that don't fit into the already serviced categories. +Thus sg is used for scanners, cd writers and reading audio cds digitally +amongst other things. These are notes on the Linux SCSI generic packet device driver (sg) -describing version 2.1.34 . The original driver was written by Lawrence +describing version 2.1.36 . The original driver was written by Lawrence Foard and remained in place with minimal changes since circa 1992. Version 2 of this driver remains backward compatible (binary and source **) with the original. It adds scatter gather, command queuing, @@ -27,12 +28,12 @@ of the document is version 1.5 dated 7th May 1996. It can found at ftp://sunsite.unc.edu/pub/Linux/docs/HOWTO-SCSI-Programming-HOWTO . A copy of this document can be found at: -http://www.torque.net/sg/p/original/HOWTO-SCSI-Programming-HOWTO . +http://www.torque.net/sg/p/original/HOWTO-SCSI-Programming-HOWTO.txt . ** It is possible to write applications that perform differently depending on whether they are using the original or this version of -the sg device driver. The author is not aware of any useful -pre-existing applications that have problems with version 2 (yet). +the sg device driver. The author is not aware of any useful +pre-existing applications that have problems with version 2. Architecture @@ -52,12 +53,12 @@ system calls of open(), close(), read(), write() and ioctl(). Two other related system calls: poll() and fcntl() are added to this list and how they interact with the sg device driver is documented later. - -An SG device is accessed by write()ing SCSI commands plus any associated + +An SG device is accessed by write()ing SCSI commands plus any associated outgoing data to it; the resulting status codes and any incoming data are then obtained by a read() call. The device can be opened O_NONBLOCK (non-blocking) and poll() used to monitor its progress. The device may be -opened O_EXCL which excludes other "sg" users from this device (but not +opened O_EXCL which excludes other "sg" users from this device (but not "sd", "st" or "sr" users). The buffer given to the write() call is made up as follows: - struct sg_header image (see below) @@ -71,6 +72,12 @@ The given SCSI command has its LUN field overwritten by the LUN value of the associated sg device that has been open()ed. +SCSI commands are only attempted once (i.e. there are no internal +retries). If appropriate (e.g. a SCSI READ) the data buffer is copied back +to user space irrespective of the values of the various SCSI related +error/status codes. [Some adapters that use an old error interface in +the SCSI mid level ignore the retry count and retry certain errors.] + sg_header ========= @@ -96,11 +103,11 @@ input variable, it is not read by sg internally (only written). The 'reply_len' is the length of the data the corresponding read() -will/should request (including the sg_header). +will/should request (including the sg_header). The 'pack_id' is not acted upon by the sg device driver but is conveyed back to the corresponding read() so it can be used for sequencing by an -application. +application. The 'result' is also bizarre, turning certain types of host codes to 0 (no error), EBUSY or EIO. With better error reporting now available, the @@ -138,7 +145,7 @@ unsigned int driver_status:8; /* [o] driver status+suggestion */ unsigned int other_flags:10; /* unused */ unsigned char sense_buffer[SG_MAX_SENSE]; /* [o] Output in 3 cases: - when target_status is CHECK_CONDITION or + when target_status is CHECK_CONDITION or when target_status is COMMAND_TERMINATED or when (driver_status & DRIVER_SENSE) is true. */ }; /* This structure is 36 bytes long on i386 */ @@ -164,7 +171,7 @@ values are (found in ): /* N.B. 1 bit offset from usual SCSI status values */ #define GOOD 0x00 -#define CHECK_CONDITION 0x01 +#define CHECK_CONDITION 0x01 #define CONDITION_GOOD 0x02 #define BUSY 0x04 #define INTERMEDIATE_GOOD 0x08 @@ -175,7 +182,7 @@ When the 'target_status' is CHECK_CONDITION or COMMAND_TERMINATED the 'sense_buffer' is output. Note that when (driver_status & DRIVER_SENSE) is true then the 'sense_buffer' is also output (this seems to occur when -the ide-scsi emulation is used). When the 'sense_buffer' is output the +the ide-scsi emulation is used). When the 'sense_buffer' is output the SCSI Sense Key can be found at (sense_buffer[2] & 0x0f) . The 'host_status' field is always output and has the following values @@ -269,12 +276,12 @@ Returns a file descriptor if >= 0 , otherwise -1 implies an error. Error codes (value in 'errno' after -1 returned): -EACCES Either the user doesn't have appropriate permissions on +EACCES Either the user doesn't have appropriate permissions on 'filename' or attempted to use both O_RDONLY and O_EXCL EBUSY O_NONBLOCK set and some user of this sg device has O_EXCL set while someone is already using this device EINTR while waiting for an "exclusive" lock to clear, a signal - is received, just try again ... + is received, just try again ... ENODEV sg not compiled into kernel or the kernel cannot find the sg module (or it can't initialize itself (low memory??)) ENOENT given filename not found @@ -320,12 +327,13 @@ This is more likely to happen when queuing commands, so wait a bit (eg usleep(10000) ) before trying again EDOM a) command queuing off: a packet is already queued - b) command queuing on: too many packets queued + b) command queuing on: too many packets queued (SG_MAX_QUEUE exceeded) - c) SCSI command length given in SG_NEXT_CMD_LEN too long EFAULT 'buffer' for 'count' bytes is an invalid memory range -EIO incoming buffer too short. It should be at least (6 + - sizeof(struct sg_header))==42 bytes long +EIO a) incoming buffer too short. It should be at least + (6 + sizeof(struct sg_header))==42 bytes long + b) SCSI command length given in SG_NEXT_CMD_LEN too long + c) reply_len negative ENOMEM can't get memory for DMA. Take evasive action ... ENXIO either scsi sub-system is currently processing some error (eg doing a device reset) or the sg driver/module removed @@ -334,7 +342,7 @@ read(int sg_fd, void * buffer, size_t count) -------------------------------------------- -Read() is used to receive a packet containing 1 mandatory part and 1 +Read() is used to receive a packet containing 1 mandatory part and 1 optional part. The mandatory part is: - a control block (an instance of struct sg_header) The optional part is: @@ -343,7 +351,7 @@ sufficient to accommodate this packet to avoid truncation. Truncation occurs if count < sg_header::replylen . -By default, read() will return the oldest packet queued up. If the +By default, read() will return the oldest packet queued up. If the SG_SET_FORCE_PACK_ID,1 ioctl() is active then read() will attempt to fetch the packet whose pack_id (given earlier to write()) matches the sg_header::pack_id given to this read(). If not available it will either @@ -356,12 +364,16 @@ DMA-ed by the SCSI device. This driver is currently unable to provide such an underrun indication. +If the SCSI device reports an error then a REQUEST SENSE is automatically +done and the output is placed in the sense_buffer array which is in the +control block. This action is sometimes called "auto-sense". + Error codes (value in 'errno' after -1 returned): EAGAIN either no waiting packet or requested packet is not available while O_NONBLOCK flag was set EFAULT 'buffer' for 'count' bytes is an invalid memory range EINTR while waiting for a packet, a signal is received, just - try again ... + try again ... EIO if the 'count' given to read() is < sizeof(struct sg_header) and the 'result' element in sg_header is non-zero. Not a recommended error reporting technique @@ -380,11 +392,11 @@ flight". A process that has an open file descriptor to an sg device may be aborted -(eg by a kill signal). In this case, the kernel automatically calls close +(eg by a kill signal). In this case, the kernel automatically calls close (which is called 'sg_release()' in the version 2 driver) to facilitate the cleanup mentioned above. -A problem persists in version 2.1.34 if the sg driver is a module and is +A problem persists in version 2.1.36 if the sg driver is a module and is removed while packets are still "in flight". Returns 0 if successful, otherwise -1 implies an error. @@ -394,7 +406,7 @@ ioctl(int sg_fd, int command, ...) [sg specific] ------------------------------------------------- -Ken Thompson (or perhaps some other Unix luminary) described ioctl() as +Ken Thompson (or perhaps some other Unix luminary) described ioctl() as the "garbage bin of Unix". This driver compounds the situation by adding more ... If a ioctl command is not recognized by sg (and the various lower levels @@ -417,7 +429,8 @@ for this file descriptor. The unit is a "jiffy". Packets that are already "in flight" will not be affected. The default value is set on open() and is SG_DEFAULT_TIMEOUT (defined in sg.h). This default is -currently 1 minute and may not be long enough for formats. +currently 1 minute and may not be long enough for formats. Negative +values will yield an EIO error. SG_EMULATED_HOST: Assumes 3rd argument points to an int and outputs a flag indicating @@ -426,33 +439,31 @@ while 0 is not. SG_SET_TRANSFORM W: -Third argument is ignored. Only is meaningful when SG_EMULATED host has -yielded 1 (ie the low-level is the ide-scsi device driver); otherwise -an EINVAL error occurs. The default state is to _not_ transform SCSI -commands to the corresponding ATAPI commands but pass them straight -through as is. [Only certain classes of SCSI commands need to be -transformed to their ATAPI equivalents.] Making this ioctl command causes -transforms to occur thereafter. Subsequent calls to this ioctl command -have no additional effect. Beware, this state will affect all devices -(and hence all related sg file descriptors) associated with this ide-scsi -"bus". -The author of ide-scsi has pointed out that this is not the intended -behaviour which is a 3rd argument of 0 to disable transforms and 1 to -enable transforms. Note the 3rd argument is an 'int' not a 'int *'. -Perhaps the intended behaviour will be implemented soon. +Only is meaningful when SG_EMULATED host has yielded 1 (i.e. the low-level +is the ide-scsi device driver); otherwise an EINVAL error occurs. The +default state is to _not_ transform SCSI commands to the corresponding +ATAPI commands but pass them straight through as is. [Only certain classes +of SCSI commands need to be transformed to their ATAPI equivalents.] +The third argument is interpreted as an integer. When it is non-zero then +a flag is set inside the ide-scsi driver that transforms subsequent +commands sent to this driver. When zero is passed as the 3rd argument to +this ioctl then the flag within the ide-scsi driver is cleared and +subsequent commands are not transformed. Beware, this state will affect +all devices (and hence all related sg file descriptors) associated with +this ide-scsi "bus". SG_GET_TRANSFORM: Third argument is ignored. Only is meaningful when SG_EMULATED host has yielded 1 (ie the low-level is the ide-scsi device driver); otherwise an EINVAL error occurs. Returns 0 to indicate _not_ transforming SCSI -to ATAPI commands (default). Returns 1 when it is transforming. +to ATAPI commands (default). Returns 1 when it is transforming them. SG_SET_FORCE_LOW_DMA +: Assumes 3rd argument points to an int containing 0 or 1. 0 (default) means sg decides whether to use memory above 16 Mbyte level (on i386) based on the host adapter being used by this SCSI device. Typically PCI SCSI adapters will indicate they can DMA to the whole 32 bit address -space. +space. If 1 is given then the host adapter is overridden and only memory below the 16MB level is used for DMA. A requirement for this should be extremely rare. If the "reserved" buffer allocated on open() is not in @@ -469,15 +480,21 @@ SG_GET_SCSI_ID +: Assumes 3rd argument is pointing to an object of type Sg_scsi_id (see -sg.h) and populates it. That structure contains ints for host_no, -channel, scsi_id, lun and scsi_type. Most of this information is -available from other sources (eg SCSI_IOCTL_GET_IDLUN and -SCSI_IOCTL_GET_BUS_NUMBER) but tends to be awkward to collect. +sg.h) and populates it. That structure contains ints for host_no, +channel, scsi_id, lun, scsi_type, allowable commands per lun and +queue_depth. Most of this information is available from other sources +(eg SCSI_IOCTL_GET_IDLUN and SCSI_IOCTL_GET_BUS_NUMBER) but tends to be +awkward to collect. +Allowable commands per lun and queue_depth give an insight to the +command queuing capabilities of the adapters and the device. The latter +overrides the former (logically) and the former is only of interest +if it is equal to queue_depth which probably indicates the device +does not support queueing commands (e.g. most scanners). SG_SET_FORCE_PACK_ID +: Assumes 3rd argument is pointing to an int. 0 (default) instructs read() to return the oldest (written) packet if multiple packets are -waiting to be read (when command queuing is being used). +waiting to be read (when command queuing is being used). 1 instructs read() to view the sg_header::pack_id as input and return the oldest packet matching that pack_id or wait until it arrives (or yield EAGAIN if O_NONBLOCK is in force). As a special case the pack_id of -1 @@ -524,7 +541,7 @@ device then an EBUSY error occurs. Per device sequencing was the original semantics and allowed, for example different processes to "share" the device, one perhaps write()ing with the other one read()ing. This command -is supplied if anyone needs those semantics. Per file descriptor +is supplied if anyone needs those semantics. Per file descriptor sequencing, perhaps with the use of the O_EXCL flag, seems more sensible. SG_GET_MERGE_FD +: @@ -548,7 +565,7 @@ SG_SET_UNDERRUN_FLAG +: Assumes 3rd argument is pointing to an int. 0 (current default, set by SG_DEF_UNDERRUN_FLAG in sg.h) requests underruns be ignored. 1 requests -that underruns be flagged. [The only low level driver that acts on this +that underruns be flagged. [The only low level driver that acts on this at the moment is the aic7xxx which yields a DID_ERROR error on underrun.] Only the current file descriptor is affected by this command (unless "per device" sequencing has been selected). @@ -571,7 +588,13 @@ SG_GET_VERSION_NUM +: Assumes 3rd argument points to an int. The version number is then placed -in that int. A sg version such as 2.1.34 will yield "20134" from this ioctl. +in that int. A sg version such as 2.1.36 will yield "20136" from this ioctl. + +SG_SCSI_RESET +: +Assumes 3rd argument points to an int. Unfortunately doesn't currently +do much (may in the future after other issues are resolved). Yields an +EBUSY error if the SCSI bus or the associated device is being reset +when this ioctl() is called, otherwise returns 0. SG_SET_DEBUG +: Assumes 3rd argument is pointing to an int. 0 (default) turns debugging @@ -641,13 +664,16 @@ Utility and Test Programs ========================= -See the README file in the sg_utils.tgz tarball. At the time of -writing this was sg_utils990527.tgz . +See the README file in the sg_utils.tgz tarball. Look on the +http://www.torque.net/sg website for the latest version. Briefly, that tarball contains the following utilities: sg_dd512 'dd' like program that assumes 512 byte blocks size sg_dd2048 'dd' like program that assumes 2048 byte blocks size +sg_dd2352 'dd' like program that assumes 2352 byte blocks size sgq_dd512 like 'sg_dd512' but does command queuing on "if" +sgp_dd probably the most flexible 'dd' variant. It uses POSIX + threads, block size set by "bs=..." plus other options. sg_scan outputs information (optionally Inquiry) on SCSI devices sg_rbuf tests SCSI bus transfer speed (without physical IO) sg_whoami outputs info (optionally capacity) of given SCSI device @@ -657,6 +683,9 @@ It also contains the following test programs: sg_debug outputs sg driver state to console/log file sg_poll tests asynchronous notification +sg_runt_ex example run time selection program for application authors +sg_simple1 example program first time users +sg_simple2 like sg_simple1 but with more primitive error processing sg_inquiry does a SCSI Inquiry command (from original HOWTO) sg_tst_med checks presence of media (from original HOWTO) @@ -664,36 +693,40 @@ SCSI 2 errors and warnings. This code is used by most of the above utility and test programs. -The following programs: sg_dd512, sg_dd2048, sg_scan, sg_rbuf, sg_tst_med, -sg_inquiry and sginfo, can be compiled either for this new sg driver _or_ -the original sg driver. +The following programs: sg_dd512, sg_dd2048, sg_dd2352, sg_scan, sg_runt_ex, +sg_rbuf, sg_tst_med, sg_inquiry and sginfo, can be compiled either for this +new sg driver _or_ the original sg driver (in 2.0 or 2.2 series kernels). +sg_runt_ex can be run on 2.0, 2.2 or 2.3 series kernels even if it is +compiled on a different series (eg compiled on 2.0, run on 2.2). Header files ============ User applications need to find the correct "sg.h" header file matching -their kernel in order to write code using the sg device driver. This is +their kernel in order to write code using the sg device driver. This is sometimes more difficult than it should be. The correct "sg.h" will usually -be found at /usr/src/linux/include/scsi/sg.h . Another important header +be found at /usr/src/linux/include/scsi/sg.h . Another important header file is "scsi.h" which will be in the same directory. -Several distributions have taken their own copies of these files and placed -them in /usr/include/scsi which is where "#include " would go -looking. The directory /usr/include/scsi _should_ be a symbolic link to -/usr/src/linux/include/scsi/ . It was is Redhat 5.1 and 5.2 but it is -not is Redhat 6.0 . Some other distributions have the same problem. To -solve this (as root) do the following: - -# cd /usr/include -# mv scsi scsi_orig -# ln -s ../src/linux/include/scsi scsi - -This doesn't seem to be a problem with /usr/include/linux (at least in -Redhat where it is a symbolic link) so it is hard to understand why -/usr/include/scsi is defined the way it is. The fact the -/usr/include/linux is a symbolic link opens up the following solution -proposed by the author of cdparanoia (Monty): -#include +When "#include " is written in an application then this refers +to the file /usr/include/scsi/sg.h . A problem sometimes arises because +the files in the /usr/include/scsi directory are controlled by the GNU +library people who maintain glibc. Unfortunately these 2 versions of +the sg.h header file are not always in sync. [This was the case in Redhat +6.0 and 6.1 .] Glibc 2.1.3 and later versions should get this right. + +If this is a problem, the user may need to copy sg.h (and scsi.h) from +the kernel source includes to /usr/include scsi. If the user can change +the effected source code then another approach is to rely on the fact that +/usr/src/linux is a symbolic link to /usr/src/linux/include/linux and +change the sg.h include to look like: + #include +This solution is used by the author of cdparanoia (Monty) in his application. + +[Former scsi generic documents suggested adding a symbolic link to +bypass this problem but that is not popular with the glibc maintainers. +I would like to thank Andreas Jaeger for his contributions +on this subject.] Extra information in scsi-generic_long.txt @@ -709,6 +742,27 @@ - shortcomings - future directions - an appendix with some SCSI 2 information in it + + +References +========== +http://www.t10.org Very important site for SCSI related information. + Contains SCSI 2 and 3 draft standards. +http://www.andante.org/scsi.html + This is Eric Youngdale's site. Eric is primarily + responsible for the Linux SCSI architecture and + its mid-level implementation. +http://www.kernel.dk Jens Axboe's site for Linux cdrom matters including + the SCSI "sr" driver. +http://www.torque.net/sg + My site with sg related information. +newsgroup:linux-scsi@vger.rutgers.edu + Newsgroup for Linux related SCSI matters +/usr/src/linux/MAINTAINERS + This is a file in the Linux kernel source that + contains up to date information about who maintains + what and where information can be found. Links to + SCSI adapter information are also here. Conclusion diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/sysrq.txt linux/Documentation/sysrq.txt --- v2.3.99-pre1/linux/Documentation/sysrq.txt Sun Nov 7 16:37:33 1999 +++ linux/Documentation/sysrq.txt Wed Mar 15 13:29:12 2000 @@ -1,6 +1,7 @@ - MAGIC SYSRQ KEY DOCUMENTATION v1.2 + + MAGIC SYSRQ KEY DOCUMENTATION v1.31 ------------------------------------ - [Sat May 16 01:09:21 EDT 1998] + [Mon Mar 13 21:45:48 EST 2000] * What is the magic SysRQ key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -11,7 +12,10 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You need to say "yes" to 'Magic SysRq key (CONFIG_MAGIC_SYSRQ)' when configuring the kernel. This option is only available in 2.1.x or later -kernels. +kernels. Once you boot the new kernel, you need to enable it manually +using following command: + + echo "1" > /proc/sys/kernel/sysrq * How do I use the magic SysRQ key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -28,7 +32,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 'r' - Turns off keyboard raw mode and sets it to XLATE. -'k' - Kills all programs on the current virtual console. +'k' - Secure Access Key (SAK) Kills all programs on the current virtual + console. NOTE: See important comments below in SAK section. 'b' - Will immediately reboot the system without syncing or unmounting your disks. @@ -58,12 +63,23 @@ 'l' - Send a SIGKILL to all processes, INCLUDING init. (Your system will be non-functional after this.) +'h' - Will display help ( actually any other key than those listed + above will display help. but 'h' is easy to remember :-) + * Okay, so what can I use them for? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Well, un'R'aw is very handy when your X server or a svgalib program crashes. -sa'K' (system attention key) is useful when you want to exit a program -that will not let you switch consoles. (For example, X or a svgalib program.) +sa'K' (Secure Access Key) is usefull when you want to be sure there are no +trojan program is running at console and which could grab your password +when you would try to login. It will kill all programs on given console +and thus letting you make sure that the login prompt you see is actually +the one from init, not some trojan program. +IMPORTANT:In its true form it is not a true SAK like the one in :IMPORTANT +IMPORTATN:c2 compliant systems, and it should be mistook as such. :IMPORTANT + It seems other find it usefull as (System Attention Key) which is +useful when you want to exit a program that will not let you switch consoles. +(For example, X or a svgalib program.) re'B'oot is good when you're unable to shut down. But you should also 'S'ync and 'U'mount first. @@ -108,6 +124,10 @@ * I have more questions, who can I ask? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You may feel free to send email to myrdraal@deathsdoor.com, and I will -respond as soon as possible. If that email address does not work, use -myrdraal@jackalz.dyn.ml.org. +respond as soon as possible. -Myrdraal + +* Credits +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Written by Mydraal +Updated by Adam Sulmicki diff -u --recursive --new-file v2.3.99-pre1/linux/Documentation/usb/usb-serial.txt linux/Documentation/usb/usb-serial.txt --- v2.3.99-pre1/linux/Documentation/usb/usb-serial.txt Tue Mar 7 14:32:25 2000 +++ linux/Documentation/usb/usb-serial.txt Fri Mar 17 14:11:49 2000 @@ -90,8 +90,6 @@ not all of the standard USB descriptors are handled: Get_Status, Set_Feature O_NONBLOCK, select() - The device usually appears at /dev/ttyUSB1 . - Generic Serial driver diff -u --recursive --new-file v2.3.99-pre1/linux/MAINTAINERS linux/MAINTAINERS --- v2.3.99-pre1/linux/MAINTAINERS Tue Mar 14 19:10:38 2000 +++ linux/MAINTAINERS Fri Mar 17 10:56:19 2000 @@ -689,6 +689,8 @@ NETFILTER P: Rusty Russell M: Rusty.Russell@rustcorp.com.au +P: Marc Boucher +M: marc@mbsi.ca W: http://www.samba.org/netfilter/ W: http://netfilter.kernelnotes.org W: http://antarctica.penguincomputing.com/~netfilter/ diff -u --recursive --new-file v2.3.99-pre1/linux/Makefile linux/Makefile --- v2.3.99-pre1/linux/Makefile Tue Mar 14 19:10:38 2000 +++ linux/Makefile Sun Mar 19 10:16:36 2000 @@ -1,11 +1,11 @@ VERSION = 2 PATCHLEVEL = 3 SUBLEVEL = 99 -EXTRAVERSION = -pre1 +EXTRAVERSION = -pre2 -ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) -.EXPORT_ALL_VARIABLES: +ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ else if [ -x /bin/bash ]; then echo /bin/bash; \ @@ -15,22 +15,33 @@ HPATH = $(TOPDIR)/include FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net -HOSTCC =gcc -HOSTCFLAGS =-Wall -Wstrict-prototypes -O2 -fomit-frame-pointer +HOSTCC = gcc +HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer CROSS_COMPILE = -AS =$(CROSS_COMPILE)as -LD =$(CROSS_COMPILE)ld -CC =$(CROSS_COMPILE)gcc -CPP =$(CC) -E -AR =$(CROSS_COMPILE)ar -NM =$(CROSS_COMPILE)nm -STRIP =$(CROSS_COMPILE)strip -OBJCOPY =$(CROSS_COMPILE)objcopy -OBJDUMP =$(CROSS_COMPILE)objdump -MAKE =make -GENKSYMS=/sbin/genksyms +# +# Include the make variables (CC, etc...) +# + +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +CC = $(CROSS_COMPILE)gcc +CPP = $(CC) -E +AR = $(CROSS_COMPILE)ar +NM = $(CROSS_COMPILE)nm +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +MAKE = make +MAKEFILES = $(TOPDIR)/.config +GENKSYMS = /sbin/genksyms +MODFLAGS = -DMODULE +PERL = perl + +export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ + CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ + CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL all: do-it-all @@ -38,6 +49,7 @@ # Make "config" the default target if there is no configuration file or # "depend" the target if there is no top-level dependency information. # + ifeq (.config,$(wildcard .config)) include .config ifeq (.depend,$(wildcard .depend)) @@ -53,20 +65,11 @@ endif # -# ROOT_DEV specifies the default root-device when making the image. -# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case -# the default of FLOPPY is used by 'build'. -# - -ROOT_DEV = CURRENT - -KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) - -# # INSTALL_PATH specifies where to place the updated kernel and system map # images. Uncomment if you want to place them anywhere other than root. +# -#INSTALL_PATH=/boot +#export INSTALL_PATH=/boot # # INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory @@ -75,15 +78,6 @@ # # -# If you want to preset the SVGA mode, uncomment the next line and -# set SVGA_MODE to whatever number you want. -# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. -# The number is the same as you would ordinarily press at bootup. -# - -SVGA_MODE= -DSVGA_MODE=NORMAL_VGA - -# # standard CFLAGS # @@ -96,16 +90,35 @@ CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer AFLAGS := $(CPPFLAGS) +export CPPFLAGS CFLAGS AFLAGS + # -# if you want the RAM disk device, define this to be the -# size in blocks. +# ROOT_DEV specifies the default root-device when making the image. +# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case +# the default of FLOPPY is used by 'build'. +# This is i386 specific. # -#RAMDISK = -DRAMDISK=512 +export ROOT_DEV = CURRENT -# Include the make variables (CC, etc...) +# +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. +# This is i386 specific. # +export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA + +# +# if you want the RAM disk device, define this to be the +# size in blocks. +# This is i386 specific. +# + +#export RAMDISK = -DRAMDISK=512 + CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o NETWORKS =net/network.a DRIVERS =drivers/block/block.a \ @@ -166,6 +179,9 @@ include arch/$(ARCH)/Makefile +export CORE_FILES NETWORKS DRIVERS LIBS HEAD LDFLAGS LIBS LINKFLAGS \ + MAKEBOOT ASFLAGS + # use '-fno-strict-aliasing', but only if the compiler can take it CFLAGS += $(shell if $(CC) -fno-strict-aliasing -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-fno-strict-aliasing"; fi) @@ -276,7 +292,6 @@ find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \ find $(SUBDIRS) init -name '*.c' | xargs ctags $$CTAGSF -a -MODFLAGS += -DMODULE ifdef CONFIG_MODULES ifdef CONFIG_MODVERSIONS MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h @@ -320,6 +335,7 @@ if [ -f SK98LIN_MODULES ]; then inst_mod SK98LIN_MODULES net; fi; \ if [ -f SKFP_MODULES ]; then inst_mod SKFP_MODULES net; fi; \ if [ -f USB_MODULES ]; then inst_mod USB_MODULES usb; fi; \ + if [ -f USB_SERIAL_MODULES ]; then inst_mod USB_SERIAL_MODULES usb; fi; \ if [ -f IEEE1394_MODULES ]; then inst_mod IEEE1394_MODULES ieee1394; fi; \ if [ -f PCMCIA_MODULES ]; then inst_mod PCMCIA_MODULES pcmcia; fi; \ if [ -f PCMCIA_NET_MODULES ]; then inst_mod PCMCIA_NET_MODULES pcmcia; fi; \ @@ -380,7 +396,7 @@ rm -rf include/config rm -f .depend `find . -type f -name .depend -print` rm -f core `find . -type f -size 0 -print` - rm -f .hdepend scripts/mkdep scripts/split-include + rm -f .hdepend scripts/mkdep scripts/split-include scripts/docproc rm -f $(TOPDIR)/include/linux/modversions.h rm -rf $(TOPDIR)/include/linux/modules @@ -404,20 +420,21 @@ scripts/mkdep `find $(FINDHPATH) -follow -name \*.h ! -name modversions.h -print` > .hdepend $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" -MODVERFILE := - ifdef CONFIG_MODVERSIONS MODVERFILE := $(TOPDIR)/include/linux/modversions.h +else +MODVERFILE := endif +export MODVERFILE depend dep: dep-files $(MODVERFILE) # make checkconfig: Prune 'scripts' directory to avoid "false positives". checkconfig: - find * -name '*.[hcS]' -type f -print | grep -v scripts/ | sort | xargs perl -w scripts/checkconfig.pl + find * -name '*.[hcS]' -type f -print | grep -v scripts/ | sort | xargs $(PERL) -w scripts/checkconfig.pl checkhelp: - perl -w scripts/checkhelp.pl `find * -name [cC]onfig.in -print` + $(PERL) -w scripts/checkhelp.pl `find * -name [cC]onfig.in -print` ifdef CONFIGURATION ..$(CONFIGURATION): diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/config.in linux/arch/alpha/config.in --- v2.3.99-pre1/linux/arch/alpha/config.in Tue Mar 14 19:10:38 2000 +++ linux/arch/alpha/config.in Thu Mar 16 14:08:32 2000 @@ -126,6 +126,7 @@ then define_bool CONFIG_PCI y define_bool CONFIG_ALPHA_EV5 y + define_bool CONFIG_ALPHA_CIA y define_bool CONFIG_ALPHA_PYXIS y fi if [ "$CONFIG_ALPHA_DP264" = "y" -o "$CONFIG_ALPHA_EIGER" = "y" ] diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/Makefile linux/arch/alpha/kernel/Makefile --- v2.3.99-pre1/linux/arch/alpha/kernel/Makefile Tue Mar 7 14:32:25 2000 +++ linux/arch/alpha/kernel/Makefile Thu Mar 16 14:08:32 2000 @@ -18,7 +18,7 @@ OX_OBJS := alpha_ksyms.o L_TARGET := rest.a -L_OBJS := irq_i8259.o irq_srm.o \ +L_OBJS := irq_i8259.o irq_srm.o irq_pyxis.o \ es1888.o smc37c669.o smc37c93x.o ns87312.o ifdef CONFIG_SMP @@ -32,7 +32,7 @@ ifdef CONFIG_ALPHA_GENERIC O_OBJS += core_apecs.o core_cia.o core_irongate.o core_lca.o core_mcpcia.o \ - core_polaris.o core_pyxis.o core_t2.o core_tsunami.o \ + core_polaris.o core_t2.o core_tsunami.o \ sys_alcor.o sys_cabriolet.o sys_dp264.o sys_eb64p.o sys_eiger.o \ sys_jensen.o sys_miata.o sys_mikasa.o sys_nautilus.o \ sys_noritake.o sys_rawhide.o sys_ruffian.o sys_rx164.o \ @@ -55,9 +55,6 @@ endif ifdef CONFIG_ALPHA_MCPCIA O_OBJS += core_mcpcia.o -endif -ifdef CONFIG_ALPHA_PYXIS -O_OBJS += core_pyxis.o endif ifdef CONFIG_ALPHA_T2 O_OBJS += core_t2.o diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/alpha_ksyms.c linux/arch/alpha/kernel/alpha_ksyms.c --- v2.3.99-pre1/linux/arch/alpha/kernel/alpha_ksyms.c Sat Feb 26 22:31:38 2000 +++ linux/arch/alpha/kernel/alpha_ksyms.c Sat Mar 18 11:39:12 2000 @@ -37,6 +37,7 @@ extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); extern spinlock_t kernel_flag; +extern spinlock_t rtc_lock; /* these are C runtime functions with special calling conventions: */ extern void __divl (void); @@ -106,6 +107,7 @@ EXPORT_SYMBOL(pci_unmap_single); EXPORT_SYMBOL(pci_map_sg); EXPORT_SYMBOL(pci_unmap_sg); +EXPORT_SYMBOL(pci_dma_supported); EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); @@ -201,6 +203,8 @@ EXPORT_SYMBOL(__local_bh_count); EXPORT_SYMBOL(__local_irq_count); #endif /* __SMP__ */ + +EXPORT_SYMBOL(rtc_lock); /* * The following are special because they're not called diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/core_cia.c linux/arch/alpha/kernel/core_cia.c --- v2.3.99-pre1/linux/arch/alpha/kernel/core_cia.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/core_cia.c Thu Mar 16 14:08:32 2000 @@ -6,7 +6,7 @@ * * Copyright (C) 1995 David A Rusling * Copyright (C) 1997, 1998 Jay Estabrook - * Copyright (C) 1998, 1999 Richard Henderson + * Copyright (C) 1998, 1999, 2000 Richard Henderson * * Code common to all CIA core logic chips. */ @@ -25,6 +25,8 @@ #include #undef __EXTERN_INLINE +#include + #include "proto.h" #include "pci_impl.h" @@ -35,27 +37,20 @@ * handle the system transaction. Another involves timing. Ho hum. */ -/* - * BIOS32-style PCI interface: - */ - #define DEBUG_CONFIG 0 -#define DEBUG_DUMP_REGS 0 - #if DEBUG_CONFIG # define DBGC(args) printk args #else # define DBGC(args) #endif -#define vuip volatile unsigned int * +#define vip volatile int * /* * Given a bus, device, and function number, compute resulting - * configuration space address and setup the CIA_HAXR2 register - * accordingly. It is therefore not safe to have concurrent - * invocations to configuration space access routines, but there - * really shouldn't be any need for this. + * configuration space address. It is therefore not safe to have + * concurrent invocations to configuration space access routines, but + * there really shouldn't be any need for this. * * Type 0: * @@ -96,34 +91,16 @@ mk_conf_addr(struct pci_dev *dev, int where, unsigned long *pci_addr, unsigned char *type1) { - unsigned long addr; u8 bus = dev->bus->number; u8 device_fn = dev->devfn; - DBGC(("mk_conf_addr(bus=%d, device_fn=0x%x, where=0x%x, " - "pci_addr=0x%p, type1=0x%p)\n", - bus, device_fn, where, pci_addr, type1)); - - if (bus == 0) { - int device = device_fn >> 3; - - /* Type 0 configuration cycle. */ - - if (device > 20) { - DBGC(("mk_conf_addr: device (%d) > 20, returning -1\n", - device)); - return -1; - } + *type1 = (bus != 0); + *pci_addr = (bus << 16) | (device_fn << 8) | where; + + DBGC(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x," + " returning address 0x%p\n" + bus, device_fn, where, *pci_addr)); - *type1 = 0; - addr = (device_fn << 8) | (where); - } else { - /* Type 1 configuration cycle. */ - *type1 = 1; - addr = (bus << 16) | (device_fn << 8) | (where); - } - *pci_addr = addr; - DBGC(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); return 0; } @@ -131,43 +108,37 @@ conf_read(unsigned long addr, unsigned char type1) { unsigned long flags; - unsigned int stat0, value; - unsigned int cia_cfg = 0; + int stat0, value; + int cia_cfg = 0; - value = 0xffffffffU; - mb(); - - __save_and_cli(flags); /* avoid getting hit by machine check */ - - DBGC(("conf_read(addr=0x%lx, type1=%d)\n", addr, type1)); + DBGC(("conf_read(addr=0x%lx, type1=%d) ", addr, type1)); + __save_and_cli(flags); /* Reset status register to avoid losing errors. */ - stat0 = *(vuip)CIA_IOC_CIA_ERR; - *(vuip)CIA_IOC_CIA_ERR = stat0; + stat0 = *(vip)CIA_IOC_CIA_ERR; + *(vip)CIA_IOC_CIA_ERR = stat0; mb(); - DBGC(("conf_read: CIA ERR was 0x%x\n", stat0)); /* If Type1 access, must set CIA CFG. */ if (type1) { - cia_cfg = *(vuip)CIA_IOC_CFG; - *(vuip)CIA_IOC_CFG = cia_cfg | 1; + cia_cfg = *(vip)CIA_IOC_CFG; + *(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1; mb(); - DBGC(("conf_read: TYPE1 access\n")); + *(vip)CIA_IOC_CFG; } - mb(); draina(); mcheck_expected(0) = 1; mcheck_taken(0) = 0; mb(); /* Access configuration space. */ - value = *(vuip)addr; + value = *(vip)addr; mb(); mb(); /* magic */ if (mcheck_taken(0)) { mcheck_taken(0) = 0; - value = 0xffffffffU; + value = 0xffffffff; mb(); } mcheck_expected(0) = 0; @@ -175,13 +146,14 @@ /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ if (type1) { - *(vuip)CIA_IOC_CFG = cia_cfg & ~1; + *(vip)CIA_IOC_CFG = cia_cfg; mb(); + *(vip)CIA_IOC_CFG; } - DBGC(("conf_read(): finished\n")); - __restore_flags(flags); + DBGC(("done\n")); + return value; } @@ -189,31 +161,31 @@ conf_write(unsigned long addr, unsigned int value, unsigned char type1) { unsigned long flags; - unsigned int stat0; - unsigned int cia_cfg = 0; + int stat0, cia_cfg = 0; - __save_and_cli(flags); /* avoid getting hit by machine check */ + DBGC(("conf_write(addr=0x%lx, type1=%d) ", addr, type1)); + __save_and_cli(flags); /* Reset status register to avoid losing errors. */ - stat0 = *(vuip)CIA_IOC_CIA_ERR; - *(vuip)CIA_IOC_CIA_ERR = stat0; + stat0 = *(vip)CIA_IOC_CIA_ERR; + *(vip)CIA_IOC_CIA_ERR = stat0; mb(); - DBGC(("conf_write: CIA ERR was 0x%x\n", stat0)); /* If Type1 access, must set CIA CFG. */ if (type1) { - cia_cfg = *(vuip)CIA_IOC_CFG; - *(vuip)CIA_IOC_CFG = cia_cfg | 1; + cia_cfg = *(vip)CIA_IOC_CFG; + *(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1; mb(); - DBGC(("conf_write: TYPE1 access\n")); + *(vip)CIA_IOC_CFG; } draina(); mcheck_expected(0) = 1; + mcheck_taken(0) = 0; mb(); /* Access configuration space. */ - *(vuip)addr = value; + *(vip)addr = value; mb(); mb(); /* magic */ @@ -222,12 +194,13 @@ /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ if (type1) { - *(vuip)CIA_IOC_CFG = cia_cfg & ~1; + *(vip)CIA_IOC_CFG = cia_cfg; mb(); + *(vip)CIA_IOC_CFG; } - DBGC(("conf_write(): finished\n")); __restore_flags(flags); + DBGC(("done\n")); } static int @@ -314,158 +287,442 @@ write_dword: cia_write_config_dword }; +/* + * CIA Pass 1 and PYXIS Pass 1 and 2 have a broken scatter-gather tlb. + * It cannot be invalidated. Rather than hard code the pass numbers, + * actually try the tbia to see if it works. + */ + void cia_pci_tbi(struct pci_controler *hose, dma_addr_t start, dma_addr_t end) { wmb(); *(vip)CIA_IOC_PCI_TBIA = 3; /* Flush all locked and unlocked. */ mb(); + *(vip)CIA_IOC_PCI_TBIA; +} + +/* + * Fixup attempt number 1. + * + * Write zeros directly into the tag registers. + */ + +static void +cia_pci_tbi_try1(struct pci_controler *hose, + dma_addr_t start, dma_addr_t end) +{ + wmb(); + *(vip)CIA_IOC_TB_TAGn(0) = 0; + *(vip)CIA_IOC_TB_TAGn(1) = 0; + *(vip)CIA_IOC_TB_TAGn(2) = 0; + *(vip)CIA_IOC_TB_TAGn(3) = 0; + *(vip)CIA_IOC_TB_TAGn(4) = 0; + *(vip)CIA_IOC_TB_TAGn(5) = 0; + *(vip)CIA_IOC_TB_TAGn(6) = 0; + *(vip)CIA_IOC_TB_TAGn(7) = 0; + mb(); + *(vip)CIA_IOC_TB_TAGn(0); +} + +#if 0 +/* + * Fixup attempt number 2. This is the method NT and NetBSD use. + * + * Allocate mappings, and put the chip into DMA loopback mode to read a + * garbage page. This works by causing TLB misses, causing old entries to + * be purged to make room for the new entries coming in for the garbage page. + */ + +#define CIA_BROKEN_TBI_TRY2_BASE 0xE0000000 + +static void __init +cia_enable_broken_tbi_try2(void) +{ + unsigned long *ppte, pte; + long i; + + ppte = __alloc_bootmem(PAGE_SIZE, 32768, 0); + pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1; + + for (i = 0; i < PAGE_SIZE / sizeof(unsigned long); ++i) + ppte[i] = pte; + + *(vip)CIA_IOC_PCI_W3_BASE = CIA_BROKEN_TBI_TRY2_BASE | 3; + *(vip)CIA_IOC_PCI_W3_MASK = (PAGE_SIZE - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_T3_BASE = virt_to_phys(ppte) >> 2; +} + +static void +cia_pci_tbi_try2(struct pci_controler *hose, + dma_addr_t start, dma_addr_t end) +{ + unsigned long flags; + unsigned long bus_addr; + int ctrl; + long i; + + __save_and_cli(flags); + + /* Put the chip into PCI loopback mode. */ + mb(); + ctrl = *(vip)CIA_IOC_CIA_CTRL; + *(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); + + /* Read from PCI dense memory space at TBI_ADDR, skipping 32k on + each read. This forces SG TLB misses. NetBSD claims that the + TLB entries are not quite LRU, meaning that we need to read more + times than there are actual tags. The 2117x docs claim strict + round-robin. Oh well, we've come this far... */ + + bus_addr = cia_ioremap(CIA_BROKEN_TBI_TRY2_BASE); + for (i = 0; i < 12; ++i, bus_addr += 32768) + cia_readl(bus_addr); + + /* Restore normal PCI operation. */ + mb(); + *(vip)CIA_IOC_CIA_CTRL = ctrl; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); + + __restore_flags(flags); +} +#endif + +static void __init +verify_tb_operation(void) +{ + static int page[PAGE_SIZE/4] + __attribute__((aligned(PAGE_SIZE))) + __initlocaldata = { 0 }; + + struct pci_iommu_arena *arena = pci_isa_hose->sg_isa; + int ctrl, addr0, tag0, pte0, data0; + int temp; + + /* Put the chip into PCI loopback mode. */ + mb(); + ctrl = *(vip)CIA_IOC_CIA_CTRL; + *(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); + + /* Write a valid entry directly into the TLB registers. */ + + addr0 = arena->dma_base; + tag0 = addr0 | 1; + pte0 = (virt_to_phys(page) >> (PAGE_SHIFT - 1)) | 1; + + *(vip)CIA_IOC_TB_TAGn(0) = tag0; + *(vip)CIA_IOC_TB_TAGn(1) = 0; + *(vip)CIA_IOC_TB_TAGn(2) = 0; + *(vip)CIA_IOC_TB_TAGn(3) = 0; + *(vip)CIA_IOC_TB_TAGn(4) = 0; + *(vip)CIA_IOC_TB_TAGn(5) = 0; + *(vip)CIA_IOC_TB_TAGn(6) = 0; + *(vip)CIA_IOC_TB_TAGn(7) = 0; + *(vip)CIA_IOC_TBn_PAGEm(0,0) = pte0; + *(vip)CIA_IOC_TBn_PAGEm(0,1) = 0; + *(vip)CIA_IOC_TBn_PAGEm(0,2) = 0; + *(vip)CIA_IOC_TBn_PAGEm(0,3) = 0; + mb(); + + /* First, verify we can read back what we've written. If + this fails, we can't be sure of any of the other testing + we're going to do, so bail. */ + /* ??? Actually, we could do the work with machine checks. + By passing this register update test, we pretty much + guarantee that cia_pci_tbi_try1 works. If this test + fails, cia_pci_tbi_try2 might still work. */ + + temp = *(vip)CIA_IOC_TB_TAGn(0); + if (temp != tag0) { + printk("pci: failed tb register update test " + "(tag0 %#x != %#x)\n", temp, tag0); + goto failed; + } + temp = *(vip)CIA_IOC_TB_TAGn(1); + if (temp != 0) { + printk("pci: failed tb register update test " + "(tag1 %#x != 0)\n", temp); + goto failed; + } + temp = *(vip)CIA_IOC_TBn_PAGEm(0,0); + if (temp != pte0) { + printk("pci: failed tb register update test " + "(pte0 %#x != %#x)\n", temp, pte0); + goto failed; + } + printk("pci: passed tb register update test\n"); + + /* Second, verify we can actually do I/O through this entry. */ + + data0 = 0xdeadbeef; + page[0] = data0; + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(cia_ioremap(addr0)); + mb(); + mcheck_expected(0) = 0; + mb(); + if (mcheck_taken(0)) { + printk("pci: failed sg loopback i/o read test (mcheck)\n"); + goto failed; + } + if (temp != data0) { + printk("pci: failed sg loopback i/o read test " + "(%#x != %#x)\n", temp, data0); + goto failed; + } + printk("pci: passed sg loopback i/o read test\n"); + + /* Third, try to invalidate the TLB. */ + + cia_pci_tbi(arena->hose, 0, -1); + temp = *(vip)CIA_IOC_TB_TAGn(0); + if (temp & 1) { + cia_pci_tbi_try1(arena->hose, 0, -1); + + temp = *(vip)CIA_IOC_TB_TAGn(0); + if (temp & 1) { + printk("pci: failed tbia test; " + "no usable workaround\n"); + goto failed; + } + + alpha_mv.mv_pci_tbi = cia_pci_tbi_try1; + printk("pci: failed tbia test; workaround 1 succeeded\n"); + } else { + printk("pci: passed tbia test\n"); + } + + /* Fourth, verify the TLB snoops the EV5's caches when + doing a tlb fill. */ + + data0 = 0x5adda15e; + page[0] = data0; + arena->ptes[4] = pte0; + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(cia_ioremap(addr0 + 4*PAGE_SIZE)); + mb(); + mcheck_expected(0) = 0; + mb(); + if (mcheck_taken(0)) { + printk("pci: failed pte write cache snoop test (mcheck)\n"); + goto failed; + } + if (temp != data0) { + printk("pci: failed pte write cache snoop test " + "(%#x != %#x)\n", temp, data0); + goto failed; + } + printk("pci: passed pte write cache snoop test\n"); + + /* Fifth, verify that a previously invalid PTE entry gets + filled from the page table. */ + + data0 = 0xabcdef123; + page[0] = data0; + arena->ptes[5] = pte0; + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(cia_ioremap(addr0 + 5*PAGE_SIZE)); + mb(); + mcheck_expected(0) = 0; + mb(); + if (mcheck_taken(0)) { + printk("pci: failed valid tag invalid pte reload test " + "(mcheck; workaround available)\n"); + /* Work around this bug by aligning new allocations + on 4 page boundaries. */ + arena->align_entry = 4; + } else if (temp != data0) { + printk("pci: failed valid tag invalid pte reload test " + "(%#x != %#x)\n", temp, data0); + goto failed; + } else { + printk("pci: passed valid tag invalid pte reload test\n"); + } + + /* Sixth, verify machine checks are working. Test invalid + pte under the same valid tag as we used above. */ + + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(cia_ioremap(addr0 + 6*PAGE_SIZE)); + mb(); + mcheck_expected(0) = 0; + mb(); + printk("pci: %s pci machine check test\n", + mcheck_taken(0) ? "passed" : "failed"); + + /* Clean up after the tests. */ + arena->ptes[4] = 0; + arena->ptes[5] = 0; + alpha_mv.mv_pci_tbi(arena->hose, 0, -1); + +exit: + /* Restore normal PCI operation. */ + mb(); + *(vip)CIA_IOC_CIA_CTRL = ctrl; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); + return; + +failed: + printk("pci: disabling sg translation window\n"); + *(vip)CIA_IOC_PCI_W0_BASE = 0; + alpha_mv.mv_pci_tbi = NULL; + goto exit; } -void __init -cia_init_arch(void) +static void __init +do_init_arch(int is_pyxis) { struct pci_controler *hose; - struct resource *hae_mem; - unsigned int temp; + int temp; + int cia_rev; -#if DEBUG_DUMP_REGS - temp = *(vuip)CIA_IOC_CIA_REV; mb(); - printk("cia_init: CIA_REV was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_PCI_LAT; mb(); - printk("cia_init: CIA_PCI_LAT was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CIA_CTRL; mb(); - printk("cia_init: CIA_CTRL was 0x%x\n", temp); - temp = *(vuip)0xfffffc8740000140UL; mb(); - printk("cia_init: CIA_CTRL1 was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_HAE_MEM; mb(); - printk("cia_init: CIA_HAE_MEM was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_HAE_IO; mb(); - printk("cia_init: CIA_HAE_IO was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CFG; mb(); - printk("cia_init: CIA_CFG was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CACK_EN; mb(); - printk("cia_init: CIA_CACK_EN was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CFG; mb(); - printk("cia_init: CIA_CFG was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CIA_DIAG; mb(); - printk("cia_init: CIA_DIAG was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_DIAG_CHECK; mb(); - printk("cia_init: CIA_DIAG_CHECK was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_PERF_MONITOR; mb(); - printk("cia_init: CIA_PERF_MONITOR was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_PERF_CONTROL; mb(); - printk("cia_init: CIA_PERF_CONTROL was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CIA_ERR; mb(); - printk("cia_init: CIA_ERR was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CIA_STAT; mb(); - printk("cia_init: CIA_STAT was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_MCR; mb(); - printk("cia_init: CIA_MCR was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_CIA_CTRL; mb(); - printk("cia_init: CIA_CTRL was 0x%x\n", temp); - temp = *(vuip)CIA_IOC_ERR_MASK; mb(); - printk("cia_init: CIA_ERR_MASK was 0x%x\n", temp); - temp = *((vuip)CIA_IOC_PCI_W0_BASE); mb(); - printk("cia_init: W0_BASE was 0x%x\n", temp); - temp = *((vuip)CIA_IOC_PCI_W1_BASE); mb(); - printk("cia_init: W1_BASE was 0x%x\n", temp); - temp = *((vuip)CIA_IOC_PCI_W2_BASE); mb(); - printk("cia_init: W2_BASE was 0x%x\n", temp); - temp = *((vuip)CIA_IOC_PCI_W3_BASE); mb(); - printk("cia_init: W3_BASE was 0x%x\n", temp); -#endif /* DEBUG_DUMP_REGS */ + cia_rev = *(vip)CIA_IOC_CIA_REV & CIA_REV_MASK; + printk("pci: cia revision %d%s\n", + cia_rev, is_pyxis ? " (pyxis)" : ""); + + /* Set up error reporting. */ + temp = *(vip)CIA_IOC_ERR_MASK; + temp &= ~(CIA_ERR_CPU_PE | CIA_ERR_MEM_NEM | CIA_ERR_PA_PTE_INV + | CIA_ERR_RCVD_MAS_ABT | CIA_ERR_RCVD_TAR_ABT); + *(vip)CIA_IOC_ERR_MASK = temp; + + /* Clear all currently pending errors. */ + *(vip)CIA_IOC_CIA_ERR = 0; + + /* Turn on mchecks. */ + temp = *(vip)CIA_IOC_CIA_CTRL; + temp |= CIA_CTRL_FILL_ERR_EN | CIA_CTRL_MCHK_ERR_EN; + *(vip)CIA_IOC_CIA_CTRL = temp; + + /* Clear the CFG register, which gets used for PCI config space + accesses. That is the way we want to use it, and we do not + want to depend on what ARC or SRM might have left behind. */ + *(vip)CIA_IOC_CFG = 0; + + /* Zero the HAEs. */ + *(vip)CIA_IOC_HAE_MEM = 0; + *(vip)CIA_IOC_HAE_IO = 0; + + /* For PYXIS, we always use BWX bus and i/o accesses. To that end, + make sure they're enabled on the controler. */ + if (is_pyxis) { + temp = *(vip)CIA_IOC_CIA_CNFG; + temp |= CIA_CNFG_IOA_BWEN; + *(vip)CIA_IOC_CIA_CNFG = temp; + } + + /* Syncronize with all previous changes. */ + mb(); + *(vip)CIA_IOC_CIA_REV; /* * Create our single hose. */ pci_isa_hose = hose = alloc_pci_controler(); - hae_mem = alloc_resource(); - hose->io_space = &ioport_resource; - hose->mem_space = hae_mem; + hose->mem_space = &iomem_resource; hose->config_space = CIA_CONF; hose->index = 0; - hae_mem->start = 0; - hae_mem->end = CIA_MEM_R1_MASK; - hae_mem->name = pci_hae0_name; - hae_mem->flags = IORESOURCE_MEM; + if (! is_pyxis) { + struct resource *hae_mem = alloc_resource(); + hose->mem_space = hae_mem; + + hae_mem->start = 0; + hae_mem->end = CIA_MEM_R1_MASK; + hae_mem->name = pci_hae0_name; + hae_mem->flags = IORESOURCE_MEM; - if (request_resource(&iomem_resource, hae_mem) < 0) - printk(KERN_ERR "Failed to request HAE_MEM\n"); + if (request_resource(&iomem_resource, hae_mem) < 0) + printk(KERN_ERR "Failed to request HAE_MEM\n"); + } /* * Set up the PCI to main memory translation windows. * * Window 0 is scatter-gather 8MB at 8MB (for isa) - * Window 1 is scatter-gather 128MB at 1GB - * Window 2 is direct access 2GB at 2GB - * ??? We ought to scale window 1 with memory. + * Window 1 is direct access 1GB at 1GB + * Window 2 is direct access 1GB at 2GB + * + * We must actually use 2 windows to direct-map the 2GB space, + * because of an idiot-syncrasy of the CYPRESS chip used on + * many PYXIS systems. It may respond to a PCI bus address in + * the last 1MB of the 4GB address range. + * + * ??? NetBSD hints that page tables must be aligned to 32K, + * possibly due to a hardware bug. This is over-aligned + * from the 8K alignment one would expect for an 8MB window. + * No description of what revisions affected. */ - /* ??? NetBSD hints that page tables must be aligned to 32K, - possibly due to a hardware bug. This is over-aligned - from the 8K alignment one would expect for an 8MB window. - No description of what CIA revisions affected. */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0x8000); - hose->sg_pci = iommu_arena_new(hose, 0x40000000, 0x08000000, 0); - __direct_map_base = 0x80000000; + hose->sg_pci = NULL; + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 32768); + __direct_map_base = 0x40000000; __direct_map_size = 0x80000000; - *(vuip)CIA_IOC_PCI_W0_BASE = hose->sg_isa->dma_base | 3; - *(vuip)CIA_IOC_PCI_W0_MASK = (hose->sg_isa->size - 1) & 0xfff00000; - *(vuip)CIA_IOC_PCI_T0_BASE = virt_to_phys(hose->sg_isa->ptes) >> 2; - - *(vuip)CIA_IOC_PCI_W1_BASE = hose->sg_pci->dma_base | 3; - *(vuip)CIA_IOC_PCI_W1_MASK = (hose->sg_pci->size - 1) & 0xfff00000; - *(vuip)CIA_IOC_PCI_T1_BASE = virt_to_phys(hose->sg_pci->ptes) >> 2; + *(vip)CIA_IOC_PCI_W0_BASE = hose->sg_isa->dma_base | 3; + *(vip)CIA_IOC_PCI_W0_MASK = (hose->sg_isa->size - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_T0_BASE = virt_to_phys(hose->sg_isa->ptes) >> 2; + + *(vip)CIA_IOC_PCI_W1_BASE = 0x40000000 | 1; + *(vip)CIA_IOC_PCI_W1_MASK = (0x40000000 - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_T1_BASE = 0; + + *(vip)CIA_IOC_PCI_W2_BASE = 0x80000000 | 1; + *(vip)CIA_IOC_PCI_W2_MASK = (0x40000000 - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_T2_BASE = 0x40000000; - *(vuip)CIA_IOC_PCI_W2_BASE = __direct_map_base | 1; - *(vuip)CIA_IOC_PCI_W2_MASK = (__direct_map_size - 1) & 0xfff00000; - *(vuip)CIA_IOC_PCI_T2_BASE = 0; - - *(vuip)CIA_IOC_PCI_W3_BASE = 0; + *(vip)CIA_IOC_PCI_W3_BASE = 0; +} - cia_pci_tbi(hose, 0, -1); +void __init +cia_init_arch(void) +{ + do_init_arch(0); +} - /* - * Set up error reporting. - */ - temp = *(vuip)CIA_IOC_CIA_ERR; - temp |= 0x180; /* master, target abort */ - *(vuip)CIA_IOC_CIA_ERR = temp; - - temp = *(vuip)CIA_IOC_CIA_CTRL; - temp |= 0x400; /* turn on FILL_ERR to get mchecks */ - *(vuip)CIA_IOC_CIA_CTRL = temp; +void __init +pyxis_init_arch(void) +{ + do_init_arch(1); +} - /* - * Next, clear the CIA_CFG register, which gets used - * for PCI Config Space accesses. That is the way - * we want to use it, and we do not want to depend on - * what ARC or SRM might have left behind... - */ - *(vuip)CIA_IOC_CFG = 0; - - /* - * Zero the HAEs. - */ - *(vuip)CIA_IOC_HAE_MEM = 0; - *(vuip)CIA_IOC_HAE_IO = 0; - mb(); +void __init +cia_init_pci(void) +{ + /* Must delay this from init_arch, as we need machine checks. */ + verify_tb_operation(); + common_init_pci(); } static inline void cia_pci_clr_err(void) { - unsigned int jd; + int jd; - jd = *(vuip)CIA_IOC_CIA_ERR; - *(vuip)CIA_IOC_CIA_ERR = jd; + jd = *(vip)CIA_IOC_CIA_ERR; + *(vip)CIA_IOC_CIA_ERR = jd; mb(); - *(vuip)CIA_IOC_CIA_ERR; /* re-read to force write. */ + *(vip)CIA_IOC_CIA_ERR; /* re-read to force write. */ } void diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/core_pyxis.c linux/arch/alpha/kernel/core_pyxis.c --- v2.3.99-pre1/linux/arch/alpha/kernel/core_pyxis.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/core_pyxis.c Wed Dec 31 16:00:00 1969 @@ -1,644 +0,0 @@ -/* - * linux/arch/alpha/kernel/core_pyxis.c - * - * Based on code written by David A Rusling (david.rusling@reo.mts.dec.com). - * - * Code common to all PYXIS core logic chips. - */ - -#include -#include - -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - -#include -#include -#include -#include - -#include -#include - -#include "proto.h" -#include "irq_impl.h" -#include "pci_impl.h" - - -/* NOTE: Herein are back-to-back mb instructions. They are magic. - One plausible explanation is that the I/O controller does not properly - handle the system transaction. Another involves timing. Ho hum. */ - -/* - * BIOS32-style PCI interface: - */ - -#define DEBUG_CONFIG 0 -#if DEBUG_CONFIG -# define DBG_CNF(args) printk args -#else -# define DBG_CNF(args) -#endif - - -/* - * Given a bus, device, and function number, compute resulting - * configuration space address and setup the PYXIS_HAXR2 register - * accordingly. It is therefore not safe to have concurrent - * invocations to configuration space access routines, but there - * really shouldn't be any need for this. - * - * Type 0: - * - * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 - * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0| - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * 31:11 Device select bit. - * 10:8 Function number - * 7:2 Register number - * - * Type 1: - * - * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 - * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * 31:24 reserved - * 23:16 bus number (8 bits = 128 possible buses) - * 15:11 Device number (5 bits) - * 10:8 function number - * 7:2 register number - * - * Notes: - * The function number selects which function of a multi-function device - * (e.g., SCSI and Ethernet). - * - * The register selects a DWORD (32 bit) register offset. Hence it - * doesn't get shifted by 2 bits as we want to "drop" the bottom two - * bits. - */ - -static int -mk_conf_addr(struct pci_dev *dev, int where, unsigned long *pci_addr, - unsigned char *type1) -{ - u8 bus = dev->bus->number; - u8 device_fn = dev->devfn; - - *type1 = (bus == 0) ? 0 : 1; - *pci_addr = (bus << 16) | (device_fn << 8) | (where); - - DBG_CNF(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x," - " returning address 0x%p\n" - bus, device_fn, where, *pci_addr)); - - return 0; -} - -static unsigned int -conf_read(unsigned long addr, unsigned char type1) -{ - unsigned long flags; - unsigned int stat0, value, temp; - unsigned int pyxis_cfg = 0; - - __save_and_cli(flags); /* avoid getting hit by machine check */ - - /* Reset status register to avoid losing errors. */ - stat0 = *(vuip)PYXIS_ERR; - *(vuip)PYXIS_ERR = stat0; mb(); - temp = *(vuip)PYXIS_ERR; /* re-read to force write */ - - /* If Type1 access, must set PYXIS CFG. */ - if (type1) { - pyxis_cfg = *(vuip)PYXIS_CFG; - *(vuip)PYXIS_CFG = (pyxis_cfg & ~3L) | 1; mb(); - temp = *(vuip)PYXIS_CFG; /* re-read to force write */ - } - - mb(); - draina(); - mcheck_expected(0) = 1; - mcheck_taken(0) = 0; - mb(); - - /* Access configuration space. */ - value = *(vuip)addr; - mb(); - mb(); /* magic */ - - if (mcheck_taken(0)) { - mcheck_taken(0) = 0; - value = 0xffffffffU; - mb(); - } - mcheck_expected(0) = 0; - mb(); - - /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ - if (type1) { - *(vuip)PYXIS_CFG = pyxis_cfg & ~3L; mb(); - temp = *(vuip)PYXIS_CFG; /* re-read to force write */ - } - - __restore_flags(flags); - - DBG_CNF(("conf_read(addr=0x%lx, type1=%d) = %#x\n", - addr, type1, value)); - - return value; -} - -static void -conf_write(unsigned long addr, unsigned int value, unsigned char type1) -{ - unsigned long flags; - unsigned int stat0, temp; - unsigned int pyxis_cfg = 0; - - __save_and_cli(flags); /* avoid getting hit by machine check */ - - /* Reset status register to avoid losing errors. */ - stat0 = *(vuip)PYXIS_ERR; - *(vuip)PYXIS_ERR = stat0; mb(); - temp = *(vuip)PYXIS_ERR; /* re-read to force write */ - - /* If Type1 access, must set PYXIS CFG. */ - if (type1) { - pyxis_cfg = *(vuip)PYXIS_CFG; - *(vuip)PYXIS_CFG = (pyxis_cfg & ~3L) | 1; mb(); - temp = *(vuip)PYXIS_CFG; /* re-read to force write */ - } - - mb(); - draina(); - mcheck_expected(0) = 1; - mcheck_taken(0) = 0; - mb(); - - /* Access configuration space. */ - *(vuip)addr = value; - mb(); - temp = *(vuip)addr; /* read back to force the write */ - mcheck_expected(0) = 0; - mb(); - - /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ - if (type1) { - *(vuip)PYXIS_CFG = pyxis_cfg & ~3L; mb(); - temp = *(vuip)PYXIS_CFG; /* re-read to force write */ - } - - __restore_flags(flags); - - DBG_CNF(("conf_write(addr=%#lx, value=%#x, type1=%d)\n", - addr, value, type1)); -} - -static int -pyxis_read_config_byte(struct pci_dev *dev, int where, u8 *value) -{ - unsigned long addr, pci_addr; - unsigned char type1; - - if (mk_conf_addr(dev, where, &pci_addr, &type1)) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = (pci_addr << 5) + 0x00 + PYXIS_CONF; - *value = conf_read(addr, type1) >> ((where & 3) * 8); - return PCIBIOS_SUCCESSFUL; -} - -static int -pyxis_read_config_word(struct pci_dev *dev, int where, u16 *value) -{ - unsigned long addr, pci_addr; - unsigned char type1; - - if (mk_conf_addr(dev, where, &pci_addr, &type1)) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = (pci_addr << 5) + 0x08 + PYXIS_CONF; - *value = conf_read(addr, type1) >> ((where & 3) * 8); - return PCIBIOS_SUCCESSFUL; -} - -static int -pyxis_read_config_dword(struct pci_dev *dev, int where, u32 *value) -{ - unsigned long addr, pci_addr; - unsigned char type1; - - if (mk_conf_addr(dev, where, &pci_addr, &type1)) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = (pci_addr << 5) + 0x18 + PYXIS_CONF; - *value = conf_read(addr, type1); - return PCIBIOS_SUCCESSFUL; -} - -static int -pyxis_write_config(struct pci_dev *dev, int where, u32 value, long mask) -{ - unsigned long addr, pci_addr; - unsigned char type1; - - if (mk_conf_addr(dev, where, &pci_addr, &type1)) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = (pci_addr << 5) + mask + PYXIS_CONF; - conf_write(addr, value << ((where & 3) * 8), type1); - return PCIBIOS_SUCCESSFUL; -} - -static int -pyxis_write_config_byte(struct pci_dev *dev, int where, u8 value) -{ - return pyxis_write_config(dev, where, value, 0x00); -} - -static int -pyxis_write_config_word(struct pci_dev *dev, int where, u16 value) -{ - return pyxis_write_config(dev, where, value, 0x08); -} - -static int -pyxis_write_config_dword(struct pci_dev *dev, int where, u32 value) -{ - return pyxis_write_config(dev, where, value, 0x18); -} - -struct pci_ops pyxis_pci_ops = -{ - read_byte: pyxis_read_config_byte, - read_word: pyxis_read_config_word, - read_dword: pyxis_read_config_dword, - write_byte: pyxis_write_config_byte, - write_word: pyxis_write_config_word, - write_dword: pyxis_write_config_dword -}; - -/* Note mask bit is true for ENABLED irqs. */ -static unsigned long cached_irq_mask; - -static inline void -pyxis_update_irq_hw(unsigned long mask) -{ - *(vulp)PYXIS_INT_MASK = mask; - mb(); - *(vulp)PYXIS_INT_MASK; -} - -static inline void -pyxis_enable_irq(unsigned int irq) -{ - pyxis_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); -} - -static void -pyxis_disable_irq(unsigned int irq) -{ - pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); -} - -static unsigned int -pyxis_startup_irq(unsigned int irq) -{ - pyxis_enable_irq(irq); - return 0; -} - -static void -pyxis_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - pyxis_enable_irq(irq); -} - -static void -pyxis_mask_and_ack_irq(unsigned int irq) -{ - unsigned long bit = 1UL << (irq - 16); - unsigned long mask = cached_irq_mask &= ~bit; - - /* Disable the interrupt. */ - *(vulp)PYXIS_INT_MASK = mask; - wmb(); - /* Ack PYXIS PCI interrupt. */ - *(vulp)PYXIS_INT_REQ = bit; - mb(); - /* Re-read to force both writes. */ - *(vulp)PYXIS_INT_MASK; -} - -static struct hw_interrupt_type pyxis_irq_type = { - typename: "PYXIS", - startup: pyxis_startup_irq, - shutdown: pyxis_disable_irq, - enable: pyxis_enable_irq, - disable: pyxis_disable_irq, - ack: pyxis_mask_and_ack_irq, - end: pyxis_end_irq, -}; - -void -pyxis_device_interrupt(unsigned long vector, struct pt_regs *regs) -{ - unsigned long pld; - unsigned int i; - - /* Read the interrupt summary register of PYXIS */ - pld = *(vulp)PYXIS_INT_REQ; - pld &= cached_irq_mask; - - /* - * Now for every possible bit set, work through them and call - * the appropriate interrupt handler. - */ - while (pld) { - i = ffz(~pld); - pld &= pld - 1; /* clear least bit set */ - if (i == 7) - isa_device_interrupt(vector, regs); - else - handle_irq(16+i, regs); - } -} - -void __init -init_pyxis_irqs(unsigned long ignore_mask) -{ - long i; - - *(vulp)PYXIS_INT_MASK = 0; /* disable all */ - *(vulp)PYXIS_INT_REQ = -1; /* flush all */ - mb(); - - /* Send -INTA pulses to clear any pending interrupts ...*/ - *(vuip) PYXIS_IACK_SC; - - for (i = 16; i < 48; ++i) { - if ((ignore_mask >> i) & 1) - continue; - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].handler = &pyxis_irq_type; - } - - setup_irq(16+7, &isa_cascade_irqaction); -} - -void -pyxis_pci_tbi(struct pci_controler *hose, dma_addr_t start, dma_addr_t end) -{ - wmb(); - *(vip)PYXIS_TBIA = 3; /* Flush all locked and unlocked. */ - mb(); -} - -/* - * Pass 1 and 2 have a broken scatter-gather tlb -- it cannot be invalidated. - * To work around this problem, we allocate mappings, and put the chip into - * DMA loopback mode to read a garbage page. This works by causing TLB - * misses, causing old entries to be purged to make room for the new entries - * coming in for the garbage page. - * - * Thanks to NetBSD sources for pointing out this bug. What a pain. - */ - -static unsigned long broken_tbi_addr; - -#define BROKEN_TBI_READS 12 - -static void -pyxis_broken_pci_tbi(struct pci_controler *hose, - dma_addr_t start, dma_addr_t end) -{ - unsigned long flags; - unsigned long bus_addr; - unsigned int ctrl; - long i; - - __save_and_cli(flags); - - /* Put the chip into PCI loopback mode. */ - mb(); - ctrl = *(vuip)PYXIS_CTRL; - *(vuip)PYXIS_CTRL = ctrl | 4; - mb(); - *(vuip)PYXIS_CTRL; - mb(); - - /* Read from PCI dense memory space at TBI_ADDR, skipping 64k - on each read. This forces SG TLB misses. It appears that - the TLB entries are "not quite LRU", meaning that we need - to read more times than there are actual tags. */ - - bus_addr = broken_tbi_addr; - for (i = 0; i < BROKEN_TBI_READS; ++i, bus_addr += 64*1024) - pyxis_readl(bus_addr); - - /* Restore normal PCI operation. */ - mb(); - *(vuip)PYXIS_CTRL = ctrl; - mb(); - *(vuip)PYXIS_CTRL; - mb(); - - __restore_flags(flags); -} - -static void __init -pyxis_enable_broken_tbi(struct pci_iommu_arena *arena) -{ - void *page; - unsigned long *ppte, ofs, pte; - long i, npages; - - page = alloc_bootmem_pages(PAGE_SIZE); - pte = (virt_to_phys(page) >> (PAGE_SHIFT - 1)) | 1; - npages = (BROKEN_TBI_READS + 1) * 64*1024 / PAGE_SIZE; - - ofs = iommu_arena_alloc(arena, npages); - ppte = arena->ptes + ofs; - for (i = 0; i < npages; ++i) - ppte[i] = pte; - - broken_tbi_addr = pyxis_ioremap(arena->dma_base + ofs*PAGE_SIZE); - alpha_mv.mv_pci_tbi = pyxis_broken_pci_tbi; - - printk("PYXIS: Enabling broken tbia workaround.\n"); -} - -void __init -pyxis_init_arch(void) -{ - struct pci_controler *hose; - unsigned int temp; - - /* Set up error reporting. Make sure CPU_PE is OFF in the mask. */ - temp = *(vuip)PYXIS_ERR_MASK; - *(vuip)PYXIS_ERR_MASK = temp & ~4; - - /* Enable master/target abort. */ - temp = *(vuip)PYXIS_ERR; - *(vuip)PYXIS_ERR = temp | 0x180; - - /* Clear the PYXIS_CFG register, which gets used for PCI Config - Space accesses. That is the way we want to use it, and we do - not want to depend on what ARC or SRM might have left behind. */ - *(vuip)PYXIS_CFG = 0; - - /* Zero the HAEs. */ - *(vuip)PYXIS_HAE_MEM = 0; - *(vuip)PYXIS_HAE_IO = 0; - - /* Finally, check that the PYXIS_CTRL1 has IOA_BEN set for - enabling byte/word PCI bus space(s) access. */ - temp = *(vuip)PYXIS_CTRL1; - *(vuip)PYXIS_CTRL1 = temp | 1; - - /* Syncronize with all previous changes. */ - mb(); - *(vuip)PYXIS_REV; - - /* - * Create our single hose. - */ - - hose = alloc_pci_controler(); - hose->io_space = &ioport_resource; - hose->mem_space = &iomem_resource; - hose->config_space = PYXIS_CONF; - hose->index = 0; - - /* - * Set up the PCI to main memory translation windows. - * - * Window 0 is scatter-gather 8MB at 8MB (for isa) - * Window 1 is scatter-gather 128MB at 3GB - * Window 2 is direct access 1GB at 1GB - * Window 3 is direct access 1GB at 2GB - * ??? We ought to scale window 1 with memory. - * - * We must actually use 2 windows to direct-map the 2GB space, - * because of an idiot-syncrasy of the CYPRESS chip. It may - * respond to a PCI bus address in the last 1MB of the 4GB - * address range. - */ - -#if 1 - /* ??? There's some bit of syncronization wrt writing new tlb - entries that's missing. Sometimes it works, sometimes invalid - tlb machine checks, sometimes hard lockup. And this just within - the boot sequence. - - I've tried extra memory barriers, extra alignment, pyxis - register reads, tlb flushes, and loopback tlb accesses. - - I guess the pyxis revision in the sx164 is just too buggy... */ - - hose->sg_isa = hose->sg_pci = NULL; - __direct_map_base = 0x40000000; - __direct_map_size = 0x80000000; - - *(vuip)PYXIS_W0_BASE = 0x40000000 | 1; - *(vuip)PYXIS_W0_MASK = (0x40000000 - 1) & 0xfff00000; - *(vuip)PYXIS_T0_BASE = 0; - - *(vuip)PYXIS_W1_BASE = 0x80000000 | 1; - *(vuip)PYXIS_W1_MASK = (0x40000000 - 1) & 0xfff00000; - *(vuip)PYXIS_T1_BASE = 0; - - *(vuip)PYXIS_W2_BASE = 0; - *(vuip)PYXIS_W3_BASE = 0; - - alpha_mv.mv_pci_tbi = NULL; - mb(); -#else - /* ??? NetBSD hints that page tables must be aligned to 32K, - possibly due to a hardware bug. This is over-aligned - from the 8K alignment one would expect for an 8MB window. - No description of what CIA revisions affected. */ - hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0x08000); - hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0x20000); - __direct_map_base = 0x40000000; - __direct_map_size = 0x80000000; - - *(vuip)PYXIS_W0_BASE = hose->sg_isa->dma_base | 3; - *(vuip)PYXIS_W0_MASK = (hose->sg_isa->size - 1) & 0xfff00000; - *(vuip)PYXIS_T0_BASE = virt_to_phys(hose->sg_isa->ptes) >> 2; - - *(vuip)PYXIS_W1_BASE = hose->sg_pci->dma_base | 3; - *(vuip)PYXIS_W1_MASK = (hose->sg_pci->size - 1) & 0xfff00000; - *(vuip)PYXIS_T1_BASE = virt_to_phys(hose->sg_pci->ptes) >> 2; - - *(vuip)PYXIS_W2_BASE = 0x40000000 | 1; - *(vuip)PYXIS_W2_MASK = (0x40000000 - 1) & 0xfff00000; - *(vuip)PYXIS_T2_BASE = 0; - - *(vuip)PYXIS_W3_BASE = 0x80000000 | 1; - *(vuip)PYXIS_W3_MASK = (0x40000000 - 1) & 0xfff00000; - *(vuip)PYXIS_T3_BASE = 0; - - /* Pass 1 and 2 (ie revision <= 1) have a broken TBIA. See the - complete description next to pyxis_broken_pci_tbi for details. */ - if ((*(vuip)PYXIS_REV & 0xff) <= 1) - pyxis_enable_broken_tbi(hose->sg_pci); - - alpha_mv.mv_pci_tbi(hose, 0, -1); -#endif -} - -static inline void -pyxis_pci_clr_err(void) -{ - unsigned int tmp; - - tmp = *(vuip)PYXIS_ERR; - *(vuip)PYXIS_ERR = tmp; - mb(); - *(vuip)PYXIS_ERR; /* re-read to force write */ -} - -void -pyxis_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) -{ - int expected; - - /* Clear the error before reporting anything. */ - mb(); - mb(); /* magic */ - draina(); - pyxis_pci_clr_err(); - wrmces(0x7); - mb(); - - expected = mcheck_expected(0); - if (!expected && vector == 0x660) { - struct el_common *com; - struct el_common_EV5_uncorrectable_mcheck *ev5; - struct el_PYXIS_sysdata_mcheck *pyxis; - - com = (void *)la_ptr; - ev5 = (void *)(la_ptr + com->proc_offset); - pyxis = (void *)(la_ptr + com->sys_offset); - - if (com->code == 0x202) { - printk(KERN_CRIT "PYXIS PCI machine check: err0=%08x " - "err1=%08x err2=%08x\n", - (int) pyxis->pci_err0, (int) pyxis->pci_err1, - (int) pyxis->pci_err2); - expected = 1; - } - } - process_mcheck_info(vector, la_ptr, regs, "PYXIS", expected); -} diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/core_tsunami.c linux/arch/alpha/kernel/core_tsunami.c --- v2.3.99-pre1/linux/arch/alpha/kernel/core_tsunami.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/core_tsunami.c Thu Mar 16 14:07:09 2000 @@ -363,7 +363,7 @@ pchip->wsba[3].csr = 0x80000000 | 1; pchip->wsm[3].csr = (0x40000000 - 1) & 0xfff00000; - pchip->tba[3].csr = 0; + pchip->tba[3].csr = 0x40000000; tsunami_pci_tbi(hose, 0, -1); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/entry.S linux/arch/alpha/kernel/entry.S --- v2.3.99-pre1/linux/arch/alpha/kernel/entry.S Sat Feb 12 11:22:10 2000 +++ linux/arch/alpha/kernel/entry.S Thu Mar 16 14:07:09 2000 @@ -8,7 +8,7 @@ #define SIGCHLD 20 -#define NR_SYSCALLS 374 +#define NR_SYSCALLS 376 /* * These offsets must match with alpha_mv in . @@ -991,7 +991,7 @@ .quad osf_shmat .quad sys_shmctl /* 210 */ .quad sys_shmdt - .quad osf_shmget + .quad sys_shmget .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall /* 215 */ @@ -1154,4 +1154,5 @@ .quad sys_setresgid .quad sys_getresgid .quad sys_ni_syscall /* sys_dipc */ - .quad sys_shmget + .quad sys_pivot_root + .quad sys_mincore /* 375 */ diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/irq.c linux/arch/alpha/kernel/irq.c --- v2.3.99-pre1/linux/arch/alpha/kernel/irq.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/irq.c Fri Mar 17 13:02:05 2000 @@ -219,15 +219,33 @@ } spin_unlock_irqrestore(&desc->lock,flags); - register_irq_proc(irq); return 0; } static struct proc_dir_entry * root_irq_dir; -static struct proc_dir_entry * irq_dir [NR_IRQS]; -static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; +static struct proc_dir_entry * irq_dir[NR_IRQS]; -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +#ifdef CONFIG_SMP +static struct proc_dir_entry * smp_affinity_entry[NR_IRQS]; +static char irq_user_affinity[NR_IRQS]; +static unsigned long irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; + +static void +select_smp_affinity(int irq) +{ + static int last_cpu; + int cpu = last_cpu + 1; + + if (! irq_desc[irq].handler->set_affinity || irq_user_affinity[irq]) + return; + + while (((cpu_present_mask >> cpu) & 1) == 0) + cpu = (cpu < NR_CPUS ? cpu + 1 : 0); + last_cpu = cpu; + + irq_affinity[irq] = 1UL << cpu; + irq_desc[irq].handler->set_affinity(irq, 1UL << cpu); +} #define HEX_DIGITS 16 @@ -290,18 +308,22 @@ err = parse_hex_value(buffer, count, &new_value); -#if CONFIG_SMP - /* - * Do not allow disabling IRQs completely - it's a too easy - * way to make the system unusable accidentally :-) At least - * one online CPU still has to be targeted. - */ - if (!(new_value & cpu_present_mask)) + /* The special value 0 means release control of the + affinity to kernel. */ + if (new_value == 0) { + irq_user_affinity[irq] = 0; + select_smp_affinity(irq); + } + /* Do not allow disabling IRQs completely - it's a too easy + way to make the system unusable accidentally :-) At least + one online CPU still has to be targeted. */ + else if (!(new_value & cpu_present_mask)) return -EINVAL; -#endif - - irq_affinity[irq] = new_value; - irq_desc[irq].handler->set_affinity(irq, new_value); + else { + irq_affinity[irq] = new_value; + irq_user_affinity[irq] = 1; + irq_desc[irq].handler->set_affinity(irq, new_value); + } return full_count; } @@ -313,7 +335,7 @@ unsigned long *mask = (unsigned long *) data; if (count < HEX_DIGITS+1) return -EINVAL; - return sprintf (page, "%08lx\n", *mask); + return sprintf (page, "%016lx\n", *mask); } static int @@ -330,6 +352,7 @@ *mask = new_value; return full_count; } +#endif /* CONFIG_SMP */ #define MAX_NAMELEN 10 @@ -348,6 +371,7 @@ /* create /proc/irq/1234 */ irq_dir[irq] = proc_mkdir(name, root_irq_dir); +#ifdef CONFIG_SMP /* create /proc/irq/1234/smp_affinity */ entry = create_proc_entry("smp_affinity", 0700, irq_dir[irq]); @@ -357,6 +381,7 @@ entry->write_proc = irq_affinity_write_proc; smp_affinity_entry[irq] = entry; +#endif } unsigned long prof_cpu_mask = ~0UL; @@ -370,6 +395,7 @@ /* create /proc/irq */ root_irq_dir = proc_mkdir("irq", 0); +#ifdef CONFIG_SMP /* create /proc/irq/prof_cpu_mask */ entry = create_proc_entry("prof_cpu_mask", 0700, root_irq_dir); @@ -377,6 +403,7 @@ entry->data = (void *)&prof_cpu_mask; entry->read_proc = prof_cpu_mask_read_proc; entry->write_proc = prof_cpu_mask_write_proc; +#endif /* * Create entries for all existing IRQs. @@ -426,6 +453,10 @@ action->next = NULL; action->dev_id = dev_id; +#ifdef CONFIG_SMP + select_smp_affinity(irq); +#endif + retval = setup_irq(irq, action); if (retval) kfree(action); @@ -522,10 +553,10 @@ *p++ = '\n'; } #if CONFIG_SMP - p += sprintf(p, "LOC: "); + p += sprintf(p, "IPI: "); for (j = 0; j < smp_num_cpus; j++) p += sprintf(p, "%10lu ", - cpu_data[cpu_logical_map(j)].smp_local_irq_count); + cpu_data[cpu_logical_map(j)].ipi_count); p += sprintf(p, "\n"); #endif p += sprintf(p, "ERR: %10lu\n", irq_err_count); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/irq_alpha.c linux/arch/alpha/kernel/irq_alpha.c --- v2.3.99-pre1/linux/arch/alpha/kernel/irq_alpha.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/irq_alpha.c Fri Mar 17 13:02:05 2000 @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -61,14 +62,22 @@ break; case 1: #ifdef CONFIG_SMP - cpu_data[smp_processor_id()].smp_local_irq_count++; + { + long cpu; smp_percpu_timer_interrupt(®s); - if (smp_processor_id() == boot_cpuid) -#endif + cpu = smp_processor_id(); + if (cpu != boot_cpuid) { + irq_attempt(cpu, RTC_IRQ)++; + kstat.irqs[cpu][RTC_IRQ]++; + } else { handle_irq(RTC_IRQ, ®s); + } + } +#else + handle_irq(RTC_IRQ, ®s); +#endif return; case 2: - irq_err_count++; alpha_mv.machine_check(vector, la_ptr, ®s); return; case 3: diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/irq_pyxis.c linux/arch/alpha/kernel/irq_pyxis.c --- v2.3.99-pre1/linux/arch/alpha/kernel/irq_pyxis.c Wed Dec 31 16:00:00 1969 +++ linux/arch/alpha/kernel/irq_pyxis.c Thu Mar 16 14:08:32 2000 @@ -0,0 +1,127 @@ +/* + * linux/arch/alpha/kernel/irq_pyxis.c + * + * Based on code written by David A Rusling (david.rusling@reo.mts.dec.com). + * + * IRQ Code common to all PYXIS core logic chips. + */ + +#include +#include +#include + +#include +#include + +#include "proto.h" +#include "irq_impl.h" + + +/* Note mask bit is true for ENABLED irqs. */ +static unsigned long cached_irq_mask; + +static inline void +pyxis_update_irq_hw(unsigned long mask) +{ + *(vulp)PYXIS_INT_MASK = mask; + mb(); + *(vulp)PYXIS_INT_MASK; +} + +static inline void +pyxis_enable_irq(unsigned int irq) +{ + pyxis_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); +} + +static void +pyxis_disable_irq(unsigned int irq) +{ + pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); +} + +static unsigned int +pyxis_startup_irq(unsigned int irq) +{ + pyxis_enable_irq(irq); + return 0; +} + +static void +pyxis_end_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) + pyxis_enable_irq(irq); +} + +static void +pyxis_mask_and_ack_irq(unsigned int irq) +{ + unsigned long bit = 1UL << (irq - 16); + unsigned long mask = cached_irq_mask &= ~bit; + + /* Disable the interrupt. */ + *(vulp)PYXIS_INT_MASK = mask; + wmb(); + /* Ack PYXIS PCI interrupt. */ + *(vulp)PYXIS_INT_REQ = bit; + mb(); + /* Re-read to force both writes. */ + *(vulp)PYXIS_INT_MASK; +} + +static struct hw_interrupt_type pyxis_irq_type = { + typename: "PYXIS", + startup: pyxis_startup_irq, + shutdown: pyxis_disable_irq, + enable: pyxis_enable_irq, + disable: pyxis_disable_irq, + ack: pyxis_mask_and_ack_irq, + end: pyxis_end_irq, +}; + +void +pyxis_device_interrupt(unsigned long vector, struct pt_regs *regs) +{ + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary register of PYXIS */ + pld = *(vulp)PYXIS_INT_REQ; + pld &= cached_irq_mask; + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i == 7) + isa_device_interrupt(vector, regs); + else + handle_irq(16+i, regs); + } +} + +void __init +init_pyxis_irqs(unsigned long ignore_mask) +{ + long i; + + *(vulp)PYXIS_INT_MASK = 0; /* disable all */ + *(vulp)PYXIS_INT_REQ = -1; /* flush all */ + mb(); + + /* Send -INTA pulses to clear any pending interrupts ...*/ + *(vuip) CIA_IACK_SC; + + for (i = 16; i < 48; ++i) { + if ((ignore_mask >> i) & 1) + continue; + irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; + irq_desc[i].handler = &pyxis_irq_type; + } + + setup_irq(16+7, &isa_cascade_irqaction); +} diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/irq_srm.c linux/arch/alpha/kernel/irq_srm.c --- v2.3.99-pre1/linux/arch/alpha/kernel/irq_srm.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/irq_srm.c Thu Mar 16 14:07:09 2000 @@ -6,9 +6,6 @@ #include #include -#include -#include - #include "proto.h" #include "irq_impl.h" diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/machvec_impl.h linux/arch/alpha/kernel/machvec_impl.h --- v2.3.99-pre1/linux/arch/alpha/kernel/machvec_impl.h Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/machvec_impl.h Thu Mar 16 14:08:32 2000 @@ -77,27 +77,26 @@ mv_writew: CAT(low,_writew), \ mv_writel: CAT(low,_writel), \ mv_writeq: CAT(low,_writeq), \ - mv_ioremap: CAT(low,_ioremap), \ - mv_is_ioaddr: CAT(low,_is_ioaddr) + mv_ioremap: CAT(low,_ioremap) \ #define IO(UP,low) \ IO_LITE(UP,low), \ pci_ops: &CAT(low,_pci_ops) -/* Any assembler that can generate a GENERIC kernel can generate BWX - instructions. So always use them for PYXIS I/O. */ - #define DO_APECS_IO IO(APECS,apecs) #define DO_CIA_IO IO(CIA,cia) #define DO_IRONGATE_IO IO(IRONGATE,irongate) #define DO_LCA_IO IO(LCA,lca) #define DO_MCPCIA_IO IO(MCPCIA,mcpcia) #define DO_POLARIS_IO IO(POLARIS,polaris) -#define DO_PYXIS_IO IO(PYXIS,pyxis) #define DO_T2_IO IO(T2,t2) #define DO_TSUNAMI_IO IO(TSUNAMI,tsunami) +#define DO_PYXIS_IO IO_LITE(CIA,cia_bwx), \ + pci_ops: &CAT(cia,_pci_ops) + #define BUS(which) \ + mv_is_ioaddr: CAT(which,_is_ioaddr), \ mv_pci_tbi: CAT(which,_pci_tbi) #define DO_APECS_BUS BUS(apecs) @@ -105,7 +104,6 @@ #define DO_IRONGATE_BUS BUS(irongate) #define DO_LCA_BUS BUS(lca) #define DO_MCPCIA_BUS BUS(mcpcia) -#define DO_PYXIS_BUS BUS(pyxis) #define DO_POLARIS_BUS BUS(polaris) #define DO_T2_BUS BUS(t2) #define DO_TSUNAMI_BUS BUS(tsunami) diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/osf_sys.c linux/arch/alpha/kernel/osf_sys.c --- v2.3.99-pre1/linux/arch/alpha/kernel/osf_sys.c Fri Mar 10 16:40:39 2000 +++ linux/arch/alpha/kernel/osf_sys.c Thu Mar 16 14:07:09 2000 @@ -189,15 +189,6 @@ return prio; } - -/* - * Heh. As documented by DEC.. - */ -asmlinkage unsigned long sys_madvise(void) -{ - return 0; -} - /* * No need to acquire the kernel lock, we're local.. */ @@ -1373,9 +1364,4 @@ return -EFAULT; return ret; -} - -asmlinkage long osf_shmget (key_t key, int size, int flag) -{ - return sys_shmget (key, size, flag); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/pci_impl.h linux/arch/alpha/kernel/pci_impl.h --- v2.3.99-pre1/linux/arch/alpha/kernel/pci_impl.h Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/pci_impl.h Thu Mar 16 14:08:32 2000 @@ -138,6 +138,7 @@ dma_addr_t dma_base; unsigned int size; unsigned int next_entry; + unsigned int align_entry; }; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/pci_iommu.c linux/arch/alpha/kernel/pci_iommu.c --- v2.3.99-pre1/linux/arch/alpha/kernel/pci_iommu.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/pci_iommu.c Thu Mar 16 14:08:32 2000 @@ -67,6 +67,10 @@ arena->size = window_size; arena->next_entry = 0; + /* Align allocations to a multiple of a page size. Not needed + unless there are chip bugs. */ + arena->align_entry = 1; + return arena; } @@ -74,28 +78,36 @@ iommu_arena_alloc(struct pci_iommu_arena *arena, long n) { unsigned long flags; - unsigned long *beg, *p, *end; - long i; + unsigned long *ptes; + long i, p, nent, mask; spin_lock_irqsave(&arena->lock, flags); /* Search forward for the first sequence of N empty ptes. */ - beg = arena->ptes; - end = beg + (arena->size >> PAGE_SHIFT); - p = beg + arena->next_entry; + ptes = arena->ptes; + nent = arena->size >> PAGE_SHIFT; + mask = arena->align_entry - 1; + p = (arena->next_entry + mask) & ~mask; i = 0; - while (i < n && p < end) - i = (*p++ == 0 ? i + 1 : 0); + while (i < n && p+i < nent) { + if (ptes[p+i]) + p = (p + i + 1 + mask) & ~mask, i = 0; + else + i = i + 1; + } if (i < n) { /* Reached the end. Flush the TLB and restart the search from the beginning. */ alpha_mv.mv_pci_tbi(arena->hose, 0, -1); - p = beg; - i = 0; - while (i < n && p < end) - i = (*p++ == 0 ? i + 1 : 0); + p = 0, i = 0; + while (i < n && p+i < nent) { + if (ptes[p+i]) + p = (p + i + 1 + mask) & ~mask, i = 0; + else + i = i + 1; + } if (i < n) { spin_unlock_irqrestore(&arena->lock, flags); @@ -107,13 +119,13 @@ bit zero is the valid bit, so write ~1 into everything. The chip specific bits will fill this in with something kosher when we return. */ - for (p = p - n, i = 0; i < n; ++i) - p[i] = ~1UL; + for (i = 0; i < n; ++i) + ptes[p+i] = ~1UL; - arena->next_entry = p - beg + n; + arena->next_entry = p + n; spin_unlock_irqrestore(&arena->lock, flags); - return p - beg; + return p; } static void @@ -238,6 +250,12 @@ npages = calc_npages((dma_addr & ~PAGE_MASK) + size); iommu_arena_free(arena, dma_ofs, npages); + /* If we're freeing ptes above the `next_entry' pointer, they + may have snuck back into the TLB since the last wrap flush. + We need to flush the TLB before reallocating these. */ + if (dma_ofs >= arena->next_entry) + alpha_mv.mv_pci_tbi(hose, dma_addr, dma_addr + size - 1); + DBGA("pci_unmap_single: sg [%x,%lx] np %ld from %p\n", dma_addr, size, npages, __builtin_return_address(0)); } @@ -509,6 +527,7 @@ struct pci_iommu_arena *arena; struct scatterlist *end; dma_addr_t max_dma; + dma_addr_t fbeg, fend; if (direction == PCI_DMA_NONE) BUG(); @@ -522,9 +541,11 @@ if (!arena || arena->dma_base + arena->size > max_dma) arena = hose->sg_isa; + fbeg = -1, fend = 0; for (end = sg + nents; sg < end; ++sg) { unsigned long addr, size; long npages, ofs; + dma_addr_t tend; addr = sg->dma_address; size = sg->dma_length; @@ -547,7 +568,17 @@ npages = calc_npages((addr & ~PAGE_MASK) + size); ofs = (addr - arena->dma_base) >> PAGE_SHIFT; iommu_arena_free(arena, ofs, npages); + + tend = addr + size - 1; + if (fbeg > addr) fbeg = addr; + if (fend < tend) fend = tend; } + + /* If we're freeing ptes above the `next_entry' pointer, they + may have snuck back into the TLB since the last wrap flush. + We need to flush the TLB before reallocating these. */ + if ((fend - arena->dma_base) >> PAGE_SHIFT >= arena->next_entry) + alpha_mv.mv_pci_tbi(hose, fbeg, fend); DBGA("pci_unmap_sg: %d entries\n", nents - (end - sg)); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/proto.h linux/arch/alpha/kernel/proto.h --- v2.3.99-pre1/linux/arch/alpha/kernel/proto.h Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/proto.h Thu Mar 16 14:08:32 2000 @@ -20,7 +20,9 @@ /* core_cia.c */ extern struct pci_ops cia_pci_ops; +extern void cia_init_pci(void); extern void cia_init_arch(void); +extern void pyxis_init_arch(void); extern void cia_machine_check(u64, u64, struct pt_regs *); extern void cia_pci_tbi(struct pci_controler *, dma_addr_t, dma_addr_t); @@ -51,12 +53,6 @@ extern void polaris_init_arch(void); extern void polaris_machine_check(u64, u64, struct pt_regs *); #define polaris_pci_tbi ((void *)0) - -/* core_pyxis.c */ -extern struct pci_ops pyxis_pci_ops; -extern void pyxis_init_arch(void); -extern void pyxis_machine_check(u64, u64, struct pt_regs *); -extern void pyxis_pci_tbi(struct pci_controler *, dma_addr_t, dma_addr_t); /* core_t2.c */ extern struct pci_ops t2_pci_ops; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/setup.c linux/arch/alpha/kernel/setup.c --- v2.3.99-pre1/linux/arch/alpha/kernel/setup.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/setup.c Fri Mar 17 13:02:05 2000 @@ -846,6 +846,22 @@ } } +static int +get_nr_processors(struct percpu_struct *cpubase, unsigned long num) +{ + struct percpu_struct *cpu; + int i, count = 0; + + for (i = 0; i < num; i++) { + cpu = (struct percpu_struct *) + ((char *)cpubase + i*hwrpb->processor_size); + if ((cpu->flags & 0x1cc) == 0x1cc) + count++; + } + return count; +} + + /* * BUFFER is PAGE_SIZE bytes long. */ @@ -865,7 +881,7 @@ char *cpu_name; char *systype_name; char *sysvariation_name; - int len; + int len, nr_processors; cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset); cpu_index = (unsigned) (cpu->type - 1); @@ -876,6 +892,8 @@ get_sysnames(hwrpb->sys_type, hwrpb->sys_variation, &systype_name, &sysvariation_name); + nr_processors = get_nr_processors(cpu, hwrpb->nr_processors); + len = sprintf(buffer, "cpu\t\t\t: Alpha\n" "cpu model\t\t: %s\n" @@ -894,7 +912,8 @@ "BogoMIPS\t\t: %lu.%02lu\n" "kernel unaligned acc\t: %ld (pc=%lx,va=%lx)\n" "user unaligned acc\t: %ld (pc=%lx,va=%lx)\n" - "platform string\t\t: %s\n", + "platform string\t\t: %s\n" + "cpus detected\t\t: %d\n", cpu_name, cpu->variation, cpu->revision, (char*)cpu->serial_no, systype_name, sysvariation_name, hwrpb->sys_revision, @@ -909,7 +928,7 @@ loops_per_sec / 500000, (loops_per_sec / 5000) % 100, unaligned[0].count, unaligned[0].pc, unaligned[0].va, unaligned[1].count, unaligned[1].pc, unaligned[1].va, - platform_string()); + platform_string(), nr_processors); #ifdef __SMP__ len += smp_info(buffer+len); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/smp.c linux/arch/alpha/kernel/smp.c --- v2.3.99-pre1/linux/arch/alpha/kernel/smp.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/smp.c Fri Mar 17 13:02:05 2000 @@ -1003,15 +1003,11 @@ int smp_info(char *buffer) { - long i; - unsigned long sum = 0; - for (i = 0; i < NR_CPUS; i++) - sum += cpu_data[i].ipi_count; - - return sprintf(buffer, "CPUs probed %d active %d map 0x%lx IPIs %ld\n", - smp_num_probed, smp_num_cpus, cpu_present_mask, sum); + return sprintf(buffer, + "cpus active\t\t: %d\n" + "cpu active mask\t\t: %016lx\n", + smp_num_cpus, cpu_present_mask); } - #if DEBUG_SPINLOCK void diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_alcor.c linux/arch/alpha/kernel/sys_alcor.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_alcor.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_alcor.c Thu Mar 16 14:08:32 2000 @@ -249,7 +249,7 @@ init_arch: cia_init_arch, init_irq: alcor_init_irq, init_rtc: common_init_rtc, - init_pci: common_init_pci, + init_pci: cia_init_pci, kill_arch: alcor_kill_arch, pci_map_irq: alcor_map_irq, pci_swizzle: common_swizzle, @@ -279,7 +279,7 @@ init_arch: cia_init_arch, init_irq: alcor_init_irq, init_rtc: common_init_rtc, - init_pci: common_init_pci, + init_pci: cia_init_pci, kill_arch: alcor_kill_arch, pci_map_irq: alcor_map_irq, pci_swizzle: common_swizzle, diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_cabriolet.c linux/arch/alpha/kernel/sys_cabriolet.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_cabriolet.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_cabriolet.c Thu Mar 16 14:08:32 2000 @@ -3,7 +3,7 @@ * * Copyright (C) 1995 David A Rusling * Copyright (C) 1996 Jay A Estabrook - * Copyright (C) 1998, 1999 Richard Henderson + * Copyright (C) 1998, 1999, 2000 Richard Henderson * * Code supporting the Cabriolet (AlphaPC64), EB66+, and EB164, * PC164 and LX164. @@ -28,7 +28,6 @@ #include #include #include -#include #include "proto.h" #include "irq_impl.h" @@ -223,6 +222,12 @@ ns87312_enable_ide(0x398); } +static inline void __init +cia_cab_init_pci(void) +{ + cia_init_pci(); + ns87312_enable_ide(0x398); +} /* * The PC164 and LX164 have 19 PCI interrupts, four from each of the four @@ -286,7 +291,7 @@ static inline void __init alphapc164_init_pci(void) { - common_init_pci(); + cia_init_pci(); SMC93x_Init(); } @@ -339,7 +344,7 @@ init_arch: cia_init_arch, init_irq: cabriolet_init_irq, init_rtc: common_init_rtc, - init_pci: cabriolet_init_pci, + init_pci: cia_cab_init_pci, pci_map_irq: cabriolet_map_irq, pci_swizzle: common_swizzle, }; @@ -377,8 +382,8 @@ DO_EV5_MMU, DO_DEFAULT_RTC, DO_PYXIS_IO, - DO_PYXIS_BUS, - machine_check: pyxis_machine_check, + DO_CIA_BUS, + machine_check: cia_machine_check, max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_dp264.c linux/arch/alpha/kernel/sys_dp264.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_dp264.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_dp264.c Thu Mar 16 22:32:21 2000 @@ -36,32 +36,31 @@ /* Note mask bit is true for ENABLED irqs. */ static unsigned long cached_irq_mask; /* dp264 boards handle at max four CPUs */ -static unsigned long cpu_irq_affinity[4]; +static unsigned long cpu_irq_affinity[4] = { ~0UL, ~0UL, ~0UL, ~0UL }; spinlock_t dp264_irq_lock = SPIN_LOCK_UNLOCKED; static void -tsunami_update_irq_hw(unsigned long mask, unsigned long isa_enable) +tsunami_update_irq_hw(unsigned long mask) { register tsunami_cchip *cchip = TSUNAMI_cchip; + unsigned long isa_enable = 1UL << 55; register int bcpu = boot_cpuid; #ifdef CONFIG_SMP register unsigned long cpm = cpu_present_mask; volatile unsigned long *dim0, *dim1, *dim2, *dim3; - unsigned long mask0, mask1, mask2, mask3, maskB, dummy; + unsigned long mask0, mask1, mask2, mask3, dummy; - mask0 = mask1 = mask2 = mask3 = mask; - maskB = mask | isa_enable; - if (bcpu == 0) mask0 = maskB; - else if (bcpu == 1) mask1 = maskB; - else if (bcpu == 2) mask2 = maskB; - else if (bcpu == 3) mask3 = maskB; - - mask0 &= cpu_irq_affinity[0]; - mask1 &= cpu_irq_affinity[1]; - mask2 &= cpu_irq_affinity[2]; - mask3 &= cpu_irq_affinity[3]; + mask0 = mask & cpu_irq_affinity[0]; + mask1 = mask & cpu_irq_affinity[1]; + mask2 = mask & cpu_irq_affinity[2]; + mask3 = mask & cpu_irq_affinity[3]; + + if (bcpu == 0) mask0 |= isa_enable; + else if (bcpu == 1) mask1 |= isa_enable; + else if (bcpu == 2) mask2 |= isa_enable; + else mask3 |= isa_enable; dim0 = &cchip->dim0.csr; dim1 = &cchip->dim1.csr; @@ -86,7 +85,7 @@ if (bcpu == 0) dimB = &cchip->dim0.csr; else if (bcpu == 1) dimB = &cchip->dim1.csr; else if (bcpu == 2) dimB = &cchip->dim2.csr; - else if (bcpu == 3) dimB = &cchip->dim3.csr; + else dimB = &cchip->dim3.csr; *dimB = mask | isa_enable; mb(); @@ -94,24 +93,12 @@ #endif } -static inline void -dp264_update_irq_hw(unsigned long mask) -{ - tsunami_update_irq_hw(mask, (1UL << 55) | 0xffff); -} - -static inline void -clipper_update_irq_hw(unsigned long mask) -{ - tsunami_update_irq_hw(mask, 1UL << 55); -} - static void dp264_enable_irq(unsigned int irq) { spin_lock(&dp264_irq_lock); cached_irq_mask |= 1UL << irq; - dp264_update_irq_hw(cached_irq_mask); + tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } @@ -120,7 +107,7 @@ { spin_lock(&dp264_irq_lock); cached_irq_mask &= ~(1UL << irq); - dp264_update_irq_hw(cached_irq_mask); + tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } @@ -142,8 +129,8 @@ clipper_enable_irq(unsigned int irq) { spin_lock(&dp264_irq_lock); - cached_irq_mask |= 1UL << irq; - clipper_update_irq_hw(cached_irq_mask); + cached_irq_mask |= 1UL << (irq - 16); + tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } @@ -151,8 +138,8 @@ clipper_disable_irq(unsigned int irq) { spin_lock(&dp264_irq_lock); - cached_irq_mask &= ~(1UL << irq); - clipper_update_irq_hw(cached_irq_mask); + cached_irq_mask &= ~(1UL << (irq - 16)); + tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } @@ -191,7 +178,7 @@ { spin_lock(&dp264_irq_lock); cpu_set_irq_affinity(irq, affinity); - dp264_update_irq_hw(cached_irq_mask); + tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } @@ -199,8 +186,8 @@ clipper_set_affinity(unsigned int irq, unsigned long affinity) { spin_lock(&dp264_irq_lock); - cpu_set_irq_affinity(irq, affinity); - clipper_update_irq_hw(cached_irq_mask); + cpu_set_irq_affinity(irq - 16, affinity); + tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } @@ -304,12 +291,10 @@ } static void __init -init_tsunami_irqs(struct hw_interrupt_type * ops) +init_tsunami_irqs(struct hw_interrupt_type * ops, int imin, int imax) { long i; - - /* Only irqs between 16 and 47 are tsunami irqs. */ - for (i = 16; i < 48; ++i) { + for (i = imin; i <= imax; ++i) { irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; irq_desc[i].handler = ops; } @@ -318,8 +303,6 @@ static void __init dp264_init_irq(void) { - int cpu; - outb(0, DMA1_RESET_REG); outb(0, DMA2_RESET_REG); outb(DMA_MODE_CASCADE, DMA2_MODE_REG); @@ -328,13 +311,10 @@ if (alpha_using_srm) alpha_mv.device_interrupt = dp264_srm_device_interrupt; - /* this is single threaded by design so no need of any smp lock */ - for (cpu = 0; cpu < 4; cpu++) - cpu_irq_affinity[cpu] = ~0UL; - dp264_update_irq_hw(0UL); + tsunami_update_irq_hw(0); init_i8259a_irqs(); - init_tsunami_irqs(&dp264_irq_type); + init_tsunami_irqs(&dp264_irq_type, 16, 47); } static void __init @@ -348,10 +328,10 @@ if (alpha_using_srm) alpha_mv.device_interrupt = clipper_srm_device_interrupt; - clipper_update_irq_hw(0UL); + tsunami_update_irq_hw(0); init_i8259a_irqs(); - init_tsunami_irqs(&clipper_irq_type); + init_tsunami_irqs(&clipper_irq_type, 24, 63); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_eiger.c linux/arch/alpha/kernel/sys_eiger.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_eiger.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_eiger.c Thu Mar 16 14:07:09 2000 @@ -34,10 +34,6 @@ #include "machvec_impl.h" -/* - * HACK ALERT! only the boot cpu is used for interrupts. - */ - /* Note that this interrupt code is identical to TAKARA. */ /* Note mask bit is true for DISABLED irqs. */ diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_miata.c linux/arch/alpha/kernel/sys_miata.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_miata.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_miata.c Thu Mar 16 14:08:32 2000 @@ -3,7 +3,7 @@ * * Copyright (C) 1995 David A Rusling * Copyright (C) 1996 Jay A Estabrook - * Copyright (C) 1998, 1999 Richard Henderson + * Copyright (C) 1998, 1999, 2000 Richard Henderson * * Code supporting the MIATA (EV56+PYXIS). */ @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "proto.h" #include "irq_impl.h" @@ -215,7 +215,7 @@ static void __init miata_init_pci(void) { - common_init_pci(); + cia_init_pci(); SMC669_Init(0); /* it might be a GL (fails harmlessly if not) */ es1888_init(); } @@ -240,8 +240,8 @@ DO_EV5_MMU, DO_DEFAULT_RTC, DO_PYXIS_IO, - DO_PYXIS_BUS, - machine_check: pyxis_machine_check, + DO_CIA_BUS, + machine_check: cia_machine_check, max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_mikasa.c linux/arch/alpha/kernel/sys_mikasa.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_mikasa.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_mikasa.c Thu Mar 16 14:08:32 2000 @@ -258,7 +258,7 @@ init_arch: cia_init_arch, init_irq: mikasa_init_irq, init_rtc: common_init_rtc, - init_pci: common_init_pci, + init_pci: cia_init_pci, pci_map_irq: mikasa_map_irq, pci_swizzle: common_swizzle, }; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_noritake.c linux/arch/alpha/kernel/sys_noritake.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_noritake.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_noritake.c Thu Mar 16 14:08:32 2000 @@ -309,7 +309,7 @@ init_arch: cia_init_arch, init_irq: noritake_init_irq, init_rtc: common_init_rtc, - init_pci: common_init_pci, + init_pci: cia_init_pci, pci_map_irq: noritake_map_irq, pci_swizzle: noritake_swizzle, }; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_ruffian.c linux/arch/alpha/kernel/sys_ruffian.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_ruffian.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_ruffian.c Thu Mar 16 14:08:32 2000 @@ -3,7 +3,7 @@ * * Copyright (C) 1995 David A Rusling * Copyright (C) 1996 Jay A Estabrook - * Copyright (C) 1998, 1999 Richard Henderson + * Copyright (C) 1998, 1999, 2000 Richard Henderson * * Code supporting the RUFFIAN. */ @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include "proto.h" #include "irq_impl.h" @@ -149,8 +149,8 @@ DO_EV5_MMU, DO_DEFAULT_RTC, DO_PYXIS_IO, - DO_PYXIS_BUS, - machine_check: pyxis_machine_check, + DO_CIA_BUS, + machine_check: cia_machine_check, max_dma_address: ALPHA_RUFFIAN_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, @@ -161,7 +161,7 @@ init_arch: pyxis_init_arch, init_irq: ruffian_init_irq, init_rtc: ruffian_init_rtc, - init_pci: common_init_pci, + init_pci: cia_init_pci, kill_arch: ruffian_kill_arch, pci_map_irq: ruffian_map_irq, pci_swizzle: common_swizzle, diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_sx164.c linux/arch/alpha/kernel/sys_sx164.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_sx164.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_sx164.c Thu Mar 16 14:08:32 2000 @@ -3,7 +3,7 @@ * * Copyright (C) 1995 David A Rusling * Copyright (C) 1996 Jay A Estabrook - * Copyright (C) 1998, 1999 Richard Henderson + * Copyright (C) 1998, 1999, 2000 Richard Henderson * * Code supporting the SX164 (PCA56+PYXIS). */ @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include "proto.h" #include "irq_impl.h" @@ -107,10 +107,10 @@ return COMMON_TABLE_LOOKUP; } -void __init +static void __init sx164_init_pci(void) { - common_init_pci(); + cia_init_pci(); SMC669_Init(0); } @@ -124,8 +124,8 @@ DO_EV5_MMU, DO_DEFAULT_RTC, DO_PYXIS_IO, - DO_PYXIS_BUS, - machine_check: pyxis_machine_check, + DO_CIA_BUS, + machine_check: cia_machine_check, max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/sys_takara.c linux/arch/alpha/kernel/sys_takara.c --- v2.3.99-pre1/linux/arch/alpha/kernel/sys_takara.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/sys_takara.c Thu Mar 16 14:08:32 2000 @@ -258,7 +258,7 @@ if (alpha_using_srm) alpha_mv.pci_map_irq = takara_map_irq_srm; - common_init_pci(); + cia_init_pci(); ns87312_enable_ide(0x26e); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/time.c linux/arch/alpha/kernel/time.c --- v2.3.99-pre1/linux/arch/alpha/kernel/time.c Thu Mar 2 14:36:22 2000 +++ linux/arch/alpha/kernel/time.c Wed Mar 15 11:08:04 2000 @@ -48,6 +48,7 @@ static int set_rtc_mmss(unsigned long); +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; /* * Shift amount by which scaled_ticks_per_cycle is scaled. Shifting @@ -163,7 +164,7 @@ } void -common_init_rtc() +common_init_rtc(void) { unsigned char x; @@ -406,6 +407,8 @@ int real_seconds, real_minutes, cmos_minutes; unsigned char save_control, save_freq_select; + /* irq are locally disabled here */ + spin_lock(&rtc_lock); /* Tell the clock it's being set */ save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); @@ -455,6 +458,7 @@ */ CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock(&rtc_lock); return retval; } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/alpha/kernel/traps.c linux/arch/alpha/kernel/traps.c --- v2.3.99-pre1/linux/arch/alpha/kernel/traps.c Sat Feb 26 22:31:38 2000 +++ linux/arch/alpha/kernel/traps.c Sun Mar 19 10:08:11 2000 @@ -215,10 +215,8 @@ /* EV4 does not implement anything except normal rounding. Everything else will come here as an illegal instruction. Emulate them. */ - if (alpha_fp_emul(regs.pc)) { - regs.pc += 4; + if (alpha_fp_emul(regs.pc-4)) return; - } } send_sig(SIGILL, current, 1); break; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/Makefile linux/arch/arm/Makefile --- v2.3.99-pre1/linux/arch/arm/Makefile Tue Mar 14 19:10:38 2000 +++ linux/arch/arm/Makefile Sun Mar 19 10:16:36 2000 @@ -14,7 +14,6 @@ OBJCOPY := $(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S CPP := $(CC) -E -PERL := perl LINKFLAGS := -p -X -T arch/arm/vmlinux.lds ARCHCC := $(word 1,$(CC)) @@ -100,6 +99,8 @@ endif LIBGCC := $(shell $(CC) $(CFLAGS) --print-libgcc-file-name) + +export LIBGCC ifeq ($(CONFIG_ARCH_A5K),y) MACHINE = a5k diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/config.in linux/arch/arm/config.in --- v2.3.99-pre1/linux/arch/arm/config.in Tue Mar 14 19:10:38 2000 +++ linux/arch/arm/config.in Sat Mar 18 11:38:03 2000 @@ -35,9 +35,9 @@ if [ "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then bool ' Include support for EBSA285' CONFIG_ARCH_EBSA285 - bool ' Include support for CATS' CONFIG_CATS + bool ' Include support for CATS' CONFIG_ARCH_CATS bool ' Include support for NetWinder' CONFIG_ARCH_NETWINDER - bool ' Include support for Compaq Personal Server' CONFIG_PERSONAL_SERVER + bool ' Include support for Compaq Personal Server' CONFIG_ARCH_PERSONAL_SERVER fi if [ "$CONFIG_ADDIN_FOOTBRIDGE" = "y" ]; then @@ -124,7 +124,7 @@ # # These machines have ISA-DMA # -if [ "$CONFIG_CATS" = "y" -o \ +if [ "$CONFIG_ARCH_CATS" = "y" -o \ "$CONFIG_ARCH_SHARK" = "y" -o \ "$CONFIG_ARCH_NETWINDER" = "y" ]; then define_bool CONFIG_ISA_DMA y @@ -171,8 +171,8 @@ if [ "$CONFIG_ARCH_EBSA110" = "y" -o \ "$CONFIG_ARCH_SA1100" = "y" -o \ "$CONFIG_ARCH_NETWINDER" = "y" -o \ - "$CONFIG_PERSONAL_SERVER" = "y" -o \ - "$CONFIG_CATS" = "y" ]; then + "$CONFIG_ARCH_PERSONAL_SERVER" = "y" -o \ + "$CONFIG_ARCH_CATS" = "y" ]; then string 'Initial kernel command string' CONFIG_CMDLINE fi if [ "$CONFIG_ARCH_NETWINDER" = "y" -o \ diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/kernel/arch.c linux/arch/arm/kernel/arch.c --- v2.3.99-pre1/linux/arch/arm/kernel/arch.c Tue Mar 14 19:10:38 2000 +++ linux/arch/arm/kernel/arch.c Sat Mar 18 11:38:03 2000 @@ -192,6 +192,7 @@ * in head-armv.S. */ static struct machine_desc machine_desc[] __attribute__ ((__section__ (".arch.info"))) = { +#ifdef CONFIG_ARCH_EBSA110 { MACH_TYPE_EBSA110, "EBSA110", /* RMK */ @@ -199,119 +200,160 @@ NO_VIDEO, 1, 0, 1, 1, 1, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_RPC + { MACH_TYPE_RISCPC, "Acorn-RiscPC", /* RMK */ 0x10000100, NO_VIDEO, 1, 1, 0, 0, 0, fixup_acorn - }, { - 2, - "unknown", - NO_PARAMS, - NO_VIDEO, - 0, 0, 0, 0, 0, - NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_NEXUSPCI + { MACH_TYPE_NEXUSPCI, "FTV/PCI", /* Philip Blundell */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_EBSA285 + { MACH_TYPE_EBSA285, "EBSA285", /* RMK */ 0x00000100, 0x000a0000, 0x000bffff, 0, 0, 0, 0, 0, fixup_ebsa285 - }, { + }, +#endif +#ifdef CONFIG_ARCH_NETWINDER + { MACH_TYPE_NETWINDER, "Rebel-NetWinder", /* RMK */ 0x00000100, 0x000a0000, 0x000bffff, 1, 0, 1, 0, 0, fixup_netwinder - }, { + }, +#endif +#ifdef CONFIG_ARCH_CATS + { MACH_TYPE_CATS, "Chalice-CATS", /* Philip Blundell */ NO_PARAMS, 0x000a0000, 0x000bffff, 0, 0, 0, 0, 1, fixup_cats - }, { + }, +#endif +#ifdef CONFIG_ARCH_TBOX + { MACH_TYPE_TBOX, "unknown-TBOX", /* Philip Blundell */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_CO285 + { MACH_TYPE_CO285, "co-EBSA285", /* Mark van Doesburg */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, fixup_coebsa285 - }, { + }, +#endif +#ifdef CONFIG_ARCH_CLPS7110 + { MACH_TYPE_CLPS7110, "CL-PS7110", /* Werner Almesberger */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_ARC + { MACH_TYPE_ARCHIMEDES, "Acorn-Archimedes",/* RMK/DAG */ 0x0207c000, NO_VIDEO, 0, 0, 0, 0, 0, fixup_acorn - }, { + }, +#endif +#ifdef CONFIG_ARCH_A5K + { MACH_TYPE_A5K, "Acorn-A5000", /* RMK/PB */ 0x0207c000, NO_VIDEO, 0, 0, 0, 0, 0, fixup_acorn - }, { + }, +#endif +#ifdef CONFIG_ARCH_ETOILE + { MACH_TYPE_ETOILE, "Etoile", /* Alex de Vries */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_LACIE_NAS + { MACH_TYPE_LACIE_NAS, "LaCie_NAS", /* Benjamin Herrenschmidt */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_CLPS7500 + { MACH_TYPE_CLPS7500, "CL-PS7500", /* Philip Blundell */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_SHARK + { MACH_TYPE_SHARK, "Shark", /* Alexander Schulz */ NO_PARAMS, 0x06000000, 0x06000000+0x001fffff, 0, 0, 0, 0, 0, NULL - }, { + }, +#endif +#ifdef CONFIG_ARCH_SA1100 + { MACH_TYPE_SA1100, "SA1100-based", /* Nicolas Pitre */ NO_PARAMS, NO_VIDEO, 0, 0, 0, 0, 0, fixup_sa1100 - }, { + }, +#endif +#ifdef CONFIG_ARCH_PERSONAL_SERVER + { MACH_TYPE_PERSONAL_SERVER, "Compaq Personal Server", NO_PARAMS, @@ -319,4 +361,5 @@ 0, 0, 0, 0, 0, NULL } +#endif }; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/kernel/bios32.c linux/arch/arm/kernel/bios32.c --- v2.3.99-pre1/linux/arch/arm/kernel/bios32.c Tue Mar 14 19:10:38 2000 +++ linux/arch/arm/kernel/bios32.c Sat Mar 18 11:38:03 2000 @@ -164,7 +164,52 @@ void __init pcibios_fixup_bus(struct pci_bus *bus) { struct list_head *walk = &bus->devices; + struct arm_pci_sysdata *sysdata = + (struct arm_pci_sysdata *)bus->sysdata; + struct arm_bus_sysdata *busdata; + + if (bus->number < MAX_NR_BUS) + busdata = sysdata->bus + bus->number; + else + BUG(); + + /* + * Walk the devices on this bus, working out what we can + * and can't support. + */ + for (walk = walk->next; walk != &bus->devices; walk = walk->next) { + struct pci_dev *dev = pci_dev_b(walk); + u16 status; + + pci_read_config_word(dev, PCI_STATUS, &status); + + /* + * If this device does not support fast back to back + * transfers, the bus as a whole cannot support them. + */ + if (!(status & PCI_STATUS_FAST_BACK)) + busdata->features &= ~PCI_COMMAND_FAST_BACK; + + /* + * Calculate the maximum devsel latency. + */ + if (busdata->maxdevsel < (status & PCI_STATUS_DEVSEL_MASK)) + busdata->maxdevsel = (status & PCI_STATUS_DEVSEL_MASK); + /* + * If this device is an ISA bridge, set the have_isa_bridge + * flag. We will then go looking for things like keyboard, + * etc + */ + if (dev->class >> 8 == PCI_CLASS_BRIDGE_ISA || + dev->class >> 8 == PCI_CLASS_BRIDGE_EISA) + have_isa_bridge = !0; + } + + /* + * Now walk the devices again, this time setting them up. + */ + walk = &bus->devices; for (walk = walk->next; walk != &bus->devices; walk = walk->next) { struct pci_dev *dev = pci_dev_b(walk); u16 cmd; @@ -182,25 +227,15 @@ pci_write_config_dword(dev, 0x40, 0x80000000); /* - * If this device is an ISA bridge, set the have_isa_bridge - * flag. We will then go looking for things like keyboard, - * etc - */ - if (dev->class >> 8 == PCI_CLASS_BRIDGE_ISA || - dev->class >> 8 == PCI_CLASS_BRIDGE_EISA) - have_isa_bridge = !0; - - /* * Set latency timer to 32, and a cache line size to 32 bytes. - * Also, set system error enable, parity error enable, and - * fast back to back transaction enable. Disable ROM. + * Also, set system error enable, parity error enable. + * Disable ROM. */ pci_write_config_byte(dev, PCI_LATENCY_TIMER, 32); pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 8); pci_read_config_word(dev, PCI_COMMAND, &cmd); - cmd |= PCI_COMMAND_FAST_BACK | PCI_COMMAND_SERR | - PCI_COMMAND_PARITY; + cmd |= busdata->features; pci_write_config_word(dev, PCI_COMMAND, cmd); pci_read_config_word(dev, PCI_COMMAND, &cmd); @@ -254,7 +289,7 @@ }; #endif -#ifdef CONFIG_CATS +#ifdef CONFIG_ARCH_CATS /* cats host-specific stuff */ static int irqmap_cats[] __initdata = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 }; @@ -323,7 +358,7 @@ }; #endif -#ifdef CONFIG_PERSONAL_SERVER +#ifdef CONFIG_ARCH_PERSONAL_SERVER static int irqmap_personal_server[] __initdata = { IRQ_IN0, IRQ_IN1, IRQ_IN2, IRQ_IN3, 0, 0, 0, IRQ_DOORBELLHOST, IRQ_DMA1, IRQ_DMA2, IRQ_PCI @@ -399,7 +434,7 @@ break; } #endif -#ifdef CONFIG_CATS +#ifdef CONFIG_ARCH_CATS if (machine_is_cats()) { hw_pci = &cats_pci; break; @@ -411,7 +446,7 @@ break; } #endif -#ifdef CONFIG_PERSONAL_SERVER +#ifdef CONFIG_ARCH_PERSONAL_SERVER if (machine_is_personal_server()) { hw_pci = &personal_server_pci; break; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/kernel/bios32.h linux/arch/arm/kernel/bios32.h --- v2.3.99-pre1/linux/arch/arm/kernel/bios32.h Fri Jan 21 18:19:15 2000 +++ linux/arch/arm/kernel/bios32.h Sat Mar 18 11:38:03 2000 @@ -1,9 +1,27 @@ +#define MAX_NR_BUS 2 + +struct arm_bus_sysdata { + /* + * bitmask of features we can turn. + * See PCI command register for more info. + */ + u16 features; + /* + * Maximum devsel for this bus. + */ + u16 maxdevsel; +}; + +struct arm_pci_sysdata { + struct arm_bus_sysdata bus[MAX_NR_BUS]; +}; + struct hw_pci { - void (*init)(void); + void (*init)(void); unsigned long io_start; unsigned long mem_start; - u8 (*swizzle)(struct pci_dev *dev, u8 *pin); - int (*map_irq)(struct pci_dev *dev, u8 slot, u8 pin); + u8 (*swizzle)(struct pci_dev *dev, u8 *pin); + int (*map_irq)(struct pci_dev *dev, u8 slot, u8 pin); }; void __init dc21285_init(void); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/kernel/dec21285.c linux/arch/arm/kernel/dec21285.c --- v2.3.99-pre1/linux/arch/arm/kernel/dec21285.c Fri Jan 21 18:19:15 2000 +++ linux/arch/arm/kernel/dec21285.c Sat Mar 18 11:38:04 2000 @@ -206,8 +206,11 @@ void __init dc21285_init(void) { static struct resource csrmem, csrio; - unsigned int mem_size; + struct arm_pci_sysdata sysdata; unsigned long cntl; + unsigned int mem_size, pci_cmd = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE; + int i; mem_size = (unsigned int)high_memory - PAGE_OFFSET; *CSR_SDRAMBASEMASK = (mem_size - 1) & 0x0ffc0000; @@ -240,25 +243,34 @@ *CSR_PCICSRIOBASE = csrio.start; *CSR_PCISDRAMBASE = virt_to_bus((void *)PAGE_OFFSET); *CSR_PCIROMBASE = 0; - *CSR_PCICMD = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | - PCI_COMMAND_MASTER | PCI_COMMAND_FAST_BACK | - PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY | + *CSR_PCICMD = pci_cmd | (1 << 31) | (1 << 29) | (1 << 28) | (1 << 24); #endif printk(KERN_DEBUG "PCI: DC21285 footbridge, revision %02lX\n", *CSR_CLASSREV & 0xff); - pci_scan_bus(0, &dc21285_ops, NULL); + for (i = 0; i < MAX_NR_BUS; i++) { + sysdata.bus[i].features = PCI_COMMAND_FAST_BACK | + PCI_COMMAND_SERR | + PCI_COMMAND_PARITY; + sysdata.bus[i].maxdevsel = PCI_STATUS_DEVSEL_FAST; + } + + pci_scan_bus(0, &dc21285_ops, &sysdata); + + pci_cmd |= sysdata.bus[0].features; + + printk("Fast back to back PCI transfers %sabled\n", + (sysdata.bus[0].features & PCI_COMMAND_FAST_BACK) ? "en" : "dis"); /* * Clear any existing errors - we aren't * interested in historical data... */ - cntl = *CSR_SA110_CNTL & 0xffffde07; - *CSR_SA110_CNTL = cntl | SA110_CNTL_RXSERR; - cntl = *CSR_PCICMD & 0x0000ffff; - *CSR_PCICMD = cntl | 1 << 31 | 1 << 29 | 1 << 28 | 1 << 24; + cntl = *CSR_SA110_CNTL & 0xffffde07; + *CSR_SA110_CNTL = cntl | SA110_CNTL_RXSERR; + *CSR_PCICMD = pci_cmd | 1 << 31 | 1 << 29 | 1 << 28 | 1 << 24; /* * Initialise PCI error IRQ after we've finished probing diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/kernel/hw-footbridge.c linux/arch/arm/kernel/hw-footbridge.c --- v2.3.99-pre1/linux/arch/arm/kernel/hw-footbridge.c Thu Mar 2 14:36:22 2000 +++ linux/arch/arm/kernel/hw-footbridge.c Sat Mar 18 11:38:04 2000 @@ -613,7 +613,7 @@ /* * CATS stuff */ -#ifdef CONFIG_CATS +#ifdef CONFIG_ARCH_CATS #define CONFIG_PORT 0x370 #define INDEX_PORT (CONFIG_PORT) @@ -698,7 +698,7 @@ #endif } #endif -#ifdef CONFIG_CATS +#ifdef CONFIG_ARCH_CATS if (machine_is_cats()) cats_hw_init(); #endif diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/kernel/process.c linux/arch/arm/kernel/process.c --- v2.3.99-pre1/linux/arch/arm/kernel/process.c Fri Jan 21 18:19:15 2000 +++ linux/arch/arm/kernel/process.c Sat Mar 18 11:38:04 2000 @@ -1,7 +1,7 @@ /* * linux/arch/arm/kernel/process.c * - * Copyright (C) 1996-1999 Russell King - Converted to ARM. + * Copyright (C) 1996-2000 Russell King - Converted to ARM. * Origional Copyright (C) 1995 Linus Torvalds */ @@ -32,6 +32,7 @@ #include extern char *processor_modes[]; +extern void setup_mm_for_reboot(char mode); asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call"); @@ -96,15 +97,28 @@ void machine_restart(char * __unused) { /* - * Turn off caches, interrupts, etc + * Clean and disable cache, and turn off interrupts */ cpu_proc_fin(); + /* + * Tell the mm system that we are going to reboot - + * we may need it to insert some 1:1 mappings so that + * soft boot works. + */ + setup_mm_for_reboot(reboot_mode); + + /* + * Now call the architecture specific reboot code. + */ arch_reset(reboot_mode); + /* + * Whoops - the architecture was unable to reboot. + * Tell the user! + */ mdelay(1000); printk("Reboot failed -- System halted\n"); - cli(); while (1); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/mm/fault-armv.c linux/arch/arm/mm/fault-armv.c --- v2.3.99-pre1/linux/arch/arm/mm/fault-armv.c Thu Mar 2 14:36:22 2000 +++ linux/arch/arm/mm/fault-armv.c Sat Mar 18 11:38:04 2000 @@ -379,7 +379,8 @@ */ #define BUG_PROC_MSG \ KERN_DEBUG "Weird data abort (%08X).\n" \ - KERN_DEBUG "Please see http://www.arm.linux.org.uk/state.html for more information" + KERN_DEBUG "Please see http://www.arm.linux.org.uk/state.html for " \ + "more information\n" asmlinkage void do_DataAbort(unsigned long addr, int fsr, int error_code, struct pt_regs *regs) diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/mm/mm-armo.c linux/arch/arm/mm/mm-armo.c --- v2.3.99-pre1/linux/arch/arm/mm/mm-armo.c Fri Jan 21 18:19:16 2000 +++ linux/arch/arm/mm/mm-armo.c Sat Mar 18 11:38:04 2000 @@ -3,7 +3,7 @@ * * Page table sludge for older ARM processor architectures. * - * Copyright (C) 1998-1999 Russell King + * Copyright (C) 1998-2000 Russell King */ #include #include @@ -135,6 +135,13 @@ } /* + * No special code is required here. + */ +void setup_mm_for_reboot(char mode) +{ +} + +/* * This contains the code to setup the memory map on an ARM2/ARM250/ARM3 * machine. This is both processor & architecture specific, and requires * some more work to get it to fit into our separate processor and @@ -155,6 +162,9 @@ pgd_val(swapper_pg_dir[i]) = 0; } +/* + * We never have holes in the memmap + */ void __init create_memmap_holes(void) { } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/mm/mm-armv.c linux/arch/arm/mm/mm-armv.c --- v2.3.99-pre1/linux/arch/arm/mm/mm-armv.c Thu Mar 2 14:36:22 2000 +++ linux/arch/arm/mm/mm-armv.c Sat Mar 18 11:38:04 2000 @@ -3,7 +3,7 @@ * * Page table sludge for ARM v3 and v4 processor architectures. * - * Copyright (C) 1998-1999 Russell King + * Copyright (C) 1998-2000 Russell King */ #include #include @@ -283,6 +283,25 @@ virt += PAGE_SIZE; length -= PAGE_SIZE; + } +} + +/* + * In order to soft-boot, we need to insert a 1:1 mapping in place of + * the user-mode pages. This will then ensure that we have predictable + * results when turning the mmu off + */ +void setup_mm_for_reboot(char mode) +{ + pgd_t *pgd = current->mm->pgd; + pmd_t pmd; + int i; + + for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++) { + pmd_val(pmd) = (i << PGDIR_SHIFT) | + PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | + PMD_TYPE_SECT; + set_pmd(pmd_offset(pgd + i, i << PGDIR_SHIFT), pmd); } } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/mm/proc-arm6,7.S linux/arch/arm/mm/proc-arm6,7.S --- v2.3.99-pre1/linux/arch/arm/mm/proc-arm6,7.S Tue Mar 14 19:10:39 2000 +++ linux/arch/arm/mm/proc-arm6,7.S Sat Mar 18 11:38:04 2000 @@ -1,7 +1,7 @@ /* * linux/arch/arm/mm/proc-arm6,7.S: MMU functions for ARM6 * - * (C) 1997-1999 Russell King + * (C) 1997-2000 Russell King * * These are the low level assembler for performing cache and TLB * functions on the ARM6 & ARM7. @@ -342,15 +342,17 @@ /* * Function: _arm6_7_reset - * + * Params : r0 = address to jump to * Notes : This sets up everything for a reset */ ENTRY(cpu_arm6_reset) ENTRY(cpu_arm7_reset) - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ flush cache - mcr p15, 0, r0, c5, c0, 0 @ flush TLB - mov pc, lr + mov r1, #0 + mcr p15, 0, r1, c7, c0, 0 @ flush cache + mcr p15, 0, r1, c5, c0, 0 @ flush TLB + mov r1, #0x30 + mcr p15, 0, r1, c1, c0, 0 @ turn off MMU etc + mov pc, r0 cpu_armvlsi_name: .asciz "ARM/VLSI" diff -u --recursive --new-file v2.3.99-pre1/linux/arch/arm/mm/proc-sa110.S linux/arch/arm/mm/proc-sa110.S --- v2.3.99-pre1/linux/arch/arm/mm/proc-sa110.S Sun Feb 20 21:12:38 2000 +++ linux/arch/arm/mm/proc-sa110.S Sat Mar 18 11:38:04 2000 @@ -1,7 +1,7 @@ /* * linux/arch/arm/mm/proc-sa110.S: MMU functions for SA110 * - * (C) 1997-1999 Russell King + * (C) 1997-2000 Russell King * * These are the low level assembler for performing cache and TLB * functions on the StrongARM-110 and StrongARM-1100 @@ -225,9 +225,9 @@ .align 5 ENTRY(cpu_sa110_flush_tlb_all) ENTRY(cpu_sa1100_flush_tlb_all) - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c8, c7, 0 @ flush I & D tlbs + mov ip, #0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ flush I & D tlbs mov pc, lr /* @@ -421,16 +421,18 @@ ENTRY(cpu_sa110_proc_fin) ENTRY(cpu_sa1100_proc_fin) + stmfd sp!, {r1, lr} mrs r0, cpsr orr r0, r0, #F_BIT | I_BIT msr cpsr, r0 + bl cpu_sa110_flush_cache_all @ clean caches mov r0, #0 mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #0x1100 @ ...i...s........ + bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ldmfd sp!, {r1, pc} .align 5 ENTRY(cpu_sa110_do_idle) @@ -448,18 +450,21 @@ /* * Function: sa110_reset + * Params : r0 = address to jump to * Notes : This sets up everything for a reset */ + .align 5 ENTRY(cpu_sa110_reset) ENTRY(cpu_sa1100_reset) - stmfd sp!, {r1, lr} - bl cpu_sa110_flush_cache_all - bl cpu_sa110_flush_tlb_all + mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ flush I,D caches - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x000f @ ............wcam - bic r0, r0, #0x1100 @ ...i...s........ - ldmfd sp!, {r1, pc} + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ flush I & D tlbs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 /* * Purpose : Function pointers used to access above functions - all calls * come through these diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/Makefile linux/arch/i386/Makefile --- v2.3.99-pre1/linux/arch/i386/Makefile Tue Mar 7 14:32:25 2000 +++ linux/arch/i386/Makefile Sat Mar 18 16:50:54 2000 @@ -30,7 +30,8 @@ CFLAGS += $(shell if $(CC) -mpreferred-stack-boundary=2 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mpreferred-stack-boundary=2"; fi) ifdef CONFIG_M386 -CFLAGS := $(CFLAGS) -m386 -DCPU=386 +CFLAGS := $(CFLAGS) -DCPU=386 +CFLAGS += $(shell if $(CC) -march=i386 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-march=i386"; else echo "-m386"; fi) AFLAGS := $(AFLAGS) -DCPU=386 endif @@ -65,7 +66,7 @@ endif ifdef CONFIG_MK7 -CFLAGS := $(CFLAGS) -DCPU=686 -march=pentiumpro -mpentiumpro -malign-functions=4 -fschedule-insns2 -mwide-multiply -fexpensive-optimizations +CFLAGS := $(CFLAGS) -malign-functions=4 -fschedule-insns2 -mwide-multiply -fexpensive-optimizations -DCPU=686 CFLAGS += $(shell if $(CC) -march=i686 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-march=i686"; fi) AFLAGS := $(AFLAGS) -DCPU=686 endif diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/boot/compressed/head.S linux/arch/i386/boot/compressed/head.S --- v2.3.99-pre1/linux/arch/i386/boot/compressed/head.S Thu Feb 10 17:11:03 2000 +++ linux/arch/i386/boot/compressed/head.S Sat Mar 18 11:29:12 2000 @@ -33,10 +33,10 @@ cld cli movl $(__KERNEL_DS),%eax - movl %ax,%ds - movl %ax,%es - movl %ax,%fs - movl %ax,%gs + movl %eax,%ds + movl %eax,%es + movl %eax,%fs + movl %eax,%gs lss SYMBOL_NAME(stack_start),%esp xorl %eax,%eax diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/defconfig linux/arch/i386/defconfig --- v2.3.99-pre1/linux/arch/i386/defconfig Tue Mar 14 19:10:39 2000 +++ linux/arch/i386/defconfig Fri Mar 17 15:47:16 2000 @@ -179,7 +179,6 @@ # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_IDEDMA_PCI_AUTO is not set # CONFIG_BLK_DEV_IDEDMA is not set -# CONFIG_IDEDMA_AUTO is not set # CONFIG_IDEDMA_PCI_EXPERIMENTAL is not set # CONFIG_IDEDMA_PCI_WIP is not set # CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set @@ -208,6 +207,7 @@ # CONFIG_BLK_DEV_TRM290 is not set # CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_IDE_CHIPSETS is not set +# CONFIG_IDEDMA_AUTO is not set CONFIG_BLK_DEV_IDE_MODES=y # diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/kernel/acpi.c linux/arch/i386/kernel/acpi.c --- v2.3.99-pre1/linux/arch/i386/kernel/acpi.c Tue Mar 14 19:10:39 2000 +++ linux/arch/i386/kernel/acpi.c Thu Mar 16 22:28:40 2000 @@ -488,13 +488,13 @@ if (!rsdt) { printk(KERN_ERR "ACPI: missing RSDT at 0x%p\n", (void*) rsdp->rsdt); - return -ENODEV; + return -EINVAL; } else if (rsdt->signature != ACPI_RSDT_SIG) { printk(KERN_ERR "ACPI: bad RSDT at 0x%p (%08x)\n", (void*) rsdp->rsdt, (unsigned) rsdt->signature); acpi_unmap_table(rsdt); - return -ENODEV; + return -EINVAL; } // search RSDT for FACP acpi_facp.table = NULL; @@ -532,7 +532,7 @@ if (!acpi_facp.table) { printk(KERN_ERR "ACPI: missing FACP\n"); - return -ENODEV; + return -EINVAL; } return 0; } @@ -1461,8 +1461,19 @@ switch (acpi_enabled) { case ACPI_ENABLED: - if (acpi_find_tables() && acpi_find_chipset()) + switch (acpi_find_tables()) { + case 0: + // found valid ACPI tables + break; + case -ENODEV: + // found no ACPI tables, try chipset-specific + if (acpi_find_chipset()) + return -ENODEV; + break; + default: + // found broken ACPI tables return -ENODEV; + } break; case ACPI_TABLES_ONLY: if (acpi_find_tables()) @@ -1478,6 +1489,12 @@ facp = (struct acpi_facp*) acpi_facp.table; + if (PM_IS_ACTIVE()) { + printk(KERN_NOTICE "acpi: APM is already active.\n"); + goto err_out; + } + pm_active = 1; + /* * Internally we always keep latencies in timer * ticks, which is simpler and more consistent (what is @@ -1516,8 +1533,6 @@ pm_power_off = acpi_power_off; - pm_active = 1; - /* * Set up the ACPI idle function. Note that we can't really * do this with multiple CPU's, we'd need a per-CPU ACPI @@ -1549,7 +1564,6 @@ struct acpi_facp *facp = (struct acpi_facp*) acpi_facp.table; pm_idle = NULL; - pm_active = 0; pm_power_off = NULL; unregister_sysctl_table(acpi_sysctl); @@ -1563,6 +1577,8 @@ if (pci_driver_registered) pci_unregister_driver(&acpi_driver); + + pm_active = 0; } /* diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/kernel/head.S linux/arch/i386/kernel/head.S --- v2.3.99-pre1/linux/arch/i386/kernel/head.S Tue Mar 14 19:10:39 2000 +++ linux/arch/i386/kernel/head.S Fri Mar 17 13:01:37 2000 @@ -58,10 +58,19 @@ * New page tables may be in 4Mbyte page mode and may * be using the global pages. * + * NOTE! If we are on a 486 we may have no cr4 at all! + * So we do not try to touch it unless we really have + * some bits in it to set. This won't work if the BSP + * implements cr4 but this AP does not -- very unlikely + * but be warned! The same applies to the pse feature + * if not equally supported. --macro + * * NOTE! We have to correct for the fact that we're * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET + cmpl $0,cr4_bits + je 1f movl %cr4,%eax # Turn on 4Mb pages orl cr4_bits,%eax movl %eax,%cr4 @@ -219,7 +228,6 @@ orl $2,%eax # set MP 2: movl %eax,%cr0 call check_x87 -4: #ifdef __SMP__ incb ready #endif diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c --- v2.3.99-pre1/linux/arch/i386/kernel/i386_ksyms.c Tue Mar 14 19:10:39 2000 +++ linux/arch/i386/kernel/i386_ksyms.c Wed Mar 15 11:08:04 2000 @@ -25,6 +25,7 @@ extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(elf_fpregset_t *); +extern spinlock_t rtc_lock; #ifdef CONFIG_SMP extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); @@ -131,3 +132,5 @@ EXPORT_SYMBOL(get_wchan); EXPORT_SYMBOL(irq_stat); + +EXPORT_SYMBOL(rtc_lock); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/kernel/pci-i386.c linux/arch/i386/kernel/pci-i386.c --- v2.3.99-pre1/linux/arch/i386/kernel/pci-i386.c Wed Feb 16 17:03:51 2000 +++ linux/arch/i386/kernel/pci-i386.c Thu Mar 16 11:28:58 2000 @@ -323,3 +323,17 @@ } return 0; } + +/* + * If we set up a device for bus mastering, we need to check the latency + * timer as certain crappy BIOSes forget to set it properly. + */ +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) { + printk("PCI: Increasing latency timer of device %s to 64\n", dev->slot_name); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64); + } +} diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/kernel/pci-pc.c linux/arch/i386/kernel/pci-pc.c --- v2.3.99-pre1/linux/arch/i386/kernel/pci-pc.c Fri Mar 10 16:40:39 2000 +++ linux/arch/i386/kernel/pci-pc.c Thu Mar 16 11:27:22 2000 @@ -1103,9 +1103,9 @@ { struct irq_info *q; struct pci_dev *router; - int i, pirq, newirq; + int i, pirq, newirq, reg; u32 rtrid, mask; - u8 x; + u8 x, y; char *msg = NULL; pin--; @@ -1169,13 +1169,43 @@ DBG(" -> [PIIX] set to %02x\n", newirq); pci_write_config_byte(router, pirq, newirq); msg = "PIIX-NEW"; - } else - DBG(" -> [PIIX] sink\n"); + } else DBG(" -> [PIIX] sink\n"); break; case ID(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533): newirq = ali_set_irq(router, pirq-1, newirq); if (newirq) msg = "ALI"; + break; + case ID(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596): + case ID(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686): + reg = 0x55 + (pirq >> 1); + pci_read_config_byte(router, reg, &x); + y = (pirq & 1) ? (x >> 4) : (x & 0x0f); + if (y) { + DBG(" -> [VIA] %02x\n", y); + newirq = y; + msg = "VIA"; + } else if (newirq) { + DBG(" -> [VIA] set to %02x\n", newirq); + x = (pirq & 1) ? ((x & 0x0f) | (newirq << 4)) : ((x & 0xf0) | newirq); + pci_write_config_byte(router, reg, y); + msg = "VIA-NEW"; + } else DBG(" -> [VIA] sink\n"); + break; + case ID(PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C700): + reg = 0xb8 + (pirq >> 5); + pci_read_config_byte(router, reg, &x); + y = (pirq & 0x10) ? (x >> 4) : (x & 0x0f); + if (y) { + DBG(" -> [OPTI] %02x\n", y); + newirq = y; + msg = "OPTI"; + } else if (newirq) { + DBG(" -> [OPTI] set to %02x\n", newirq); + x = (pirq & 0x10) ? ((x & 0x0f) | (newirq << 4)) : ((x & 0xf0) | newirq); + pci_write_config_byte(router, reg, y); + msg = "OPTI-NEW"; + } else DBG(" -> [OPTI] sink\n"); break; default: DBG(" -> unknown router %04x/%04x\n", rt->rtr_vendor, rt->rtr_device); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- v2.3.99-pre1/linux/arch/i386/kernel/setup.c Tue Mar 14 19:10:39 2000 +++ linux/arch/i386/kernel/setup.c Fri Mar 17 20:47:39 2000 @@ -793,7 +793,7 @@ unsigned int n, dummy, *v, ecx, edx; /* Actually we must have cpuid or we could never have - * figured out that this was AMD from the vendor info :-). + * figured out that this was AMD/Cyrix from the vendor info :-). */ cpuid(0x80000000, &n, &dummy, &dummy, &dummy); @@ -806,23 +806,27 @@ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); c->x86_model_id[48] = 0; /* Set MTRR capability flag if appropriate */ - if(boot_cpu_data.x86 == 5) { - if((boot_cpu_data.x86_model == 9) || - ((boot_cpu_data.x86_model == 8) && - (boot_cpu_data.x86_mask >= 8))) - c->x86_capability |= X86_FEATURE_MTRR; - } - - if (n >= 0x80000005){ - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk("CPU: L1 I Cache: %dK L1 D Cache: %dK\n", - ecx>>24, edx>>24); - c->x86_cache_size=(ecx>>24)+(edx>>24); - } - if (n >= 0x80000006){ - cpuid(0x80000006, &dummy, &dummy, &ecx, &edx); - printk("CPU: L2 Cache: %dK\n", ecx>>16); - c->x86_cache_size=(ecx>>16); + + if(c->x86_vendor==X86_VENDOR_AMD) + { + if(boot_cpu_data.x86 == 5) { + if((boot_cpu_data.x86_model == 9) || + ((boot_cpu_data.x86_model == 8) && + (boot_cpu_data.x86_mask >= 8))) + c->x86_capability |= X86_FEATURE_MTRR; + } + + if (n >= 0x80000005){ + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk("CPU: L1 I Cache: %dK L1 D Cache: %dK\n", + ecx>>24, edx>>24); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + if (n >= 0x80000006){ + cpuid(0x80000006, &dummy, &dummy, &ecx, &edx); + printk("CPU: L2 Cache: %dK\n", ecx>>16); + c->x86_cache_size=(ecx>>16); + } } return 1; } @@ -1034,6 +1038,8 @@ printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); isa_dma_bridge_buggy = 2; #endif + c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ + /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { get_model_name(c); /* get CPU marketing name */ @@ -1546,7 +1552,7 @@ cpus_initialized++; printk("Initializing CPU#%d\n", nr); - if (cpu_has_pse) + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/i386/kernel/time.c linux/arch/i386/kernel/time.c --- v2.3.99-pre1/linux/arch/i386/kernel/time.c Tue Mar 14 19:10:39 2000 +++ linux/arch/i386/kernel/time.c Wed Mar 15 17:00:03 2000 @@ -79,6 +79,9 @@ unsigned long fast_gettimeoffset_quotient=0; extern rwlock_t xtime_lock; +extern volatile unsigned long lost_ticks; + +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; static inline unsigned long do_fast_gettimeoffset(void) { @@ -113,6 +116,8 @@ #ifndef CONFIG_X86_TSC +spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; + /* This function must be called with interrupts disabled * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs * @@ -157,6 +162,8 @@ */ unsigned long jiffies_t; + /* gets recalled with irq locally disabled */ + spin_lock(&i8253_lock); /* timer count may underflow right here */ outb_p(0x00, 0x43); /* latch the count ASAP */ @@ -215,6 +222,7 @@ } } else jiffies_p = jiffies_t; + spin_unlock(&i8253_lock); count_p = count; @@ -238,7 +246,6 @@ */ void do_gettimeofday(struct timeval *tv) { - extern volatile unsigned long lost_ticks; unsigned long flags; unsigned long usec, sec; @@ -272,6 +279,7 @@ * would have done, and then undo it! */ tv->tv_usec -= do_gettimeoffset(); + tv->tv_usec -= lost_ticks * (1000000 / HZ); while (tv->tv_usec < 0) { tv->tv_usec += 1000000; @@ -302,6 +310,8 @@ int real_seconds, real_minutes, cmos_minutes; unsigned char save_control, save_freq_select; + /* gets recalled with irq locally disabled */ + spin_lock(&rtc_lock); save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); @@ -347,6 +357,7 @@ */ CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock(&rtc_lock); return retval; } @@ -448,10 +459,19 @@ rdtscl(last_tsc_low); +#if 0 /* + * SUBTLE: this is not necessary from here because it's implicit in the + * write xtime_lock. + */ + spin_lock(&i8253_lock); +#endif outb_p(0x00, 0x43); /* latch the count ASAP */ count = inb_p(0x40); /* read the latched count */ count |= inb(0x40) << 8; +#if 0 + spin_unlock(&i8253_lock); +#endif count = ((LATCH-1) - count) * TICK_SIZE; delay_at_last_interrupt = (count + LATCH/2) / LATCH; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/mips/kernel/irixelf.c linux/arch/mips/kernel/irixelf.c --- v2.3.99-pre1/linux/arch/mips/kernel/irixelf.c Tue Mar 14 19:10:39 2000 +++ linux/arch/mips/kernel/irixelf.c Thu Mar 16 22:23:22 2000 @@ -42,7 +42,7 @@ #undef DEBUG_ELF static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs); -static int load_irix_library(int fd); +static int load_irix_library(struct file *); static int irix_core_dump(long signr, struct pt_regs * regs, struct file *file); extern int dump_fpu (elf_fpregset_t *); @@ -820,7 +820,7 @@ /* This is really simpleminded and specialized - we are loading an * a.out library that is given an ELF header. */ -static inline int do_load_irix_library(struct file *file) +static int load_irix_library(struct file *file) { struct elfhdr elf_ex; struct elf_phdr *elf_phdata = NULL; @@ -834,8 +834,6 @@ int i,j, k; len = 0; - if (!file->f_op) - return -EACCES; dentry = file->f_dentry; inode = dentry->d_inode; elf_bss = 0; @@ -888,12 +886,14 @@ while(elf_phdata->p_type != PT_LOAD) elf_phdata++; /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, elf_phdata->p_vaddr & 0xfffff000, elf_phdata->p_filesz + (elf_phdata->p_vaddr & 0xfff), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, elf_phdata->p_offset & 0xfffff000); + up(¤t->mm->mmap_sem); k = elf_phdata->p_vaddr + elf_phdata->p_filesz; if(k > elf_bss) elf_bss = k; @@ -911,19 +911,6 @@ do_brk(len, bss-len); kfree(elf_phdata); return 0; -} - -static int load_irix_library(int fd) -{ - int retval = -EACCES; - struct file *file; - - file = fget(fd); - if (file) { - retval = do_load_irix_library(file); - fput(file); - } - return retval; } /* Called through irix_syssgi() to map an elf image given an FD, diff -u --recursive --new-file v2.3.99-pre1/linux/arch/ppc/kernel/setup.c linux/arch/ppc/kernel/setup.c --- v2.3.99-pre1/linux/arch/ppc/kernel/setup.c Tue Mar 7 14:32:25 2000 +++ linux/arch/ppc/kernel/setup.c Thu Mar 16 14:01:05 2000 @@ -756,7 +756,7 @@ id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues); id->word92 = __le16_to_cpu(id->word92); id->hw_config = __le16_to_cpu(id->hw_config); - for (i=0; i<34; i++) + for (i=0; i<32; i++) id->words94_125[i] = __le16_to_cpu(id->words94_125[i]); id->last_lun = __le16_to_cpu(id->last_lun); id->word127 = __le16_to_cpu(id->word127); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/Makefile linux/arch/sparc/Makefile --- v2.3.99-pre1/linux/arch/sparc/Makefile Tue Feb 1 01:35:43 2000 +++ linux/arch/sparc/Makefile Sun Mar 19 10:16:36 2000 @@ -49,6 +49,8 @@ CORE_FILES_NO_BTFIX := $(CORE_FILES) CORE_FILES += arch/sparc/boot/btfix.o +export CORE_FILES_NO_BTFIX + archclean: rm -f $(TOPDIR)/vmlinux.aout -$(MAKE) -C arch/sparc/boot clean diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/config.in linux/arch/sparc/config.in --- v2.3.99-pre1/linux/arch/sparc/config.in Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc/config.in Fri Mar 17 10:56:19 2000 @@ -1,4 +1,4 @@ -# $Id: config.in,v 1.88 2000/03/13 03:40:27 davem Exp $ +# $Id: config.in,v 1.90 2000/03/17 05:18:02 anton Exp $ # For a description of the syntax of this configuration file, # see the Configure script. # @@ -19,18 +19,6 @@ bool 'Symmetric multi-processing support (does not work on sun4/sun4c)' CONFIG_SMP -bool 'Support for SUN4 machines (disables SUN4[CDM] support)' CONFIG_SUN4 -if [ "$CONFIG_SUN4" != "y" ]; then - bool 'Support for PCI and PS/2 keyboard/mouse' CONFIG_PCI - source drivers/pci/Config.in -fi - -mainmenu_option next_comment -comment 'Console drivers' -bool 'PROM console' CONFIG_PROM_CONSOLE -source drivers/video/Config.in -endmenu - # Global things across all Sun machines. define_bool CONFIG_SBUS y define_bool CONFIG_SBUSCHAR y @@ -43,9 +31,11 @@ define_bool CONFIG_SUN_CONSOLE y define_bool CONFIG_SUN_AUXIO y define_bool CONFIG_SUN_IO y + +bool 'Support for SUN4 machines (disables SUN4[CDM] support)' CONFIG_SUN4 if [ "$CONFIG_SUN4" != "y" ]; then - source drivers/sbus/char/Config.in - source drivers/sbus/audio/Config.in + bool 'Support for PCI and PS/2 keyboard/mouse' CONFIG_PCI + source drivers/pci/Config.in fi tristate 'Openprom tree appears in /proc/openprom' CONFIG_SUN_OPENPROMFS @@ -74,15 +64,27 @@ endmenu mainmenu_option next_comment -comment 'Floppy and other block devices' +comment 'Console drivers' +bool 'PROM console' CONFIG_PROM_CONSOLE +source drivers/video/Config.in +endmenu + +if [ "$CONFIG_SUN4" != "y" ]; then + source drivers/sbus/char/Config.in + source drivers/sbus/audio/Config.in +fi + +mainmenu_option next_comment +comment 'Block devices' bool 'Normal floppy disk support' CONFIG_BLK_DEV_FD + bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then tristate ' Linear (append) mode' CONFIG_MD_LINEAR tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED - tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING - tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 +# tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING +# tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 fi tristate 'RAM disk support' CONFIG_BLK_DEV_RAM @@ -99,16 +101,6 @@ source net/Config.in fi -mainmenu_option next_comment -comment 'ISDN subsystem' - -tristate 'ISDN support' CONFIG_ISDN -if [ "$CONFIG_ISDN" != "n" ]; then - source drivers/isdn/Config.in -fi -endmenu - - define_bool CONFIG_IDE n define_bool CONFIG_BLK_DEV_IDE_MODES n define_bool CONFIG_BLK_DEV_HD n @@ -127,6 +119,15 @@ # endmenu mainmenu_option next_comment +comment 'ISDN subsystem' + +tristate 'ISDN support' CONFIG_ISDN +if [ "$CONFIG_ISDN" != "n" ]; then + source drivers/isdn/Config.in +fi +endmenu + +mainmenu_option next_comment comment 'SCSI support' tristate 'SCSI support' CONFIG_SCSI @@ -196,12 +197,16 @@ fi tristate ' Sun LANCE support' CONFIG_SUNLANCE tristate ' Sun Happy Meal 10/100baseT support' CONFIG_HAPPYMEAL - tristate ' Sun BigMAC 10/100baseT support (EXPERIMENTAL)' CONFIG_SUNBMAC + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + tristate ' Sun BigMAC 10/100baseT support (EXPERIMENTAL)' CONFIG_SUNBMAC + fi tristate ' Sun QuadEthernet support' CONFIG_SUNQE tristate ' MyriCOM Gigabit Ethernet support' CONFIG_MYRI_SBUS + # bool ' FDDI driver support' CONFIG_FDDI # if [ "$CONFIG_FDDI" = "y" ]; then # fi + if [ "$CONFIG_ATM" = "y" ]; then source drivers/atm/Config.in fi diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/defconfig linux/arch/sparc/defconfig --- v2.3.99-pre1/linux/arch/sparc/defconfig Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc/defconfig Fri Mar 17 10:56:19 2000 @@ -14,8 +14,42 @@ CONFIG_VT=y CONFIG_VT_CONSOLE=y # CONFIG_SMP is not set +CONFIG_SBUS=y +CONFIG_SBUSCHAR=y +CONFIG_BUSMOUSE=y +CONFIG_SUN_MOUSE=y +CONFIG_SERIAL=y +CONFIG_SUN_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +CONFIG_SUN_KEYBOARD=y +CONFIG_SUN_CONSOLE=y +CONFIG_SUN_AUXIO=y +CONFIG_SUN_IO=y # CONFIG_SUN4 is not set # CONFIG_PCI is not set +CONFIG_SUN_OPENPROMFS=m +CONFIG_NET=y +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_SUNOS_EMUL=y + +# +# Parallel port support +# +# CONFIG_PARPORT is not set +# CONFIG_PRINTER is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y # # Console drivers @@ -42,17 +76,6 @@ CONFIG_FBCON_FONTWIDTH8_ONLY=y CONFIG_FONT_SUN8x16=y # CONFIG_FBCON_FONTS is not set -CONFIG_SBUS=y -CONFIG_SBUSCHAR=y -CONFIG_BUSMOUSE=y -CONFIG_SUN_MOUSE=y -CONFIG_SERIAL=y -CONFIG_SUN_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SUN_KEYBOARD=y -CONFIG_SUN_CONSOLE=y -CONFIG_SUN_AUXIO=y -CONFIG_SUN_IO=y # # Misc Linux/SPARC drivers @@ -73,39 +96,14 @@ # CONFIG_SPARCAUDIO_CS4231 is not set # CONFIG_SPARCAUDIO_DBRI is not set # CONFIG_SPARCAUDIO_DUMMY is not set -CONFIG_SUN_OPENPROMFS=m -CONFIG_NET=y -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_SUNOS_EMUL=y # -# Parallel port support -# -# CONFIG_PARPORT is not set -# CONFIG_PRINTER is not set - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Floppy, IDE, and other block devices +# Block devices # CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m CONFIG_MD_STRIPED=m -CONFIG_MD_MIRRORING=m -CONFIG_MD_RAID5=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_INITRD=y CONFIG_BLK_DEV_LOOP=m @@ -162,16 +160,14 @@ # QoS and/or fair queueing # # CONFIG_NET_SCHED is not set +# CONFIG_IDE is not set +# CONFIG_BLK_DEV_IDE_MODES is not set +# CONFIG_BLK_DEV_HD is not set # # ISDN subsystem # # CONFIG_ISDN is not set - -# -# ATA/IDE/MFM/RLL support -# -# CONFIG_IDE is not set # # SCSI support diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/kernel/ioport.c linux/arch/sparc/kernel/ioport.c --- v2.3.99-pre1/linux/arch/sparc/kernel/ioport.c Thu Mar 2 14:36:22 2000 +++ linux/arch/sparc/kernel/ioport.c Thu Mar 16 11:40:17 2000 @@ -1,4 +1,4 @@ -/* $Id: ioport.c,v 1.35 2000/02/27 08:16:25 davem Exp $ +/* $Id: ioport.c,v 1.36 2000/03/16 08:22:53 anton Exp $ * ioport.c: Simple io mapping allocator. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -745,6 +745,10 @@ halt(); }; +} + +void register_proc_sparc_ioport(void) +{ #ifdef CONFIG_PROC_FS create_proc_read_entry("io_map",0,0,_sparc_io_get_info,&sparc_iomap); create_proc_read_entry("dvma_map",0,0,_sparc_io_get_info,&_sparc_dvma); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/kernel/setup.c linux/arch/sparc/kernel/setup.c --- v2.3.99-pre1/linux/arch/sparc/kernel/setup.c Sat Feb 26 22:31:42 2000 +++ linux/arch/sparc/kernel/setup.c Thu Mar 16 11:20:33 2000 @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.115 2000/02/26 04:24:31 davem Exp $ +/* $Id: setup.c,v 1.116 2000/03/15 23:26:22 anton Exp $ * linux/arch/sparc/kernel/setup.c * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -268,8 +268,6 @@ extern unsigned short root_flags; extern unsigned short root_dev; extern unsigned short ram_flags; -extern unsigned sparc_ramdisk_image; -extern unsigned sparc_ramdisk_size; #define RAMDISK_IMAGE_START_MASK 0x07FF #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 @@ -382,42 +380,9 @@ rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif -#ifdef CONFIG_BLK_DEV_INITRD -// FIXME needs to do the new bootmem alloc stuff - if (sparc_ramdisk_image) { - initrd_start = sparc_ramdisk_image; - if (initrd_start < KERNBASE) initrd_start += KERNBASE; - initrd_end = initrd_start + sparc_ramdisk_size; - if (initrd_end > *memory_end_p) { - printk(KERN_CRIT "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - initrd_end,*memory_end_p); - initrd_start = 0; - } - if (initrd_start >= *memory_start_p && initrd_start < *memory_start_p + 2 * PAGE_SIZE) { - initrd_below_start_ok = 1; - *memory_start_p = PAGE_ALIGN (initrd_end); - } else if (initrd_start && sparc_ramdisk_image < KERNBASE) { - switch (sparc_cpu_model) { - case sun4m: - case sun4d: - initrd_start -= KERNBASE; - initrd_end -= KERNBASE; - break; - default: - break; - } - } - } -#endif + prom_setsync(prom_sync_me); -#ifdef CONFIG_SUN_SERIAL -#if 0 - /* XXX We can't do this until the bootmem allocator is working. */ - sun_serial_setup(); /* set this up ASAP */ -#endif -#endif { #if !CONFIG_SUN_SERIAL serial_console = 0; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/kernel/sparc_ksyms.c linux/arch/sparc/kernel/sparc_ksyms.c --- v2.3.99-pre1/linux/arch/sparc/kernel/sparc_ksyms.c Fri Mar 10 16:40:41 2000 +++ linux/arch/sparc/kernel/sparc_ksyms.c Thu Mar 16 11:40:17 2000 @@ -1,4 +1,4 @@ -/* $Id: sparc_ksyms.c,v 1.94 2000/02/28 04:00:53 anton Exp $ +/* $Id: sparc_ksyms.c,v 1.96 2000/03/16 09:12:49 jj Exp $ * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -68,6 +68,8 @@ extern int __ashrdi3(int, int); extern int __ashldi3(int, int); extern int __lshrdi3(int, int); +extern int __muldi3(int, int); +extern int __divdi3(int, int); extern void dump_thread(struct pt_regs *, struct user *); @@ -85,10 +87,10 @@ __EXPORT_SYMBOL(__sparc_dot_ ## sym, "." #sym) #define EXPORT_SYMBOL_PRIVATE(sym) \ -extern int __sparc_priv_ ## sym (int) __asm__("__" ## #sym); \ +extern int __sparc_priv_ ## sym (int) __asm__("__" #sym); \ const struct module_symbol __export_priv_##sym \ __attribute__((section("__ksymtab"))) = \ -{ (unsigned long) &__sparc_priv_ ## sym, "__" ## #sym } +{ (unsigned long) &__sparc_priv_ ## sym, "__" #sym } /* used by various drivers */ EXPORT_SYMBOL(sparc_cpu_model); @@ -273,6 +275,8 @@ EXPORT_SYMBOL_NOVERS(__ashrdi3); EXPORT_SYMBOL_NOVERS(__ashldi3); EXPORT_SYMBOL_NOVERS(__lshrdi3); +EXPORT_SYMBOL_NOVERS(__muldi3); +EXPORT_SYMBOL_NOVERS(__divdi3); EXPORT_SYMBOL_DOT(rem); EXPORT_SYMBOL_DOT(urem); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/kernel/sys_solaris.c linux/arch/sparc/kernel/sys_solaris.c --- v2.3.99-pre1/linux/arch/sparc/kernel/sys_solaris.c Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc/kernel/sys_solaris.c Wed Mar 15 00:37:49 2000 @@ -14,6 +14,7 @@ #include #include #include +#include /* CHECKME: this stuff looks rather bogus */ asmlinkage int diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/kernel/sys_sunos.c linux/arch/sparc/kernel/sys_sunos.c --- v2.3.99-pre1/linux/arch/sparc/kernel/sys_sunos.c Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc/kernel/sys_sunos.c Wed Mar 15 00:37:49 2000 @@ -1,4 +1,4 @@ -/* $Id: sys_sunos.c,v 1.115 2000/03/13 21:57:23 davem Exp $ +/* $Id: sys_sunos.c,v 1.117 2000/03/15 02:43:32 davem Exp $ * sys_sunos.c: SunOS specific syscall compatibility support. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -267,35 +267,6 @@ unlock_kernel(); } -/* Same as vadvise, and just as bogus, but for a range of virtual - * process address space. - */ -#define MADV_NORMAL 0 /* Nothing special... */ -#define MADV_RANDOM 1 /* I am emacs... */ -#define MADV_SEQUENTIAL 2 /* I am researcher code... */ -#define MADV_WILLNEED 3 /* Pages in this range will be needed */ -#define MADV_DONTNEED 4 /* Pages in this range won't be needed */ - -static char *mstrings[] = { - "MADV_NORMAL", - "MADV_RANDOM", - "MADV_SEQUENTIAL", - "MADV_WILLNEED", - "MADV_DONTNEED", -}; - -asmlinkage void sunos_madvise(unsigned long address, unsigned long len, - unsigned long strategy) -{ - /* I wanna see who uses this... */ - lock_kernel(); - printk("%s: Advises us to use %s paging strategy for addr<%08lx> len<%08lx>\n", - current->comm, - strategy <= 4 ? mstrings[strategy] : "BOGUS", - address, len); - unlock_kernel(); -} - /* This just wants the soft limit (ie. rlim_cur element) of the RLIMIT_NOFILE * resource limit and is for backwards compatibility with older sunos * revs. @@ -733,7 +704,6 @@ asmlinkage int sunos_nfs_mount(char *dir_name, int linux_flags, void *data) { - int ret = -ENODEV; int server_fd; char *the_name; struct nfs_mount_data linux_nfs_mount; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/kernel/systbls.S linux/arch/sparc/kernel/systbls.S --- v2.3.99-pre1/linux/arch/sparc/kernel/systbls.S Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc/kernel/systbls.S Wed Mar 15 00:37:49 2000 @@ -1,4 +1,4 @@ -/* $Id: systbls.S,v 1.95 2000/03/13 21:57:23 davem Exp $ +/* $Id: systbls.S,v 1.96 2000/03/15 02:43:32 davem Exp $ * systbls.S: System call entry point tables for OS compatibility. * The native Linux system call table lives here also. * @@ -33,7 +33,7 @@ /*60*/ .long sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize /*65*/ .long sys_msync, sys_vfork, sys_pread, sys_pwrite, sys_geteuid /*70*/ .long sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect -/*75*/ .long sys_nis_syscall, sys_vhangup, sys_truncate64, sys_mincore, sys_getgroups16 +/*75*/ .long sys_madvise, sys_vhangup, sys_truncate64, sys_mincore, sys_getgroups16 /*80*/ .long sys_setgroups16, sys_getpgrp, sys_setgroups, sys_setitimer, sys_ftruncate64 /*85*/ .long sys_swapon, sys_getitimer, sys_setuid, sys_sethostname, sys_setgid /*90*/ .long sys_dup2, sys_setfsuid, sys_fcntl, sys_select, sys_setfsgid @@ -103,7 +103,7 @@ .long sys_msync, sys_vfork, sunos_nosys .long sunos_nosys, sunos_sbrk, sunos_sstk .long sunos_mmap, sunos_vadvise, sys_munmap - .long sys_mprotect, sunos_madvise, sys_vhangup + .long sys_mprotect, sys_madvise, sys_vhangup .long sunos_nosys, sys_mincore, sys_getgroups16 .long sys_setgroups16, sys_getpgrp, sunos_setpgrp .long sys_setitimer, sunos_nosys, sys_swapon diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/kernel/unaligned.c linux/arch/sparc/kernel/unaligned.c --- v2.3.99-pre1/linux/arch/sparc/kernel/unaligned.c Fri Jan 28 15:09:07 2000 +++ linux/arch/sparc/kernel/unaligned.c Thu Mar 16 11:20:33 2000 @@ -1,4 +1,4 @@ -/* $Id: unaligned.c,v 1.20 2000/01/21 11:38:42 jj Exp $ +/* $Id: unaligned.c,v 1.21 2000/03/15 08:50:16 anton Exp $ * unaligned.c: Unaligned load/store trap handling with special * cases for the kernel to do them more quickly. * @@ -106,6 +106,26 @@ return win->locals[reg - 16]; /* yes, I know what this does... */ } +static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *regs) +{ + struct reg_window *win; + unsigned long ret; + + if(reg < 16) + return (!reg ? 0 : regs->u_regs[reg]); + + /* Ho hum, the slightly complicated case. */ + win = (struct reg_window *) regs->u_regs[UREG_FP]; + + if ((unsigned long)win & 3) + return -1; + + if (get_user(ret, &win->locals[reg - 16])) + return -1; + + return ret; +} + static inline unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs) { struct reg_window *win; @@ -132,6 +152,22 @@ } } +static inline unsigned long safe_compute_effective_address(struct pt_regs *regs, + unsigned int insn) +{ + unsigned int rs1 = (insn >> 14) & 0x1f; + unsigned int rs2 = insn & 0x1f; + unsigned int rd = (insn >> 25) & 0x1f; + + if(insn & 0x2000) { + maybe_flush_windows(rs1, 0, rd); + return (safe_fetch_reg(rs1, regs) + sign_extend_imm13(insn)); + } else { + maybe_flush_windows(rs1, rs2, rd); + return (safe_fetch_reg(rs1, regs) + safe_fetch_reg(rs2, regs)); + } +} + /* This is just to make gcc think panic does return... */ static void unaligned_panic(char *str) { @@ -427,7 +463,7 @@ info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRALN; - info.si_addr = (void *)compute_effective_address(regs, insn); + info.si_addr = (void *)safe_compute_effective_address(regs, insn); info.si_trapno = 0; send_sig_info(SIGBUS, &info, current); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/lib/Makefile linux/arch/sparc/lib/Makefile --- v2.3.99-pre1/linux/arch/sparc/lib/Makefile Fri Mar 10 16:40:41 2000 +++ linux/arch/sparc/lib/Makefile Thu Mar 16 11:20:33 2000 @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.32 2000/02/28 04:00:48 anton Exp $ +# $Id: Makefile,v 1.33 2000/03/16 00:52:07 anton Exp $ # Makefile for Sparc library files.. # @@ -6,7 +6,7 @@ strlen.o checksum.o blockops.o memscan.o memcmp.o strncmp.o \ strncpy_from_user.o divdi3.o udivdi3.o strlen_user.o \ copy_user.o locks.o atomic.o bitops.o debuglocks.o lshrdi3.o \ - ashldi3.o rwsem.o + ashldi3.o rwsem.o muldi3.o lib.a: $(OBJS) $(AR) rcs lib.a $(OBJS) diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/lib/muldi3.S linux/arch/sparc/lib/muldi3.S --- v2.3.99-pre1/linux/arch/sparc/lib/muldi3.S Wed Dec 31 16:00:00 1969 +++ linux/arch/sparc/lib/muldi3.S Thu Mar 16 11:20:33 2000 @@ -0,0 +1,76 @@ +/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + .text + .align 4 + .globl __muldi3 +__muldi3: + save %sp, -104, %sp + wr %g0, %i1, %y + sra %i3, 0x1f, %g2 + and %i1, %g2, %g2 + andcc %g0, 0, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, %i3, %g1 + mulscc %g1, 0, %g1 + add %g1, %g2, %l2 + rd %y, %o1 + mov %o1, %l3 + mov %i1, %o0 + call .umul + mov %i2, %o1 + mov %o0, %l0 + mov %i0, %o0 + call .umul + mov %i3, %o1 + add %l0, %o0, %l0 + mov %l2, %i0 + add %l2, %l0, %i0 + ret + restore %g0, %l3, %o1 diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc/mm/init.c linux/arch/sparc/mm/init.c --- v2.3.99-pre1/linux/arch/sparc/mm/init.c Fri Mar 10 16:40:41 2000 +++ linux/arch/sparc/mm/init.c Thu Mar 16 11:20:33 2000 @@ -1,4 +1,4 @@ -/* $Id: init.c,v 1.83 2000/03/07 23:12:35 anton Exp $ +/* $Id: init.c,v 1.84 2000/03/15 23:26:26 anton Exp $ * linux/arch/sparc/mm/init.c * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -43,7 +43,11 @@ struct pgtable_cache_struct pgt_quicklists = { 0, 0, 0, 0, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED }; /* References to section boundaries */ -extern char __init_begin, __init_end, _start, end, etext , edata; +extern char __init_begin, __init_end, _start, _end, etext , edata; + +/* Initial ramdisk setup */ +extern unsigned int sparc_ramdisk_image; +extern unsigned int sparc_ramdisk_size; unsigned long totalram_pages = 0; @@ -118,6 +122,7 @@ { unsigned long bootmap_size, start_pfn, end_pfn; unsigned long end_of_phys_memory = 0UL; + unsigned long bootmap_pfn; int i; /* Limit maximum memory until we implement highmem for sparc */ @@ -160,7 +165,7 @@ /* Start with page aligned address of last symbol in kernel * image. */ - start_pfn = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &end)); + start_pfn = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &_end)); /* Adjust up to the physical address where the kernel begins. */ start_pfn += phys_base; @@ -168,14 +173,36 @@ /* Now shift down to get the real physical page frame number. */ start_pfn >>= PAGE_SHIFT; + bootmap_pfn = start_pfn; + end_pfn = end_of_phys_memory >> PAGE_SHIFT; +#ifdef CONFIG_BLK_DEV_INITRD + /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ + if (sparc_ramdisk_image) { + if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) + sparc_ramdisk_image -= KERNBASE; + initrd_start = sparc_ramdisk_image + phys_base; + initrd_end = initrd_start + sparc_ramdisk_size; + if (initrd_end > end_of_phys_memory) { + printk(KERN_CRIT "initrd extends beyond end of memory " + "(0x%016lx > 0x%016lx)\ndisabling initrd\n", + initrd_end, end_of_phys_memory); + initrd_start = 0; + } + if (initrd_start) { + if (initrd_start >= (start_pfn << PAGE_SHIFT) && + initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE) + bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT; + } + } +#endif /* Initialize the boot-time allocator. */ #ifdef DEBUG_BOOTMEM - prom_printf("init_bootmem(spfn[%lx],epfn[%lx])\n", - start_pfn, end_pfn); + prom_printf("init_bootmem(spfn[%lx],bpfn[%lx],epfn[%lx])\n", + start_pfn, bootmap_pfn, end_pfn); #endif - bootmap_size = init_bootmem(start_pfn, end_pfn); + bootmap_size = init_bootmem(bootmap_pfn, end_pfn); /* Now register the available physical memory with the * allocator. @@ -190,15 +217,27 @@ sp_banks[i].num_bytes); } - /* Reserve the kernel text/data/bss and the bootmem bitmap. */ + /* Reserve the kernel text/data/bss, the bootmem bitmap and initrd. */ #ifdef DEBUG_BOOTMEM +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + prom_printf("reserve_bootmem: base[%lx] size[%lx]\n", + initrd_start, initrd_end - initrd_start); +#endif prom_printf("reserve_bootmem: base[%lx] size[%lx]\n", - phys_base, - (start_pfn << PAGE_SHIFT) + - bootmap_size + PAGE_SIZE-1 - phys_base); + phys_base, (start_pfn << PAGE_SHIFT) - phys_base); + prom_printf("reserve_bootmem: base[%lx] size[%lx]\n", + (bootmap_pfn << PAGE_SHIFT), bootmap_size); #endif - reserve_bootmem(phys_base, (start_pfn << PAGE_SHIFT) + - bootmap_size + PAGE_SIZE-1 - phys_base); +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) { + reserve_bootmem(initrd_start, initrd_end - initrd_start); + initrd_start += PAGE_OFFSET; + initrd_end += PAGE_OFFSET; + } +#endif + reserve_bootmem(phys_base, (start_pfn << PAGE_SHIFT) - phys_base); + reserve_bootmem((bootmap_pfn << PAGE_SHIFT), bootmap_size); #ifdef DEBUG_BOOTMEM prom_printf("init_bootmem: return end_pfn[%lx]\n", end_pfn); @@ -371,18 +410,6 @@ } memset(sparc_valid_addr_bitmap, 0, i << 2); - /* fix this */ -#ifdef CONFIG_BLK_DEV_INITRD - addr = __va(phys_base); - last = PAGE_ALIGN((unsigned long)&end) + phys_base; - while(addr < last) { - if (initrd_below_start_ok && addr >= initrd_start && addr < initrd_end) - mem_map[MAP_NR(addr)].flags &= ~(1<> 10); } + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + struct page *p = mem_map + MAP_NR(start); + + ClearPageReserved(p); + set_page_count(p, 1); + __free_page(p); + num_physpages++; + } +} +#endif void si_meminfo(struct sysinfo *val) { diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/config.in linux/arch/sparc64/config.in --- v2.3.99-pre1/linux/arch/sparc64/config.in Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc64/config.in Sat Mar 18 11:18:05 2000 @@ -1,4 +1,4 @@ -# $Id: config.in,v 1.101 2000/03/13 05:49:55 jj Exp $ +# $Id: config.in,v 1.104 2000/03/15 15:02:28 jj Exp $ # For a description of the syntax of this configuration file, # see the Configure script. # @@ -32,16 +32,6 @@ bool 'PCI support' CONFIG_PCI source drivers/pci/Config.in -mainmenu_option next_comment -comment 'Console drivers' -bool 'PROM console' CONFIG_PROM_CONSOLE -bool 'Support Frame buffer devices' CONFIG_FB -source drivers/video/Config.in -endmenu - -source drivers/sbus/char/Config.in -source drivers/sbus/audio/Config.in - tristate 'Openprom tree appears in /proc/openprom' CONFIG_SUN_OPENPROMFS bool 'Networking support' CONFIG_NET bool 'System V IPC' CONFIG_SYSVIPC @@ -78,7 +68,17 @@ endmenu mainmenu_option next_comment -comment 'Floppy, IDE, and other block devices' +comment 'Console drivers' +bool 'PROM console' CONFIG_PROM_CONSOLE +bool 'Support Frame buffer devices' CONFIG_FB +source drivers/video/Config.in +endmenu + +source drivers/sbus/char/Config.in +source drivers/sbus/audio/Config.in + +mainmenu_option next_comment +comment 'Block devices' bool 'Normal floppy disk support' CONFIG_BLK_DEV_FD @@ -86,8 +86,8 @@ if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then tristate ' Linear (append) mode' CONFIG_MD_LINEAR tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED - tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING - tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 +# tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING +# tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 fi tristate 'RAM disk support' CONFIG_BLK_DEV_RAM @@ -98,6 +98,8 @@ tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP tristate 'Network block device support' CONFIG_BLK_DEV_NBD +endmenu + if [ "$CONFIG_NET" = "y" ]; then source net/Config.in fi @@ -224,6 +226,7 @@ tristate ' Sun QuadEthernet support' CONFIG_SUNQE if [ "$CONFIG_PCI" = "y" ]; then tristate 'Generic DECchip & DIGITAL EtherWORKS PCI/EISA' CONFIG_DE4X5 + tristate 'DECchip Tulip (dc21x4x) PCI support' CONFIG_TULIP tristate '3c590/3c900 series (592/595/597) "Vortex/Boomerang" support' CONFIG_VORTEX tristate 'RealTek RTL-8139 support' CONFIG_8139TOO tristate 'PCI NE2000 support' CONFIG_NE2K_PCI diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/defconfig linux/arch/sparc64/defconfig --- v2.3.99-pre1/linux/arch/sparc64/defconfig Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc64/defconfig Sat Mar 18 11:18:05 2000 @@ -26,6 +26,42 @@ CONFIG_SUN_IO=y CONFIG_PCI=y CONFIG_PCI_NAMES=y +CONFIG_SUN_OPENPROMFS=m +CONFIG_NET=y +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +CONFIG_SPARC32_COMPAT=y +CONFIG_BINFMT_ELF32=y +# CONFIG_BINFMT_AOUT32 is not set +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +# CONFIG_SUNOS_EMUL is not set +CONFIG_SOLARIS_EMUL=m + +# +# Parallel port support +# +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_PC_FIFO=y +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_AMIGA is not set +# CONFIG_PARPORT_MFC3 is not set +# CONFIG_PARPORT_ATARI is not set +# CONFIG_PARPORT_SUNBPP is not set +# CONFIG_PARPORT_OTHER is not set +CONFIG_PARPORT_1284=y +CONFIG_PRINTER=m +CONFIG_ENVCTRL=m + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y # # Console drivers @@ -84,44 +120,9 @@ CONFIG_SPARCAUDIO_CS4231=y # CONFIG_SPARCAUDIO_DBRI is not set # CONFIG_SPARCAUDIO_DUMMY is not set -CONFIG_SUN_OPENPROMFS=m -CONFIG_NET=y -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -CONFIG_SPARC32_COMPAT=y -CONFIG_BINFMT_ELF32=y -# CONFIG_BINFMT_AOUT32 is not set -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -# CONFIG_SUNOS_EMUL is not set -CONFIG_SOLARIS_EMUL=m - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_FIFO=y -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y -CONFIG_PRINTER=m -CONFIG_ENVCTRL=m - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y # -# Floppy, IDE, and other block devices +# Block devices # CONFIG_BLK_DEV_FD=y # CONFIG_BLK_DEV_MD is not set @@ -331,6 +332,7 @@ CONFIG_SUNBMAC=m CONFIG_SUNQE=m CONFIG_DE4X5=m +CONFIG_TULIP=m CONFIG_VORTEX=m CONFIG_8139TOO=m CONFIG_NE2K_PCI=m diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/binfmt_aout32.c linux/arch/sparc64/kernel/binfmt_aout32.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/binfmt_aout32.c Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc64/kernel/binfmt_aout32.c Thu Mar 16 22:23:22 2000 @@ -33,7 +33,7 @@ #include static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs); -static int load_aout32_library(int fd); +static int load_aout32_library(struct file*); static int aout32_core_dump(long signr, struct pt_regs * regs, struct file *file); extern void dump_thread(struct pt_regs *, struct user *); @@ -343,9 +343,8 @@ } /* N.B. Move to .h file and use code in fs/binfmt_aout.c? */ -static int load_aout32_library(int fd) +static int load_aout32_library(struct file *file) { - struct file * file; struct inode * inode; unsigned long bss, start_addr, len; unsigned long error; @@ -353,12 +352,6 @@ loff_t offset = 0; struct exec ex; - retval = -EACCES; - file = fget(fd); - if (!file) - goto out; - if (!file->f_op) - goto out_putf; inode = file->f_dentry->d_inode; retval = -ENOEXEC; @@ -367,23 +360,23 @@ error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset); set_fs(USER_DS); if (error != sizeof(ex)) - goto out_putf; + goto out; /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { - goto out_putf; + goto out; } if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) { printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); - goto out_putf; + goto out; } if (N_FLAGS(ex)) - goto out_putf; + goto out; /* For QMAGIC, the starting address is 0x20 into the page. We mask this off to get the starting address for the page */ @@ -391,13 +384,15 @@ start_addr = ex.a_entry & 0xfffff000; /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, N_TXTOFF(ex)); + up(¤t->mm->mmap_sem); retval = error; if (error != start_addr) - goto out_putf; + goto out; len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; @@ -405,12 +400,9 @@ error = do_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) - goto out_putf; + goto out; } retval = 0; - -out_putf: - fput(file); out: return retval; } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/ioctl32.c linux/arch/sparc64/kernel/ioctl32.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/ioctl32.c Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc64/kernel/ioctl32.c Wed Mar 15 00:37:50 2000 @@ -1,4 +1,4 @@ -/* $Id: ioctl32.c,v 1.82 2000/03/13 21:57:27 davem Exp $ +/* $Id: ioctl32.c,v 1.83 2000/03/14 07:31:25 jj Exp $ * ioctl32.c: Conversion between 32bit and 64bit native ioctls. * * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) @@ -8,6 +8,7 @@ * ioctls. */ +#include #include #include #include @@ -44,6 +45,14 @@ #include #include #include +#include +#include +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +/* Ugh. This header really is not clean */ +#define min min +#define max max +#include +#endif /* LVM */ #include /* Ugly hack. */ @@ -1980,6 +1989,353 @@ return -EINVAL; } +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +/* Ugh, LVM. Pitty it was not cleaned up before accepted :((. */ +typedef struct { + uint8_t vg_name[NAME_LEN]; + uint32_t vg_number; + uint32_t vg_access; + uint32_t vg_status; + uint32_t lv_max; + uint32_t lv_cur; + uint32_t lv_open; + uint32_t pv_max; + uint32_t pv_cur; + uint32_t pv_act; + uint32_t dummy; + uint32_t vgda; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; + uint32_t pvg_total; + u32 proc; + u32 pv[ABS_MAX_PV + 1]; + u32 lv[ABS_MAX_LV + 1]; +} vg32_t; + +typedef struct { + uint8_t id[2]; + uint16_t version; + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_namelist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + uint8_t pv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint8_t system_id[NAME_LEN]; + kdev_t pv_dev; + uint32_t pv_number; + uint32_t pv_status; + uint32_t pv_allocatable; + uint32_t pv_size; + uint32_t lv_cur; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; + uint32_t pe_stale; + u32 pe; + u32 inode; +} pv32_t; + +typedef struct { + char lv_name[NAME_LEN]; + u32 lv; +} lv_req32_t; + +typedef struct { + u32 lv_index; + u32 lv; +} lv_status_byindex_req32_t; + +typedef struct { + uint8_t lv_name[NAME_LEN]; + kdev_t old_dev; + kdev_t new_dev; + u32 old_pe; + u32 new_pe; +} le_remap_req32_t; + +typedef struct { + char pv_name[NAME_LEN]; + u32 pv; +} pv_status_req32_t; + +typedef struct { + uint8_t lv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint32_t lv_access; + uint32_t lv_status; + uint32_t lv_open; + kdev_t lv_dev; + uint32_t lv_number; + uint32_t lv_mirror_copies; + uint32_t lv_recovery; + uint32_t lv_schedule; + uint32_t lv_size; + u32 lv_current_pe; + uint32_t lv_current_le; + uint32_t lv_allocated_le; + uint32_t lv_stripes; + uint32_t lv_stripesize; + uint32_t lv_badblock; + uint32_t lv_allocation; + uint32_t lv_io_timeout; + uint32_t lv_read_ahead; + /* delta to version 1 starts here */ + u32 lv_snapshot_org; + u32 lv_snapshot_prev; + u32 lv_snapshot_next; + u32 lv_block_exception; + uint32_t lv_remap_ptr; + uint32_t lv_remap_end; + uint32_t lv_chunk_size; + uint32_t lv_snapshot_minor; + char dummy[200]; +} lv32_t; + +typedef struct { + u32 hash[2]; + u32 rsector_org; + kdev_t rdev_org; + u32 rsector_new; + kdev_t rdev_new; +} lv_block_exception32_t; + +static void put_lv_t(lv_t *l) +{ + if (l->lv_current_pe) vfree(l->lv_current_pe); + if (l->lv_block_exception) vfree(l->lv_block_exception); + kfree(l); +} + +static lv_t *get_lv_t(u32 p, int *errp) +{ + int err, i; + u32 ptr1, ptr2; + size_t size; + lv_block_exception32_t *lbe32; + lv_block_exception_t *lbe; + lv32_t *ul = (lv32_t *)A(p); + lv_t *l = (lv_t *)kmalloc(sizeof(lv_t), GFP_KERNEL); + if (!l) { + *errp = -ENOMEM; + return NULL; + } + memset(l, 0, sizeof(lv_t)); + err = copy_from_user(l, ul, (long)&((lv32_t *)0)->lv_current_pe); + err |= __copy_from_user(&l->lv_current_le, &ul->lv_current_le, + ((long)&ul->lv_snapshot_org) - ((long)&ul->lv_current_le)); + err |= __copy_from_user(&l->lv_remap_ptr, &ul->lv_remap_ptr, + ((long)&ul->dummy[0]) - ((long)&ul->lv_remap_ptr)); + err |= __get_user(ptr1, &ul->lv_current_pe); + err |= __get_user(ptr2, &ul->lv_block_exception); + if (err) { + kfree(l); + *errp = -EFAULT; + return NULL; + } + if (ptr1) { + size = l->lv_allocated_le * sizeof(pe_t); + l->lv_current_pe = vmalloc(size); + if (l->lv_current_pe) + err = copy_from_user(l->lv_current_pe, (void *)A(ptr1), size); + } + if (!err && ptr2) { + size = l->lv_remap_end * sizeof(lv_block_exception_t); + l->lv_block_exception = lbe = vmalloc(size); + if (l->lv_block_exception) { + lbe32 = (lv_block_exception32_t *)A(ptr2); + memset(lbe, 0, size); + for (i = 0; i < l->lv_remap_end; i++, lbe++, lbe32++) { + err |= get_user(lbe->rsector_org, &lbe32->rsector_org); + err |= __get_user(lbe->rdev_org, &lbe32->rdev_org); + err |= __get_user(lbe->rsector_new, &lbe32->rsector_new); + err |= __get_user(lbe->rdev_new, &lbe32->rdev_new); + } + } + } + if (err || (ptr1 && !l->lv_current_pe) || (ptr2 && !l->lv_block_exception)) { + if (!err) + *errp = -ENOMEM; + else + *errp = -EFAULT; + put_lv_t(l); + return NULL; + } + return l; +} + +static int copy_lv_t(u32 ptr, lv_t *l) +{ + int err; + lv32_t *ul = (lv32_t *)A(ptr); + u32 ptr1; + size_t size; + + err = get_user(ptr1, &ul->lv_current_pe); + if (err) + return -EFAULT; + err = copy_to_user(ul, l, (long)&((lv32_t *)0)->lv_current_pe); + err |= __copy_to_user(&ul->lv_current_le, &l->lv_current_le, + ((long)&ul->lv_snapshot_org) - ((long)&ul->lv_current_le)); + err |= __copy_to_user(&ul->lv_remap_ptr, &l->lv_remap_ptr, + ((long)&ul->dummy[0]) - ((long)&ul->lv_remap_ptr)); + size = l->lv_allocated_le * sizeof(pe_t); + err |= __copy_to_user((void *)A(ptr1), l->lv_current_pe, size); + return -EFAULT; +} + +static int do_lvm_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + vg_t *v; + union { + lv_req_t lv_req; + le_remap_req_t le_remap; + lv_status_byindex_req_t lv_byindex; + pv_status_req32_t pv_status; + } u; + pv_t p; + int err; + u32 ptr = 0; + int i; + mm_segment_t old_fs; + void *karg = &u; + + switch (cmd) { + case VG_STATUS: + v = kmalloc(sizeof(vg_t), GFP_KERNEL); + if (!v) return -ENOMEM; + karg = v; + break; + case VG_CREATE: + v = kmalloc(sizeof(vg_t), GFP_KERNEL); + if (!v) return -ENOMEM; + if (copy_from_user(v, (void *)arg, (long)&((vg32_t *)0)->proc) || + __get_user(v->proc, &((vg32_t *)arg)->proc)) { + kfree(v); + return -EFAULT; + } + karg = v; + memset(v->pv, 0, sizeof(v->pv) + sizeof(v->lv)); + if (v->pv_max > ABS_MAX_PV || v->lv_max == ABS_MAX_LV) return -EPERM; + for (i = 0; i < v->pv_max; i++) { + err = __get_user(ptr, &((vg32_t *)arg)->pv[i]); + if (err) break; + if (ptr) { + v->pv[i] = kmalloc(sizeof(pv_t), GFP_KERNEL); + if (!v->pv[i]) { + err = -ENOMEM; + break; + } + err = copy_from_user(v->pv[i], (void *)A(ptr), sizeof(pv32_t) - 8); + if (err) { + err = -EFAULT; + break; + } + v->pv[i]->pe = NULL; v->pv[i]->inode = NULL; + } + } + if (!err) { + for (i = 0; i < v->lv_max; i++) { + err = __get_user(ptr, &((vg32_t *)arg)->lv[i]); + if (err) break; + if (ptr) { + v->lv[i] = get_lv_t(ptr, &err); + if (err) break; + } + } + } + break; + case LV_CREATE: + case LV_EXTEND: + case LV_REDUCE: + case LV_REMOVE: + case LV_STATUS_BYNAME: + err = copy_from_user(&u.pv_status, arg, sizeof(u.pv_status.pv_name)); + if (err) return -EFAULT; + if (cmd != LV_REMOVE) { + err = __get_user(ptr, &((lv_req32_t *)arg)->lv); + if (err) return err; + u.lv_req.lv = get_lv_t(ptr, &err); + } else + u.lv_req.lv = NULL; + break; + case LV_STATUS_BYINDEX: + err = get_user(u.lv_byindex.lv_index, &((lv_status_byindex_req32_t *)arg)->lv_index); + err |= __get_user(ptr, &((lv_status_byindex_req32_t *)arg)->lv); + if (err) return err; + u.lv_byindex.lv = get_lv_t(ptr, &err); + break; + case VG_EXTEND: + err = copy_from_user(&p, (void *)arg, sizeof(pv32_t) - 8); + if (err) return -EFAULT; + p.pe = NULL; p.inode = NULL; + karg = &p; + break; + case LE_REMAP: + err = copy_from_user(&u.le_remap, (void *)arg, sizeof(le_remap_req32_t)); + if (err) return -EFAULT; + u.le_remap.new_pe = ((le_remap_req32_t *)&u.le_remap)->new_pe; + u.le_remap.old_pe = ((le_remap_req32_t *)&u.le_remap)->old_pe; + break; + case PV_CHANGE: + case PV_STATUS: + err = copy_from_user(&u.pv_status, arg, sizeof(u.lv_req.lv_name)); + if (err) return -EFAULT; + err = __get_user(ptr, &((pv_status_req32_t *)arg)->pv); + if (err) return err; + u.pv_status.pv = &p; + if (cmd == PV_CHANGE) { + err = copy_from_user(&p, (void *)A(ptr), sizeof(pv32_t) - 8); + if (err) return -EFAULT; + p.pe = NULL; p.inode = NULL; + } + break; + } + old_fs = get_fs(); set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)karg); + set_fs (old_fs); + switch (cmd) { + case VG_STATUS: + if (!err) { + if (copy_to_user((void *)arg, v, (long)&((vg32_t *)0)->proc) || + clear_user(&((vg32_t *)arg)->proc, sizeof(vg32_t) - (long)&((vg32_t *)0)->proc)) + err = -EFAULT; + } + kfree(v); + break; + case VG_CREATE: + for (i = 0; i < v->pv_max; i++) + if (v->pv[i]) kfree(v->pv[i]); + for (i = 0; i < v->lv_max; i++) + if (v->lv[i]) put_lv_t(v->lv[i]); + kfree(v); + break; + case LV_STATUS_BYNAME: + if (!err && u.lv_req.lv) err = copy_lv_t(ptr, u.lv_req.lv); + /* Fall through */ + case LV_CREATE: + case LV_EXTEND: + case LV_REDUCE: + if (u.lv_req.lv) put_lv_t(u.lv_req.lv); + break; + case LV_STATUS_BYINDEX: + if (u.lv_byindex.lv) { + if (!err) err = copy_lv_t(ptr, u.lv_byindex.lv); + put_lv_t(u.lv_byindex.lv); + } + case PV_STATUS: + if (!err) { + err = copy_to_user((void *)A(ptr), &p, sizeof(pv32_t) - 8); + if (err) return -EFAULT; + } + break; + } + return err; +} +#endif + static int ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) { return -EINVAL; @@ -2027,6 +2383,39 @@ return err; } +typedef struct blkelv_ioctl32_arg_s { + u32 queue_ID; + int read_latency; + int write_latency; + int max_bomb_segments; +} blkelv_ioctl32_arg_t; + +static int do_blkelv_ioctl(unsigned int fd, unsigned int cmd, blkelv_ioctl32_arg_t *arg) +{ + blkelv_ioctl_arg_t b; + int err; + mm_segment_t old_fs = get_fs(); + + if (cmd == BLKELVSET) { + err = get_user((long)b.queue_ID, &arg->queue_ID); + err |= __get_user(b.read_latency, &arg->read_latency); + err |= __get_user(b.write_latency, &arg->write_latency); + err |= __get_user(b.max_bomb_segments, &arg->max_bomb_segments); + if (err) return err; + } + set_fs (KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&b); + set_fs (old_fs); + if (cmd == BLKELVGET && !err) { + err = put_user((long)b.queue_ID, &arg->queue_ID); + err |= __put_user(b.read_latency, &arg->read_latency); + err |= __put_user(b.write_latency, &arg->write_latency); + err |= __put_user(b.max_bomb_segments, &arg->max_bomb_segments); + if (err) return err; + } + return err; +} + static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) { return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); @@ -2548,6 +2937,24 @@ COMPATIBLE_IOCTL(ATMTCP_REMOVE) COMPATIBLE_IOCTL(ATMMPC_CTRL) COMPATIBLE_IOCTL(ATMMPC_DATA) +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +/* 0xfe - lvm */ +COMPATIBLE_IOCTL(VG_SET_EXTENDABLE) +COMPATIBLE_IOCTL(VG_STATUS_GET_COUNT) +COMPATIBLE_IOCTL(VG_STATUS_GET_NAMELIST) +COMPATIBLE_IOCTL(VG_REMOVE) +COMPATIBLE_IOCTL(VG_REDUCE) +COMPATIBLE_IOCTL(PE_LOCK_UNLOCK) +COMPATIBLE_IOCTL(PV_FLUSH) +COMPATIBLE_IOCTL(LVM_LOCK_LVM) +COMPATIBLE_IOCTL(LVM_GET_IOP_VERSION) +#ifdef LVM_TOTAL_RESET +COMPATIBLE_IOCTL(LVM_RESET) +#endif +COMPATIBLE_IOCTL(LV_SET_ACCESS) +COMPATIBLE_IOCTL(LV_SET_STATUS) +COMPATIBLE_IOCTL(LV_SET_ALLOCATION) +#endif /* LVM */ /* And these ioctls need translation */ HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32) HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf) @@ -2594,6 +3001,8 @@ HANDLE_IOCTL(BLKFRAGET, w_long) HANDLE_IOCTL(BLKSECTGET, w_long) HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans) +HANDLE_IOCTL(BLKELVGET, do_blkelv_ioctl) +HANDLE_IOCTL(BLKELVSET, do_blkelv_ioctl) HANDLE_IOCTL(FBIOPUTCMAP32, fbiogetputcmap) HANDLE_IOCTL(FBIOGETCMAP32, fbiogetputcmap) HANDLE_IOCTL(FBIOSCURSOR32, fbiogscursor) @@ -2677,6 +3086,20 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +HANDLE_IOCTL(VG_STATUS, do_lvm_ioctl) +HANDLE_IOCTL(VG_CREATE, do_lvm_ioctl) +HANDLE_IOCTL(VG_EXTEND, do_lvm_ioctl) +HANDLE_IOCTL(LV_CREATE, do_lvm_ioctl) +HANDLE_IOCTL(LV_REMOVE, do_lvm_ioctl) +HANDLE_IOCTL(LV_EXTEND, do_lvm_ioctl) +HANDLE_IOCTL(LV_REDUCE, do_lvm_ioctl) +HANDLE_IOCTL(LV_STATUS_BYNAME, do_lvm_ioctl) +HANDLE_IOCTL(LV_STATUS_BYINDEX, do_lvm_ioctl) +HANDLE_IOCTL(LE_REMAP, do_lvm_ioctl) +HANDLE_IOCTL(PV_CHANGE, do_lvm_ioctl) +HANDLE_IOCTL(PV_STATUS, do_lvm_ioctl) +#endif /* LVM */ IOCTL_TABLE_END unsigned int ioctl32_hash_table[1024]; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/setup.c linux/arch/sparc64/kernel/setup.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/setup.c Tue Mar 7 14:32:25 2000 +++ linux/arch/sparc64/kernel/setup.c Thu Mar 16 11:20:33 2000 @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.52 2000/03/03 23:48:41 davem Exp $ +/* $Id: setup.c,v 1.53 2000/03/15 14:42:52 jj Exp $ * linux/arch/sparc64/kernel/setup.c * * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu) @@ -431,8 +431,6 @@ extern unsigned short root_flags; extern unsigned short root_dev; extern unsigned short ram_flags; -extern unsigned int sparc_ramdisk_image; -extern unsigned int sparc_ramdisk_size; #define RAMDISK_IMAGE_START_MASK 0x07FF #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 @@ -512,29 +510,6 @@ rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif -#ifdef CONFIG_BLK_DEV_INITRD -// FIXME needs to do the new bootmem alloc stuff - if (sparc_ramdisk_image) { - unsigned long start = 0; - - if (sparc_ramdisk_image >= (unsigned long)&end - 2 * PAGE_SIZE) - sparc_ramdisk_image -= KERNBASE; - initrd_start = sparc_ramdisk_image + phys_base + PAGE_OFFSET; - initrd_end = initrd_start + sparc_ramdisk_size; - if (initrd_end > *memory_end_p) { - printk(KERN_CRIT "initrd extends beyond end of memory " - "(0x%016lx > 0x%016lx)\ndisabling initrd\n", - initrd_end,*memory_end_p); - initrd_start = 0; - } - if (initrd_start) - start = sparc_ramdisk_image + KERNBASE; - if (start >= *memory_start_p && start < *memory_start_p + 2 * PAGE_SIZE) { - initrd_below_start_ok = 1; - *memory_start_p = PAGE_ALIGN (start + sparc_ramdisk_size); - } - } -#endif /* Due to stack alignment restrictions and assumptions... */ init_mm.mmap->vm_page_prot = PAGE_SHARED; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/smp.c linux/arch/sparc64/kernel/smp.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/smp.c Fri Mar 10 16:40:41 2000 +++ linux/arch/sparc64/kernel/smp.c Thu Mar 16 11:40:17 2000 @@ -528,7 +528,7 @@ void smp_capture(void) { if (smp_processors_ready) { - int result = atomic_add_return(1, &smp_capture_depth); + int result = __atomic_add(1, &smp_capture_depth); membar("#StoreStore | #LoadStore"); if(result == 1) { @@ -596,18 +596,21 @@ smp_cross_call(&xcall_promstop, 0, 0, 0); } -static inline void sparc64_do_profile(unsigned long pc, unsigned long g3) +static inline void sparc64_do_profile(unsigned long pc, unsigned long o7) { if (prof_buffer && current->pid) { extern int _stext; extern int rwlock_impl_begin, rwlock_impl_end; extern int atomic_impl_begin, atomic_impl_end; + extern int __memcpy_begin, __memcpy_end; - if ((pc >= (unsigned long) &rwlock_impl_begin && + if ((pc >= (unsigned long) &atomic_impl_begin && + pc < (unsigned long) &atomic_impl_end) || + (pc >= (unsigned long) &rwlock_impl_begin && pc < (unsigned long) &rwlock_impl_end) || - (pc >= (unsigned long) &atomic_impl_begin && - pc < (unsigned long) &atomic_impl_end)) - pc = g3; + (pc >= (unsigned long) &__memcpy_begin && + pc < (unsigned long) &__memcpy_end)) + pc = o7; pc -= (unsigned long) &_stext; pc >>= prof_shift; @@ -646,7 +649,7 @@ clear_softint((1UL << 0)); do { if(!user) - sparc64_do_profile(regs->tpc, regs->u_regs[UREG_G3]); + sparc64_do_profile(regs->tpc, regs->u_regs[UREG_RETPC]); if(!--prof_counter(cpu)) { if (cpu == boot_cpu_id) { diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/sparc64_ksyms.c linux/arch/sparc64/kernel/sparc64_ksyms.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/sparc64_ksyms.c Sat Feb 26 22:31:42 2000 +++ linux/arch/sparc64/kernel/sparc64_ksyms.c Fri Mar 17 10:56:19 2000 @@ -1,4 +1,4 @@ -/* $Id: sparc64_ksyms.c,v 1.75 2000/02/21 15:50:08 davem Exp $ +/* $Id: sparc64_ksyms.c,v 1.79 2000/03/17 14:41:18 davem Exp $ * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -109,10 +109,10 @@ */ #define EXPORT_SYMBOL_PRIVATE(sym) \ -extern int __sparc_priv_ ## sym (int) __asm__("__" ## #sym); \ +extern int __sparc_priv_ ## sym (int) __asm__("__" #sym); \ const struct module_symbol __export_priv_##sym \ __attribute__((section("__ksymtab"))) = \ -{ (unsigned long) &__sparc_priv_ ## sym, "__" ## #sym } +{ (unsigned long) &__sparc_priv_ ## sym, "__" #sym } /* used by various drivers */ #ifdef __SMP__ @@ -142,6 +142,8 @@ /* Misc SMP information */ EXPORT_SYMBOL(smp_num_cpus); +EXPORT_SYMBOL(__cpu_number_map); +EXPORT_SYMBOL(__cpu_logical_map); /* Spinlock debugging library, optional. */ #ifdef SPIN_LOCK_DEBUG @@ -207,6 +209,7 @@ EXPORT_SYMBOL(pci_unmap_sg); EXPORT_SYMBOL(pci_dma_sync_single); EXPORT_SYMBOL(pci_dma_sync_sg); +EXPORT_SYMBOL(pci_dma_supported); #endif /* Solaris/SunOS binary compatibility */ @@ -288,6 +291,9 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(clear_user_page); +EXPORT_SYMBOL(copy_user_page); EXPORT_SYMBOL(__bzero); EXPORT_SYMBOL(__memscan_zero); EXPORT_SYMBOL(__memscan_generic); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/sys_sparc.c linux/arch/sparc64/kernel/sys_sparc.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/sys_sparc.c Sun Feb 20 21:12:38 2000 +++ linux/arch/sparc64/kernel/sys_sparc.c Fri Mar 17 10:56:19 2000 @@ -1,4 +1,4 @@ -/* $Id: sys_sparc.c,v 1.36 2000/02/16 07:31:35 davem Exp $ +/* $Id: sys_sparc.c,v 1.37 2000/03/17 05:48:46 anton Exp $ * linux/arch/sparc64/kernel/sys_sparc.c * * This file contains various random system calls that @@ -348,9 +348,10 @@ lock_kernel(); regs->tpc = regs->tnpc; regs->tnpc += 4; - if(++count <= 20) + if(++count <= 5) { printk ("For Solaris binary emulation you need solaris module loaded\n"); - show_regs (regs); + show_regs (regs); + } send_sig(SIGSEGV, current, 1); unlock_kernel(); return -ENOSYS; diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/sys_sparc32.c linux/arch/sparc64/kernel/sys_sparc32.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/sys_sparc32.c Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc64/kernel/sys_sparc32.c Fri Mar 17 10:56:19 2000 @@ -1,4 +1,4 @@ -/* $Id: sys_sparc32.c,v 1.136 2000/03/13 21:57:29 davem Exp $ +/* $Id: sys_sparc32.c,v 1.139 2000/03/16 20:37:57 davem Exp $ * sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. * * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -1746,70 +1746,58 @@ return 0; } -extern asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void *data); +extern long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page, + unsigned long new_flags, unsigned long data_page); #define SMBFS_NAME "smbfs" #define NCPFS_NAME "ncpfs" asmlinkage int sys32_mount(char *dev_name, char *dir_name, char *type, unsigned long new_flags, u32 data) { - unsigned long type_page; + unsigned long type_page = 0; + unsigned long data_page = 0; int err, is_smb, is_ncp; - if(!capable(CAP_SYS_ADMIN)) - return -EPERM; is_smb = is_ncp = 0; + + lock_kernel(); err = copy_mount_stuff_to_kernel((const void *)type, &type_page); - if(err) - return err; - if(type_page) { - is_smb = !strcmp((char *)type_page, SMBFS_NAME); - is_ncp = !strcmp((char *)type_page, NCPFS_NAME); - } - if(!is_smb && !is_ncp) { - if(type_page) - free_page(type_page); - return sys_mount(dev_name, dir_name, type, new_flags, (void *)AA(data)); - } else { - unsigned long dev_page, dir_page, data_page; - mm_segment_t old_fs; + if (err) + goto out; - err = copy_mount_stuff_to_kernel((const void *)dev_name, &dev_page); - if(err) - goto out; - err = copy_mount_stuff_to_kernel((const void *)dir_name, &dir_page); - if(err) - goto dev_out; - err = copy_mount_stuff_to_kernel((const void *)AA(data), &data_page); - if(err) - goto dir_out; - if(is_ncp) + if (!type_page) { + err = -EINVAL; + goto out; + } + + is_smb = !strcmp((char *)type_page, SMBFS_NAME); + is_ncp = !strcmp((char *)type_page, NCPFS_NAME); + + err = copy_mount_stuff_to_kernel((const void *)AA(data), &data_page); + if (err) + goto type_out; + + if (!is_smb && !is_ncp) { + err = do_sys_mount(dev_name, dir_name, type_page, new_flags, + data_page); + } else { + if (is_ncp) do_ncp_super_data_conv((void *)data_page); - else if(is_smb) - do_smb_super_data_conv((void *)data_page); else - panic("The problem is here..."); - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = sys_mount((char *)dev_page, (char *)dir_page, - (char *)type_page, new_flags, - (void *)data_page); - set_fs(old_fs); + do_smb_super_data_conv((void *)data_page); - if(data_page) - free_page(data_page); - dir_out: - if(dir_page) - free_page(dir_page); - dev_out: - if(dev_page) - free_page(dev_page); - out: - if(type_page) - free_page(type_page); - return err; + err = do_sys_mount(dev_name, dir_name, type_page, new_flags, + data_page); } + + free_page(data_page); + +type_out: + free_page(type_page); + +out: + unlock_kernel(); + return err; } struct rusage32 { @@ -3071,7 +3059,7 @@ memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); lock_kernel(); - dentry = open_namei(filename, 0, 0); + dentry = open_namei(filename); unlock_kernel(); retval = PTR_ERR(dentry); @@ -4166,7 +4154,7 @@ if (fd >= 0) { struct file * f; lock_kernel(); - f = filp_open(tmp, flags, mode); + f = filp_open(tmp, flags, mode, NULL); unlock_kernel(); error = PTR_ERR(f); if (IS_ERR(f)) @@ -4217,13 +4205,4 @@ up(¤t->mm->mmap_sem); out: return ret; -} - -extern asmlinkage long sys_mincore(unsigned long start, size_t len, unsigned char *vec); - -asmlinkage long sys32_mincore(unsigned long start, u32 __len, unsigned char *vec) -{ - size_t len = (size_t) __len; - - return sys_mincore(start, len, vec); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/sys_sunos32.c linux/arch/sparc64/kernel/sys_sunos32.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/sys_sunos32.c Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc64/kernel/sys_sunos32.c Wed Mar 15 00:37:50 2000 @@ -1,4 +1,4 @@ -/* $Id: sys_sunos32.c,v 1.41 2000/03/13 21:57:31 davem Exp $ +/* $Id: sys_sunos32.c,v 1.42 2000/03/15 02:43:35 davem Exp $ * sys_sunos32.c: SunOS binary compatability layer on sparc64. * * Copyright (C) 1995, 1996, 1997 David S. Miller (davem@caip.rutgers.edu) @@ -222,33 +222,6 @@ printk("%s: Advises us to use %s paging strategy\n", current->comm, strategy <= 3 ? vstrings[strategy] : "BOGUS"); - unlock_kernel(); -} - -/* Same as vadvise, and just as bogus, but for a range of virtual - * process address space. - */ -#define MADV_NORMAL 0 /* Nothing special... */ -#define MADV_RANDOM 1 /* I am emacs... */ -#define MADV_SEQUENTIAL 2 /* I am researcher code... */ -#define MADV_WILLNEED 3 /* Pages in this range will be needed */ -#define MADV_DONTNEED 4 /* Pages in this range won't be needed */ - -static char *mstrings[] = { - "MADV_NORMAL", - "MADV_RANDOM", - "MADV_SEQUENTIAL", - "MADV_WILLNEED", - "MADV_DONTNEED", -}; - -asmlinkage void sunos_madvise(u32 address, u32 len, u32 strategy) -{ - /* I wanna see who uses this... */ - lock_kernel(); - printk("%s: Advises us to use %s paging strategy for addr<%08x> len<%08x>\n", - current->comm, strategy <= 4 ? mstrings[strategy] : "BOGUS", - address, len); unlock_kernel(); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/systbls.S linux/arch/sparc64/kernel/systbls.S --- v2.3.99-pre1/linux/arch/sparc64/kernel/systbls.S Tue Mar 14 19:10:39 2000 +++ linux/arch/sparc64/kernel/systbls.S Wed Mar 15 00:37:50 2000 @@ -1,4 +1,4 @@ -/* $Id: systbls.S,v 1.69 2000/03/13 21:57:28 davem Exp $ +/* $Id: systbls.S,v 1.71 2000/03/15 02:43:36 davem Exp $ * systbls.S: System call entry point tables for OS compatibility. * The native Linux system call table lives here also. * @@ -34,7 +34,7 @@ /*60*/ .word sys_umask, sys_chroot, sys32_newfstat, sys_fstat64, sys_getpagesize .word sys_msync, sys_vfork, sys32_pread, sys32_pwrite, sys_geteuid /*70*/ .word sys_getegid, sys32_mmap, sys_setreuid, sys_munmap, sys_mprotect - .word sys_nis_syscall, sys_vhangup, sys32_truncate64, sys32_mincore, sys32_getgroups16 + .word sys_madvise, sys_vhangup, sys32_truncate64, sys_mincore, sys32_getgroups16 /*80*/ .word sys32_setgroups16, sys_getpgrp, sys_setgroups, sys32_setitimer, sys32_ftruncate64 .word sys_swapon, sys32_getitimer, sys_setuid, sys_sethostname, sys_setgid /*90*/ .word sys_dup2, sys_setfsuid, sys32_fcntl, sys32_select, sys_setfsgid @@ -93,7 +93,7 @@ /*60*/ .word sys_umask, sys_chroot, sys_newfstat, sys_nis_syscall, sys_getpagesize .word sys_msync, sys_vfork, sys_pread, sys_pwrite, sys_nis_syscall /*70*/ .word sys_nis_syscall, sys_mmap, sys_nis_syscall, sys64_munmap, sys_mprotect - .word sys_nis_syscall, sys_vhangup, sys_nis_syscall, sys_mincore, sys_getgroups + .word sys_madvise, sys_vhangup, sys_nis_syscall, sys_mincore, sys_getgroups /*80*/ .word sys_setgroups, sys_getpgrp, sys_nis_syscall, sys_setitimer, sys_nis_syscall .word sys_swapon, sys_getitimer, sys_nis_syscall, sys_sethostname, sys_nis_syscall /*90*/ .word sys_dup2, sys_nis_syscall, sys_fcntl, sys_select, sys_nis_syscall @@ -163,8 +163,8 @@ .word sys_msync, sys_vfork, sunos_nosys .word sunos_nosys, sunos_sbrk, sunos_sstk .word sunos_mmap, sunos_vadvise, sys_munmap - .word sys_mprotect, sunos_madvise, sys_vhangup - .word sunos_nosys, sys32_mincore, sys32_getgroups16 + .word sys_mprotect, sys_madvise, sys_vhangup + .word sunos_nosys, sys_mincore, sys32_getgroups16 .word sys32_setgroups16, sys_getpgrp, sunos_setpgrp .word sys32_setitimer, sunos_nosys, sys_swapon .word sys32_getitimer, sys_gethostname, sys_sethostname diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/kernel/unaligned.c linux/arch/sparc64/kernel/unaligned.c --- v2.3.99-pre1/linux/arch/sparc64/kernel/unaligned.c Mon Aug 2 22:07:16 1999 +++ linux/arch/sparc64/kernel/unaligned.c Thu Mar 16 11:40:17 2000 @@ -1,4 +1,4 @@ -/* $Id: unaligned.c,v 1.18 1999/08/02 08:39:44 davem Exp $ +/* $Id: unaligned.c,v 1.19 2000/03/16 11:53:05 jj Exp $ * unaligned.c: Unaligned load/store trap handling with special * cases for the kernel to do them more quickly. * @@ -590,9 +590,19 @@ maybe_flush_windows(0, 0, rd, from_kernel); reg = fetch_reg_addr(rd, regs); - if ((insn & 0x780000) == 0x180000) - reg[1] = 0; - reg[0] = 0; + if (from_kernel || rd < 16) { + reg[0] = 0; + if ((insn & 0x780000) == 0x180000) + reg[1] = 0; + } else if (current->thread.flags & SPARC_FLAG_32BIT) { + put_user(0, (int *)reg); + if ((insn & 0x780000) == 0x180000) + put_user(0, ((int *)reg) + 1); + } else { + put_user(0, reg); + if ((insn & 0x780000) == 0x180000) + put_user(0, reg + 1); + } advance(regs); } diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/lib/VIScopy.S linux/arch/sparc64/lib/VIScopy.S --- v2.3.99-pre1/linux/arch/sparc64/lib/VIScopy.S Mon Aug 2 22:07:16 1999 +++ linux/arch/sparc64/lib/VIScopy.S Thu Mar 16 11:40:17 2000 @@ -1,4 +1,4 @@ -/* $Id: VIScopy.S,v 1.21 1999/07/30 09:35:35 davem Exp $ +/* $Id: VIScopy.S,v 1.22 2000/03/16 16:44:38 davem Exp $ * VIScopy.S: High speed copy operations utilizing the UltraSparc * Visual Instruction Set. * @@ -304,6 +304,9 @@ .type bcopy,@function #ifdef __KERNEL__ + .globl __memcpy_begin +__memcpy_begin: + .globl __memcpy .type __memcpy,@function @@ -1001,6 +1004,9 @@ FPU_RETL #ifdef __KERNEL__ + .globl __memcpy_end +__memcpy_end: + .section .fixup .align 4 VIScopyfixup_reto2: diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/lib/atomic.S linux/arch/sparc64/lib/atomic.S --- v2.3.99-pre1/linux/arch/sparc64/lib/atomic.S Tue Aug 31 17:29:13 1999 +++ linux/arch/sparc64/lib/atomic.S Thu Mar 16 11:40:17 2000 @@ -1,4 +1,4 @@ -/* $Id: atomic.S,v 1.2 1999/08/23 05:15:58 davem Exp $ +/* $Id: atomic.S,v 1.3 2000/03/16 16:44:37 davem Exp $ * atomic.S: These things are too big to do inline. * * Copyright (C) 1999 David S. Miller (davem@redhat.com) @@ -13,24 +13,24 @@ .globl __atomic_add atomic_impl_begin: -__atomic_add: - lduw [%g1], %g5 - add %g5, %g2, %g7 - cas [%g1], %g5, %g7 +__atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ + lduw [%o1], %g5 + add %g5, %o0, %g7 + cas [%o1], %g5, %g7 cmp %g5, %g7 bne,pn %icc, __atomic_add nop - jmpl %g3 + 8, %g0 - add %g7, %g2, %g2 + retl + add %g7, %o0, %o0 .globl __atomic_sub -__atomic_sub: - lduw [%g1], %g5 - sub %g5, %g2, %g7 - cas [%g1], %g5, %g7 +__atomic_sub: /* %o0 = increment, %o1 = atomic_ptr */ + lduw [%o1], %g5 + sub %g5, %o0, %g7 + cas [%o1], %g5, %g7 cmp %g5, %g7 bne,pn %icc, __atomic_sub nop - jmpl %g3 + 8, %g0 - sub %g7, %g2, %g2 + retl + sub %g7, %o0, %o0 atomic_impl_end: diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/lib/blockops.S linux/arch/sparc64/lib/blockops.S --- v2.3.99-pre1/linux/arch/sparc64/lib/blockops.S Tue Mar 7 14:32:25 2000 +++ linux/arch/sparc64/lib/blockops.S Wed Mar 15 09:59:06 2000 @@ -1,8 +1,8 @@ -/* $Id: blockops.S,v 1.20 2000/03/03 23:48:38 davem Exp $ +/* $Id: blockops.S,v 1.22 2000/03/15 07:18:55 davem Exp $ * blockops.S: UltraSparc block zero optimized routines. * - * Copyright (C) 1996,1998 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1996, 1998, 1999, 2000 David S. Miller (davem@redhat.com) + * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) */ #include "VIS.h" @@ -24,23 +24,63 @@ #define TLBTEMP_ENTSZ (1 << 3) .text + .align 32 .globl copy_page .type copy_page,@function copy_page: /* %o0=dest, %o1=src */ VISEntry - ldx [%g6 + AOFF_task_active_mm], %o2 + membar #LoadStore | #StoreStore | #StoreLoad + ldda [%o1] ASI_BLK_P, %f0 + add %o1, 0x40, %o1 + ldda [%o1] ASI_BLK_P, %f16 + add %o1, 0x40, %o1 + sethi %hi(8192), %o2 +1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) + ldda [%o1] ASI_BLK_P, %f32 + add %o1, 0x40, %o1 + sub %o2, 0x40, %o2 + stda %f48, [%o0] ASI_BLK_P + add %o0, 0x40, %o0 + TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) + ldda [%o1] ASI_BLK_P, %f0 + add %o1, 0x40, %o1 + sub %o2, 0x40, %o2 + stda %f48, [%o0] ASI_BLK_P + add %o0, 0x40, %o0 + TOUCH(f32, f34, f36, f38, f40, f42, f44, f46) + ldda [%o1] ASI_BLK_P, %f16 + add %o1, 0x40, %o1 + sub %o2, 0x40, %o2 + stda %f48, [%o0] ASI_BLK_P + cmp %o2, 0x80 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + membar #Sync + stda %f0, [%o0] ASI_BLK_P + add %o0, 0x40, %o0 + stda %f16, [%o0] ASI_BLK_P + membar #Sync + VISExit + retl + nop + + .globl copy_user_page + .type copy_user_page,@function +copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + VISEntry + sethi %hi(PAGE_SIZE), %g3 sub %o0, %g4, %g1 + and %o2, %g3, %o0 + sethi %hi(TLBTEMP_BASE), %o3 sethi %uhi(_PAGE_VALID), %g3 sub %o1, %g4, %g2 sllx %g3, 32, %g3 - ldx [%o2 + AOFF_mm_segments], %o0 + mov TLB_TAG_ACCESS, %o2 or %g3, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W), %g3 + sethi %hi(DCACHE_SIZE), %o1 or %g1, %g3, %g1 or %g2, %g3, %g2 - mov TLB_TAG_ACCESS, %o2 - sethi %hi(TLBTEMP_BASE), %o3 - sethi %hi(DCACHE_SIZE), %o1 add %o0, %o3, %o0 add %o0, %o1, %o1 sethi %hi(TLBTEMP_ENT1), %o3 @@ -129,17 +169,25 @@ .align 32 .globl clear_page .type clear_page,@function -clear_page: /* %o0=dest */ +clear_page: /* %o0=dest */ VISEntryHalf - ldx [%g6 + AOFF_task_active_mm], %o2 + ba,pt %xcc, clear_page_common + clr %o4 + + .align 32 + .globl clear_user_page + .type clear_user_page,@function +clear_user_page: /* %o0=dest, %o1=vaddr */ + VISEntryHalf + sethi %hi(PAGE_SIZE), %g3 sub %o0, %g4, %g1 + and %o1, %g3, %o0 + mov TLB_TAG_ACCESS, %o2 sethi %uhi(_PAGE_VALID), %g3 + sethi %hi(TLBTEMP_BASE), %o3 sllx %g3, 32, %g3 or %g3, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W), %g3 - ldx [%o2 + AOFF_mm_segments], %o0 or %g1, %g3, %g1 - mov TLB_TAG_ACCESS, %o2 - sethi %hi(TLBTEMP_BASE), %o3 add %o0, %o3, %o0 sethi %hi(TLBTEMP_ENT2), %o3 rdpr %pstate, %g3 @@ -162,6 +210,9 @@ stxa %g1, [%o3] ASI_DTLB_DATA_ACCESS membar #Sync + mov 1, %o4 + +clear_page_common: fzero %f0 ! FPA Group mov 32, %o1 ! IEU0 fzero %f2 ! FPA Group @@ -187,7 +238,13 @@ membar #Sync ! LSU Group VISExitHalf - stxa %g5, [%o2] ASI_DMMU + brnz,pt %o4, 1f + nop + + retl + nop + +1: stxa %g5, [%o2] ASI_DMMU stxa %g7, [%o3] ASI_DTLB_DATA_ACCESS membar #Sync jmpl %o7 + 0x8, %g0 diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/lib/rwlock.S linux/arch/sparc64/lib/rwlock.S --- v2.3.99-pre1/linux/arch/sparc64/lib/rwlock.S Tue Aug 31 17:29:13 1999 +++ linux/arch/sparc64/lib/rwlock.S Thu Mar 16 11:40:17 2000 @@ -1,4 +1,4 @@ -/* $Id: rwlock.S,v 1.2 1999/08/23 05:15:58 davem Exp $ +/* $Id: rwlock.S,v 1.3 2000/03/16 16:44:38 davem Exp $ * rwlocks.S: These things are too big to do inline. * * Copyright (C) 1999 David S. Miller (davem@redhat.com) @@ -12,34 +12,33 @@ /* The non-contention read lock usage is 2 cache lines. */ .globl __read_lock, __read_unlock - /* g1=lock, g3=retpc, g5/g7=scratch */ rwlock_impl_begin: -__read_lock: - ldsw [%g1], %g5 +__read_lock: /* %o0 = lock_ptr */ + ldsw [%o0], %g5 brlz,pn %g5, __read_wait_for_writer 4: add %g5, 1, %g7 - cas [%g1], %g5, %g7 + cas [%o0], %g5, %g7 cmp %g5, %g7 bne,pn %icc, __read_lock membar #StoreLoad | #StoreStore -99: jmpl %g3 + 8, %g0 +99: retl nop -__read_unlock: - lduw [%g1], %g5 +__read_unlock: /* %o0 = lock_ptr */ + lduw [%o0], %g5 sub %g5, 1, %g7 - cas [%g1], %g5, %g7 + cas [%o0], %g5, %g7 cmp %g5, %g7 be,pt %xcc, 99b membar #StoreLoad | #StoreStore b,a,pt %xcc, __read_unlock __read_wait_for_writer: - ldsw [%g1], %g5 + ldsw [%o0], %g5 brlz,pt %g5, __read_wait_for_writer membar #LoadLoad b,a,pt %xcc, 4b __write_wait_for_writer: - ldsw [%g1], %g5 + ldsw [%o0], %g5 brlz,pt %g5, __write_wait_for_writer membar #LoadLoad b,a,pt %xcc, 4f @@ -48,24 +47,23 @@ .align 64 .globl __write_unlock - /* g1=lock, g3=retpc, g2/g5/g7=scratch */ -__write_unlock: +__write_unlock: /* %o0 = lock_ptr */ sethi %hi(0x80000000), %g2 -1: lduw [%g1], %g5 +1: lduw [%o0], %g5 andn %g5, %g2, %g7 - cas [%g1], %g5, %g7 + cas [%o0], %g5, %g7 cmp %g5, %g7 be,pt %icc, 99b membar #StoreLoad | #StoreStore b,a,pt %xcc, 1b .globl __write_lock -__write_lock: +__write_lock: /* %o0 = lock_ptr */ sethi %hi(0x80000000), %g2 -1: ldsw [%g1], %g5 +1: ldsw [%o0], %g5 4: brnz,pn %g5, 5f or %g5, %g2, %g7 - cas [%g1], %g5, %g7 + cas [%o0], %g5, %g7 cmp %g5, %g7 be,pt %icc, 99b membar #StoreLoad | #StoreStore @@ -73,10 +71,10 @@ b,a,pt %xcc, 1b 5: brlz %g5, __write_wait_for_writer or %g5, %g2, %g7 - cas [%g1], %g5, %g7 + cas [%o0], %g5, %g7 cmp %g5, %g7 bne,pn %icc, 5b -8: ldsw [%g1], %g5 +8: ldsw [%o0], %g5 cmp %g5, %g2 be,pn %icc, 99b membar #LoadLoad diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/mm/fault.c linux/arch/sparc64/mm/fault.c --- v2.3.99-pre1/linux/arch/sparc64/mm/fault.c Fri Jan 28 15:09:07 2000 +++ linux/arch/sparc64/mm/fault.c Wed Mar 15 09:59:06 2000 @@ -1,4 +1,4 @@ -/* $Id: fault.c,v 1.42 2000/01/21 11:39:13 jj Exp $ +/* $Id: fault.c,v 1.43 2000/03/14 03:59:46 davem Exp $ * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -245,7 +245,7 @@ if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - current->mm->segments = (void *) (address & PAGE_SIZE); + { int fault = handle_mm_fault(current, vma, address, write); diff -u --recursive --new-file v2.3.99-pre1/linux/arch/sparc64/mm/init.c linux/arch/sparc64/mm/init.c --- v2.3.99-pre1/linux/arch/sparc64/mm/init.c Fri Mar 10 16:40:41 2000 +++ linux/arch/sparc64/mm/init.c Thu Mar 16 11:20:33 2000 @@ -1,4 +1,4 @@ -/* $Id: init.c,v 1.148 2000/03/07 07:08:31 anton Exp $ +/* $Id: init.c,v 1.149 2000/03/15 14:42:58 jj Exp $ * arch/sparc64/mm/init.c * * Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu) @@ -48,6 +48,10 @@ /* References to section boundaries */ extern char __init_begin, __init_end, _start, _end, etext, edata; +/* Initial ramdisk setup */ +extern unsigned int sparc_ramdisk_image; +extern unsigned int sparc_ramdisk_size; + int do_check_pgt_cache(int low, int high) { int freed = 0; @@ -808,6 +812,7 @@ { unsigned long bootmap_size, start_pfn, end_pfn; unsigned long end_of_phys_memory = 0UL; + unsigned long bootmap_pfn; int i; /* XXX It is a bit ambiguous here, whether we should @@ -855,15 +860,37 @@ /* Now shift down to get the real physical page frame number. */ start_pfn >>= PAGE_SHIFT; + + bootmap_pfn = start_pfn; end_pfn = end_of_phys_memory >> PAGE_SHIFT; +#ifdef CONFIG_BLK_DEV_INITRD + /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ + if (sparc_ramdisk_image) { + if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) + sparc_ramdisk_image -= KERNBASE; + initrd_start = sparc_ramdisk_image + phys_base; + initrd_end = initrd_start + sparc_ramdisk_size; + if (initrd_end > end_of_phys_memory) { + printk(KERN_CRIT "initrd extends beyond end of memory " + "(0x%016lx > 0x%016lx)\ndisabling initrd\n", + initrd_end, end_of_phys_memory); + initrd_start = 0; + } + if (initrd_start) { + if (initrd_start >= (start_pfn << PAGE_SHIFT) && + initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE) + bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT; + } + } +#endif /* Initialize the boot-time allocator. */ #ifdef DEBUG_BOOTMEM - prom_printf("init_bootmem(spfn[%lx],epfn[%lx])\n", - start_pfn, end_pfn); + prom_printf("init_bootmem(spfn[%lx],bpfn[%lx],epfn[%lx])\n", + start_pfn, bootmap_pfn, end_pfn); #endif - bootmap_size = init_bootmem(start_pfn, end_pfn); + bootmap_size = init_bootmem(bootmap_pfn, end_pfn); /* Now register the available physical memory with the * allocator. @@ -878,15 +905,27 @@ sp_banks[i].num_bytes); } - /* Reserve the kernel text/data/bss and the bootmem bitmap. */ + /* Reserve the kernel text/data/bss, the bootmem bootmap and initrd. */ #ifdef DEBUG_BOOTMEM +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + prom_printf("reserve_bootmem: base[%lx] size[%lx]\n", + initrd_start, initrd_end - initrd_start); +#endif + prom_printf("reserve_bootmem: base[%lx] size[%lx]\n", + phys_base, (start_pfn << PAGE_SHIFT) - phys_base); prom_printf("reserve_bootmem: base[%lx] size[%lx]\n", - phys_base, - (((start_pfn << PAGE_SHIFT) + - bootmap_size) - phys_base)); + (bootmap_pfn << PAGE_SHIFT), bootmap_size); +#endif +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) { + reserve_bootmem(initrd_start, initrd_end - initrd_start); + initrd_start += PAGE_OFFSET; + initrd_end += PAGE_OFFSET; + } #endif - reserve_bootmem(phys_base, (((start_pfn << PAGE_SHIFT) + - bootmap_size) - phys_base)); + reserve_bootmem(phys_base, (start_pfn << PAGE_SHIFT) - phys_base); + reserve_bootmem((bootmap_pfn << PAGE_SHIFT), bootmap_size); #ifdef DEBUG_BOOTMEM prom_printf("init_bootmem: return end_pfn[%lx]\n", end_pfn); @@ -1234,11 +1273,6 @@ ((unsigned long) &empty_zero_page); last += PAGE_OFFSET + phys_base; while (addr < last) { -#ifdef CONFIG_BLK_DEV_INITRD -// FIXME to use bootmem scheme... - if (initrd_below_start_ok && addr >= initrd_start && addr < initrd_end) - mem_map[MAP_NR(addr)].flags &= ~(1<> 22, sparc64_valid_addr_bitmap); addr += PAGE_SIZE; } @@ -1318,6 +1352,22 @@ num_physpages++; } } + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + struct page *p = mem_map + MAP_NR(start); + + ClearPageReserved(p); + set_page_count(p, 1); + __free_page(p); + num_physpages++; + } +} +#endif void si_meminfo(struct sysinfo *val) { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/block/Config.in linux/drivers/block/Config.in --- v2.3.99-pre1/linux/drivers/block/Config.in Tue Mar 14 19:10:39 2000 +++ linux/drivers/block/Config.in Sat Mar 18 11:33:30 2000 @@ -31,7 +31,7 @@ fi fi tristate 'XT hard disk support' CONFIG_BLK_DEV_XD -dep_tristate 'Parallel port IDE device support' CONFIG_PARIDE $CONFIG_PARIDE_PARPORT +dep_tristate 'Parallel port IDE device support' CONFIG_PARIDE $CONFIG_PARPORT if [ "$CONFIG_PARIDE" = "y" -o "$CONFIG_PARIDE" = "m" ]; then source drivers/block/paride/Config.in fi diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/block/floppy.c linux/drivers/block/floppy.c --- v2.3.99-pre1/linux/drivers/block/floppy.c Wed Feb 16 17:03:51 2000 +++ linux/drivers/block/floppy.c Wed Mar 15 09:59:06 2000 @@ -4375,60 +4375,12 @@ } } -static void __init mod_setup(char *pattern, int (*setup)(char *)) -{ - unsigned long i; - char c; - int j; - int match; - char buffer[100]; - int length = strlen(pattern)+1; - - match=0; - j=1; - - for (i=current->mm->env_start; i< current->mm->env_end; i ++){ - get_user(c, (char *)i); - if (match){ - if (j==99) - c='\0'; - buffer[j] = c; - if (!c || c == ' ' || c == '\t'){ - if (j){ - buffer[j] = '\0'; - setup(buffer); - } - j=0; - } else - j++; - if (!c) - break; - continue; - } - if ((!j && !c) || (j && c == pattern[j-1])) - j++; - else - j=0; - if (j==length){ - match=1; - j=0; - } - } -} - - -#ifdef __cplusplus -extern "C" { -#endif int init_module(void) { printk(KERN_INFO "inserting floppy driver for " UTS_RELEASE "\n"); if(floppy) parse_floppy_cfg_string(floppy); - else - mod_setup("floppy=", floppy_setup); - return floppy_init(); } @@ -4449,10 +4401,6 @@ MODULE_PARM(FLOPPY_DMA,"i"); MODULE_AUTHOR("Alain L. Knaff"); MODULE_SUPPORTED_DEVICE("fd"); - -#ifdef __cplusplus -} -#endif #else diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- v2.3.99-pre1/linux/drivers/block/ll_rw_blk.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/block/ll_rw_blk.c Wed Mar 15 17:00:03 2000 @@ -319,9 +319,9 @@ DECLARE_WAITQUEUE(wait, current); unsigned long flags; - add_wait_queue(&wait_for_request, &wait); + add_wait_queue_exclusive(&wait_for_request, &wait); for (;;) { - current->state = TASK_UNINTERRUPTIBLE; + __set_current_state(TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE); spin_lock_irqsave(&io_request_lock,flags); req = get_request(n, dev); spin_unlock_irqrestore(&io_request_lock,flags); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/block/lvm.c linux/drivers/block/lvm.c --- v2.3.99-pre1/linux/drivers/block/lvm.c Sat Feb 26 22:31:44 2000 +++ linux/drivers/block/lvm.c Wed Mar 15 00:37:49 2000 @@ -892,8 +892,8 @@ "%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n", lvm_name, lv_ptr->lv_size); #endif - copy_to_user((long *) arg, &lv_ptr->lv_size, - sizeof(lv_ptr->lv_size)); + if (put_user(lv_ptr->lv_size, (long *)arg)) + return -EFAULT; break; @@ -931,8 +931,8 @@ printk(KERN_DEBUG "%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name); #endif - copy_to_user((long *) arg, &lv_ptr->lv_read_ahead, - sizeof(lv_ptr->lv_read_ahead)); + if (put_user(lv->lv_read_ahead, (long *)arg)) + return -EFAULT; break; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/block/nbd.c linux/drivers/block/nbd.c --- v2.3.99-pre1/linux/drivers/block/nbd.c Fri Mar 10 16:40:42 2000 +++ linux/drivers/block/nbd.c Wed Mar 15 00:37:49 2000 @@ -299,7 +299,7 @@ static void do_nbd_request(request_queue_t * q) { struct request *req; - int dev; + int dev = 0; struct nbd_device *lo; while (!QUEUE_EMPTY) { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/cdrom/cm206.c linux/drivers/cdrom/cm206.c --- v2.3.99-pre1/linux/drivers/cdrom/cm206.c Wed Feb 16 17:03:51 2000 +++ linux/drivers/cdrom/cm206.c Wed Mar 15 13:29:54 2000 @@ -1483,6 +1483,6 @@ #endif /* !MODULE */ /* * Local variables: - * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -D__SMP__ -pipe -fno-strength-reduce -m486 -DCPU=486 -D__SMP__ -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o cm206.o cm206.c" + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -pipe -fno-strength-reduce -m486 -DCPU=486 -D__SMP__ -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o cm206.o cm206.c" * End: */ diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/amiserial.c linux/drivers/char/amiserial.c --- v2.3.99-pre1/linux/drivers/char/amiserial.c Sun Feb 13 19:29:03 2000 +++ linux/drivers/char/amiserial.c Wed Mar 15 13:29:54 2000 @@ -2259,6 +2259,6 @@ /* Local variables: - compile-command: "gcc -D__KERNEL__ -I../../include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strict-aliasing -D__SMP__ -pipe -fno-strength-reduce -DCPU=686 -march=i686 -DMODULE -DMODVERSIONS -include ../../include/linux/modversions.h -DEXPORT_SYMTAB -c serial.c" + compile-command: "gcc -D__KERNEL__ -I../../include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strict-aliasing -D__SMP__ -pipe -fno-strength-reduce -DCPU=686 -march=i686 -DMODULE -DMODVERSIONS -include ../../include/linux/modversions.h -DEXPORT_SYMTAB -c amiserial.c" End: */ diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/drm/bufs.c linux/drivers/char/drm/bufs.c --- v2.3.99-pre1/linux/drivers/char/drm/bufs.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/char/drm/bufs.c Wed Mar 15 13:29:54 2000 @@ -29,6 +29,7 @@ */ #define __NO_VERSION__ +#include #include "drmP.h" #include "linux/un.h" diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/drm/drmP.h linux/drivers/char/drm/drmP.h --- v2.3.99-pre1/linux/drivers/char/drm/drmP.h Tue Mar 14 19:10:39 2000 +++ linux/drivers/char/drm/drmP.h Sat Mar 18 12:11:35 2000 @@ -50,7 +50,6 @@ #include #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0) -#include #include #endif #include "drm.h" diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/drm/gamma_drv.c linux/drivers/char/drm/gamma_drv.c --- v2.3.99-pre1/linux/drivers/char/drm/gamma_drv.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/char/drm/gamma_drv.c Wed Mar 15 13:29:54 2000 @@ -28,6 +28,7 @@ * */ +#include #include "drmP.h" #include "gamma_drv.h" EXPORT_SYMBOL(gamma_init); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/drm/tdfx_drv.c linux/drivers/char/drm/tdfx_drv.c --- v2.3.99-pre1/linux/drivers/char/drm/tdfx_drv.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/char/drm/tdfx_drv.c Wed Mar 15 13:29:54 2000 @@ -29,6 +29,7 @@ * */ +#include #include "drmP.h" #include "tdfx_drv.h" diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/dtlk.c linux/drivers/char/dtlk.c --- v2.3.99-pre1/linux/drivers/char/dtlk.c Wed Feb 16 17:03:51 2000 +++ linux/drivers/char/dtlk.c Sat Mar 18 11:35:13 2000 @@ -1,10 +1,14 @@ /* -*- linux-c -*- - * dtlk.c - DoubleTalk PC driver for Linux kernel 2.0.29 - * - * $Id: dtlk.c,v 1.19 1999/02/28 12:13:13 jrv Exp jrv $ + * dtlk.c - DoubleTalk PC driver for Linux * * Original author: Chris Pallotta * Current maintainer: Jim Van Zandt + * + * 2000-03-18 Jim Van Zandt: Fix polling. + * Eliminate dtlk_timer_active flag and separate dtlk_stop_timer + * function. Don't restart timer in dtlk_timer_tick. Restart timer + * in dtlk_poll after every poll. dtlk_poll returns mask (duh). + * Eliminate unused function dtlk_write_byte. Misc. code cleanups. */ /* This driver is for the DoubleTalk PC, a speech synthesizer @@ -79,7 +83,6 @@ static int dtlk_port_lpc; static int dtlk_port_tts; static int dtlk_busy; -static int dtlk_timer_active; static int dtlk_has_indexing; static unsigned int dtlk_portlist[] = {0x25e, 0x29e, 0x2de, 0x31e, 0x35e, 0x39e, 0}; @@ -114,13 +117,11 @@ static int dtlk_readable(void); static char dtlk_read_lpc(void); static char dtlk_read_tts(void); -static void dtlk_stop_timer(void); static int dtlk_writeable(void); static char dtlk_write_bytes(const char *buf, int n); static char dtlk_write_tts(char); /* static void dtlk_handle_error(char, char, unsigned int); - static char dtlk_write_byte(unsigned int, const char*); */ static void dtlk_timer_tick(unsigned long data); @@ -250,43 +251,27 @@ poll_wait(file, &dtlk_process_list, wait); if (dtlk_has_indexing && dtlk_readable()) { - dtlk_stop_timer(); + del_timer(&dtlk_timer); mask = POLLIN | POLLRDNORM; } if (dtlk_writeable()) { - dtlk_stop_timer(); + del_timer(&dtlk_timer); mask |= POLLOUT | POLLWRNORM; } /* there are no exception conditions */ - if (mask == 0 && !dtlk_timer_active) { - /* not ready just yet. There won't be any interrupts, - so we set a timer instead. */ - dtlk_timer_active = 1; - dtlk_timer.expires = jiffies + HZ / 100; - add_timer(&dtlk_timer); - } - return 0; -} + /* There won't be any interrupts, so we set a timer instead. */ + del_timer(&dtlk_timer); + dtlk_timer.expires = jiffies + 3*HZ / 100; + add_timer(&dtlk_timer); -static void dtlk_stop_timer() -{ - if (dtlk_timer_active) { - dtlk_timer_active = 0; - del_timer(&dtlk_timer); - } + return mask; } static void dtlk_timer_tick(unsigned long data) { - + TRACE_TEXT(" dtlk_timer_tick"); wake_up_interruptible(&dtlk_process_list); - - if (dtlk_timer_active) { - del_timer(&dtlk_timer); - dtlk_timer.expires = jiffies + HZ / 100; - add_timer(&dtlk_timer); - } } static int dtlk_ioctl(struct inode *inode, @@ -348,7 +333,7 @@ } TRACE_RET; - dtlk_stop_timer(); + del_timer(&dtlk_timer); return 0; } @@ -360,7 +345,6 @@ dtlk_port_lpc = 0; dtlk_port_tts = 0; dtlk_busy = 0; - dtlk_timer_active = 0; dtlk_major = devfs_register_chrdev(0, "dtlk", &dtlk_fops); if (dtlk_major == 0) { printk(KERN_ERR "DoubleTalk PC - cannot register device\n"); @@ -410,15 +394,17 @@ static int dtlk_readable(void) { - TRACE_TEXT(" dtlk_readable"); +#ifdef TRACING + printk(" dtlk_readable=%u@%u", inb_p(dtlk_port_lpc) != 0x7f, jiffies); +#endif return inb_p(dtlk_port_lpc) != 0x7f; } static int dtlk_writeable(void) { /* TRACE_TEXT(" dtlk_writeable"); */ -#ifdef TRACING - printk(" dtlk_writeable(%02x)", inb_p(dtlk_port_tts)); +#ifdef TRACINGMORE + printk(" dtlk_writeable=%u", (inb_p(dtlk_port_tts) & TTS_WRITABLE)!=0); #endif return inb_p(dtlk_port_tts) & TTS_WRITABLE; } @@ -465,7 +451,9 @@ appears. */ dtlk_delay(100); dtlk_has_indexing = dtlk_readable(); - +#ifdef TRACING + printk(", indexing %d\n", dtlk_has_indexing); +#endif #ifdef INSCOPE { /* This macro records ten samples read from the LPC port, for later display */ @@ -479,17 +467,17 @@ int b = 0, i, j; LOOK - outb_p(0xff, dtlk_port_lpc); + outb_p(0xff, dtlk_port_lpc); buffer[b++] = 0; LOOK - dtlk_write_bytes("\0012I\r", 4); + dtlk_write_bytes("\0012I\r", 4); buffer[b++] = 0; __delay(50 * loops_per_sec / 1000); outb_p(0xff, dtlk_port_lpc); buffer[b++] = 0; LOOK - printk("\n"); + printk("\n"); for (j = 0; j < b; j++) printk(" %02x", buffer[j]); printk("\n"); @@ -510,12 +498,12 @@ __delay(loops_per_sec / 100); /* 10 ms */ LOOK - outb_p(0x03, dtlk_port_tts); + outb_p(0x03, dtlk_port_tts); buffer[b++] = 0; LOOK - LOOK + LOOK - printk("\n"); + printk("\n"); for (j = 0; j < b; j++) printk(" %02x", buffer[j]); printk("\n"); @@ -555,7 +543,7 @@ if (total > 2 && buf[total] == 0x7f) break; if (total < sizeof(struct dtlk_settings)) - total++; + total++; } /* if (i==50) printk("interrogate() read overrun\n"); @@ -606,7 +594,8 @@ /* verify DT is ready, read char, wait for ACK */ do { portval = inb_p(dtlk_port_tts); - } while ((portval & TTS_READABLE) == 0 && retries++ < DTLK_MAX_RETRIES); + } while ((portval & TTS_READABLE) == 0 && + retries++ < DTLK_MAX_RETRIES); if (retries == DTLK_MAX_RETRIES) printk(KERN_ERR "dtlk_read_tts() timeout\n"); @@ -617,7 +606,8 @@ retries = 0; do { portval = inb_p(dtlk_port_tts); - } while ((portval & TTS_READABLE) != 0 && retries++ < DTLK_MAX_RETRIES); + } while ((portval & TTS_READABLE) != 0 && + retries++ < DTLK_MAX_RETRIES); if (retries == DTLK_MAX_RETRIES) printk(KERN_ERR "dtlk_read_tts() timeout\n"); @@ -649,22 +639,6 @@ return ch; } -#ifdef NEVER -static char dtlk_write_byte(unsigned int minor, const char *buf) -{ - char ch; - int err; - /* TRACE_TEXT("(dtlk_write_byte"); */ - err = get_user(ch, buf); - /* printk(" dtlk_write_byte(%d, 0x%02x)", minor, (int)ch); */ - - ch = dtlk_write_tts(ch); - /* - TRACE_RET; */ - return ch; -} -#endif /* NEVER */ - /* write n bytes to tts port */ static char dtlk_write_bytes(const char *buf, int n) { @@ -680,7 +654,7 @@ static char dtlk_write_tts(char ch) { int retries = 0; -#ifdef TRACING +#ifdef TRACINGMORE printk(" dtlk_write_tts("); if (' ' <= ch && ch <= '~') printk("'%c'", ch); @@ -702,7 +676,7 @@ if ((inb_p(dtlk_port_tts) & TTS_WRITABLE) == 0) break; -#ifdef TRACING +#ifdef TRACINGMORE printk(")\n"); #endif return 0; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/pc_keyb.c linux/drivers/char/pc_keyb.c --- v2.3.99-pre1/linux/drivers/char/pc_keyb.c Sat Feb 12 11:22:10 2000 +++ linux/drivers/char/pc_keyb.c Tue Mar 14 22:52:56 2000 @@ -442,8 +442,11 @@ scancode = kbd_read_input(); +#if 0 /* Ignore error bytes */ - if (!(status & (KBD_STAT_GTO | KBD_STAT_PERR))) { + if (!(status & (KBD_STAT_GTO | KBD_STAT_PERR))) +#endif + { if (status & KBD_STAT_MOUSE_OBF) handle_mouse_event(scancode); else diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/raw.c linux/drivers/char/raw.c --- v2.3.99-pre1/linux/drivers/char/raw.c Fri Mar 10 16:40:42 2000 +++ linux/drivers/char/raw.c Wed Mar 15 21:45:27 2000 @@ -102,7 +102,7 @@ */ sector_size = 512; - if (lookup_vfsmnt(rdev) != NULL) { + if (get_super(rdev) != NULL) { if (blksize_size[MAJOR(rdev)]) sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)]; } else { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/rtc.c linux/drivers/char/rtc.c --- v2.3.99-pre1/linux/drivers/char/rtc.c Sat Feb 26 22:31:45 2000 +++ linux/drivers/char/rtc.c Thu Mar 16 14:07:09 2000 @@ -87,11 +87,11 @@ static DECLARE_WAIT_QUEUE_HEAD(rtc_wait); -static spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +extern spinlock_t rtc_lock; static struct timer_list rtc_irq_timer; -static long long rtc_llseek(struct file *file, loff_t offset, int origin); +static loff_t rtc_llseek(struct file *file, loff_t offset, int origin); static ssize_t rtc_read(struct file *file, char *buf, size_t count, loff_t *ppos); @@ -141,8 +141,11 @@ #ifndef __alpha__ /* * A very tiny interrupt handler. It runs with SA_INTERRUPT set, - * so that there is no possibility of conflicting with the - * set_rtc_mmss() call that happens during some timer interrupts. + * but there is possibility of conflicting with the set_rtc_mmss() + * call (the rtc irq and the timer irq can easily run at the same + * time in two different CPUs). So we need to serializes + * accesses to the chip with the rtc_lock spinlock that each + * architecture should implement in the timer code. * (See ./arch/XXXX/kernel/time.c for the set_rtc_mmss() function.) */ @@ -175,7 +178,7 @@ * Now all the various file operations that we export. */ -static long long rtc_llseek(struct file *file, loff_t offset, int origin) +static loff_t rtc_llseek(struct file *file, loff_t offset, int origin) { return -ESPIPE; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/serial.c linux/drivers/char/serial.c --- v2.3.99-pre1/linux/drivers/char/serial.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/char/serial.c Sun Mar 19 11:15:29 2000 @@ -4315,6 +4315,9 @@ /* Best Data Products Inc. Smart One 336F PnP Modem */ { ISAPNP_VENDOR('B', 'D', 'P'), ISAPNP_DEVICE(0x3336), 0, 0, SPCI_FL_BASE0 | SPCI_FL_PNPDEFAULT, 1, 115200 }, + /* SupraExpress 28.8 Data/Fax PnP modem */ + { ISAPNP_VENDOR('S', 'U', 'P'), ISAPNP_DEVICE(0x1310), 0, 0, + SPCI_FL_BASE0 | SPCI_FL_PNPDEFAULT, 1, 115200 }, /* These ID's are taken from M$ documentation */ /* Compaq 14400 Modem */ { ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_DEVICE(0xC000), 0, 0, diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/char/sysrq.c linux/drivers/char/sysrq.c --- v2.3.99-pre1/linux/drivers/char/sysrq.c Sun Feb 20 21:12:39 2000 +++ linux/drivers/char/sysrq.c Wed Mar 15 21:45:27 2000 @@ -203,7 +203,7 @@ file->f_mode &= ~2; } file_list_unlock(); - DQUOT_OFF(dev); + DQUOT_OFF(sb); fsync_dev(dev); flags = MS_RDONLY; if (sb->s_op && sb->s_op->remount_fs) { @@ -212,8 +212,6 @@ printk("error %d\n", ret); else { sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); - if ((vfsmnt = lookup_vfsmnt(sb->s_dev))) - vfsmnt->mnt_flags = sb->s_flags; printk("OK\n"); } } else diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/i2c/i2c-algo-pcf.c linux/drivers/i2c/i2c-algo-pcf.c --- v2.3.99-pre1/linux/drivers/i2c/i2c-algo-pcf.c Thu Feb 10 17:11:08 2000 +++ linux/drivers/i2c/i2c-algo-pcf.c Sun Mar 19 11:15:29 2000 @@ -21,9 +21,10 @@ /* ------------------------------------------------------------------------- */ /* With some changes from Kyösti Mälkki and - Frodo Looijaard */ + Frodo Looijaard ,and also from Martin Bailey + */ -/* $Id: i2c-algo-pcf.c,v 1.20 2000/01/24 02:06:33 mds Exp $ */ +/* $Id: i2c-algo-pcf.c,v 1.21 2000/03/16 13:07:34 frodo Exp $ */ #include #include @@ -108,10 +109,12 @@ int status; status = get_pcf(adap, 1); +#ifndef STUB_I2C while (timeout-- && !(status & I2C_PCF_BB)) { udelay(1000); /* How much is this? */ status = get_pcf(adap, 1); } +#endif if (timeout<=0) printk("Timeout waiting for Bus Busy\n"); /* @@ -132,10 +135,12 @@ int timeout = DEF_TIMEOUT; *status = get_pcf(adap, 1); +#ifndef STUB_I2C while (timeout-- && (*status & I2C_PCF_PIN)) { adap->waitforpin(); *status = get_pcf(adap, 1); } +#endif if (timeout <= 0) return(-1); else @@ -268,7 +273,7 @@ i2c_start(adap); status = get_pcf(adap, 1); if (wait_for_pin(adap, &status) >= 0) { - if ((status && I2C_PCF_LRB) == 0) { + if ((status & I2C_PCF_LRB) == 0) { i2c_stop(adap); break; /* success! */ } @@ -287,25 +292,28 @@ { struct i2c_algo_pcf_data *adap = i2c_adap->algo_data; int wrcount, status, timeout; - + for (wrcount=0; wrcountname, buf[wrcount]&0xff)); + i2c_adap->name, buf[wrcount]&0xff)); i2c_outb(adap, buf[wrcount]); timeout = wait_for_pin(adap, &status); if (timeout) { - printk("i2c-algo-pcf.o: %s i2c_write: error - timeout.\n", - i2c_adap->name); i2c_stop(adap); + printk("i2c-algo-pcf.o: %s i2c_write: " + "error - timeout.\n", i2c_adap->name); return -EREMOTEIO; /* got a better one ?? */ } +#ifndef STUB_I2C if (status & I2C_PCF_LRB) { - printk("i2c-algo-pcf.o: %s i2c_write: error - no ack.\n", - i2c_adap->name); - i2c_stop(adap); - return -EREMOTEIO; /* got a better one ?? */ + i2c_stop(adap); + printk("i2c-algo-pcf.o: %s i2c_write: " + "error - no ack.\n", i2c_adap->name); + return -EREMOTEIO; /* got a better one ?? */ } +#endif } + i2c_stop(adap); return (wrcount); } @@ -314,34 +322,48 @@ { int rdcount=0, i, status, timeout, dummy=1; struct i2c_algo_pcf_data *adap = i2c_adap->algo_data; - + for (i=0; ialgo_data; struct i2c_msg *pmsg; - int i, ret, timeout, status; - + int i = 0; + int ret, timeout, status; + + pmsg = &msgs[i]; + + /* Send address here if Read */ + if (pmsg->flags & I2C_M_RD) { + ret = pcf_doAddress(adap, pmsg, i2c_adap->retries); + } + + /* Check for bus busy */ timeout = wait_for_bb(adap); if (timeout) { - DEB2(printk("i2c-algo-pcf.o: Timeout waiting for BB in pcf_xfer\n");) + DEB2(printk("i2c-algo-pcf.o: " + "Timeout waiting for BB in pcf_xfer\n");) return -EIO; } + + /* Send address here if Write */ + if (!(pmsg->flags & I2C_M_RD)) { + ret = pcf_doAddress(adap, pmsg, i2c_adap->retries); + } + /* Send START */ i2c_start(adap); - - for (i=0; iflags & I2C_M_NOSTART)) { - if (i) - i2c_repstart(adap); - ret = pcf_doAddress(adap, pmsg, i2c_adap->retries); - timeout = wait_for_pin(adap, &status); - if (timeout) { - DEB2(printk("i2c-algo-pcf.o: Timeout waiting for PIN(1) in pcf_xfer\n");) - return (-EREMOTEIO); - } - if (status & I2C_PCF_LRB) { - i2c_stop(adap); - DEB2(printk("i2c-algo-pcf.o: No LRB(1) in pcf_xfer\n");) - return (-EREMOTEIO); - } - } - DEB3(printk("i2c-algo-pcf.o: Msg %d, addr=0x%x, flags=0x%x, len=%d\n", - i, msgs[i].addr, msgs[i].flags, msgs[i].len);) - if (pmsg->flags & I2C_M_RD ) { - /* read bytes into buffer*/ - ret = pcf_readbytes(i2c_adap, pmsg->buf, pmsg->len); + + /* Wait for PIN (pending interrupt NOT) */ + timeout = wait_for_pin(adap, &status); + if (timeout) { + i2c_stop(adap); + DEB2(printk("i2c-algo-pcf.o: Timeout waiting " + "for PIN(1) in pcf_xfer\n");) + return (-EREMOTEIO); + } + +#ifndef STUB_I2C + /* Check LRB (last rcvd bit - slave ack) */ + if (status & I2C_PCF_LRB) { + i2c_stop(adap); + DEB2(printk("i2c-algo-pcf.o: No LRB(1) in pcf_xfer\n");) + return (-EREMOTEIO); + } +#endif + + DEB3(printk("i2c-algo-pcf.o: Msg %d, addr=0x%x, flags=0x%x, len=%d\n", + i, msgs[i].addr, msgs[i].flags, msgs[i].len);) + + /* Read */ + if (pmsg->flags & I2C_M_RD) { + + /* read bytes into buffer*/ + ret = pcf_readbytes(i2c_adap, pmsg->buf, pmsg->len); + + if (ret != pmsg->len) { + DEB2(printk("i2c-algo-pcf.o: fail: " + "only read %d bytes.\n",ret)); + } else { DEB2(printk("i2c-algo-pcf.o: read %d bytes.\n",ret)); + } + } else { /* Write */ + + /* Write bytes from buffer */ + ret = pcf_sendbytes(i2c_adap, pmsg->buf, pmsg->len); + + if (ret != pmsg->len) { + DEB2(printk("i2c-algo-pcf.o: fail: " + "only wrote %d bytes.\n",ret)); } else { - /* write bytes from buffer */ - ret = pcf_sendbytes(i2c_adap, pmsg->buf, pmsg->len); DEB2(printk("i2c-algo-pcf.o: wrote %d bytes.\n",ret)); } } - i2c_stop(adap); return (num); } @@ -503,7 +556,7 @@ i2c_outb(pcf_adap, i); i2c_start(pcf_adap); if ((wait_for_pin(pcf_adap, &status) >= 0) && - ((status && I2C_PCF_LRB) == 0)) { + ((status & I2C_PCF_LRB) == 0)) { printk("(%02x)",i>>1); } else { printk("."); @@ -528,7 +581,7 @@ return 0; } -int __init i2c_algo_pcf_init (void) +static int __init i2c_algo_pcf_init (void) { printk("i2c-algo-pcf.o: i2c pcf8584 algorithm module\n"); return 0; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/i2c/i2c-core.c linux/drivers/i2c/i2c-core.c --- v2.3.99-pre1/linux/drivers/i2c/i2c-core.c Sat Feb 26 22:31:45 2000 +++ linux/drivers/i2c/i2c-core.c Sun Mar 19 11:15:29 2000 @@ -20,7 +20,7 @@ /* With some changes from Kyösti Mälkki . All SMBus-related things are written by Frodo Looijaard */ -/* $Id: i2c-core.c,v 1.50 2000/02/02 23:29:54 frodo Exp $ */ +/* $Id: i2c-core.c,v 1.52 2000/02/27 10:43:29 frodo Exp $ */ #include #include @@ -101,6 +101,12 @@ read: i2cproc_bus_read, }; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,48)) +static struct inode_operations i2cproc_inode_operations = { + &i2cproc_operations +}; +#endif + static int i2cproc_initialized = 0; #else /* undef CONFIG_PROC_FS */ @@ -159,7 +165,12 @@ name); return -ENOENT; } + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,48)) proc_entry->proc_fops = &i2cproc_operations; +#else + proc_entry->ops = &i2cproc_inode_operations; +#endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27)) proc_entry->owner = THIS_MODULE; #else diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/i2c/i2c-dev.c linux/drivers/i2c/i2c-dev.c --- v2.3.99-pre1/linux/drivers/i2c/i2c-dev.c Thu Feb 10 17:11:09 2000 +++ linux/drivers/i2c/i2c-dev.c Sun Mar 19 11:15:29 2000 @@ -23,7 +23,9 @@ But I have used so much of his original code and ideas that it seems only fair to recognize him as co-author -- Frodo */ -/* $Id: i2c-dev.c,v 1.25 2000/01/26 14:14:20 frodo Exp $ */ +/* The I2C_RDWR ioctl code is written by Kolja Waschk */ + +/* $Id: i2c-dev.c,v 1.30 2000/02/28 21:35:05 frodo Exp $ */ #include #include @@ -143,8 +145,8 @@ #endif ret = i2c_master_recv(client,tmp,count); - if (! ret) - ret = copy_to_user(buf,tmp,count)?-EFAULT:0; + if (ret >= 0) + ret = copy_to_user(buf,tmp,count)?-EFAULT:ret; kfree(tmp); return ret; } @@ -182,9 +184,11 @@ unsigned long arg) { struct i2c_client *client = (struct i2c_client *)file->private_data; + struct i2c_rdwr_ioctl_data rdwr_arg; struct i2c_smbus_ioctl_data data_arg; union i2c_smbus_data temp; - int datasize,res; + struct i2c_msg *rdwr_pa; + int i,datasize,res; unsigned long funcs; #ifdef DEBUG @@ -212,6 +216,67 @@ funcs = i2c_get_functionality(client->adapter); return (copy_to_user((unsigned long *)arg,&funcs, sizeof(unsigned long)))?-EFAULT:0; + + case I2C_RDWR: + copy_from_user_ret(&rdwr_arg, + (struct i2c_rdwr_ioctl_data *)arg, + sizeof(rdwr_arg), + -EFAULT); + + rdwr_pa = (struct i2c_msg *) + kmalloc(rdwr_arg.nmsgs * sizeof(struct i2c_msg), + GFP_KERNEL); + + if (rdwr_pa == NULL) return -ENOMEM; + + res = 0; + for( i=0; iadapter, + rdwr_pa, + rdwr_arg.nmsgs); + } + while(i-- > 0) + { + if( res>=0 && (rdwr_pa[i].flags & I2C_M_RD)) + { + if(copy_to_user( + rdwr_arg.msgs[i].buf, + rdwr_pa[i].buf, + rdwr_pa[i].len)) + { + res = -EFAULT; + } + } + kfree(rdwr_pa[i].buf); + } + kfree(rdwr_pa); + return res; + case I2C_SMBUS: copy_from_user_ret(&data_arg, (struct i2c_smbus_ioctl_data *) arg, diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/i2c/i2c-elektor.c linux/drivers/i2c/i2c-elektor.c --- v2.3.99-pre1/linux/drivers/i2c/i2c-elektor.c Tue Feb 1 01:35:43 2000 +++ linux/drivers/i2c/i2c-elektor.c Sun Mar 19 11:15:29 2000 @@ -22,7 +22,7 @@ /* With some changes from Kyösti Mälkki and even Frodo Looijaard */ -/* $Id: i2c-elektor.c,v 1.16 2000/01/24 02:06:33 mds Exp $ */ +/* $Id: i2c-elektor.c,v 1.17 2000/03/16 13:07:34 frodo Exp $ */ #include #include @@ -73,17 +73,22 @@ static void pcf_isa_setbyte(void *data, int ctl, int val) { - if (ctl) { + unsigned long j = jiffies + 10; + + if (ctl) { if (gpi.pi_irq > 0) { - DEB3(printk("i2c-elektor.o: Write control 0x%x\n", + DEB3(printk("i2c-elektor.o: Write Ctrl 0x%02X\n", val|I2C_PCF_ENI)); + DEB3({while (jiffies < j) schedule();}) outb(val | I2C_PCF_ENI, CTRL); } else { - DEB3(printk("i2c-elektor.o: Write control 0x%x\n", val)); - outb(val, CTRL); + DEB3(printk("i2c-elektor.o: Write Ctrl 0x%02X\n", val|I2C_PCF_ENI)); + DEB3({while (jiffies < j) schedule();}) + outb(val|I2C_PCF_ENI, CTRL); } } else { - DEB3(printk("i2c-elektor.o: Write data 0x%x\n", val)); + DEB3(printk("i2c-elektor.o: Write Data 0x%02X\n", val&0xff)); + DEB3({while (jiffies < j) schedule();}) outb(val, DATA); } } @@ -94,10 +99,10 @@ if (ctl) { val = inb(CTRL); - DEB3(printk("i2c-elektor.o: Read control 0x%x\n", val)); + DEB3(printk("i2c-elektor.o: Read Ctrl 0x%02X\n", val)); } else { val = inb(DATA); - DEB3(printk("i2c-elektor.o: Read data 0x%x\n", val)); + DEB3(printk("i2c-elektor.o: Read Data 0x%02X\n", val)); } return (val); } @@ -226,7 +231,7 @@ pcf_isa_unreg, }; -int __init i2c_pcfisa_init(void) +static int __init i2c_pcfisa_init(void) { struct i2c_pcf_isa *pisa = &gpi; @@ -277,6 +282,7 @@ MODULE_PARM(irq, "i"); MODULE_PARM(clock, "i"); MODULE_PARM(own, "i"); +MODULE_PARM(i2c_debug,"i"); int init_module(void) { @@ -290,5 +296,3 @@ } #endif - - diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/ide/Config.in linux/drivers/ide/Config.in --- v2.3.99-pre1/linux/drivers/ide/Config.in Tue Mar 14 19:10:39 2000 +++ linux/drivers/ide/Config.in Fri Mar 17 13:10:45 2000 @@ -32,7 +32,6 @@ bool ' Boot off-board chipsets first support' CONFIG_BLK_DEV_OFFBOARD dep_bool ' Use PCI DMA by default when available' CONFIG_IDEDMA_PCI_AUTO $CONFIG_BLK_DEV_IDEDMA_PCI define_bool CONFIG_BLK_DEV_IDEDMA $CONFIG_BLK_DEV_IDEDMA_PCI - define_bool CONFIG_IDEDMA_AUTO $CONFIG_IDEDMA_PCI_AUTO define_bool CONFIG_IDEDMA_PCI_EXPERIMENTAL $CONFIG_EXPERIMENTAL dep_bool ' ATA Work(s) In Progress (EXPERIMENTAL)' CONFIG_IDEDMA_PCI_WIP $CONFIG_BLK_DEV_IDEDMA_PCI $CONFIG_EXPERIMENTAL dep_bool ' Good-Bad DMA Model-Firmware (WIP)' CONFIG_IDEDMA_NEW_DRIVE_LISTINGS $CONFIG_IDEDMA_PCI_WIP @@ -72,14 +71,12 @@ dep_bool ' PowerMac IDE DMA support' CONFIG_BLK_DEV_IDEDMA_PMAC $CONFIG_BLK_DEV_IDE_PMAC dep_bool ' Use DMA by default' CONFIG_IDEDMA_PMAC_AUTO $CONFIG_BLK_DEV_IDEDMA_PMAC define_bool CONFIG_BLK_DEV_IDEDMA $CONFIG_BLK_DEV_IDEDMA_PMAC - define_bool CONFIG_IDEDMA_AUTO $CONFIG_IDEDMA_PMAC_AUTO fi if [ "$CONFIG_ARCH_ACORN" = "y" ]; then dep_bool ' ICS IDE interface support' CONFIG_BLK_DEV_IDE_ICSIDE $CONFIG_ARCH_ACORN dep_bool ' ICS DMA support' CONFIG_BLK_DEV_IDEDMA_ICS $CONFIG_BLK_DEV_IDE_ICSIDE dep_bool ' Use ICS DMA by default' CONFIG_IDEDMA_ICS_AUTO $CONFIG_BLK_DEV_IDEDMA_ICS define_bool CONFIG_BLK_DEV_IDEDMA $CONFIG_BLK_DEV_IDEDMA_ICS - define_bool CONFIG_IDEDMA_AUTO $CONFIG_IDEDMA_ICS_AUTO dep_bool ' RapIDE interface support' CONFIG_BLK_DEV_IDE_RAPIDE $CONFIG_ARCH_ACORN fi if [ "$CONFIG_AMIGA" = "y" ]; then @@ -115,19 +112,13 @@ define_bool CONFIG_BLK_DEV_HD $CONFIG_BLK_DEV_HD_ONLY fi -# if [ "$CONFIG_BLK_DEV_IDEDMA_PCI" = "y" -o \ -# "$CONFIG_BLK_DEV_IDEDMA_PMAC" = "y" -o \ -# "$CONFIG_BLK_DEV_IDEDMA_ICS" = "y" ]; then -# define_bool CONFIG_BLK_DEV_IDEDMA y -# if [ "$CONFIG_IDEDMA_PCI_AUTO" = "y" -o \ -# "$CONFIG_IDEDMA_PMAC_AUTO" = "y" -o \ -# "$CONFIG_IDEDMA_ICS_AUTO" = "y" ]; then -# define_bool CONFIG_IDEDMA_AUTO y -# fi -# else -# define_bool CONFIG_BLK_DEV_IDEDMA n -# define_bool CONFIG_IDEDMA_AUTO n -# fi +if [ "$CONFIG_IDEDMA_PCI_AUTO" = "y" -o \ + "$CONFIG_IDEDMA_PMAC_AUTO" = "y" -o \ + "$CONFIG_IDEDMA_ICS_AUTO" = "y" ]; then + define_bool CONFIG_IDEDMA_AUTO y +else + define_bool CONFIG_IDEDMA_AUTO n +fi if [ "$CONFIG_IDE_CHIPSETS" = "y" -o \ "$CONFIG_BLK_DEV_AEC6210" = "y" -o \ diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/ide/cmd64x.c linux/drivers/ide/cmd64x.c --- v2.3.99-pre1/linux/drivers/ide/cmd64x.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/ide/cmd64x.c Thu Mar 16 14:01:05 2000 @@ -68,7 +68,7 @@ #define UDIDETCR1 0x7B #define DTPR1 0x7C -#undef DISPLAY_CMD64X_TIMINGS +#define DISPLAY_CMD64X_TIMINGS #if defined(DISPLAY_CMD64X_TIMINGS) && defined(CONFIG_PROC_FS) #include @@ -116,22 +116,22 @@ p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n"); p += sprintf(p, " %sabled %sabled\n", - (reg72&0x80) ? "dis" : " en", (reg7a&0x80) ? "dis" : " en"); + (reg72&0x80) ? "dis" : " en", (reg7a&0x80) ? "dis" : " en"); p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n"); p += sprintf(p, "DMA enabled: %s %s %s %s\n", - (reg72&0x20) ? "yes" : "no ", (reg72&0x40) ? "yes" : "no ", (reg7a&0x20) ? "yes" : "no ", (reg7a&0x40) ? "yes" : "no " ); - p += sprintf(p, "UDMA enabled: %s %s %s %s\n", - (reg73&0x01) ? "yes" : "no ", (reg73&0x02) ? "yes" : "no ", (reg7b&0x01) ? "yes" : "no ", (reg7b&0x02) ? "yes" : "no " ); - p += sprintf(p, "UDMA enabled: %s %s %s %s\n", - (reg73&0x15) ? "4" : (reg73&0x25) ? "3" : (reg73&0x11) ? "2" : (reg73&0x21) ? "1" : (reg73&0x31) ? "0" : "X", - (reg73&0x4A) ? "4" : (reg73&0x8A) ? "3" : (reg73&0x42) ? "2" : (reg73&0x82) ? "1" : (reg73&0xC2) ? "0" : "X", - (reg7b&0x15) ? "4" : (reg7b&0x25) ? "3" : (reg7b&0x11) ? "2" : (reg7b&0x21) ? "1" : (reg7b&0x31) ? "0" : "X", - (reg7b&0x4A) ? "4" : (reg7b&0x8A) ? "3" : (reg7b&0x42) ? "2" : (reg7b&0x82) ? "1" : (reg7b&0xC2) ? "0" : "X" ); - p += sprintf(p, "DMA enabled: %s %s %s %s\n", - (reg73&0x10) ? "2" : (reg73&0x20) ? "1" : (reg73&0x30) ? "0" : "X", - (reg73&0x40) ? "2" : (reg73&0x80) ? "1" : (reg73&0xC0) ? "0" : "X", - (reg7b&0x10) ? "2" : (reg7b&0x20) ? "1" : (reg7b&0x30) ? "0" : "X", - (reg7b&0x40) ? "2" : (reg7b&0x80) ? "1" : (reg7b&0xC0) ? "0" : "X" ); + (reg72&0x20) ? "yes" : "no ", (reg72&0x40) ? "yes" : "no ", (reg7a&0x20) ? "yes" : "no ", (reg7a&0x40) ? "yes" : "no " ); + p += sprintf(p, "DMA Mode: %s(%s) %s(%s) %s(%s) %s(%s)\n", + (reg72&0x20)?((reg73&0x01)?"UDMA":" DMA"):" PIO", + (reg72&0x20)?(((reg73&0x15)==0x15)?"4":((reg73&0x25)==0x25)?"3":((reg73&0x10)==0x10)?"2":((reg73&0x20)==0x20)?"1":((reg73&0x30)==0x30)?"0":"X"):"?", + (reg72&0x40)?((reg73&0x02)?"UDMA":" DMA"):" PIO", + (reg72&0x40)?(((reg73&0x4A)==0x4A)?"4":((reg73&0x8A)==0x8A)?"3":((reg73&0x40)==0x40)?"2":((reg73&0x80)==0x80)?"1":((reg73&0xC0)==0xC0)?"0":"X"):"?", + (reg7a&0x20)?((reg7b&0x01)?"UDMA":" DMA"):" PIO", + (reg7a&0x20)?(((reg7b&0x15)==0x15)?"4":((reg7b&0x25)==0x25)?"3":((reg7b&0x10)==0x10)?"2":((reg7b&0x20)==0x20)?"1":((reg7b&0x30)==0x30)?"0":"X"):"?", + (reg7a&0x40)?((reg7b&0x02)?"UDMA":" DMA"):" PIO", + (reg7a&0x40)?(((reg7b&0x4A)==0x4A)?"4":((reg7b&0x8A)==0x8A)?"3":((reg7b&0x40)==0x40)?"2":((reg7b&0x80)==0x80)?"1":((reg7b&0xC0)==0xC0)?"0":"X"):"?" ); + p += sprintf(p, "PIO Mode: %s %s %s %s\n", + "?", "?", "?", "?"); + p += sprintf(p, "PIO\n"); SPLIT_BYTE(reg53, hi_byte, lo_byte); @@ -668,9 +668,11 @@ (void) pci_write_config_byte(dev, DRWTIM3, 0x3f); #if defined(DISPLAY_CMD64X_TIMINGS) && defined(CONFIG_PROC_FS) - cmd64x_proc = 1; - bmide_dev = dev; - cmd64x_display_info = &cmd64x_get_info; + if (!cmd64x_proc) { + cmd64x_proc = 1; + bmide_dev = dev; + cmd64x_display_info = &cmd64x_get_info; + } #endif /* DISPLAY_CMD64X_TIMINGS && CONFIG_PROC_FS */ return 0; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/ide/ide-geometry.c linux/drivers/ide/ide-geometry.c --- v2.3.99-pre1/linux/drivers/ide/ide-geometry.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/ide/ide-geometry.c Thu Mar 16 14:01:05 2000 @@ -3,7 +3,7 @@ */ #include -#ifdef CONFIG_BLK_DEV_IDE +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) #include #include @@ -211,4 +211,4 @@ drive->bios_cyl, drive->bios_head, drive->bios_sect); return ret; } -#endif /* CONFIG_BLK_DEV_IDE */ +#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */ diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- v2.3.99-pre1/linux/drivers/ide/ide-probe.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/ide/ide-probe.c Sat Mar 18 11:39:12 2000 @@ -470,11 +470,13 @@ if (hwif->noprobe) return; +#ifdef CONFIG_BLK_DEV_IDE if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) { extern void probe_cmos_for_drives(ide_hwif_t *); probe_cmos_for_drives (hwif); } +#endif if ((hwif->chipset != ide_4drives || !hwif->mate->present) && #if CONFIG_BLK_DEV_PDC4030 diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/ide/piix.c linux/drivers/ide/piix.c --- v2.3.99-pre1/linux/drivers/ide/piix.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/ide/piix.c Thu Mar 16 14:01:05 2000 @@ -417,18 +417,22 @@ void __init ide_init_piix (ide_hwif_t *hwif) { + if (!hwif->irq) + hwif->irq = hwif->channel ? 15 : 14; + hwif->tuneproc = &piix_tune_drive; + hwif->drives[0].autotune = 1; + hwif->drives[1].autotune = 1; + + if (!hwif->dma_base) + return; - if (hwif->dma_base) { #ifdef CONFIG_PIIX_TUNING - hwif->dmaproc = &piix_dmaproc; + hwif->autodma = 1; + hwif->dmaproc = &piix_dmaproc; +#else + if (hwif->autodma) + hwif->autodma = 0; + #endif /* CONFIG_PIIX_TUNING */ - hwif->drives[0].autotune = 0; - hwif->drives[1].autotune = 0; - } else { - hwif->drives[0].autotune = 1; - hwif->drives[1].autotune = 1; - } - if (!hwif->irq) - hwif->irq = hwif->channel ? 15 : 14; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/ide/via82cxxx.c linux/drivers/ide/via82cxxx.c --- v2.3.99-pre1/linux/drivers/ide/via82cxxx.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/ide/via82cxxx.c Thu Mar 16 14:01:05 2000 @@ -104,6 +104,7 @@ { "VT 82C595 Apollo VP2", PCI_DEVICE_ID_VIA_82C595, }, { "VT 82C597 Apollo VP3", PCI_DEVICE_ID_VIA_82C597_0, }, { "VT 82C598 Apollo MVP3", PCI_DEVICE_ID_VIA_82C598_0, }, + { "VT 82C598 Apollo MVP3", PCI_DEVICE_ID_VIA_82C598_0, }, { "VT 82C680 Apollo P6", PCI_DEVICE_ID_VIA_82C680, }, { "VT 82C691 Apollo Pro", PCI_DEVICE_ID_VIA_82C691, }, { "VT 82C693 Apollo Pro Plus", PCI_DEVICE_ID_VIA_82C693, }, @@ -127,7 +128,7 @@ { PCI_DEVICE_ID_VIA_82C598_0, PCI_DEVICE_ID_VIA_82C586_1, VIA_FLAG_CHECK_REV }, { PCI_DEVICE_ID_VIA_82C598_0, PCI_DEVICE_ID_VIA_82C596, 0 }, { PCI_DEVICE_ID_VIA_82C680, PCI_DEVICE_ID_VIA_82C586_1, VIA_FLAG_CHECK_REV }, - { PCI_DEVICE_ID_VIA_82C691, PCI_DEVICE_ID_VIA_82C596, 0 }, + { PCI_DEVICE_ID_VIA_82C691, PCI_DEVICE_ID_VIA_82C596, VIA_FLAG_ATA_66 }, { PCI_DEVICE_ID_VIA_82C693, PCI_DEVICE_ID_VIA_82C596, 0 }, { PCI_DEVICE_ID_VIA_8501_0, PCI_DEVICE_ID_VIA_82C686, VIA_FLAG_ATA_66 }, { PCI_DEVICE_ID_VIA_8371_0, PCI_DEVICE_ID_VIA_82C686, VIA_FLAG_ATA_66 }, diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/8390.c linux/drivers/net/8390.c --- v2.3.99-pre1/linux/drivers/net/8390.c Sun Feb 20 21:12:39 2000 +++ linux/drivers/net/8390.c Fri Mar 17 20:52:20 2000 @@ -478,7 +478,9 @@ outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base + E8390_CMD); if (nr_serviced >= MAX_SERVICE) { - printk(KERN_WARNING "%s: Too much work at interrupt, status %#2.2x\n", + /* 0xFF is valid for a card removal */ + if(interrupts!=0xFF) + printk(KERN_WARNING "%s: Too much work at interrupt, status %#2.2x\n", dev->name, interrupts); outb_p(ENISR_ALL, e8390_base + EN0_ISR); /* Ack. most intrs. */ } else { @@ -588,8 +590,8 @@ else ei_local->lasttx = 10, ei_local->txing = 0; } - else printk(KERN_WARNING "%s: unexpected TX-done interrupt, lasttx=%d.\n", - dev->name, ei_local->lasttx); +// else printk(KERN_WARNING "%s: unexpected TX-done interrupt, lasttx=%d.\n", +// dev->name, ei_local->lasttx); #else /* EI_PINGPONG */ /* @@ -651,8 +653,12 @@ this_frame = ei_local->rx_start_page; /* Someday we'll omit the previous, iff we never get this message. - (There is at least one clone claimed to have a problem.) */ - if (ei_debug > 0 && this_frame != ei_local->current_page) + (There is at least one clone claimed to have a problem.) + + Keep quiet if it looks like a card removal. One problem here + is that some clones crash in roughly the same way. + */ + if (ei_debug > 0 && this_frame != ei_local->current_page && (this_frame!=0x0 || rxing_page!=0xFF)) printk(KERN_ERR "%s: mismatched read page pointers %2x vs %2x.\n", dev->name, this_frame, ei_local->current_page); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/Makefile linux/drivers/net/Makefile --- v2.3.99-pre1/linux/drivers/net/Makefile Tue Mar 7 14:32:25 2000 +++ linux/drivers/net/Makefile Sun Mar 19 11:13:46 2000 @@ -104,11 +104,11 @@ endif endif -ifeq ($(CONFIG_ATALK),y) +ifeq ($(CONFIG_APPLETALK),y) SUB_DIRS += appletalk MOD_IN_SUB_DIRS += appletalk else - ifeq ($(CONFIG_ATALK),m) + ifeq ($(CONFIG_APPLETALK),m) MOD_IN_SUB_DIRS += appletalk endif endif diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/net_init.c linux/drivers/net/net_init.c --- v2.3.99-pre1/linux/drivers/net/net_init.c Tue Mar 7 14:32:26 2000 +++ linux/drivers/net/net_init.c Sat Mar 18 16:48:22 2000 @@ -70,10 +70,9 @@ { struct net_device *dev; int alloc_size; - - /* 32-byte alignment */ + + /* ensure 32-byte alignment of the private area */ alloc_size = sizeof (*dev) + IFNAMSIZ + sizeof_priv + 31; - alloc_size &= ~31; dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL); if (dev == NULL) @@ -85,9 +84,9 @@ memset(dev, 0, alloc_size); if (sizeof_priv) - dev->priv = (void *) (dev + 1); + dev->priv = (void *) (((long)(dev + 1) + 31) & ~31); - dev->name = sizeof_priv + (char *)(dev + 1); + dev->name = sizeof_priv + 31 + (char *)(dev + 1); return dev; } @@ -395,6 +394,8 @@ int register_netdev(struct net_device *dev) { + int err; + rtnl_lock(); /* @@ -404,8 +405,9 @@ if (dev->name && strchr(dev->name, '%')) { + err = -EBUSY; if(dev_alloc_name(dev, dev->name)<0) - return -EBUSY; + goto out; } /* @@ -414,17 +416,21 @@ if (dev->name && (dev->name[0]==0 || dev->name[0]==' ')) { + err = -EBUSY; if(dev_alloc_name(dev, "eth%d")<0) - return -EBUSY; + goto out; } - if (register_netdevice(dev)) { - rtnl_unlock(); - return -EIO; - } + err = -EIO; + if (register_netdevice(dev)) + goto out; + + err = 0; + +out: rtnl_unlock(); - return 0; + return err; } void unregister_netdev(struct net_device *dev) diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/pcnet32.c linux/drivers/net/pcnet32.c --- v2.3.99-pre1/linux/drivers/net/pcnet32.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/pcnet32.c Sat Mar 18 11:34:36 2000 @@ -929,7 +929,7 @@ lp->tx_ring[i].status = 0; } - lp->init_block.tlen_rlen = TX_RING_LEN_BITS | RX_RING_LEN_BITS; + lp->init_block.tlen_rlen = le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS); for (i = 0; i < 6; i++) lp->init_block.phys_addr[i] = dev->dev_addr[i]; lp->init_block.rx_ring = (u32)le32_to_cpu(virt_to_bus(lp->rx_ring)); @@ -1050,8 +1050,10 @@ if (lp->tx_ring[(entry+1) & TX_RING_MOD_MASK].base == 0) netif_start_queue(dev); - else + else { lp->tx_full = 1; + netif_stop_queue(dev); + } spin_unlock_irqrestore(&lp->lock, flags); return 0; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/ppp_async.c linux/drivers/net/ppp_async.c --- v2.3.99-pre1/linux/drivers/net/ppp_async.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/ppp_async.c Sat Mar 18 16:41:47 2000 @@ -220,7 +220,7 @@ if (ap == 0) break; err = -EFAULT; - if (put_user(ppp_channel_index(&ap->chan), (int *) arg)) + if (put_user(ppp_unit_number(&ap->chan), (int *) arg)) break; err = 0; break; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/ppp_generic.c linux/drivers/net/ppp_generic.c --- v2.3.99-pre1/linux/drivers/net/ppp_generic.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/ppp_generic.c Sat Mar 18 16:41:47 2000 @@ -1653,13 +1653,13 @@ } /* - * Return the index of a channel. + * Return the unit number associated with a channel. */ -int ppp_channel_index(struct ppp_channel *chan) +int ppp_unit_number(struct ppp_channel *chan) { struct channel *pch = chan->ppp; - return pch->file.index; + return pch->ppp->file.index; } /* @@ -2319,7 +2319,7 @@ EXPORT_SYMBOL(ppp_register_channel); EXPORT_SYMBOL(ppp_unregister_channel); -EXPORT_SYMBOL(ppp_channel_index); +EXPORT_SYMBOL(ppp_unit_number); EXPORT_SYMBOL(ppp_input); EXPORT_SYMBOL(ppp_input_error); EXPORT_SYMBOL(ppp_output_wakeup); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/rcpci45.c linux/drivers/net/rcpci45.c --- v2.3.99-pre1/linux/drivers/net/rcpci45.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/rcpci45.c Fri Mar 17 14:19:42 2000 @@ -1221,7 +1221,7 @@ } module_init(rcpci_init_module); -module_exit(rcpci_clenaup_module); +module_exit(rcpci_cleanup_module); static int diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/setup.c linux/drivers/net/setup.c --- v2.3.99-pre1/linux/drivers/net/setup.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/setup.c Fri Mar 17 13:28:18 2000 @@ -9,7 +9,6 @@ #include extern int mkiss_init_ctrl_dev(void); -extern int ppp_init(void); extern int slip_init_ctrl_dev(void); extern int strip_init_ctrl_dev(void); extern int x25_asy_init_ctrl_dev(void); @@ -77,7 +76,7 @@ {cpm_enet_init, 0}, #endif #if defined(CONFIG_COMX) - {comx_init(), 0}, + {comx_init, 0}, #endif /* * SLHC if present needs attaching so other people see it * even if not opened. @@ -166,9 +165,6 @@ #endif #if defined(CONFIG_STRIP) strip_init_ctrl_dev(); -#endif -#if defined(CONFIG_PPP) - ppp_init(); #endif } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/shaper.c linux/drivers/net/shaper.c --- v2.3.99-pre1/linux/drivers/net/shaper.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/shaper.c Wed Mar 15 13:29:54 2000 @@ -66,6 +66,7 @@ * 1999/06/18 Jordi Murgo */ +#include #include #include #include diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/sis900.c linux/drivers/net/sis900.c --- v2.3.99-pre1/linux/drivers/net/sis900.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/sis900.c Thu Mar 16 11:19:25 2000 @@ -52,34 +52,36 @@ #include "sis900.h" static const char *version = -"sis900.c: v1.06.04 02/11/2000\n"; +"sis900.c: v1.07 03/07/2000\n"; static int max_interrupt_work = 20; +static int multicast_filter_limit = 128; + #define sis900_debug debug static int sis900_debug = 0; -static int multicast_filter_limit = 128; - /* Time in jiffies before concluding the transmitter is hung. */ #define TX_TIMEOUT (4*HZ) +/* SiS 900 is capable of 32 bits BM DMA */ +#define SIS900_DMA_MASK 0xffffffff -struct mac_chip_info { - const char *name; - u16 vendor_id, device_id, flags; - int io_size; - struct net_device *(*probe) (struct mac_chip_info *mac, struct pci_dev * pci_dev, - struct net_device * net_dev); +static struct net_device * sis900_mac_probe (struct pci_dev * pci_dev, + char *card_name); +enum { + SIS_900 = 0, + SIS_7018 }; -static struct net_device * sis900_mac_probe (struct mac_chip_info * mac, struct pci_dev * pci_dev, - struct net_device * net_dev); - -static struct mac_chip_info mac_chip_table[] = { - { "SiS 900 PCI Fast Ethernet", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_900, - PCI_COMMAND_IO|PCI_COMMAND_MASTER, SIS900_TOTAL_SIZE, sis900_mac_probe}, - { "SiS 7016 PCI Fast Ethernet",PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_7016, - PCI_COMMAND_IO|PCI_COMMAND_MASTER, SIS900_TOTAL_SIZE, sis900_mac_probe}, - {0,}, /* 0 terminated list. */ +static char * card_names[] = { + "SiS 900 PCI Fast Ethernet", + "SiS 7016 PCI Fast Ethernet" +}; +static struct pci_device_id sis900_pci_tbl [] __initdata = { + {PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_900, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, SIS_900}, + {PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_7016, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, SIS_7018} }; +MODULE_DEVICE_TABLE (pci, sis900_pci_tbl); static void sis900_read_mode(struct net_device *net_dev, int phy_addr, int *speed, int *duplex); static void amd79c901_read_mode(struct net_device *net_dev, int phy_addr, int *speed, int *duplex); @@ -111,24 +113,22 @@ } BufferDesc; struct sis900_private { - struct net_device *next_module; struct net_device_stats stats; struct pci_dev * pci_dev; spinlock_t lock; - struct mac_chip_info * mac; struct mii_phy * mii; unsigned int cur_phy; struct timer_list timer; /* Link status detection timer. */ - unsigned int cur_rx, dirty_rx; + unsigned int cur_rx, dirty_rx; unsigned int cur_tx, dirty_tx; /* The saved address of a sent/receive-in-place packet buffer */ struct sk_buff *tx_skbuff[NUM_TX_DESC]; struct sk_buff *rx_skbuff[NUM_RX_DESC]; - BufferDesc tx_ring[NUM_TX_DESC]; + BufferDesc tx_ring[NUM_TX_DESC]; BufferDesc rx_ring[NUM_RX_DESC]; unsigned int tx_full; /* The Tx queue is full. */ @@ -163,68 +163,47 @@ static void set_rx_mode(struct net_device *net_dev); static void sis900_reset(struct net_device *net_dev); -/* A list of all installed SiS900 devices, for removing the driver module. */ -static struct net_device *root_sis900_dev = NULL; - /* walk through every ethernet PCI devices to see if some of them are matched with our card list*/ -static int __init sis900_probe (void) +static int __init sis900_probe (struct pci_dev *pci_dev, const struct pci_device_id *pci_id) { - int found = 0; - struct pci_dev * pci_dev = NULL; - - while ((pci_dev = pci_find_class (PCI_CLASS_NETWORK_ETHERNET << 8, pci_dev)) != NULL) { - /* pci_dev contains all ethernet devices */ - u32 pci_io_base; - struct mac_chip_info * mac; - struct net_device *net_dev = NULL; - - for (mac = mac_chip_table; mac->vendor_id; mac++) { - /* try to match our card list */ - if (pci_dev->vendor == mac->vendor_id && - pci_dev->device == mac->device_id) - break; - } - - if (mac->vendor_id == 0) - /* pci_dev does not match any of our cards */ - continue; - - /* now, pci_dev should be either 900 or 7016 */ - pci_io_base = pci_dev->resource[0].start; - if ((mac->flags & PCI_COMMAND_IO ) && - check_region(pci_io_base, mac->io_size)) - continue; - - /* setup various bits in PCI command register */ - pci_enable_device (pci_dev); - pci_set_master(pci_dev); + u32 pci_io_base; - /* do the real low level jobs */ - net_dev = mac->probe(mac, pci_dev, net_dev); - - if (net_dev != NULL) { - found++; - } - net_dev = NULL; + if (!pci_dma_supported(pci_dev, SIS900_DMA_MASK)) { + printk(KERN_ERR "sis900.c: architecture does not support " + "32bit PCI busmaster DMA\n"); + return -ENODEV; + } + + pci_io_base = pci_dev->resource[0].start; + if (check_region(pci_io_base, SIS900_TOTAL_SIZE)) { + printk(KERN_ERR "sis900.c: can't allocate I/O space at 0x%08x\n", + pci_io_base); + return -ENODEV; } - return found ? 0 : -ENODEV; + + /* setup various bits in PCI command register */ + pci_enable_device (pci_dev); + pci_set_master(pci_dev); + + /* do the real low level jobs */ + if (sis900_mac_probe(pci_dev, card_names[pci_id->driver_data]) == NULL) + return -1; + + return 0; } -static struct net_device * sis900_mac_probe (struct mac_chip_info * mac, struct pci_dev * pci_dev, - struct net_device * net_dev) +static struct net_device * __init sis900_mac_probe (struct pci_dev * pci_dev, char * card_name) { struct sis900_private *sis_priv; long ioaddr = pci_dev->resource[0].start; + struct net_device *net_dev = NULL; int irq = pci_dev->irq; - static int did_version = 0; u16 signature; int i; - if (did_version++ == 0) - printk(KERN_INFO "%s", version); - if ((net_dev = init_etherdev(net_dev, 0)) == NULL) return NULL; + /* check to see if we have sane EEPROM */ signature = (u16) read_eeprom(ioaddr, EEPROMSignature); if (signature == 0xffff || signature == 0x0000) { @@ -233,8 +212,8 @@ return NULL; } - printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ", net_dev->name, mac->name, - ioaddr, irq); + printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ", net_dev->name, + card_name, ioaddr, irq); /* get MAC address from EEPROM */ for (i = 0; i < 3; i++) @@ -252,23 +231,22 @@ memset(sis_priv, 0, sizeof(struct sis900_private)); /* We do a request_region() to register /proc/ioports info. */ - request_region(ioaddr, mac->io_size, net_dev->name); + request_region(ioaddr, SIS900_TOTAL_SIZE, net_dev->name); net_dev->base_addr = ioaddr; net_dev->irq = irq; sis_priv->pci_dev = pci_dev; - sis_priv->mac = mac; spin_lock_init(&sis_priv->lock); /* probe for mii transciver */ if (sis900_mii_probe(net_dev) == 0) { unregister_netdev(net_dev); kfree(sis_priv); - release_region(ioaddr, mac->io_size); + release_region(ioaddr, SIS900_TOTAL_SIZE); return NULL; } - sis_priv->next_module = root_sis900_dev; - root_sis900_dev = net_dev; + pci_dev->driver_data = net_dev; + pci_dev->dma_mask = SIS900_DMA_MASK; /* The SiS900-specific entries in the device structure. */ net_dev->open = &sis900_open; @@ -283,7 +261,7 @@ return net_dev; } -static int sis900_mii_probe (struct net_device * net_dev) +static int __init sis900_mii_probe (struct net_device * net_dev) { struct sis900_private * sis_priv = (struct sis900_private *)net_dev->priv; int phy_addr; @@ -966,21 +944,42 @@ net_dev->name); break; } + + /* gvie the socket buffer to upper layers */ skb = sis_priv->rx_skbuff[entry]; - sis_priv->rx_skbuff[entry] = NULL; - /* reset buffer descriptor state */ - sis_priv->rx_ring[entry].cmdsts = 0; - sis_priv->rx_ring[entry].bufptr = 0; - skb_put(skb, rx_size); skb->protocol = eth_type_trans(skb, net_dev); netif_rx(skb); - + + /* some network statistics */ if ((rx_status & BCAST) == MCAST) sis_priv->stats.multicast++; net_dev->last_rx = jiffies; sis_priv->stats.rx_bytes += rx_size; sis_priv->stats.rx_packets++; + + /* refill the Rx buffer, what if there is not enought memory for + new socket buffer ?? */ + if ((skb = dev_alloc_skb(RX_BUF_SIZE)) == NULL) { + /* not enough memory for skbuff, this makes a "hole" + on the buffer ring, it is not clear how the + hardware will react to this kind of degenerated + buffer */ + printk(KERN_INFO "%s: Memory squeeze," + "deferring packet.\n", + net_dev->name); + sis_priv->rx_skbuff[entry] = NULL; + /* reset buffer descriptor state */ + sis_priv->rx_ring[entry].cmdsts = 0; + sis_priv->rx_ring[entry].bufptr = 0; + sis_priv->stats.rx_dropped++; + break; + } + skb->dev = net_dev; + sis_priv->rx_skbuff[entry] = skb; + sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE; + sis_priv->rx_ring[entry].bufptr = virt_to_bus(skb->tail); + sis_priv->dirty_rx++; } sis_priv->cur_rx++; entry = sis_priv->cur_rx % NUM_RX_DESC; @@ -1249,23 +1248,46 @@ outl(PESEL, ioaddr + cfg); } -static void __exit sis900_cleanup_module(void) +static void __exit sis900_remove(struct pci_dev *pci_dev) { - /* No need to check MOD_IN_USE, as sys_delete_module() checks. */ - while (root_sis900_dev) { - struct sis900_private *sis_priv = - (struct sis900_private *)root_sis900_dev->priv; - struct net_device *next_dev = sis_priv->next_module; + struct net_device *net_dev = pci_dev->driver_data; + struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; - unregister_netdev(root_sis900_dev); - release_region(root_sis900_dev->base_addr, - sis_priv->mac->io_size); - kfree(sis_priv); - kfree(root_sis900_dev); + unregister_netdev(net_dev); + release_region(net_dev->base_addr, + SIS900_TOTAL_SIZE); + + kfree(sis_priv); + kfree(net_dev); +} - root_sis900_dev = next_dev; +#define SIS900_MODULE_NAME "sis900" + +static struct pci_driver sis900_pci_driver = { + name: SIS900_MODULE_NAME, + id_table: sis900_pci_tbl, + probe: sis900_probe, + remove: sis900_remove, +}; + +static int __init sis900_init_module(void) +{ + if (!pci_present()) /* No PCI bus in this machine! */ + return -ENODEV; + + printk(KERN_INFO "%s", version); + + if (!pci_register_driver(&sis900_pci_driver)) { + pci_unregister_driver(&sis900_pci_driver); + return -ENODEV; } + return 0; +} + +static void __exit sis900_cleanup_module(void) +{ + pci_unregister_driver(&sis900_pci_driver); } -module_init(sis900_probe); +module_init(sis900_init_module); module_exit(sis900_cleanup_module); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/sunhme.c linux/drivers/net/sunhme.c --- v2.3.99-pre1/linux/drivers/net/sunhme.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/sunhme.c Wed Mar 15 09:59:06 2000 @@ -1,4 +1,4 @@ -/* $Id: sunhme.c,v 1.93 2000/03/12 04:02:14 davem Exp $ +/* $Id: sunhme.c,v 1.94 2000/03/15 06:47:04 davem Exp $ * sunhme.c: Sparc HME/BigMac 10/100baseT half/full duplex auto switching, * auto carrier detecting ethernet driver. Also known as the * "Happy Meal Ethernet" found on SunSwift SBUS cards. @@ -2953,7 +2953,7 @@ } -static void __exit cleanup_module(void) +static void __exit happy_meal_cleanup_module(void) { #ifdef MODULE /* No need to check MOD_IN_USE, as sys_delete_module() checks. */ diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/tokenring/olympic.c linux/drivers/net/tokenring/olympic.c --- v2.3.99-pre1/linux/drivers/net/tokenring/olympic.c Sat Feb 26 22:31:47 2000 +++ linux/drivers/net/tokenring/olympic.c Wed Mar 15 11:28:28 2000 @@ -27,6 +27,8 @@ * the pci resource. * 1/11/00 - Added spinlocks for smp * 2/23/00 - Updated to dev_kfree_irq + * 3/10/00 - Fixed FDX enable which triggered other bugs also + * squashed. * * To Do: * @@ -83,11 +85,11 @@ * Version Number = a.b.c.d where a.b.c is the level of code and d is the latest author. * So 0.0.1.pds = Peter, 0.0.1.mlp = Mike * - * Official releases will only have an a.b.c version number format. + * Official releases will only have an a.b.c version number format. */ static char *version = -"Olympic.c v0.3.2 2/23/00 - Peter De Schrijver & Mike Phillips" ; +"Olympic.c v0.5.0 3/10/00 - Peter De Schrijver & Mike Phillips" ; static char *open_maj_error[] = {"No error", "Lobe Media Test", "Physical Insertion", "Address Verification", "Neighbor Notification (Ring Poll)", @@ -319,6 +321,14 @@ return -1; } + if (olympic_priv->olympic_message_level) { + if ( readb(init_srb +2) & 0x40) { + printk(KERN_INFO "Olympic: Adapter is FDX capable.\n") ; + } else { + printk(KERN_INFO "Olympic: Adapter cannot do FDX.\n"); + } + } + uaa_addr=ntohs(readw(init_srb+8)); #if OLYMPIC_DEBUG @@ -401,7 +411,7 @@ #if OLYMPIC_NETWORK_MONITOR writew(ntohs(OPEN_ADAPTER_ENABLE_FDX | OPEN_ADAPTER_PASS_ADC_MAC | OPEN_ADAPTER_PASS_ATT_MAC | OPEN_ADAPTER_PASS_BEACON),init_srb+8); #else - writew(OPEN_ADAPTER_ENABLE_FDX,init_srb+8); + writew(ntohs(OPEN_ADAPTER_ENABLE_FDX),init_srb+8); #endif if (olympic_priv->olympic_laa[0]) { @@ -941,9 +951,9 @@ options = olympic_priv->olympic_copy_all_options; if (dev->flags&IFF_PROMISC) - options |= (3<<5) ; /* All LLC and MAC frames, all through the main rx channel */ + options |= 0x61 ; else - options &= ~(3<<5) ; + options &= ~0x61 ; /* Only issue the srb if there is a change in options */ @@ -1182,7 +1192,7 @@ __u16 lan_status = 0, lan_status_diff ; /* Initialize to stop compiler warning */ __u8 fdx_prot_error ; __u16 next_ptr; - + int i ; #if OLYMPIC_NETWORK_MONITOR struct trh_hdr *mac_hdr ; #endif @@ -1242,7 +1252,7 @@ /* Is the ASB free ? */ - if (!(readl(olympic_priv->olympic_mmio + SISR) & SISR_ASB_FREE)) { + if (readb(asb_block + 2) != 0xff) { olympic_priv->asb_queued = 1 ; writel(LISR_ASB_FREE_REQ,olympic_priv->olympic_mmio+LISR_SUM); return ; @@ -1261,7 +1271,7 @@ return ; } else if (readb(arb_block) == ARB_LAN_CHANGE_STATUS) { /* Lan.change.status */ - lan_status = readw(arb_block+6); + lan_status = ntohs(readw(arb_block+6)); fdx_prot_error = readb(arb_block+8) ; /* Issue ARB Free */ @@ -1288,10 +1298,19 @@ writel(readl(olympic_mmio+BCTL)&~(3<<13),olympic_mmio+BCTL); netif_stop_queue(dev); olympic_priv->srb = readw(olympic_priv->olympic_lap + LAPWWO) ; + + olympic_priv->rx_status_last_received++; + olympic_priv->rx_status_last_received&=OLYMPIC_RX_RING_SIZE-1; + for(i=0;irx_ring_skb[olympic_priv->rx_status_last_received]); + olympic_priv->rx_status_last_received++; + olympic_priv->rx_status_last_received&=OLYMPIC_RX_RING_SIZE-1; + } + free_irq(dev->irq,dev); printk(KERN_WARNING "%s: Adapter has been closed \n", dev->name) ; - + MOD_DEC_USE_COUNT ; } /* If serious error */ if (olympic_priv->olympic_message_level) { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/tokenring/olympic.h linux/drivers/net/tokenring/olympic.h --- v2.3.99-pre1/linux/drivers/net/tokenring/olympic.h Fri Jan 21 18:19:16 2000 +++ linux/drivers/net/tokenring/olympic.h Wed Mar 15 11:28:28 2000 @@ -282,7 +282,7 @@ __u8 phys_addr[4] ; __u8 up_node_addr[6] ; - __u8 up_phys_addr[6] ; + __u8 up_phys_addr[4] ; __u8 poll_addr[6] ; __u16 reserved ; __u16 acc_priority ; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/tulip/interrupt.c linux/drivers/net/tulip/interrupt.c --- v2.3.99-pre1/linux/drivers/net/tulip/interrupt.c Sat Feb 26 22:31:47 2000 +++ linux/drivers/net/tulip/interrupt.c Sat Mar 18 11:18:05 2000 @@ -16,6 +16,7 @@ #include "tulip.h" #include #include +#include int tulip_rx_copybreak; @@ -32,13 +33,20 @@ /* Refill the Rx ring buffers. */ for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) { entry = tp->dirty_rx % RX_RING_SIZE; - if (tp->rx_skbuff[entry] == NULL) { + if (tp->rx_buffers[entry].skb == NULL) { struct sk_buff *skb; - skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ); + dma_addr_t mapping; + + skb = tp->rx_buffers[entry].skb = dev_alloc_skb(PKT_BUF_SZ); if (skb == NULL) break; + + mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); + tp->rx_buffers[entry].mapping = mapping; + skb->dev = dev; /* Mark as being used by this device. */ - tp->rx_ring[entry].buffer1 = virt_to_le32desc(skb->tail); + tp->rx_ring[entry].buffer1 = cpu_to_le32(mapping); refilled++; } tp->rx_ring[entry].status = cpu_to_le32(DescOwned); @@ -106,24 +114,39 @@ && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { skb->dev = dev; skb_reserve(skb, 2); /* 16 byte align the IP header */ + pci_dma_sync_single(tp->pdev, + tp->rx_buffers[entry].mapping, + pkt_len, PCI_DMA_FROMDEVICE); #if ! defined(__alpha__) - eth_copy_and_sum(skb, tp->rx_skbuff[entry]->tail, pkt_len, 0); + eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->tail, + pkt_len, 0); skb_put(skb, pkt_len); #else - memcpy(skb_put(skb, pkt_len), tp->rx_skbuff[entry]->tail, - pkt_len); + memcpy(skb_put(skb, pkt_len), + tp->rx_buffers[entry].skb->tail, + pkt_len); #endif } else { /* Pass up the skb already on the Rx ring. */ - char *temp = skb_put(skb = tp->rx_skbuff[entry], pkt_len); - tp->rx_skbuff[entry] = NULL; + char *temp = skb_put(skb = tp->rx_buffers[entry].skb, + pkt_len); + #ifndef final_version - if (le32desc_to_virt(tp->rx_ring[entry].buffer1) != temp) + if (tp->rx_buffers[entry].mapping != + le32_to_cpu(tp->rx_ring[entry].buffer1)) { printk(KERN_ERR "%s: Internal fault: The skbuff addresses " - "do not match in tulip_rx: %p vs. %p / %p.\n", - dev->name, - le32desc_to_virt(tp->rx_ring[entry].buffer1), - skb->head, temp); + "do not match in tulip_rx: %08x vs. %08x %p / %p.\n", + dev->name, + le32_to_cpu(tp->rx_ring[entry].buffer1), + tp->rx_buffers[entry].mapping, + skb->head, temp); + } #endif + + pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + + tp->rx_buffers[entry].skb = NULL; + tp->rx_buffers[entry].mapping = 0; } skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); @@ -189,8 +212,13 @@ if (status < 0) break; /* It still has not been Txed */ /* Check for Rx filter setup frames. */ - if (tp->tx_skbuff[entry] == NULL) + if (tp->tx_buffers[entry].skb == NULL) { + pci_unmap_single(tp->pdev, + tp->tx_buffers[entry].mapping, + sizeof(tp->setup_frame), + PCI_DMA_TODEVICE); continue; + } if (status & 0x8000) { /* There was an major error, log it. */ @@ -213,14 +241,20 @@ #ifdef ETHER_STATS if (status & 0x0001) tp->stats.tx_deferred++; #endif - tp->stats.tx_bytes += tp->tx_skbuff[entry]->len; + tp->stats.tx_bytes += + tp->tx_buffers[entry].skb->len; tp->stats.collisions += (status >> 3) & 15; tp->stats.tx_packets++; } + pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping, + tp->tx_buffers[entry].skb->len, + PCI_DMA_TODEVICE); + /* Free the original skb. */ - dev_kfree_skb_irq(tp->tx_skbuff[entry]); - tp->tx_skbuff[entry] = 0; + dev_kfree_skb_irq(tp->tx_buffers[entry].skb); + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = 0; tx++; } @@ -311,7 +345,7 @@ /* check if we card is in suspend mode */ entry = tp->dirty_rx % RX_RING_SIZE; - if (tp->rx_skbuff[entry] == NULL) { + if (tp->rx_buffers[entry].skb == NULL) { if (tulip_debug > 1) printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", dev->name, tp->nir, tp->cur_rx, tp->ttimer, rx); if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/tulip/tulip.h linux/drivers/net/tulip/tulip.h --- v2.3.99-pre1/linux/drivers/net/tulip/tulip.h Thu Mar 2 14:36:22 2000 +++ linux/drivers/net/tulip/tulip.h Sat Mar 18 11:18:05 2000 @@ -204,12 +204,6 @@ #define get_u16(ptr) (((u8*)(ptr))[0] + (((u8*)(ptr))[1]<<8)) #endif - -/* Condensed operations for readability. */ -#define virt_to_le32desc(addr) cpu_to_le32(virt_to_bus(addr)) -#define le32desc_to_virt(addr) bus_to_virt(le32_to_cpu(addr)) - - struct medialeaf { u8 type; u8 media; @@ -237,6 +231,10 @@ unsigned char *info; }; +struct ring_info { + struct sk_buff *skb; + dma_addr_t mapping; +}; struct tulip_private { const char *product_name; @@ -246,10 +244,9 @@ dma_addr_t rx_ring_dma; dma_addr_t tx_ring_dma; /* The saved address of a sent-in-place packet/buffer, for skfree(). */ - struct sk_buff *tx_skbuff[TX_RING_SIZE]; + struct ring_info tx_buffers[TX_RING_SIZE]; /* The addresses of receive-in-place skbuffs. */ - struct sk_buff *rx_skbuff[RX_RING_SIZE]; - char *rx_buffs; /* Address of temporary Rx buffers. */ + struct ring_info rx_buffers[RX_RING_SIZE]; u16 setup_frame[96]; /* Pseudo-Tx frame to init address table. */ int chip_id; int revision; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/tulip/tulip_core.c linux/drivers/net/tulip/tulip_core.c --- v2.3.99-pre1/linux/drivers/net/tulip/tulip_core.c Fri Mar 10 16:40:43 2000 +++ linux/drivers/net/tulip/tulip_core.c Sat Mar 18 11:18:05 2000 @@ -51,7 +51,7 @@ }; /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */ -#if defined(__alpha__) || defined(__arm__) +#if defined(__alpha__) || defined(__arm__) || defined(__sparc__) static int rx_copybreak = 1518; #else static int rx_copybreak = 100; @@ -72,8 +72,14 @@ #if defined(__alpha__) static int csr0 = 0x01A00000 | 0xE000; -#elif defined(__i386__) || defined(__powerpc__) || defined(__sparc__) +#elif defined(__i386__) || defined(__powerpc__) static int csr0 = 0x01A00000 | 0x8000; +#elif defined(__sparc__) +/* The UltraSparc PCI controllers will disconnect at every 64-byte + * crossing anyways so it makes no sense to tell Tulip to burst + * any more than that. + */ +static int csr0 = 0x01A00000 | 0x9000; #elif defined(__arm__) static int csr0 = 0x01A00000 | 0x4800; #else @@ -285,6 +291,7 @@ /* This is set_rx_mode(), but without starting the transmitter. */ u16 *eaddrs = (u16 *)dev->dev_addr; u16 *setup_frm = &tp->setup_frame[15*6]; + dma_addr_t mapping; /* 21140 bug: you must add the broadcast address. */ memset(tp->setup_frame, 0xff, sizeof(tp->setup_frame)); @@ -292,9 +299,16 @@ *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; + + mapping = pci_map_single(tp->pdev, tp->setup_frame, + sizeof(tp->setup_frame), + PCI_DMA_TODEVICE); + tp->tx_buffers[0].skb = NULL; + tp->tx_buffers[0].mapping = mapping; + /* Put the setup frame on the Tx list. */ tp->tx_ring[0].length = cpu_to_le32(0x08000000 | 192); - tp->tx_ring[0].buffer1 = virt_to_le32desc(tp->setup_frame); + tp->tx_ring[0].buffer1 = cpu_to_le32(mapping); tp->tx_ring[0].status = cpu_to_le32(DescOwned); tp->cur_tx++; @@ -569,30 +583,37 @@ tp->rx_ring[i].status = 0x00000000; tp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ); tp->rx_ring[i].buffer2 = cpu_to_le32(tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * (i + 1)); - tp->rx_skbuff[i] = NULL; + tp->rx_buffers[i].skb = NULL; + tp->rx_buffers[i].mapping = 0; } /* Mark the last entry as wrapping the ring. */ tp->rx_ring[i-1].length = cpu_to_le32(PKT_BUF_SZ | DESC_RING_WRAP); tp->rx_ring[i-1].buffer2 = cpu_to_le32(tp->rx_ring_dma); for (i = 0; i < RX_RING_SIZE; i++) { + dma_addr_t mapping; + /* Note the receive buffer must be longword aligned. dev_alloc_skb() provides 16 byte alignment. But do *not* use skb_reserve() to align the IP header! */ struct sk_buff *skb = dev_alloc_skb(PKT_BUF_SZ); - tp->rx_skbuff[i] = skb; + tp->rx_buffers[i].skb = skb; if (skb == NULL) break; + mapping = pci_map_single(tp->pdev, skb->tail, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + tp->rx_buffers[i].mapping = mapping; skb->dev = dev; /* Mark as being used by this device. */ tp->rx_ring[i].status = cpu_to_le32(DescOwned); /* Owned by Tulip chip */ - tp->rx_ring[i].buffer1 = virt_to_le32desc(skb->tail); + tp->rx_ring[i].buffer1 = cpu_to_le32(mapping); } tp->dirty_rx = (unsigned int)(i - RX_RING_SIZE); /* The Tx buffer descriptor is filled in as needed, but we do need to clear the ownership bit. */ for (i = 0; i < TX_RING_SIZE; i++) { - tp->tx_skbuff[i] = 0; + tp->tx_buffers[i].skb = NULL; + tp->tx_buffers[i].mapping = 0; tp->tx_ring[i].status = 0x00000000; tp->tx_ring[i].buffer2 = cpu_to_le32(tp->tx_ring_dma + sizeof(struct tulip_tx_desc) * (i + 1)); } @@ -605,6 +626,7 @@ struct tulip_private *tp = (struct tulip_private *)dev->priv; int entry; u32 flag; + dma_addr_t mapping; unsigned long cpuflags; /* Caution: the write order is important here, set the field @@ -615,8 +637,11 @@ /* Calculate the next Tx descriptor entry. */ entry = tp->cur_tx % TX_RING_SIZE; - tp->tx_skbuff[entry] = skb; - tp->tx_ring[entry].buffer1 = virt_to_le32desc(skb->data); + tp->tx_buffers[entry].skb = skb; + mapping = pci_map_single(tp->pdev, skb->data, + skb->len, PCI_DMA_TODEVICE); + tp->tx_buffers[entry].mapping = mapping; + tp->tx_ring[entry].buffer1 = cpu_to_le32(mapping); if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */ flag = 0x60000000; /* No interrupt */ @@ -697,19 +722,31 @@ /* Free all the skbuffs in the Rx queue. */ for (i = 0; i < RX_RING_SIZE; i++) { - struct sk_buff *skb = tp->rx_skbuff[i]; - tp->rx_skbuff[i] = 0; + struct sk_buff *skb = tp->rx_buffers[i].skb; + dma_addr_t mapping = tp->rx_buffers[i].mapping; + + tp->rx_buffers[i].skb = NULL; + tp->rx_buffers[i].mapping = 0; + tp->rx_ring[i].status = 0; /* Not owned by Tulip chip. */ tp->rx_ring[i].length = 0; tp->rx_ring[i].buffer1 = 0xBADF00D0; /* An invalid address. */ if (skb) { + pci_unmap_single(tp->pdev, mapping, PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); dev_kfree_skb (skb); } } for (i = 0; i < TX_RING_SIZE; i++) { - if (tp->tx_skbuff[i]) - dev_kfree_skb (tp->tx_skbuff[i]); - tp->tx_skbuff[i] = 0; + struct sk_buff *skb = tp->tx_buffers[i].skb; + + if (skb != NULL) { + pci_unmap_single(tp->pdev, tp->tx_buffers[i].mapping, + skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb (skb); + } + tp->tx_buffers[i].skb = NULL; + tp->tx_buffers[i].mapping = 0; } MOD_DEC_USE_COUNT; @@ -937,7 +974,8 @@ if (entry != 0) { /* Avoid a chip errata by prefixing a dummy entry. */ - tp->tx_skbuff[entry] = 0; + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = 0; tp->tx_ring[entry].length = (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0; tp->tx_ring[entry].buffer1 = 0; @@ -945,12 +983,17 @@ entry = tp->cur_tx++ % TX_RING_SIZE; } - tp->tx_skbuff[entry] = 0; + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = + pci_map_single(tp->pdev, tp->setup_frame, + sizeof(tp->setup_frame), + PCI_DMA_TODEVICE); /* Put the setup frame on the Tx list. */ if (entry == TX_RING_SIZE-1) tx_flags |= DESC_RING_WRAP; /* Wrap ring. */ tp->tx_ring[entry].length = cpu_to_le32(tx_flags); - tp->tx_ring[entry].buffer1 = virt_to_le32desc(tp->setup_frame); + tp->tx_ring[entry].buffer1 = + cpu_to_le32(tp->tx_buffers[entry].mapping); tp->tx_ring[entry].status = cpu_to_le32(DescOwned); if (tp->cur_tx - tp->dirty_tx >= TX_RING_SIZE - 2) { netif_stop_queue(dev); @@ -1163,8 +1206,10 @@ And the ASIX must have a burst limit or horrible things happen. */ if (chip_idx == DC21143 && chip_rev == 65) tp->csr0 &= ~0x01000000; - else if (chip_idx == AX88140) - tp->csr0 |= 0x2000; + else if (chip_idx == AX88140) { + if ((tp->csr0 & 0x3f00) == 0) + tp->csr0 |= 0x2000; + } /* The lower four bits are the media type. */ if (board_idx >= 0 && board_idx < MAX_UNITS) { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/via-rhine.c linux/drivers/net/via-rhine.c --- v2.3.99-pre1/linux/drivers/net/via-rhine.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/via-rhine.c Sat Mar 18 16:48:22 2000 @@ -420,7 +420,7 @@ if (via_rhine_chip_info[chip_id].flags & PCI_USES_MASTER) pci_set_master (pdev); - dev = init_etherdev(NULL, sizeof (*np)); + dev = init_etherdev(NULL, sizeof(*np)); if (dev == NULL) { printk (KERN_ERR "init_ethernet failed for card #%d\n", card_idx); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/wan/Makefile linux/drivers/net/wan/Makefile --- v2.3.99-pre1/linux/drivers/net/wan/Makefile Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/wan/Makefile Fri Mar 17 14:19:42 2000 @@ -49,6 +49,14 @@ endif endif +ifeq ($(CONFIG_COMX),y) +LX_OBJS += comx.o +else + ifeq ($(CONFIG_COMX),m) + MX_OBJS += comx.o + endif +endif + ifeq ($(CONFIG_COMX_HW_COMX),y) L_OBJS += comx-hw-comx.o else diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/wan/comx.c linux/drivers/net/wan/comx.c --- v2.3.99-pre1/linux/drivers/net/wan/comx.c Tue Mar 14 19:10:39 2000 +++ linux/drivers/net/wan/comx.c Fri Mar 17 14:19:42 2000 @@ -432,7 +432,7 @@ ch->line_status & PROTO_UP ? "UP" : "DOWN"); len += sprintf(page + len, "Modem status changes: %lu, Transmitter status " "is %s, tbusy: %d\n", ch->current_stats->tx_carrier_errors, ch->HW_txe ? - ch->HW_txe(dev) ? "IDLE" : "BUSY" : "NOT READY", (int)dev->tbusy); + ch->HW_txe(dev) ? "IDLE" : "BUSY" : "NOT READY", netif_running(dev)); len += sprintf(page + len, "Interface load (input): %d / %d / %d bits/s (", LOADAVG(0,0), LOADAVG(1, 0), LOADAVG(2, 0)); tmpstr[0] = 0; @@ -860,7 +860,7 @@ return -EIO; } - new_dir->ops = &proc_dir_inode_operations; // ez egy normalis /proc konyvtar + new_dir->proc_iops = &proc_dir_inode_operations; // ez egy normalis /proc konyvtar new_dir->nlink = 2; new_dir->data = NULL; // ide jon majd a struct dev @@ -884,7 +884,7 @@ S_IFREG | 0644, new_dir)) == NULL) { return -ENOMEM; } - debug_file->ops = &comx_debug_inode_ops; + debug_file->proc_iops = &comx_debug_inode_ops; debug_file->data = (void *)debug_file; debug_file->read_proc = NULL; // see below debug_file->write_proc = &comx_write_proc; @@ -1027,7 +1027,7 @@ struct proc_dir_entry *new_file; if ((new_file = create_proc_entry(name, S_IFREG | mode, dir)) != NULL) { - new_file->ops = &comx_normal_inode_ops; + new_file->proc_iops = &comx_normal_inode_ops; new_file->data = (void *)new_file; new_file->read_proc = &comx_read_proc; new_file->write_proc = &comx_write_proc; @@ -1129,7 +1129,7 @@ #define comx_init init_module #endif -__initfunc(int comx_init(void)) +int __init comx_init(void) { struct proc_dir_entry *new_file; @@ -1177,7 +1177,7 @@ return -ENOMEM; } - new_file->ops = &comx_normal_inode_ops; + new_file->proc_iops = &comx_normal_inode_ops; new_file->data = new_file; new_file->read_proc = &comx_root_read_proc; new_file->write_proc = NULL; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/net/yellowfin.c linux/drivers/net/yellowfin.c --- v2.3.99-pre1/linux/drivers/net/yellowfin.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/net/yellowfin.c Fri Mar 17 14:19:42 2000 @@ -1411,7 +1411,7 @@ module_init(yellowfin_init); -module_exit(yellowfin_exit); +module_exit(yellowfin_cleanup); /* diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/parport/ChangeLog linux/drivers/parport/ChangeLog --- v2.3.99-pre1/linux/drivers/parport/ChangeLog Tue Mar 14 19:10:40 2000 +++ linux/drivers/parport/ChangeLog Fri Mar 17 13:10:44 2000 @@ -1,4 +1,23 @@ -2000-03-13 +2000-03-16 Tim Waugh + + * parport_pc.c (parport_ECP_supported): This seems to trigger on + machines that don't have an IRQ conflict; toned down the warning + message accordingly. + +2000-03-16 Gunther Mayer + + * parport_pc.c (show_parconfig_smsc37c669): Fix typo. + (decode_winbond): More IDs. + (winbond_check): Protect against false positives. + (winbond_check2): Likewise. + (smsc_check): Likewise. + +2000-03-15 Tim Waugh + + * parport_pc.c (cleanup_module): Don't call pci_unregister_driver + if we didn't call pci_register_driver first. + +2000-03-13 Tim Waugh * parport_pc.c (parport_pc_init): Moved from asm/parport.h. @@ -19,12 +38,12 @@ (parport_pc_find_ports): New function. (init_module): Make superio a config option, not a parameter. -2000-03-10 +2000-03-10 Tim Waugh * parport_pc.c (decode_winbond): Use correct 83877ATF chip ID. (decode_winbond): Fix typo. -2000-03-09 +2000-03-09 Tim Waugh * parport_pc.c: Integrate SuperIO PCI probe with normal PCI card probe, so that the MODULE_DEVICE_TABLE is complete. diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/parport/parport_pc.c linux/drivers/parport/parport_pc.c --- v2.3.99-pre1/linux/drivers/parport/parport_pc.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/parport/parport_pc.c Fri Mar 17 13:10:44 2000 @@ -1060,7 +1060,7 @@ (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no"); printk("SMSC LPT Config: Port mode=%s, EPP version =%s\n", (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], - (cr4 & 40) ? "1.7" : "1.9"); + (cr4 & 0x40) ? "1.7" : "1.9"); /* Heuristics ! BIOS setup for this mainboard device limits the choices to standard settings, i.e. io-address and IRQ @@ -1172,12 +1172,15 @@ /* Values are from public data sheets pdf files, I can just confirm 83977TF is correct :-) */ - if (id == 0x9773) type="83977TF"; + if (id == 0x9771) type="83977F/AF"; + else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x"; else if (id == 0x9774) type="83977ATF"; else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x"; - else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97x35x"; + else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x"; else if ((id & ~0x0f) == 0x5210) type="83627"; else if ((id & ~0x0f) == 0x6010) type="83697HF"; + else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;} + else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;} else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;} else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;} else progif=0; @@ -1225,7 +1228,15 @@ static void __devinit winbond_check(int io, int key) { - int devid,devrev,oldid; + int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + + /* First probe without key */ + outb(0x20,io); + x_devid=inb(io+1); + outb(0x21,io); + x_devrev=inb(io+1); + outb(0x09,io); + x_oldid=inb(io+1); outb(key,io); outb(key,io); /* Write Magic Sequence to EFER, extended @@ -1238,12 +1249,23 @@ oldid=inb(io+1); outb(0xaa,io); /* Magic Seal */ + if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) + return; /* protection against false positives */ + decode_winbond(io,key,devid,devrev,oldid); } static void __devinit winbond_check2(int io,int key) { - int devid,devrev,oldid; + int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + + /* First probe without the key */ + outb(0x20,io+2); + x_devid=inb(io+2); + outb(0x21,io+1); + x_devrev=inb(io+2); + outb(0x09,io+1); + x_oldid=inb(io+2); outb(key,io); /* Write Magic Byte to EFER, extended funtion enable register */ @@ -1255,23 +1277,44 @@ oldid=inb(io+2); outb(0xaa,io); /* Magic Seal */ + if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) + return; /* protection against false positives */ + decode_winbond(io,key,devid,devrev,oldid); } static void __devinit smsc_check(int io, int key) { - int devid,devrev; + int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev; + + /* First probe without the key */ + outb(0x0d,io); + x_oldid=inb(io+1); + outb(0x0e,io); + x_oldrev=inb(io+1); + outb(0x20,io); + x_id=inb(io+1); + outb(0x21,io); + x_rev=inb(io+1); outb(key,io); outb(key,io); /* Write Magic Sequence to EFER, extended funtion enable register */ outb(0x0d,io); /* Write EFIR, extended function index register */ - devid=inb(io+1); /* Read EFDR, extended function data register */ + oldid=inb(io+1); /* Read EFDR, extended function data register */ outb(0x0e,io); - devrev=inb(io+1); + oldrev=inb(io+1); + outb(0x20,io); + id=inb(io+1); + outb(0x21,io); + rev=inb(io+1); outb(0xaa,io); /* Magic Seal */ - decode_smsc(io,key,devid,devrev); + if ((x_id == id) && (x_oldrev == oldrev) && + (x_oldid == oldid) && (x_rev == rev)) + return; /* protection against false positives */ + + decode_smsc(io,key,oldid,oldrev); } @@ -1584,7 +1627,8 @@ configb = inb (CONFIGB (pb)); if (!(configb & 0x40)) { - printk (KERN_WARNING "0x%lx: IRQ conflict!\n", pb->base); + printk (KERN_WARNING "0x%lx: possible IRQ conflict!\n", + pb->base); pb->irq = PARPORT_IRQ_NONE; } printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n", @@ -2487,7 +2531,10 @@ void cleanup_module(void) { struct parport *p = parport_enumerate(), *tmp; - pci_unregister_driver (&parport_pc_pci_driver); + + if (!user_specified) + pci_unregister_driver (&parport_pc_pci_driver); + while (p) { tmp = p->next; if (p->modes & PARPORT_MODE_PCSPP) { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/pci/pci.c linux/drivers/pci/pci.c --- v2.3.99-pre1/linux/drivers/pci/pci.c Thu Mar 2 14:36:22 2000 +++ linux/drivers/pci/pci.c Thu Mar 16 11:28:58 2000 @@ -384,19 +384,14 @@ pci_set_master(struct pci_dev *dev) { u16 cmd; - u8 lat; pci_read_config_word(dev, PCI_COMMAND, &cmd); if (! (cmd & PCI_COMMAND_MASTER)) { - printk("PCI: Enabling bus mastering for device %s\n", dev->slot_name); + DBG("PCI: Enabling bus mastering for device %s\n", dev->slot_name); cmd |= PCI_COMMAND_MASTER; pci_write_config_word(dev, PCI_COMMAND, cmd); } - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) { - printk("PCI: Increasing latency timer of device %s to 64\n", dev->slot_name); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64); - } + pcibios_set_master(dev); } /* diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/pci/pci.ids linux/drivers/pci/pci.ids --- v2.3.99-pre1/linux/drivers/pci/pci.ids Fri Mar 10 16:40:43 2000 +++ linux/drivers/pci/pci.ids Fri Mar 17 22:13:28 2000 @@ -695,10 +695,13 @@ ac18 PCI1260 ac19 PCI1221 ac1a PCI1210 - ac1b PCI1221 + ac1b PCI1450 ac1c PCI1225 - ac1d PCI1251 + ac1d PCI1251A + ac1e PCI1211 + ac1f PCI1251B ac20 TI 2030 + ac51 PCI1420 fe00 FireWire Host Controller fe03 12C01A FireWire Host Controller 104d Sony Corporation diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/pcmcia/ti113x.h linux/drivers/pcmcia/ti113x.h --- v2.3.99-pre1/linux/drivers/pcmcia/ti113x.h Fri Jan 21 18:19:16 2000 +++ linux/drivers/pcmcia/ti113x.h Sat Mar 18 12:12:51 2000 @@ -136,6 +136,46 @@ #ifdef CONFIG_CARDBUS +/* + * Generic TI open - TI has an extension for the + * INTCTL register that sets the PCI CSC interrupt. + * Make sure we set it correctly at open and init + * time. + */ +static int ti_open(pci_socket_t *socket) +{ + u8 new, reg = exca_readb(socket, I365_INTCTL); + + new = reg & ~I365_INTR_ENA; + if (socket->cb_irq) + new |= I365_INTR_ENA; + if (new != reg) + exca_writeb(socket, I365_INTCTL, new); + return 0; +} + +static int ti_init(pci_socket_t *socket) +{ + yenta_init(socket); + ti_open(socket); + return 0; +} + +static struct pci_socket_ops ti_ops = { + ti_open, + yenta_close, + ti_init, + yenta_suspend, + yenta_get_status, + yenta_get_socket, + yenta_set_socket, + yenta_get_io_map, + yenta_set_io_map, + yenta_get_mem_map, + yenta_set_mem_map, + yenta_proc_setup +}; + #define ti_sysctl(socket) ((socket)->private[0]) #define ti_cardctl(socket) ((socket)->private[1]) #define ti_devctl(socket) ((socket)->private[2]) @@ -149,6 +189,7 @@ ti_cardctl(socket) &= ~(TI113X_CCR_PCI_IRQ_ENA | TI113X_CCR_PCI_IREQ | TI113X_CCR_PCI_CSC); if (socket->cb_irq) ti_cardctl(socket) |= TI113X_CCR_PCI_IRQ_ENA | TI113X_CCR_PCI_CSC | TI113X_CCR_PCI_IREQ; + ti_open(socket); return 0; } @@ -159,7 +200,7 @@ config_writel(socket, TI113X_SYSTEM_CONTROL, ti_sysctl(socket)); config_writeb(socket, TI113X_CARD_CONTROL, ti_cardctl(socket)); config_writeb(socket, TI113X_DEVICE_CONTROL, ti_devctl(socket)); - + ti_open(socket); return 0; } @@ -187,6 +228,7 @@ ti_diag(socket) &= ~(TI1250_DIAG_PCI_CSC | TI1250_DIAG_PCI_IREQ); if (socket->cb_irq) ti_diag(socket) |= TI1250_DIAG_PCI_CSC | TI1250_DIAG_PCI_IREQ; + ti_open(socket); return 0; } @@ -195,6 +237,7 @@ yenta_init(socket); config_writeb(socket, TI1250_DIAGNOSTIC, ti_diag(socket)); + ti_open(socket); return 0; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/pcmcia/yenta.c linux/drivers/pcmcia/yenta.c --- v2.3.99-pre1/linux/drivers/pcmcia/yenta.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/pcmcia/yenta.c Sat Mar 18 09:51:30 2000 @@ -528,6 +528,7 @@ cb_writel(socket, CB_SOCKET_EVENT, -1); } cb_writel(socket, CB_SOCKET_MASK, 0); + exca_writeb(socket, I365_CSCINT, 0); mask = probe_irq_mask(val) & 0xffff; @@ -739,8 +740,18 @@ struct pci_socket_ops *op; } cardbus_override[] = { { PD(TI,1130), &ti113x_ops }, + { PD(TI,1031), &ti_ops }, { PD(TI,1131), &ti113x_ops }, { PD(TI,1250), &ti1250_ops }, + { PD(TI,1220), &ti_ops }, + { PD(TI,1221), &ti_ops }, + { PD(TI,1210), &ti_ops }, + { PD(TI,1450), &ti_ops }, + { PD(TI,1225), &ti_ops }, + { PD(TI,1251A), &ti_ops }, + { PD(TI,1211), &ti_ops }, + { PD(TI,1251B), &ti_ops }, + { PD(TI,1420), &ti_ops }, { PD(RICOH,RL5C465), &ricoh_ops }, { PD(RICOH,RL5C466), &ricoh_ops }, diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/pnp/quirks.c linux/drivers/pnp/quirks.c --- v2.3.99-pre1/linux/drivers/pnp/quirks.c Fri Mar 10 16:40:43 2000 +++ linux/drivers/pnp/quirks.c Fri Mar 17 14:19:42 2000 @@ -18,7 +18,6 @@ #include #include - static void __init quirk_awe32_resources(struct pci_dev *dev) { struct isapnp_port *port, *port2, *port3; @@ -67,6 +66,37 @@ printk(KERN_INFO "isapnp: CMI8330 quirk - fixing interrupts and dma\n"); } +static void __init quirk_sb16audio_resources(struct pci_dev *dev) +{ + struct isapnp_port *port; + struct isapnp_resources *res = dev->sysdata; + int changed = 0; + + /* + * The default range on the mpu port for these devices is 0x388-0x388. + * Here we increase that range so that two such cards can be + * auto-configured. + */ + + for( ; res ; res = res->alt ) { + port = res->port; + if(!port) + continue; + port = port->next; + if(!port) + continue; + port = port->next; + if(!port) + continue; + if(port->min != port->max) + continue; + port->max += 0x70; + changed = 1; + } + if(changed) + printk(KERN_INFO "ISAPnP: SB audio device quirk - increasing port range\n"); + return; +} /* * ISAPnP Quirks @@ -74,14 +104,31 @@ */ static struct isapnp_fixup isapnp_fixups[] __initdata = { + /* Soundblaster awe io port quirk */ { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0021), quirk_awe32_resources }, { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0022), quirk_awe32_resources }, { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0023), quirk_awe32_resources }, - { ISAPNP_VENDOR('@','X','@'), ISAPNP_DEVICE(0x0001), // CMI8330 + /* CMI 8330 interrupt and dma fix */ + { ISAPNP_VENDOR('@','X','@'), ISAPNP_DEVICE(0x0001), quirk_cmi8330_resources }, + /* Soundblaster audio device io port range quirk */ + { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0001), + quirk_sb16audio_resources }, + { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0031), + quirk_sb16audio_resources }, + { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0041), + quirk_sb16audio_resources }, + { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0042), + quirk_sb16audio_resources }, + { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0043), + quirk_sb16audio_resources }, + { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0044), + quirk_sb16audio_resources }, + { ISAPNP_VENDOR('C','T','L'), ISAPNP_DEVICE(0x0045), + quirk_sb16audio_resources }, { 0 } }; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sbus/sbus.c linux/drivers/sbus/sbus.c --- v2.3.99-pre1/linux/drivers/sbus/sbus.c Wed Dec 29 13:13:18 1999 +++ linux/drivers/sbus/sbus.c Thu Mar 16 11:40:17 2000 @@ -1,4 +1,4 @@ -/* $Id: sbus.c,v 1.83 1999/10/18 01:47:01 zaitcev Exp $ +/* $Id: sbus.c,v 1.86 2000/03/16 09:23:57 jj Exp $ * sbus.c: SBus support routines. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -205,6 +205,7 @@ this_dev->child = kmalloc(sizeof(struct sbus_dev), GFP_ATOMIC); this_dev->child->bus = sbus; + this_dev->child->next = 0; fill_sbus_device(prom_getchild(this_node), this_dev->child); sbus_do_child_siblings(prom_getchild(this_node), this_dev->child, this_dev, sbus); @@ -303,6 +304,8 @@ } } +extern void register_proc_sparc_ioport(void); + void __init sbus_init(void) { int nd, this_sbus, sbus_devs, topnd, iommund; @@ -310,7 +313,11 @@ struct sbus_bus *sbus; struct sbus_dev *this_dev; int num_sbus = 0; /* How many did we find? */ - + +#ifndef __sparc_v9__ + register_proc_sparc_ioport(); +#endif + #ifdef CONFIG_SUN4 return sun4_dvma_init(); #endif @@ -424,6 +431,7 @@ GFP_ATOMIC); /* Fill it */ this_dev->child->bus = sbus; + this_dev->child->next = 0; fill_sbus_device(prom_getchild(sbus_devs), this_dev->child); sbus_do_child_siblings(prom_getchild(sbus_devs), @@ -453,6 +461,7 @@ GFP_ATOMIC); /* Fill it */ this_dev->child->bus = sbus; + this_dev->child->next = 0; fill_sbus_device(prom_getchild(sbus_devs), this_dev->child); sbus_do_child_siblings(prom_getchild(sbus_devs), diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/scsi/53c7,8xx.c linux/drivers/scsi/53c7,8xx.c --- v2.3.99-pre1/linux/drivers/scsi/53c7,8xx.c Tue Nov 23 22:42:21 1999 +++ linux/drivers/scsi/53c7,8xx.c Thu Mar 16 22:26:53 2000 @@ -1396,7 +1396,7 @@ int i, irq; struct pci_dev *pdev = pci_find_slot(bus, device_fn); - printk("scsi-ncr53c7,8xx : at PCI bus %d, device %d, function %d\n", + printk("scsi-ncr53c7,8xx : at PCI bus %d, device %d, function %d\n", bus, (int) (device_fn & 0xf8) >> 3, (int) device_fn & 7); @@ -1406,10 +1406,8 @@ return -1; } - if ((error = pcibios_read_config_word (bus, device_fn, PCI_COMMAND, - &command)) || - (error = pcibios_read_config_byte (bus, device_fn, PCI_CLASS_REVISION, - &revision))) { + if ((error = pci_read_config_word (pdev, PCI_COMMAND, &command)) || + (error = pci_read_config_byte (pdev, PCI_CLASS_REVISION, &revision))) { printk ("scsi-ncr53c7,8xx : error %d not initializing due to error reading configuration space\n" " perhaps you specified an incorrect PCI bus, device, or function.\n", error); return -1; @@ -1451,24 +1449,21 @@ */ if (command & PCI_COMMAND_IO) { - if ((io_port & 3) != 1) { - printk ("scsi-ncr53c7,8xx : disabling I/O mapping since base address 0 (0x%x)\n" - " bits 0..1 indicate a non-IO mapping\n", - (unsigned) io_port); + if (!(pdev->resource[0].flags & IORESOURCE_IO)) { + printk ("scsi-ncr53c7,8xx : disabling I/O mapping since base " + "address 0\n contains a non-IO mapping\n"); io_port = 0; - } else - io_port &= PCI_BASE_ADDRESS_IO_MASK; + } } else { io_port = 0; } if (command & PCI_COMMAND_MEMORY) { - if ((base & PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) { - printk("scsi-ncr53c7,8xx : disabling memory mapping since base address 1\n" - " contains a non-memory mapping\n"); + if (!(pdev->resource[1].flags & IORESOURCE_MEM)) { + printk("scsi-ncr53c7,8xx : disabling memory mapping since base " + "address 1\n contains a non-memory mapping\n"); base = 0; - } else - base &= PCI_BASE_ADDRESS_MEM_MASK; + } } else { base = 0; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/scsi/Makefile linux/drivers/scsi/Makefile --- v2.3.99-pre1/linux/drivers/scsi/Makefile Sat Feb 26 22:31:48 2000 +++ linux/drivers/scsi/Makefile Sun Mar 19 10:16:36 2000 @@ -709,7 +709,7 @@ 53c8xx_d.h: 53c7,8xx.scr script_asm.pl ln -sf 53c7,8xx.scr fake8.c - $(CPP) $(CPPFLAGS) -traditional -DCHIP=810 fake8.c | grep -v '^#' | perl script_asm.pl + $(CPP) $(CPPFLAGS) -traditional -DCHIP=810 fake8.c | grep -v '^#' | $(PERL) script_asm.pl mv script.h 53c8xx_d.h mv scriptu.h 53c8xx_u.h rm fake8.c @@ -718,7 +718,7 @@ 53c7xx_d.h: 53c7xx.scr script_asm.pl ln -sf 53c7xx.scr fake7.c - $(CPP) $(CPPFLAGS) -traditional -DCHIP=710 fake7.c | grep -v '^#' | perl -s script_asm.pl -ncr7x0_family + $(CPP) $(CPPFLAGS) -traditional -DCHIP=710 fake7.c | grep -v '^#' | $(PERL) -s script_asm.pl -ncr7x0_family mv script.h 53c7xx_d.h mv scriptu.h 53c7xx_u.h rm fake7.c @@ -729,7 +729,7 @@ sim710_d.h: sim710.scr script_asm.pl ln -sf sim710.scr fake7.c - $(CPP) $(CPPFLAGS) -traditional -DCHIP=710 fake7.c | grep -v '^#' | perl -s script_asm.pl -ncr7x0_family + $(CPP) $(CPPFLAGS) -traditional -DCHIP=710 fake7.c | grep -v '^#' | $(PERL) -s script_asm.pl -ncr7x0_family mv script.h sim710_d.h mv scriptu.h sim710_u.h rm fake7.c diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/scsi/imm.c linux/drivers/scsi/imm.c --- v2.3.99-pre1/linux/drivers/scsi/imm.c Mon Oct 11 15:38:15 1999 +++ linux/drivers/scsi/imm.c Fri Mar 17 13:34:00 2000 @@ -330,7 +330,7 @@ static int imm_negotiate(imm_struct * tmp) { /* - * The following is supposedly the IEEE 1248-1994 negotiate + * The following is supposedly the IEEE 1284-1994 negotiate * sequence. I have yet to obtain a copy of the above standard * so this is a bit of a guess... * diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/scsi/scsi_scan.c linux/drivers/scsi/scsi_scan.c --- v2.3.99-pre1/linux/drivers/scsi/scsi_scan.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/scsi/scsi_scan.c Sat Mar 18 11:16:21 2000 @@ -108,6 +108,8 @@ {"YAMAHA", "CDR100", "1.00", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ {"YAMAHA", "CDR102", "1.00", BLIST_NOLUN}, /* Locks up if polled for lun != 0 * extra reset */ + {"YAMAHA", "CRW8424S", "1.0", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"YAMAHA", "CRW6416S", "1.0c", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ {"MITSUMI", "CD-R CR-2201CS", "6119", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ {"RELISYS", "Scorpio", "*", BLIST_NOLUN}, /* responds to all LUN */ {"MICROTEK", "ScanMaker II", "5.61", BLIST_NOLUN}, /* responds to all LUN */ @@ -137,10 +139,12 @@ {"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN}, {"CREATIVE","DVD-RAM RAM","*", BLIST_GHOST}, {"MATSHITA","PD-2 LF-D100","*", BLIST_GHOST}, + {"AOpen", "PD-2 DVD-520S", "*", BLIST_GHOST}, {"HITACHI", "GF-1050","*", BLIST_GHOST}, /* Hitachi SCSI DVD-RAM */ {"TOSHIBA","CDROM","*", BLIST_ISROM}, {"TOSHIBA","DVD-RAM SD-W1101","*", BLIST_GHOST}, {"TOSHIBA","DVD-RAM SD-W1111","*", BLIST_GHOST}, + {"MegaRAID", "LD", "*", BLIST_FORCELUN}, /* * Must be at end of list... diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/Makefile linux/drivers/sound/Makefile --- v2.3.99-pre1/linux/drivers/sound/Makefile Tue Mar 14 19:10:40 2000 +++ linux/drivers/sound/Makefile Thu Mar 16 11:30:03 2000 @@ -18,7 +18,7 @@ export-objs := ad1848.o audio_syms.o midi_syms.o mpu401.o \ msnd.o opl3.o sb_common.o sequencer_syms.o \ - sound_core.o sound_syms.o uart401.o ad1816.o \ + sound_core.o sound_syms.o uart401.o \ nm256_audio.o ac97.o ac97_codec.o diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/ac97_codec.c linux/drivers/sound/ac97_codec.c --- v2.3.99-pre1/linux/drivers/sound/ac97_codec.c Tue Mar 7 14:32:26 2000 +++ linux/drivers/sound/ac97_codec.c Thu Mar 16 11:18:35 2000 @@ -1,5 +1,5 @@ /* - * ac97_codec.c: Generic AC97 mixer module + * ac97_codec.c: Generic AC97 mixer/modem module * * Derived from ac97 mixer in maestro and trident driver. * @@ -20,6 +20,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * History + * v0.4 Mar 15 2000 Ollie Lho + * dual codec support verified with 4 channel output * v0.3 Feb 22 2000 Ollie Lho * bug fix for record mask setting * v0.2 Feb 10 2000 Ollie Lho @@ -42,7 +44,9 @@ static int ac97_recmask_io(struct ac97_codec *codec, int rw, int mask); static int ac97_mixer_ioctl(struct ac97_codec *codec, unsigned int cmd, unsigned long arg); -static int sigmatel_init(struct ac97_codec * codec); +static int ac97_init_mixer(struct ac97_codec *codec); + +static int sigmatel_init(struct ac97_codec *codec); #define arraysize(x) (sizeof(x)/sizeof((x)[0])) @@ -59,6 +63,8 @@ {0x43525923, "Cirrus Logic CS4298" , NULL}, {0x43525931, "Cirrus Logic CS4299" , NULL}, {0x4e534331, "National Semiconductor LM4549" , NULL}, + {0x53494c22, "Silicon Laboratory Si3036" , NULL}, + {0x53494c23, "Silicon Laboratory Si3038" , NULL}, {0x83847600, "SigmaTel STAC????" , NULL}, {0x83847604, "SigmaTel STAC9701/3/4/5", NULL}, {0x83847605, "SigmaTel STAC9704" , NULL}, @@ -323,7 +329,6 @@ /* else, write the first set in the mask as the output */ - val = ffs(mask); val = ac97_oss_rm[val-1]; val |= val << 8; /* set both channels */ @@ -503,14 +508,22 @@ int ac97_probe_codec(struct ac97_codec *codec) { - u16 id1, id2, cap; + u16 id1, id2; + u16 audio, modem; int i; /* probing AC97 codec, AC97 2.0 says that bit 15 of register 0x00 (reset) should be read zero. Probing of AC97 in this way is not reliable, it is not even SAFE !! */ codec->codec_write(codec, AC97_RESET, 0L); - if ((cap = codec->codec_read(codec, AC97_RESET)) & 0x8000) + if ((audio = codec->codec_read(codec, AC97_RESET)) & 0x8000) { + printk(KERN_ERR "ac97_codec: %s ac97 codec not present\n", + codec->id ? "Secondary" : "Primary"); return 0; + } + + /* probe for Modem Codec */ + codec->codec_write(codec, AC97_EXTENDED_MODEM_ID, 0L); + modem = codec->codec_read(codec, AC97_EXTENDED_MODEM_ID); codec->name = NULL; codec->codec_init = NULL; @@ -526,9 +539,20 @@ } if (codec->name == NULL) codec->name = "Unknown"; - printk(KERN_INFO "ac97_codec: ac97 vendor id1: 0x%04x, id2: 0x%04x (%s)\n", + printk(KERN_INFO "ac97_codec: AC97 %s codec, vendor id1: 0x%04x, " + "id2: 0x%04x (%s)\n", audio ? "Audio" : (modem ? "Modem" : ""), id1, id2, codec->name); + return ac97_init_mixer(codec); +} + +static int ac97_init_mixer(struct ac97_codec *codec) +{ + u16 cap; + int i; + + cap = codec->codec_read(codec, AC97_RESET); + /* mixer masks */ codec->supported_mixers = AC97_SUPPORTED_MASK; codec->stereo_mixers = AC97_STEREO_MASK; @@ -548,7 +572,7 @@ codec->codec_write(codec, AC97_MASTER_VOL_STEREO, 0L); codec->codec_write(codec, AC97_PCMOUT_VOL, 0L); - /* codec specific initialization for 4-6 channel output */ + /* codec specific initialization for 4-6 channel output or secondary codec stuff */ if (codec->id != 0 && codec->codec_init != NULL) { codec->codec_init(codec); } @@ -566,11 +590,28 @@ return 1; } +static int ac97_init_modem(struct ac97_codec *codec) +{ + return 0; +} + static int sigmatel_init(struct ac97_codec * codec) { codec->codec_write(codec, AC97_SURROUND_MASTER, 0L); - /* initialize SigmaTel STAC9721/23 */ - codec->codec_write(codec, 0x74, 0x01); + + /* initialize SigmaTel STAC9721/23 as secondary codec, decoding AC link + sloc 3,4 = 0x01, slot 7,8 = 0x00, */ + codec->codec_write(codec, 0x74, 0x00); + + /* we don't have the crystal when we are on an AMR card, so use + BIT_CLK as our clock source. Write the magic word ABBA and read + back to enable register 0x78 */ + codec->codec_write(codec, 0x76, 0xabba); + codec->codec_read(codec, 0x76); + + /* sync all the clocks*/ + codec->codec_write(codec, 0x78, 0x3802); + return 1; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/ad1816.c linux/drivers/sound/ad1816.c --- v2.3.99-pre1/linux/drivers/sound/ad1816.c Fri Mar 10 16:40:43 2000 +++ linux/drivers/sound/ad1816.c Sat Mar 18 16:51:35 2000 @@ -6,16 +6,8 @@ * * Based on the CS4232/AD1848 driver Copyright (C) by Hannu Savolainen 1993-1996 * - * This software is still under development. New versions of the driver - * are available from: - * http://www.student.informatik.tu-darmstadt.de/~tek/projects/linux.html - * or http://www.tu-darmstadt.de/~tek01/projects/linux.html - * - * Please report any bugs to: tek@rbg.informatik.tu-darmstadt.de * - * - * version: 1.3 - * cvs: $Header: /home/tek/CVSROOT/sound22/ad1816.c,v 1.3 1999/04/18 16:41:41 tek Exp $ + * version: 1.3.1 * status: experimental * date: 1999/4/18 * @@ -34,18 +26,23 @@ * required by some Aztech/Newcom SC-16 cards. 1999/04/18 * * Christoph Hellwig: Adapted to module_init/module_exit. 2000/03/03 + * + * Christoph Hellwig: Added isapnp support 2000/03/15 */ +#include #include #include +#include #include + #include "soundmodule.h" #include "sound_config.h" #define DEBUGNOISE(x) -#define DEBUGINFO(x) -#define DEBUGLOG(x) x -#define DEBUGWARN(x) x +#define DEBUGINFO(x) +#define DEBUGLOG(x) +#define DEBUGWARN(x) #define CHECK_FOR_POWER { int timeout=100; \ while (timeout > 0 && (inb(devc->base)&0x80)!= 0x80) {\ @@ -356,7 +353,7 @@ unsigned long flags; ad1816_info *devc = (ad1816_info *) audio_devs[dev]->devc; - DEBUGINFO (printk("ad1816: trigger called! (devc=%d,devc->base=%d\n",devc,devc->base)); + DEBUGINFO (printk("ad1816: trigger called! (devc=%d,devc->base=%d\n", devc, devc->base)); /* mode may have changed */ @@ -427,70 +424,38 @@ { ad1816_info *devc = (ad1816_info *) audio_devs[dev]->devc; - static struct format_tbl - { + static struct format_tbl { int format; unsigned char bits; - } - format2bits[] = - { - { - 0, 0 - } - , - { - AFMT_MU_LAW, 1 - } - , - { - AFMT_A_LAW, 3 - } - , - { - AFMT_IMA_ADPCM, 0 - } - , - { - AFMT_U8, 0 - } - , - { - AFMT_S16_LE, 2 - } - , - { - AFMT_S16_BE, 6 - } - , - { - AFMT_S8, 0 - } - , - { - AFMT_U16_LE, 0 - } - , - { - AFMT_U16_BE, 0 - } - }; + } format2bits[] = { + { 0, 0 }, + { AFMT_MU_LAW, 1 }, + { AFMT_A_LAW, 3 }, + { AFMT_IMA_ADPCM, 0 }, + { AFMT_U8, 0 }, + { AFMT_S16_LE, 2 }, + { AFMT_S16_BE, 6 }, + { AFMT_S8, 0 }, + { AFMT_U16_LE, 0 }, + { AFMT_U16_BE, 0 } + }; + int i, n = sizeof (format2bits) / sizeof (struct format_tbl); /* return current format */ - if (arg == 0) { + if (arg == 0) return devc->audio_format; - } devc->audio_format = arg; /* search matching format bits */ - for (i = 0; i < n; i++) { + for (i = 0; i < n; i++) if (format2bits[i].format == arg) { devc->format_bits = format2bits[i].bits; devc->audio_format = arg; return arg; } - } + /* Still hanging here. Something must be terribly wrong */ devc->format_bits = 0; return devc->audio_format = AFMT_U8; @@ -500,9 +465,8 @@ { ad1816_info *devc = (ad1816_info *) audio_devs[dev]->devc; - if (arg != 1 && arg != 2) { + if (arg != 1 && arg != 2) return devc->channels; - } devc->channels = arg; return arg; @@ -515,9 +479,8 @@ unsigned long flags; /* is device number valid ? */ - if (dev < 0 || dev >= num_audiodevs) { + if (dev < 0 || dev >= num_audiodevs) return -(ENXIO); - } /* get device info of this dev */ devc = (ad1816_info *) audio_devs[dev]->devc; @@ -600,10 +563,10 @@ static void ad1816_interrupt (int irq, void *dev_id, struct pt_regs *dummy) { - unsigned char status; - ad1816_info *devc; - int dev; - unsigned long flags; + unsigned char status; + ad1816_info *devc; + int dev; + unsigned long flags; if (irq < 0 || irq > 15) { @@ -632,18 +595,15 @@ devc->irq_ok=1; - if (status == 0) { + if (status == 0) DEBUGWARN(printk ("ad1816: interrupt: Got interrupt, but no reason?\n")); - } - if (devc->opened && (devc->audio_mode & PCM_ENABLE_INPUT) - && (status&64)){ + + if (devc->opened && (devc->audio_mode & PCM_ENABLE_INPUT) && (status&64)) DMAbuf_inputintr (dev); - } - if (devc->opened && (devc->audio_mode & PCM_ENABLE_OUTPUT) && - (status & 128)) { + if (devc->opened && (devc->audio_mode & PCM_ENABLE_OUTPUT) && (status & 128)) DMAbuf_outputintr (dev, 1); - } + restore_flags(flags); } @@ -659,7 +619,7 @@ }; static char mix_cvt[101] = { - 0, 0,3,7,10,13,16,19,21,23,26,28,30,32,34,35,37,39,40,42, + 0, 0, 3, 7,10,13,16,19,21,23,26,28,30,32,34,35,37,39,40,42, 43,45,46,47,49,50,51,52,53,55,56,57,58,59,60,61,62,63,64,65, 65,66,67,68,69,70,70,71,72,73,73,74,75,75,76,77,77,78,79,79, 80,81,81,82,82,83,84,84,85,85,86,86,87,87,88,88,89,89,90,90, @@ -705,23 +665,23 @@ static unsigned short default_mixer_levels[SOUND_MIXER_NRDEVICES] = { - 0x4343, /* Master Volume */ - 0x3232, /* Bass */ - 0x3232, /* Treble */ - 0x0000, /* FM */ - 0x4343, /* PCM */ - 0x0000, /* PC Speaker */ - 0x0000, /* Ext Line */ - 0x0000, /* Mic */ - 0x0000, /* CD */ - 0x0000, /* Recording monitor */ - 0x0000, /* SB PCM */ - 0x0000, /* Recording level */ - 0x0000, /* Input gain */ - 0x0000, /* Output gain */ - 0x0000, /* Line1 */ - 0x0000, /* Line2 */ - 0x0000 /* Line3 (usually line in)*/ + 0x4343, /* Master Volume */ + 0x3232, /* Bass */ + 0x3232, /* Treble */ + 0x0000, /* FM */ + 0x4343, /* PCM */ + 0x0000, /* PC Speaker */ + 0x0000, /* Ext Line */ + 0x0000, /* Mic */ + 0x0000, /* CD */ + 0x0000, /* Recording monitor */ + 0x0000, /* SB PCM */ + 0x0000, /* Recording level */ + 0x0000, /* Input gain */ + 0x0000, /* Output gain */ + 0x0000, /* Line1 */ + 0x0000, /* Line2 */ + 0x0000 /* Line3 (usually line in)*/ }; #define LEFT_CHN 0 @@ -739,29 +699,24 @@ n = 0; /* Count selected device bits */ - for (i = 0; i < 32; i++) { - if (mask & (1 << i)) { + for (i = 0; i < 32; i++) + if (mask & (1 << i)) n++; - } - } - if (n == 0) { + if (n == 0) mask = SOUND_MASK_MIC; - } else if (n != 1) { /* Too many devices selected */ + else if (n != 1) { /* Too many devices selected */ /* Filter out active settings */ mask &= ~devc->recmask; n = 0; /* Count selected device bits */ - for (i = 0; i < 32; i++) { - if (mask & (1 << i)) { + for (i = 0; i < 32; i++) + if (mask & (1 << i)) n++; - } - } - if (n != 1) { + if (n != 1) mask = SOUND_MASK_MIC; - } } switch (mask) { @@ -810,9 +765,8 @@ /* Reverse polarity*/ - if (mix_devices[dev][chn].polarity == 1) { + if (mix_devices[dev][chn].polarity == 1) newval = 100 - newval; - } mask = (1 << mix_devices[dev][chn].nbits) - 1; shift = mix_devices[dev][chn].bitpos; @@ -830,12 +784,10 @@ DEBUGINFO(printk("ad1816: mixer_get called!\n")); /* range check + supported mixer check */ - if (dev < 0 || dev >= SOUND_MIXER_NRDEVICES ) { + if (dev < 0 || dev >= SOUND_MIXER_NRDEVICES ) return (-(EINVAL)); - } - if (!((1 << dev) & devc->supported_devices)) { + if (!((1 << dev) & devc->supported_devices)) return -(EINVAL); - } return devc->levels[dev]; } @@ -853,27 +805,21 @@ DEBUGINFO(printk("ad1816: mixer_set called!\n")); - if (dev < 0 || dev >= SOUND_MIXER_NRDEVICES ) { + if (dev < 0 || dev >= SOUND_MIXER_NRDEVICES ) return -(EINVAL); - } - if (left > 100) { + if (left > 100) left = 100; - } - if (left < 0) { + if (left < 0) left = 0; - } - if (right > 100) { + if (right > 100) right = 100; - } - if (right < 0) { + if (right < 0) right = 0; - } /* Mono control */ - if (mix_devices[dev][RIGHT_CHN].nbits == 0) { + if (mix_devices[dev][RIGHT_CHN].nbits == 0) right = left; - } retvol = left | (right << 8); /* Scale it */ @@ -882,14 +828,12 @@ right = mix_cvt[right]; /* reject all mixers that are not supported */ - if (!(devc->supported_devices & (1 << dev))) { + if (!(devc->supported_devices & (1 << dev))) return -(EINVAL); - } /* sanity check */ - if (mix_devices[dev][LEFT_CHN].nbits == 0) { + if (mix_devices[dev][LEFT_CHN].nbits == 0) return -(EINVAL); - } /* keep precise volume internal */ devc->levels[dev] = retvol; @@ -905,11 +849,10 @@ if ( regoffs==5 || regoffs==14 || regoffs==15 || regoffs==16 || regoffs==17 || regoffs==18 || regoffs==19 || regoffs==39) { - if (left==0) { + if (left==0) valmute |= 0x8000; - } else { + else valmute &= ~0x8000; - } } ad_write (devc, regoffs, valmute); /* mute */ @@ -918,9 +861,9 @@ */ /* Was just a mono channel */ - if (mix_devices[dev][RIGHT_CHN].nbits == 0) { + if (mix_devices[dev][RIGHT_CHN].nbits == 0) return retvol; - } + regoffs = mix_devices[dev][RIGHT_CHN].regno; val = ad_read (devc, regoffs); change_bits (&val, dev, RIGHT_CHN, right); @@ -929,11 +872,10 @@ if ( regoffs==5 || regoffs==14 || regoffs==15 || regoffs==16 || regoffs==17 || regoffs==18 || regoffs==19 || regoffs==39) { - if (right==0) { + if (right==0) valmute |= 0x80; - } else { + else valmute &= ~0x80; - } } ad_write (devc, regoffs, valmute); /* mute */ @@ -968,11 +910,9 @@ devc->supported_rec_devices = REC_DEVICES; - for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { - if (devc->supported_devices & (1 << i)) { + for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) + if (devc->supported_devices & (1 << i)) ad1816_mixer_set (devc, i, default_mixer_levels[i]); - } - } ad1816_set_recmask (devc, SOUND_MASK_MIC); } @@ -992,22 +932,19 @@ switch (cmd & 0xff){ case SOUND_MIXER_RECSRC: - if (get_user(val, (int *)arg)) { + if (get_user(val, (int *)arg)) return -EFAULT; - } val=ad1816_set_recmask (devc, val); - return put_user(val, (int *)arg); + return put_user(val, (int *)arg); break; default: - if (get_user(val, (int *)arg)){ + if (get_user(val, (int *)arg)) return -EFAULT; - } - if ((val=ad1816_mixer_set (devc, cmd & 0xff, val))<0) { + if ((val=ad1816_mixer_set (devc, cmd & 0xff, val))<0) return val; - } else { + else return put_user(val, (int *)arg); - } } } else { /* read ioctl */ @@ -1039,25 +976,22 @@ break; default: - if ((val=ad1816_mixer_get (devc, cmd & 0xff))<0) { + if ((val=ad1816_mixer_get (devc, cmd & 0xff))<0) return val; - } else { + else return put_user(val, (int *)arg); - } } } - } else { + } else /* not for mixer */ return -(EINVAL); - } } /* ------------------------------------------------------------------- */ /* Mixer structure */ -static struct mixer_operations ad1816_mixer_operations = -{ +static struct mixer_operations ad1816_mixer_operations = { "AD1816", "AD1816 Mixer", ad1816_mixer_ioctl @@ -1076,7 +1010,7 @@ int io_base=hw_config->io_base; int *osp=hw_config->osp; int tmp; - + printk("ad1816: AD1816 sounddriver Copyright (C) 1998 by Thorsten Knabe\n"); printk("ad1816: io=0x%x, irq=%d, dma=%d, dma2=%d, clockfreq=%d, options=%d isadmabug=%d\n", hw_config->io_base, @@ -1098,7 +1032,7 @@ printk ("ad1816: detect error - step 0\n"); return 0; } - + devc->base = io_base; devc->irq_ok = 0; devc->irq = 0; @@ -1126,7 +1060,8 @@ DEBUGLOG (printk ("ad1816: Chip is not an AD1816 (Test 2)\n")); return(0); } - + + /* writes to ireg 10 are copied to ireg 11 */ ad_write(devc,10,54321); if (ad_read(devc,11)!=54321) { @@ -1140,7 +1075,7 @@ DEBUGLOG (printk ("ad1816: Chip is not an AD1816 (Test 4)\n")); return(0); } - + /* bit in base +1 cannot be set to 1 */ tmp=inb(devc->base+1); outb(0xff,devc->base+1); @@ -1152,7 +1087,7 @@ DEBUGLOG (printk ("ad1816: detect() - Detected OK\n")); DEBUGLOG (printk ("ad1816: AD1816 Version: %d\n",ad_read(devc,45))); - + /* detection was successful */ return 1; } @@ -1330,6 +1265,22 @@ static int __initdata dma = -1; static int __initdata dma2 = -1; +#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +struct pci_dev *ad1816_dev = NULL, + *mpu_dev = NULL; + +static int activated = 1; + +static int isapnp = 1; +static int isapnpjump = 0; + +MODULE_PARM(isapnp, "i"); +MODULE_PARM(isapnpjump, "i"); + +#else +static int isapnp = 0; +#endif + MODULE_PARM(io,"i"); MODULE_PARM(irq,"i"); MODULE_PARM(dma,"i"); @@ -1337,12 +1288,132 @@ MODULE_PARM(ad1816_clockfreq,"i"); MODULE_PARM(options,"i"); +#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE + +static struct pci_dev *activate_dev(char *devname, char *resname, struct pci_dev *dev) +{ + int err; + + if(dev->active) { + activated = 0; + return(dev); + } + + if((err = dev->activate(dev)) < 0) { + printk(KERN_ERR "ad1816: %s %s config failed (out of resources?)[%d]\n", + devname, resname, err); + dev->deactivate(dev); + return(NULL); + } + + return(dev); +} + +static struct pci_dev *ad1816_init_generic(struct pci_bus *bus, struct pci_dev *card, + struct address_info *hw_config) +{ + if((ad1816_dev = isapnp_find_dev(bus, card->vendor, card->device, NULL))) { + ad1816_dev->prepare(ad1816_dev); + + if((ad1816_dev = activate_dev("Analog Devices 1816(A)", "ad1816", ad1816_dev))) { + hw_config->io_base = ad1816_dev->resource[2].start; + hw_config->irq = ad1816_dev->irq_resource[0].start; + hw_config->dma = ad1816_dev->dma_resource[0].start; + hw_config->dma2 = ad1816_dev->dma_resource[1].start; + } + } + + return(ad1816_dev); +} + +static struct { + unsigned short vendor; + unsigned short function; + struct pci_dev * (*initfunc)(struct pci_bus*, struct pci_dev *, struct address_info *); + char *name; +} isapnp_ad1816_list[] __initdata = { + {ISAPNP_VENDOR('A','D','S'), ISAPNP_FUNCTION(0x7150), &ad1816_init_generic, "Analog Devices 1815" }, + {ISAPNP_VENDOR('A','D','S'), ISAPNP_FUNCTION(0x7180), &ad1816_init_generic, "Analog Devices 1816A" }, + {0} +}; + +static int __init ad1816_init_isapnp(struct address_info *hw_config, + struct pci_bus *bus, struct pci_dev *card, int slot) +{ + struct pci_dev *idev = NULL; + + /* You missed the init func? That's bad. */ + if(isapnp_ad1816_list[slot].initfunc) { + char *busname = bus->name[0] ? bus->name : isapnp_ad1816_list[slot].name; + + printk(KERN_INFO "ad1816: %s detected\n", busname); + + /* Initialize this baby. */ + if((idev = isapnp_ad1816_list[slot].initfunc(bus, card, hw_config))) { + /* We got it. */ + + printk(KERN_NOTICE "ad1816: ISAPnP reports '%s' at i/o %#x, irq %d, dma %d, %d\n", + busname, + hw_config->io_base, hw_config->irq, hw_config->dma, + hw_config->dma2); + return 1; + } else + printk(KERN_INFO "ad1816: Failed to initialize %s\n", busname); + } else + printk(KERN_ERR "ad1816: Bad entry in ad1816.c PnP table\n"); + + return 0; +} + +/* + * Actually this routine will detect and configure only the first card with successful + * initialization. isapnpjump could be used to jump to a specific entry. + * Please always add entries at the end of the array. + * Should this be fixed? - azummo + */ + +int __init ad1816_probe_isapnp(struct address_info *hw_config) +{ + int i; + + /* Count entries in isapnp_ad1816_list */ + for (i = 0; isapnp_ad1816_list[i].vendor != 0; i++) + ; + /* Check and adjust isapnpjump */ + if( isapnpjump < 0 || isapnpjump > ( i - 1 ) ) { + printk(KERN_ERR "ad1816: Valid range for isapnpjump is 0-%d. Adjusted to 0.\n", i-1); + isapnpjump = 0; + } + + for (i = isapnpjump; isapnp_ad1816_list[i].vendor != 0; i++) { + struct pci_dev *card = NULL; + + while ((card = isapnp_find_dev(NULL, isapnp_ad1816_list[i].vendor, + isapnp_ad1816_list[i].function, card))) + if(ad1816_init_isapnp(hw_config, card->bus, card, i)) + return 0; + } + + return -ENODEV; +} +#endif + static int __init init_ad1816(void) { - cfg.io_base = io; - cfg.irq = irq; - cfg.dma = dma; - cfg.dma2 = dma2; + +#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE + if(isapnp && (ad1816_probe_isapnp(&cfg) < 0) ) { + printk(KERN_NOTICE "ad1816: No ISAPnP cards found, trying standard ones...\n"); + isapnp = 0; + } +#endif + + if( isapnp == 0) { + cfg.io_base = io; + cfg.irq = irq; + cfg.dma = dma; + cfg.dma2 = dma2; + } if (cfg.io_base == -1 || cfg.irq == -1 || cfg.dma == -1 || cfg.dma2 == -1) { printk(KERN_INFO "ad1816: dma, dma2, irq and io must be set.\n"); @@ -1352,9 +1423,11 @@ if (probe_ad1816(&cfg) == 0) { return -ENODEV; } - + attach_ad1816(&cfg); SOUND_LOCK; + + return 0; } static void __exit cleanup_ad1816 (void) @@ -1370,6 +1443,11 @@ nr_ad1816_devs=0; SOUND_LOCK_END; +#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE + if(activated) + if(ad1816_dev) + ad1816_dev->deactivate(ad1816_dev); +#endif } module_init(init_ad1816); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/bin2hex.c linux/drivers/sound/bin2hex.c --- v2.3.99-pre1/linux/drivers/sound/bin2hex.c Tue Jun 1 23:25:48 1999 +++ linux/drivers/sound/bin2hex.c Wed Mar 15 10:28:32 2000 @@ -33,6 +33,6 @@ i++; } - printf( "};\n#define %sLen %d\n", varname, i ); + printf( "};\nstatic int %sLen = %d;\n", varname, i ); return 0; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/dev_table.c linux/drivers/sound/dev_table.c --- v2.3.99-pre1/linux/drivers/sound/dev_table.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/sound/dev_table.c Thu Mar 16 14:09:26 2000 @@ -210,6 +210,7 @@ { mixer_devs[dev] = NULL; unregister_sound_mixer(dev<<4); + num_mixers--; } } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/sb_common.c linux/drivers/sound/sb_common.c --- v2.3.99-pre1/linux/drivers/sound/sb_common.c Fri Mar 10 16:40:44 2000 +++ linux/drivers/sound/sb_common.c Thu Mar 16 14:09:26 2000 @@ -908,10 +908,13 @@ } if (!(devc->caps & SB_NO_AUDIO && devc->caps & SB_NO_MIDI)) { + extern int sbmixnum; + if (devc->irq > 0) free_irq(devc->irq, devc); sound_unload_mixerdev(devc->my_mixerdev); + sbmixnum--; /* We don't have to do this bit any more the UART401 is its own master -- Krzysztof Halasa */ /* But we have to do it, if UART401 is not detected */ diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/trident.c linux/drivers/sound/trident.c --- v2.3.99-pre1/linux/drivers/sound/trident.c Tue Mar 7 14:32:26 2000 +++ linux/drivers/sound/trident.c Thu Mar 16 11:18:35 2000 @@ -29,6 +29,10 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * History + * v0.14 Mar 15 2000 Ollie Lho + * 5.1 channel output support with channel binding. What's the Matrix ? + * v0.13.1 Mar 10 2000 Ollie Lho + * few minor bugs on dual codec support, needs more testing * v0.13 Mar 03 2000 Ollie Lho * new pci_* for 2.4 kernel, back ported to 2.2 * v0.12 Feb 23 2000 Ollie Lho @@ -62,14 +66,14 @@ * * ToDo * Clean up of low level channel register access code. (done) - * Fix the bug on dma buffer management in update_ptr, read/write, drain_dac (done) - * Dual AC97 codecs support (done partially, need channel binding to test) + * Fix the bug on dma buffer management in update_ptr, read/write, drain_dac (done) + * Dual AC97 codecs support (done) * Recording support (done) * Mmap support - * "Channel Binding" ioctl extension - * new pci device driver interface for 2.4 kernel + * "Channel Binding" ioctl extension (done) + * new pci device driver interface for 2.4 kernel (done) */ - + #include #include #include @@ -92,7 +96,7 @@ #include "trident.h" -#define DRIVER_VERSION "0.13" +#define DRIVER_VERSION "0.14" /* magic numbers to protect our data structures */ #define TRIDENT_CARD_MAGIC 0x5072696E /* "Prin" */ @@ -107,8 +111,14 @@ #define NR_AC97 2 /* minor number of /dev/dspW */ +#define SND_DEV_DSP8 3 + +/* minor number of /dev/dspW */ #define SND_DEV_DSP16 5 +/* minor number of /dev/swmodem (temporary, experimental) */ +#define SND_DEV_SWMODEM 7 + static const unsigned sample_size[] = { 1, 2, 2, 4 }; static const unsigned sample_shift[] = { 0, 1, 1, 2 }; @@ -126,7 +136,7 @@ "SiS 7018 PCI Audio" }; -static struct pci_device_id trident_pci_tbl [] __devinitdata = { +static struct pci_device_id trident_pci_tbl [] __initdata = { {PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_TRIDENT_4DWAVE_DX, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TRIDENT_4D_DX}, {PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_TRIDENT_4DWAVE_NX, @@ -256,6 +266,19 @@ u32 irq; }; +/* table to map from CHANNELMASK to channel attribute for SiS 7018 */ +static u16 mask2attr [] = +{ + PCM_LR, PCM_LR, SURR_LR, CENTER_LFE, + HSET, MIC, MODEM_LINE1, MODEM_LINE2, + I2S_LR, SPDIF_LR +}; +/* table to map from channel attribute to CHANNELMASK for SiS 7018 */ +static int attr2mask [] = { + DSP_BIND_MODEM1, DSP_BIND_MODEM2, DSP_BIND_FRONT, DSP_BIND_HANDSET, + DSP_BIND_I2S, DSP_BIND_CENTER_LFE, DSP_BIND_SURR, DSP_BIND_SPDIF +}; + static struct trident_card *devs = NULL; static void trident_ac97_set(struct ac97_codec *codec, u8 reg, u16 val); @@ -596,11 +619,7 @@ channel->eso = dmabuf->dmasize >> sample_shift[dmabuf->fmt]; channel->eso -= 1; - if (state->card->pci_id == PCI_DEVICE_ID_SI_7018) { - /* FIXME: channel attributes are configured by ioctls, but it is not - implemented so just set to ZERO for the moment */ - channel->attribute = 0; - } else { + if (state->card->pci_id != PCI_DEVICE_ID_SI_7018) { channel->attribute = 0; } @@ -660,12 +679,7 @@ channel->eso = dmabuf->dmasize >> sample_shift[dmabuf->fmt]; channel->eso -= 1; - if (state->card->pci_id == PCI_DEVICE_ID_SI_7018) { - /* FIXME: channel attributes are configured by ioctls, but it is not - implemented so just set to 0x8a80 for the moment, record from PCM L/R - input and mono = (left + right + 1)/2*/ - channel->attribute = 0x8A80; - } else { + if (state->card->pci_id != PCI_DEVICE_ID_SI_7018) { channel->attribute = 0; } @@ -1577,7 +1591,8 @@ return 0; case SNDCTL_DSP_GETCAPS: - return put_user(DSP_CAP_REALTIME|DSP_CAP_TRIGGER|DSP_CAP_MMAP, (int *)arg); + return put_user(DSP_CAP_REALTIME|DSP_CAP_TRIGGER|DSP_CAP_MMAP|DSP_CAP_BIND, + (int *)arg); case SNDCTL_DSP_GETTRIGGER: val = 0; @@ -1656,6 +1671,28 @@ return put_user((dmabuf->fmt & TRIDENT_FMT_16BIT) ? AFMT_S16_LE : AFMT_U8, (int *)arg); + case SNDCTL_DSP_GETCHANNELMASK: + return put_user(DSP_BIND_FRONT|DSP_BIND_SURR|DSP_BIND_CENTER_LFE, + (int *)arg); + + case SNDCTL_DSP_BIND_CHANNEL: + if (state->card->pci_id != PCI_DEVICE_ID_SI_7018) + return -EINVAL; + + get_user_ret(val, (int *)arg, -EFAULT); + if (val == DSP_BIND_QUERY) { + val = dmabuf->channel->attribute | 0x3c00; + val = attr2mask[val >> 8]; + } else { + dmabuf->ready = 0; + if (file->f_mode & FMODE_READ) + dmabuf->channel->attribute = (CHANNEL_REC|SRC_ENABLE); + if (file->f_mode & FMODE_WRITE) + dmabuf->channel->attribute = (CHANNEL_SPC_PB|SRC_ENABLE); + dmabuf->channel->attribute |= mask2attr[ffs(val)]; + } + return put_user(val, (int *)arg); + case SNDCTL_DSP_MAPINBUF: case SNDCTL_DSP_MAPOUTBUF: case SNDCTL_DSP_SETSYNCRO: @@ -1673,6 +1710,7 @@ int minor = MINOR(inode->i_rdev); struct trident_card *card = devs; struct trident_state *state = NULL; + struct dmabuf *dmabuf = NULL; /* find an avaiable virtual channel (instance of /dev/dsp) */ while (card != NULL) { @@ -1683,6 +1721,7 @@ if (state == NULL) return -ENOMEM; memset(state, 0, sizeof(struct trident_state)); + dmabuf = &state->dmabuf; goto found_virt; } } @@ -1694,7 +1733,7 @@ found_virt: /* found a free virtual channel, allocate hardware channels */ - if ((state->dmabuf.channel = trident_alloc_pcm_channel(card)) == NULL) { + if ((dmabuf->channel = trident_alloc_pcm_channel(card)) == NULL) { kfree (card->states[i]); card->states[i] = NULL;; return -ENODEV; @@ -1704,7 +1743,7 @@ state->virt = i; state->card = card; state->magic = TRIDENT_STATE_MAGIC; - init_waitqueue_head(&state->dmabuf.wait); + init_waitqueue_head(&dmabuf->wait); init_MUTEX(&state->open_sem); file->private_data = state; @@ -1714,24 +1753,34 @@ should be default to unsigned 8-bits, mono, with sample rate 8kHz and /dev/dspW will accept 16-bits sample */ if (file->f_mode & FMODE_WRITE) { - state->dmabuf.fmt &= ~TRIDENT_FMT_MASK; - if ((minor & 0xf) == SND_DEV_DSP16) - state->dmabuf.fmt |= TRIDENT_FMT_16BIT; - state->dmabuf.ossfragshift = 0; - state->dmabuf.ossmaxfrags = 0; - state->dmabuf.subdivision = 0; + dmabuf->fmt &= ~TRIDENT_FMT_MASK; + if ((minor & 0x0f) == SND_DEV_DSP16) + dmabuf->fmt |= TRIDENT_FMT_16BIT; + dmabuf->ossfragshift = 0; + dmabuf->ossmaxfrags = 0; + dmabuf->subdivision = 0; + if (card->pci_id == PCI_DEVICE_ID_SI_7018) { + /* set default channel attribute to normal playback */ + dmabuf->channel->attribute = CHANNEL_PB; + } trident_set_dac_rate(state, 8000); } if (file->f_mode & FMODE_READ) { /* FIXME: Trident 4d can only record in singed 16-bits stereo, 48kHz sample, to be dealed with in trident_set_adc_rate() ?? */ - state->dmabuf.fmt &= ~TRIDENT_FMT_MASK; - if ((minor & 0xf) == SND_DEV_DSP16) - state->dmabuf.fmt |= TRIDENT_FMT_16BIT; - state->dmabuf.ossfragshift = 0; - state->dmabuf.ossmaxfrags = 0; - state->dmabuf.subdivision = 0; + dmabuf->fmt &= ~TRIDENT_FMT_MASK; + if ((minor & 0x0f) == SND_DEV_DSP16) + dmabuf->fmt |= TRIDENT_FMT_16BIT; + dmabuf->ossfragshift = 0; + dmabuf->ossmaxfrags = 0; + dmabuf->subdivision = 0; + if (card->pci_id == PCI_DEVICE_ID_SI_7018) { + /* set default channel attribute to 0x8a80, record from + PCM L/R FIFO and mono = (left + right + 1)/2*/ + dmabuf->channel->attribute = + (CHANNEL_REC|PCM_LR|MONO_MIX); + } trident_set_adc_rate(state, 8000); } @@ -1860,7 +1909,7 @@ address = SI_AC97_READ; mask = SI_AC97_BUSY_READ | SI_AC97_AUDIO_BUSY; if (codec->id) - mask |= SI_AC97_SECONDARY; + mask |= SI_AC97_SECONDARY; busy = SI_AC97_BUSY_READ; break; case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX: @@ -1873,7 +1922,7 @@ else address = NX_ACR2_AC97_R_PRIMARY; mask = NX_AC97_BUSY_READ; - busy = 0x0c00; + busy = NX_AC97_BUSY_READ | NX_AC97_BUSY_DATA; break; } @@ -1953,9 +2002,13 @@ case PCI_DEVICE_ID_SI_7018: /* disable AC97 GPIO interrupt */ outl(0x00, TRID_REG(card, SI_AC97_GPIO)); - /* stop AC97 cold reset process */ - outl(PCMOUT|SECONDARY_ID, TRID_REG(card, SI_SERIAL_INTF_CTRL)); - ready_2nd = inl(TRID_REG(card, SI_SERIAL_INTF_CTRL)); + /* when power up the AC link is in cold reset mode so stop it */ + outl(PCMOUT|SURROUT|CENTEROUT|LFEOUT|SECONDARY_ID, + TRID_REG(card, SI_SERIAL_INTF_CTRL)); + /* it take a long time to recover from a cold reset (especially when you have + more than one codec) */ + udelay(2000); + ready_2nd = inl(TRID_REG(card, SI_SERIAL_INTF_CTRL)); ready_2nd &= SI_AC97_SECONDARY_READY; break; case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX: @@ -1972,7 +2025,7 @@ for (num_ac97 = 0; num_ac97 < NR_AC97; num_ac97++) { if ((codec = kmalloc(sizeof(struct ac97_codec), GFP_KERNEL)) == NULL) - return -1; + return -ENOMEM; memset(codec, 0, sizeof(struct ac97_codec)); /* initialize some basic codec information, other fields will be filled @@ -2004,7 +2057,7 @@ /* install the driver, we do not allocate hardware channel nor DMA buffer now, they are defered untill "ACCESS" time (in prog_dmabuf called by open/read/write/ioctl/mmap) */ -static int __devinit trident_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_id) +static int __init trident_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_id) { unsigned long iobase; struct trident_card *card; @@ -2012,19 +2065,19 @@ if (!pci_dma_supported(pci_dev, TRIDENT_DMA_MASK)) { printk(KERN_ERR "trident: architecture does not support" " 30bit PCI busmaster DMA\n"); - return -1; + return -ENODEV; } iobase = pci_dev->resource[0].start; if (check_region(iobase, 256)) { printk(KERN_ERR "trident: can't allocate I/O space at 0x%4.4lx\n", iobase); - return -1; + return -ENODEV; } if ((card = kmalloc(sizeof(struct trident_card), GFP_KERNEL)) == NULL) { printk(KERN_ERR "trident: out of memory\n"); - return -1; + return -ENOMEM; } memset(card, 0, sizeof(*card)); @@ -2054,7 +2107,7 @@ printk(KERN_ERR "trident: unable to allocate irq %d\n", card->irq); release_region(card->iobase, 256); kfree(card); - return 0; + return -ENODEV; } /* register /dev/dsp */ if ((card->dev_audio = register_sound_dsp(&trident_audio_fops, -1)) < 0) { @@ -2062,7 +2115,7 @@ release_region(iobase, 256); free_irq(card->irq, card); kfree(card); - return -1; + return -ENODEV; } /* initilize AC97 codec and register /dev/mixer */ if (trident_ac97_init(card) <= 0) { @@ -2070,7 +2123,7 @@ release_region(iobase, 256); free_irq(card->irq, card); kfree(card); - return -1; + return -ENODEV; } outl(0x00, TRID_REG(card, T4D_MUSICVOL_WAVEVOL)); @@ -2083,7 +2136,7 @@ return 0; } -static void __devexit trident_remove(struct pci_dev *pci_dev) +static void __exit trident_remove(struct pci_dev *pci_dev) { int i; struct trident_card *card = pci_dev->driver_data; @@ -2120,15 +2173,22 @@ static int __init trident_init_module (void) { + if (!pci_present()) /* No PCI bus in this machine! */ + return -ENODEV; + printk(KERN_INFO "Trident 4DWave/SiS 7018 PCI Audio, version " DRIVER_VERSION ", " __TIME__ " " __DATE__ "\n"); - return pci_module_init (&trident_pci_driver); + if (!pci_register_driver(&trident_pci_driver)) { + pci_unregister_driver(&trident_pci_driver); + return -ENODEV; + } + return 0; } static void __exit trident_cleanup_module (void) { - pci_unregister_driver (&trident_pci_driver); + pci_unregister_driver(&trident_pci_driver); } module_init(trident_init_module); diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/sound/trident.h linux/drivers/sound/trident.h --- v2.3.99-pre1/linux/drivers/sound/trident.h Tue Mar 7 14:32:26 2000 +++ linux/drivers/sound/trident.h Thu Mar 16 11:18:35 2000 @@ -62,6 +62,7 @@ #define DAC_RUNNING 0x01 #define ADC_RUNNING 0x02 + /* Register Addresses */ /* operational registers common to DX, NX, 7018 */ @@ -130,7 +131,7 @@ enum trident_nx_ac97_bits { /* ACR1-3 */ NX_AC97_BUSY_WRITE = 0x0800, NX_AC97_BUSY_READ = 0x0800, - NX_AC97_WRITE_SECONDARY = 0x0100, + NX_AC97_BUSY_DATA = 0x0400, NX_AC97_WRITE_SECONDARY = 0x0100, /* ACR0 */ NX_AC97_SECONDARY_READY = 0x0040, NX_AC97_SECONDARY_RECORD = 0x0020, NX_AC97_SURROUND_OUTPUT = 0x0010, @@ -148,7 +149,7 @@ MICIN = 0x00000400, LINE2IN = 0x00000800, HEAD_SET_IN = 0x00001000, GPIOIN = 0x00002000, /* 7018 spec says id = 01 but the demo board routed to 10 - SECONDARY_ID= 0x00008000, */ + SECONDARY_ID= 0x00004000, */ SECONDARY_ID= 0x00004000, PCMOUT = 0x00010000, SURROUT = 0x00020000, CENTEROUT = 0x00040000, LFEOUT = 0x00080000, @@ -173,10 +174,18 @@ }; enum channel_attribute { - MODEM_LINE1, MODEM_LINE2, PCM_LR, HSET, - I2SLR, CENTER_LFE, SURR_LR, SPDIF_LR, - CHANNEL_PB = 0x00000000, CHANNEL_SPC_PB = 0x40000000, - CHANNEL_REC = 0x80000000, CHANNEL_REC_PB = 0xc0000000 + /* playback/record select */ + CHANNEL_PB = 0x0000, CHANNEL_SPC_PB = 0x4000, + CHANNEL_REC = 0x8000, CHANNEL_REC_PB = 0xc000, + /* playback destination/record source select */ + MODEM_LINE1 = 0x0000, MODEM_LINE2 = 0x0400, + PCM_LR = 0x0800, HSET = 0x0c00, + I2S_LR = 0x1000, CENTER_LFE = 0x1400, + SURR_LR = 0x1800, SPDIF_LR = 0x1c00, + MIC = 0x1400, + /* mist stuff */ + MONO_LEFT = 0x0000, MONO_RIGHT = 0x0100, + MONO_MIX = 0x0200, SRC_ENABLE = 0x0080, }; enum miscint_bits { @@ -189,12 +198,6 @@ ST_TARGET_REACHED = 0x00008000, PB_24K_MODE = 0x00010000, ST_IRQ_EN = 0x00800000, ACGPIO_IRQ = 0x01000000 }; - -#define AC97_SIGMATEL_DAC2INVERT 0x6E -#define AC97_SIGMATEL_BIAS1 0x70 -#define AC97_SIGMATEL_BIAS2 0x72 -#define AC97_SIGMATEL_CIC1 0x76 -#define AC97_SIGMATEL_CIC2 0x78 #define TRID_REG( trident, x ) ( (trident) -> iobase + (x) ) diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/Config.in linux/drivers/usb/Config.in --- v2.3.99-pre1/linux/drivers/usb/Config.in Tue Mar 14 19:10:40 2000 +++ linux/drivers/usb/Config.in Fri Mar 17 14:19:42 2000 @@ -8,7 +8,9 @@ if [ ! "$CONFIG_USB" = "n" ]; then comment 'USB Controllers' - dep_tristate ' UHCI (Intel PIIX4, VIA, ...) support' CONFIG_USB_UHCI $CONFIG_USB + if [ "$CONFIG_USB_UHCI_ALT" != "y" ]; then + dep_tristate ' UHCI (Intel PIIX4, VIA, ...) support' CONFIG_USB_UHCI $CONFIG_USB + fi if [ "$CONFIG_USB_UHCI" != "y" ]; then dep_tristate ' UHCI Alternate Driver (JE) support' CONFIG_USB_UHCI_ALT $CONFIG_USB if [ "$CONFIG_USB_UHCI_ALT" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then @@ -34,21 +36,26 @@ bool ' USB FTDI Single Port Serial Driver (EXPERIMENTAL)' CONFIG_USB_SERIAL_FTDI_SIO bool ' USB Keyspan PDA Single Port Serial Driver (EXPERIMENTAL)' CONFIG_USB_SERIAL_KEYSPAN_PDA fi + bool ' USB Serial Converter verbose debug' CONFIG_USB_SERIAL_DEBUG fi dep_tristate ' USB CPiA Camera support' CONFIG_USB_CPIA $CONFIG_USB dep_tristate ' USB IBM (Xirlink) C-it Camera support' CONFIG_USB_IBMCAM $CONFIG_USB dep_tristate ' USB OV511 Camera support' CONFIG_USB_OV511 $CONFIG_USB dep_tristate ' USB Kodak DC-2xx Camera support' CONFIG_USB_DC2XX $CONFIG_USB - dep_tristate ' USB Mass Storage support' CONFIG_USB_STORAGE $CONFIG_USB - if [ "$CONFIG_USB_STORAGE" != "n" ]; then - bool ' USB Mass Storage verbose debug' CONFIG_USB_STORAGE_DEBUG + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' USB Mass Storage support (EXPERIMENTAL)' CONFIG_USB_STORAGE $CONFIG_USB m + if [ "$CONFIG_USB_STORAGE" != "n" ]; then + bool ' USB Mass Storage verbose debug' CONFIG_USB_STORAGE_DEBUG + fi fi dep_tristate ' USS720 parport driver' CONFIG_USB_USS720 $CONFIG_USB $CONFIG_PARPORT dep_tristate ' DABUSB driver' CONFIG_USB_DABUSB $CONFIG_USB - dep_tristate ' PLUSB Prolific USB-Network driver' CONFIG_USB_PLUSB $CONFIG_USB - dep_tristate ' USB ADMtek Pegasus-based device support' CONFIG_USB_PEGASUS $CONFIG_USB - dep_tristate ' USB Diamond Rio500 support' CONFIG_USB_RIO500 $CONFIG_USB - dep_tristate ' D-Link USB FM radio support' CONFIG_USB_DSBR $CONFIG_USB + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' PLUSB Prolific USB-Network driver (EXPERIMENTAL)' CONFIG_USB_PLUSB $CONFIG_USB + dep_tristate ' USB ADMtek Pegasus-based device support (EXPERIMENTAL)' CONFIG_USB_PEGASUS $CONFIG_USB + dep_tristate ' USB Diamond Rio500 support (EXPERIMENTAL)' CONFIG_USB_RIO500 $CONFIG_USB + dep_tristate ' D-Link USB FM radio support (EXPERIMENTAL)' CONFIG_USB_DSBR $CONFIG_USB + fi comment 'USB HID' dep_tristate ' USB Human Interface Device (HID) support' CONFIG_USB_HID $CONFIG_USB diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/Makefile linux/drivers/usb/Makefile --- v2.3.99-pre1/linux/drivers/usb/Makefile Tue Mar 14 19:10:40 2000 +++ linux/drivers/usb/Makefile Fri Mar 17 14:11:49 2000 @@ -45,7 +45,7 @@ obj-y += serial/serial.o else ifeq ($(CONFIG_USB_SERIAL),m) - MOD_SUB_DIRS += serial + MOD_IN_SUB_DIRS += serial endif endif diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/dc2xx.c linux/drivers/usb/dc2xx.c --- v2.3.99-pre1/linux/drivers/usb/dc2xx.c Fri Mar 10 16:40:44 2000 +++ linux/drivers/usb/dc2xx.c Wed Mar 15 11:00:11 2000 @@ -25,7 +25,8 @@ * and have fun! * * This should also work for a number of other digital (non-Kodak) cameras, - * by adding the vendor and product IDs to the table below. + * by adding the vendor and product IDs to the table below. They'll need + * to be the sort using USB just as a fast bulk data channel. */ /* @@ -100,7 +101,8 @@ // { 0x03f0, 0xffff }, // HP PhotoSmart C500 /* Other USB devices may well work here too, so long as they - * just stick to half duplex bulk packet exchanges. + * just stick to half duplex bulk packet exchanges. That + * means, among other things, no iso or interrupt endpoints. */ }; @@ -162,7 +164,7 @@ usb_rcvbulkpipe (camera->dev, camera->inEP), camera->buf, len, &count, HZ*10); - dbg ("read (%d) - 0x%x %ld", len, result, count); + dbg ("read (%d) - 0x%x %d", len, result, count); if (!result) { if (copy_to_user (buf, camera->buf, count)) @@ -382,7 +384,7 @@ err ("no memory!"); return NULL; } - camera->dev = dev; + camera->info = camera_info; camera->subminor = i; camera->isActive = 0; camera->buf = NULL; @@ -413,19 +415,22 @@ || endpoint [1].bmAttributes != USB_ENDPOINT_XFER_BULK ) { dbg ("Bogus endpoints"); - camera->dev = NULL; - return NULL; + goto error; } if (usb_set_configuration (dev, dev->config[0].bConfigurationValue)) { err ("Failed usb_set_configuration"); - camera->dev = NULL; - return NULL; + goto error; } - camera->info = camera_info; + camera->dev = dev; return camera; + +error: + minor_data [camera->subminor] = NULL; + kfree (camera); + return NULL; } static void camera_disconnect(struct usb_device *dev, void *ptr) diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/drivers.c linux/drivers/usb/drivers.c --- v2.3.99-pre1/linux/drivers/usb/drivers.c Fri Mar 10 16:40:44 2000 +++ linux/drivers/usb/drivers.c Sat Mar 18 11:41:15 2000 @@ -69,7 +69,11 @@ pos = *ppos; for (; tmp != &usb_driver_list; tmp = tmp->next) { struct usb_driver *driver = list_entry(tmp, struct usb_driver, driver_list); - start += sprintf (start, "%s\n", driver->name); + int minor = driver->fops ? driver->minor : -1; + if (minor == -1) + start += sprintf (start, " %s\n", driver->name); + else + start += sprintf (start, "%3d-%3d: %s\n", minor, minor + 15, driver->name); if (start > end) { start += sprintf(start, "(truncated)\n"); break; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/dsbr100.c linux/drivers/usb/dsbr100.c --- v2.3.99-pre1/linux/drivers/usb/dsbr100.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/usb/dsbr100.c Wed Mar 15 19:24:01 2000 @@ -48,12 +48,6 @@ #include - -#if CONFIG_MODVERSIONS==1 -#define MODVERSIONS -#include -#endif - #include #include #include diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/hub.c linux/drivers/usb/hub.c --- v2.3.99-pre1/linux/drivers/usb/hub.c Fri Mar 10 16:40:44 2000 +++ linux/drivers/usb/hub.c Fri Mar 17 14:19:33 2000 @@ -4,8 +4,6 @@ * (C) Copyright 1999 Linus Torvalds * (C) Copyright 1999 Johannes Erdfelt * (C) Copyright 1999 Gregory P. Smith - * - * $Id: hub.c,v 1.21 2000/01/16 21:19:44 acher Exp $ */ #include @@ -75,31 +73,29 @@ * the low-level driver that it wants to be re-activated, * or zero to say "I'm done". */ -static int hub_irq(int status, void *__buffer, int len, void *dev_id) +static void hub_irq(struct urb *urb) { - struct usb_hub *hub = dev_id; + struct usb_hub *hub = (struct usb_hub *)urb->context; unsigned long flags; - switch (status) { - case -ENODEV: - /* Just ignore it */ - break; - case 0: - /* Something happened, let khubd figure it out */ - if (waitqueue_active(&khubd_wait)) { - /* Add the hub to the event queue */ - spin_lock_irqsave(&hub_event_lock, flags); - if (hub->event_list.next == &hub->event_list) { - list_add(&hub->event_list, &hub_event_list); - /* Wake up khubd */ - wake_up(&khubd_wait); - } - spin_unlock_irqrestore(&hub_event_lock, flags); - } - break; + if (urb->status) { + if (urb->status != -ENOENT) + dbg("nonzero status in irq %d", urb->status); + + return; } - return 1; + /* Something happened, let khubd figure it out */ + if (waitqueue_active(&khubd_wait)) { + /* Add the hub to the event queue */ + spin_lock_irqsave(&hub_event_lock, flags); + if (hub->event_list.next == &hub->event_list) { + list_add(&hub->event_list, &hub_event_list); + /* Wake up khubd */ + wake_up(&khubd_wait); + } + spin_unlock_irqrestore(&hub_event_lock, flags); + } } static void usb_hub_power_on(struct usb_hub *hub) @@ -196,13 +192,14 @@ return 0; } -static void * hub_probe(struct usb_device *dev, unsigned int i) +static void *hub_probe(struct usb_device *dev, unsigned int i) { struct usb_interface_descriptor *interface; struct usb_endpoint_descriptor *endpoint; struct usb_hub *hub; unsigned long flags; - int ret; + unsigned int pipe; + int maxp, ret; interface = &dev->actconfig->interface[i].altsetting[0]; @@ -233,7 +230,8 @@ /* We found a hub */ info("USB hub found"); - if ((hub = kmalloc(sizeof(*hub), GFP_KERNEL)) == NULL) { + hub = kmalloc(sizeof(*hub), GFP_KERNEL); + if (!hub) { err("couldn't kmalloc hub struct"); return NULL; } @@ -250,26 +248,24 @@ spin_unlock_irqrestore(&hub_event_lock, flags); if (usb_hub_configure(hub) >= 0) { - hub->irqpipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress); - ret = usb_request_irq(dev, hub->irqpipe, - hub_irq, endpoint->bInterval, - hub, &hub->irq_handle); - if (ret) { - err("usb_request_irq failed (%d)", ret); - /* free hub, but first clean up its list. */ - spin_lock_irqsave(&hub_event_lock, flags); + pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress); + maxp = usb_maxpacket(dev, pipe, usb_pipeout(pipe)); - /* Delete it and then reset it */ - list_del(&hub->event_list); - INIT_LIST_HEAD(&hub->event_list); - list_del(&hub->hub_list); - INIT_LIST_HEAD(&hub->hub_list); + if (maxp > sizeof(hub->buffer)) + maxp = sizeof(hub->buffer); - spin_unlock_irqrestore(&hub_event_lock, flags); - - kfree(hub); + hub->urb = usb_alloc_urb(0); + if (!hub->urb) { + err("couldn't allocate interrupt urb"); + goto fail; + } - return NULL; + FILL_INT_URB(hub->urb, dev, pipe, hub->buffer, maxp, hub_irq, + hub, endpoint->bInterval); + ret = usb_submit_urb(hub->urb); + if (ret) { + err("usb_submit_urb failed (%d)", ret); + goto fail; } /* Wake up khubd */ @@ -277,11 +273,27 @@ } return hub; + +fail: + /* free hub, but first clean up its list. */ + spin_lock_irqsave(&hub_event_lock, flags); + + /* Delete it and then reset it */ + list_del(&hub->event_list); + INIT_LIST_HEAD(&hub->event_list); + list_del(&hub->hub_list); + INIT_LIST_HEAD(&hub->hub_list); + + spin_unlock_irqrestore(&hub_event_lock, flags); + + kfree(hub); + + return NULL; } static void hub_disconnect(struct usb_device *dev, void *ptr) { - struct usb_hub *hub = ptr; + struct usb_hub *hub = (struct usb_hub *)ptr; unsigned long flags; spin_lock_irqsave(&hub_event_lock, flags); @@ -294,8 +306,10 @@ spin_unlock_irqrestore(&hub_event_lock, flags); - if (hub->irq_handle) { - usb_release_irq(hub->dev, hub->irq_handle, hub->irqpipe); + if (hub->urb) { + usb_unlink_urb(hub->urb); + usb_free_urb(hub->urb); + hub->urb = NULL; } /* Free the memory */ diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/hub.h linux/drivers/usb/hub.h --- v2.3.99-pre1/linux/drivers/usb/hub.h Fri Jan 21 18:19:17 2000 +++ linux/drivers/usb/hub.h Fri Mar 17 14:19:33 2000 @@ -78,16 +78,10 @@ __u8 bDescriptorType; __u8 bNbrPorts; __u16 wHubCharacteristics; -#if 0 - __u8 wHubCharacteristics[2]; /* __u16 but not aligned! */ -#endif __u8 bPwrOn2PwrGood; __u8 bHubContrCurrent; /* DeviceRemovable and PortPwrCtrlMask want to be variable-length bitmaps that hold max 256 entries, but for now they're ignored */ -#if 0 - __u8 filler; -#endif } __attribute__ ((packed)); struct usb_device; @@ -112,9 +106,10 @@ /* Device structure */ struct usb_device *dev; - /* Reference to the hub's polling IRQ and its associated pipe */ - void *irq_handle; - unsigned int irqpipe; + /* Interrupt polling pipe */ + struct urb *urb; + + char buffer[USB_MAXCHILDREN / 8]; /* List of hubs */ struct list_head hub_list; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/joydev.c linux/drivers/usb/joydev.c --- v2.3.99-pre1/linux/drivers/usb/joydev.c Thu Mar 2 14:36:23 2000 +++ linux/drivers/usb/joydev.c Fri Mar 17 14:19:33 2000 @@ -224,8 +224,8 @@ struct JS_DATA_TYPE data; - data.buttons = (joydev->nkey > 0 && test_bit(joydev->keypam[0], input->key)) ? 1 : 0 | - (joydev->nkey > 1 && test_bit(joydev->keypam[1], input->key)) ? 2 : 0; + data.buttons = ((joydev->nkey > 0 && test_bit(joydev->keypam[0], input->key)) ? 1 : 0) | + ((joydev->nkey > 1 && test_bit(joydev->keypam[1], input->key)) ? 2 : 0); data.x = ((joydev_correct(input->abs[ABS_X], &joydev->corr[0]) / 256) + 128) >> joydev->glue.JS_CORR.x; data.y = ((joydev_correct(input->abs[ABS_Y], &joydev->corr[1]) / 256) + 128) >> joydev->glue.JS_CORR.y; diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/serial/Makefile linux/drivers/usb/serial/Makefile --- v2.3.99-pre1/linux/drivers/usb/serial/Makefile Tue Mar 14 19:10:40 2000 +++ linux/drivers/usb/serial/Makefile Fri Mar 17 14:11:49 2000 @@ -14,7 +14,7 @@ O_TARGET := serial.o M_OBJS := usb-serial.o O_OBJS := usb-serial.o -#MOD_LIST_NAME := USB_MODULES +MOD_LIST_NAME := USB_SERIAL_MODULES # Objects that export symbols. diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/serial/usb-serial.c linux/drivers/usb/serial/usb-serial.c --- v2.3.99-pre1/linux/drivers/usb/serial/usb-serial.c Fri Mar 10 16:40:45 2000 +++ linux/drivers/usb/serial/usb-serial.c Fri Mar 17 14:11:49 2000 @@ -14,6 +14,10 @@ * * See Documentation/usb/usb-serial.txt for more information on using this driver * + * (03/17/2000) gkh + * Added config option for debugging messages. + * Added patch for keyspan pda from Brian Warner. + * * (03/06/2000) gkh * Added the keyspan pda code from Brian Warner * Moved a bunch of the port specific stuff into its own structure. This @@ -175,7 +179,12 @@ #include #include #include -#define DEBUG + +#ifdef CONFIG_USB_SERIAL_DEBUG + #define DEBUG +#else + #undef DEBUG +#endif #include #ifdef CONFIG_USB_SERIAL_WHITEHEAT @@ -1428,6 +1437,7 @@ case 2: /* tx unthrottle interrupt */ serial->tx_throttled = 0; wake_up(&serial->write_wait); /* wake up writer */ + wake_up(&tty->write_wait); /* them too */ break; default: break; @@ -1846,25 +1856,12 @@ static int keyspan_pda_chars_in_buffer (struct tty_struct *tty) { struct usb_serial *serial = (struct usb_serial *)tty->driver_data; - unsigned char count; - int rc; - - /* used by tty stuff to wait for output to drain. Go ask the - device how much is still queued in the tx ring */ - rc = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), - 6, /* write_room */ - USB_TYPE_VENDOR | USB_RECIP_INTERFACE - | USB_DIR_IN, - 1, /* value: 1 means chars_in_buffer */ - 0, /* index */ - &count, - 1, - 2*HZ); - if (rc < 0) - return rc; /* failed */ - if (rc == 0) - return -EIO; /* device didn't return any data */ - return (count); + + /* when throttled, return at least WAKEUP_CHARS to tell select() (via + n_tty.c:normal_poll() ) that we're not writeable. */ + if (serial->tx_throttled) + return 256; + return 0; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/serial/usb-serial.h linux/drivers/usb/serial/usb-serial.h --- v2.3.99-pre1/linux/drivers/usb/serial/usb-serial.h Fri Mar 10 16:40:45 2000 +++ linux/drivers/usb/serial/usb-serial.h Fri Mar 17 14:11:49 2000 @@ -371,6 +371,7 @@ num_interrupt_in: NUM_DONT_CARE, num_bulk_in: NUM_DONT_CARE, num_bulk_out: NUM_DONT_CARE, + num_ports: 1, startup: keyspan_pda_fake_startup }; static struct usb_serial_device_type keyspan_pda_device = { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/uhci.c linux/drivers/usb/uhci.c --- v2.3.99-pre1/linux/drivers/usb/uhci.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/usb/uhci.c Fri Mar 17 14:19:33 2000 @@ -689,19 +689,18 @@ return 0; td_error: - /* Some debugging code */ - if (debug) { + if (status & TD_CTRL_STALLED) + /* endpoint has stalled - mark it halted */ + usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), + uhci_packetout(td->info)); + else if (debug) { + /* Some debugging code */ dbg("uhci_result_control() failed with status %x", status); /* Print the chain for debugging purposes */ uhci_show_queue(urbp->qh); } - if (status & TD_CTRL_STALLED) - /* endpoint has stalled - mark it halted */ - usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), - uhci_packetout(td->info)); - return uhci_map_status(status, uhci_packetout(td->info)); } @@ -818,8 +817,12 @@ return 0; td_error: - /* Some debugging code */ - if (debug) { + if (status & TD_CTRL_STALLED) + /* endpoint has stalled - mark it halted */ + usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), + uhci_packetout(td->info)); + else if (debug) { + /* Some debugging code */ dbg("uhci_result_interrupt/bulk() failed with status %x", status); @@ -830,11 +833,6 @@ uhci_show_td(td); } - if (status & TD_CTRL_STALLED) - /* endpoint has stalled - mark it halted */ - usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), - uhci_packetout(td->info)); - return uhci_map_status(status, uhci_packetout(td->info)); } @@ -1251,12 +1249,14 @@ uhci_unlink_generic(urb); if (urb->transfer_flags & USB_ASYNC_UNLINK) { + urb->status = -ECONNABORTED; + spin_lock_irqsave(&uhci->urb_remove_lock, flags); list_add(&urb->urb_list, &uhci->urb_remove_list); spin_unlock_irqrestore(&uhci->urb_remove_lock, flags); - - urb->status = -ECONNABORTED; } else { + urb->status = -ENOENT; + if (in_interrupt()) { /* wait at least 1 frame */ static int errorcount = 10; @@ -1268,8 +1268,6 @@ if (urb->complete) urb->complete(urb); - - urb->status = -ENOENT; } } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/usb-ohci.c linux/drivers/usb/usb-ohci.c --- v2.3.99-pre1/linux/drivers/usb/usb-ohci.c Fri Mar 10 16:40:45 2000 +++ linux/drivers/usb/usb-ohci.c Fri Mar 17 14:19:42 2000 @@ -252,6 +252,9 @@ if (urb->hcpriv) return -EINVAL; /* urb already in use */ +// if(usb_endpoint_halted (urb->dev, usb_pipeendpoint (pipe), usb_pipeout (pipe))) +// return -EPIPE; + usb_inc_dev_use (urb->dev); ohci = (ohci_t *) urb->dev->bus->hcpriv; @@ -838,28 +841,36 @@ int data_len = urb->transfer_buffer_length; int cnt = 0; __u32 info = 0; - + unsigned int toggle = 0; + /* OHCI handles the DATA-toggles itself, we just use the USB-toggle bits for reseting */ + if(usb_gettoggle(urb->dev, usb_pipeendpoint(urb->pipe), usb_pipeout(urb->pipe))) { + toggle = TD_T_TOGGLE; + } else { + toggle = TD_T_DATA0; + usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe), usb_pipeout(urb->pipe), 1); + } + urb_priv->td_cnt = 0; switch (usb_pipetype (urb->pipe)) { case PIPE_BULK: info = usb_pipeout (urb->pipe)? - TD_CC | TD_DP_OUT | TD_T_TOGGLE: TD_CC | TD_DP_IN | TD_T_TOGGLE; + TD_CC | TD_DP_OUT : TD_CC | TD_DP_IN ; while(data_len > 4096) { - td_fill (info, data, 4096, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); + td_fill (info | (cnt? TD_T_TOGGLE:toggle), data, 4096, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); data += 4096; data_len -= 4096; cnt++; } info = usb_pipeout (urb->pipe)? - TD_CC | TD_DP_OUT | TD_T_TOGGLE: TD_CC | TD_R | TD_DP_IN | TD_T_TOGGLE; - td_fill (info, data, data_len, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); + TD_CC | TD_DP_OUT : TD_CC | TD_R | TD_DP_IN ; + td_fill (info | (cnt? TD_T_TOGGLE:toggle), data, data_len, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); cnt++; writel (OHCI_BLF, &ohci->regs->cmdstatus); /* start bulk list */ break; case PIPE_INTERRUPT: info = usb_pipeout (urb->pipe)? - TD_CC | TD_DP_OUT | TD_T_TOGGLE: TD_CC | TD_R | TD_DP_IN | TD_T_TOGGLE; + TD_CC | TD_DP_OUT | toggle: TD_CC | TD_R | TD_DP_IN | toggle; td_fill (info, data, data_len, urb, ST_ADDR | ADD_LEN, cnt++); break; @@ -1059,6 +1070,8 @@ } /* error code of transfer */ cc = TD_CC_GET (tdINFO); + if( cc == TD_CC_STALL) usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe), usb_pipeout(urb->pipe)); + if (!(urb->transfer_flags & USB_DISABLE_SPD) && (cc == TD_DATAUNDERRUN)) cc = TD_CC_NOERROR; if (++(urb_priv->td_cnt) == urb_priv->length) { diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/usb-storage.c linux/drivers/usb/usb-storage.c --- v2.3.99-pre1/linux/drivers/usb/usb-storage.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/usb/usb-storage.c Wed Mar 15 16:42:46 2000 @@ -52,6 +52,10 @@ /* direction table -- this indicates the direction of the data * transfer for each command code -- a 1 indicates input */ +/* FIXME: we need to use the new direction indicators in the Scsi_Cmnd + * structure, not this table. First we need to evaluate if it's being set + * correctly for us, though + */ unsigned char us_direction[256/8] = { 0x28, 0x81, 0x14, 0x14, 0x20, 0x01, 0x90, 0x77, 0x0C, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, @@ -81,13 +85,12 @@ __u8 ep_int; /* interrupt . */ __u8 subclass; /* as in overview */ __u8 protocol; /* .............. */ - __u8 attention_done; /* force attn on first cmd */ trans_cmnd transport; /* protocol specific do cmd */ trans_reset transport_reset; /* .......... device reset */ proto_cmnd proto_handler; /* protocol handler */ GUID(guid); /* unique dev id */ struct Scsi_Host *host; /* our dummy host data */ - Scsi_Host_Template *htmplt; /* own host template */ + Scsi_Host_Template htmplt; /* own host template */ int host_number; /* to find us */ int host_no; /* allocated by scsi */ Scsi_Cmnd *srb; /* current srb */ @@ -114,7 +117,9 @@ #define US_ACT_BUS_RESET 4 #define US_ACT_HOST_RESET 5 +/* The list of structures and the protective lock for them */ static struct us_data *us_list; +spinlock_t us_list_spinlock = SPIN_LOCK_UNLOCKED; static void * storage_probe(struct usb_device *dev, unsigned int ifnum); static void storage_disconnect(struct usb_device *dev, void *ptr); @@ -225,7 +230,7 @@ srb->result = result; } -/* calculate the length of the data transfer (not the command) for any +/* Calculate the length of the data transfer (not the command) for any * given SCSI command */ static unsigned int us_transfer_length(Scsi_Cmnd *srb) @@ -243,14 +248,18 @@ case TEST_UNIT_READY: return 0; + /* FIXME: these should be removed and tested */ case REQUEST_SENSE: case INQUIRY: case MODE_SENSE: return srb->cmnd[4]; + /* FIXME: this needs to come out when the other + * fix is in place */ case READ_CAPACITY: return 8; + /* FIXME: these should be removed and tested */ case LOG_SENSE: case MODE_SENSE_10: return (srb->cmnd[7] << 8) + srb->cmnd[8]; @@ -795,7 +804,7 @@ return USB_STOR_TRANSPORT_ERROR; } - /* FIXME: we need to handle NAKs here */ + /* FIXME: we need to handle NAKs here */ return USB_STOR_TRANSPORT_ERROR; } @@ -812,7 +821,9 @@ /* STATUS STAGE */ /* go to sleep until we get this interrup */ - /* FIXME: this should be changed to use a timeout */ + /* FIXME: this should be changed to use a timeout -- or let the + * device reset routine up() this for us to unjam us + */ down(&(us->ip_waitq)); /* FIXME: currently this code is unreachable, but the idea is @@ -827,8 +838,8 @@ US_DEBUGP("Got interrupt data 0x%x\n", us->ip_data); /* UFI gives us ASC and ASCQ, like a request sense */ - /* FIXME: is this right? Do REQUEST_SENSE and INQUIRY need special - * case handling? + /* REQUEST_SENSE and INQUIRY don't affect the sense data, so we + * ignore the information for those commands */ if (us->subclass == US_SC_UFI) { if (srb->cmnd[0] == REQUEST_SENSE || @@ -887,7 +898,7 @@ return USB_STOR_TRANSPORT_ERROR; } - /* FIXME: we need to handle NAKs here */ + /* FIXME: we need to handle NAKs here */ return USB_STOR_TRANSPORT_ERROR; } @@ -1103,7 +1114,8 @@ static int us_release(struct Scsi_Host *psh) { struct us_data *us = (struct us_data *)psh->hostdata[0]; - struct us_data *prev = (struct us_data *)&us_list; + struct us_data *prev; + unsigned long flags; if (us->irq_handle) { usb_release_irq(us->pusb_dev, us->irq_handle, us->irqpipe); @@ -1111,18 +1123,20 @@ } /* FIXME: release the interface claim here? */ - // if (us->pusb_dev) - // usb_deregister(&storage_driver); - /* FIXME - leaves hanging host template copy */ - /* (because scsi layer uses it after removal !!!) */ + /* FIXME: we need to move this elsewhere -- + * the remove function only gets called to remove the module + */ + spin_lock_irqsave(&us_list_spinlock, flags); if (us_list == us) us_list = us->next; else { + prev = us_list; while (prev->next != us) prev = prev->next; prev->next = us->next; } + spin_unlock_irqrestore(&us_list_spinlock, flags); return 0; } @@ -1190,11 +1204,20 @@ int usb_stor_proc_info (char *buffer, char **start, off_t offset, int length, int hostno, int inout) { - struct us_data *us = us_list; + struct us_data *us; char *pos = buffer; char *tmp_ptr; + unsigned long flags; + + /* if someone is sending us data, just throw it away */ + if (inout) + return length; + + /* lock the data structures */ + spin_lock_irqsave(&us_list_spinlock, flags); /* find our data from hostno */ + us = us_list; while (us) { if (us->host_no == hostno) break; @@ -1202,13 +1225,11 @@ } /* if we couldn't find it, we return an error */ - if (!us) + if (!us) { + spin_unlock_irqrestore(&us_list_spinlock, flags); return -ESRCH; - - /* if someone is sending us data, just throw it away */ - if (inout) - return length; - + } + /* print the controler name */ SPRINTF ("Host scsi%d: usb-storage\n", hostno); @@ -1254,6 +1275,9 @@ /* show the GUID of the device */ SPRINTF(" GUID: " GUID_FORMAT "\n", GUID_ARGS(us->guid)); + /* release our lock on the data structures */ + spin_unlock_irqrestore(&us_list_spinlock, flags); + /* * Calculate start of next buffer, and return value. */ @@ -1365,6 +1389,7 @@ switch (action) { case US_ACT_COMMAND: /* bad device */ + /* FIXME: we need to enable and test multiple LUNs */ if (us->srb->target || us->srb->lun) { US_DEBUGP( "Bad device number (%d/%d) or dev 0x%x\n", us->srb->target, us->srb->lun, (unsigned int)us->pusb_dev); @@ -1378,9 +1403,12 @@ /* our device has gone - pretend not ready */ /* FIXME: we also need to handle INQUIRY here, * probably */ + /* FIXME: fix return codes and sense buffer handling */ if (!us->pusb_dev) { + US_DEBUGP("Request is for removed device\n"); if (us->srb->cmnd[0] == REQUEST_SENSE) { - memcpy(us->srb->request_buffer, sense_notready, + memcpy(us->srb->request_buffer, + sense_notready, sizeof(sense_notready)); us->srb->result = DID_OK << 16; } else { @@ -1443,34 +1471,29 @@ /* Probe to see if a new device is actually a SCSI device */ static void * storage_probe(struct usb_device *dev, unsigned int ifnum) { - struct usb_interface_descriptor *interface; int i; char mf[32]; /* manufacturer */ char prod[32]; /* product */ char serial[32]; /* serial number */ struct us_data *ss = NULL; - unsigned int flags = 0; GUID(guid); /* Global Unique Identifier */ - struct us_data *prev; - int protocol = 0; - int subclass = 0; + int result; + unsigned long flags; + + /* these are temporary copies -- we test on these, then put them + * in the us-data structure + */ + __u8 ep_in = 0; + __u8 ep_out = 0; + __u8 ep_int = 0; + __u8 subclass = 0; + __u8 protocol = 0; + + /* the altsettting 0 on the interface we're probing */ struct usb_interface_descriptor *altsetting = &(dev->actconfig->interface[ifnum].altsetting[0]); - /* clear the GUID and fetch the strings */ - GUID_CLEAR(guid); - memset(mf, 0, sizeof(mf)); - memset(prod, 0, sizeof(prod)); - memset(serial, 0, sizeof(serial)); - if (dev->descriptor.iManufacturer) - usb_string(dev, dev->descriptor.iManufacturer, mf, sizeof(mf)); - if (dev->descriptor.iProduct) - usb_string(dev, dev->descriptor.iProduct, prod, sizeof(prod)); - if (dev->descriptor.iSerialNumber) - usb_string(dev, dev->descriptor.iSerialNumber, serial, sizeof(serial)); - - /* let's examine the device now */ - + /* FIXME: this isn't quite right... */ /* We make an exception for the shuttle E-USB */ if (dev->descriptor.idVendor == 0x04e6 && dev->descriptor.idProduct == 0x0001) { @@ -1487,6 +1510,91 @@ /* At this point, we know we've got a live one */ US_DEBUGP("USB Mass Storage device detected\n"); + /* + * We are expecting a minimum of 2 endpoints - in and out (bulk). + * An optional interrupt is OK (necessary for CBI protocol). + * We will ignore any others. + */ + for (i = 0; i < altsetting->bNumEndpoints; i++) { + /* is it an BULK endpoint? */ + if ((altsetting->endpoint[i].bmAttributes & + USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_BULK) { + if (altsetting->endpoint[i].bEndpointAddress & + USB_DIR_IN) + ep_in = altsetting->endpoint[i].bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + else + ep_out = altsetting->endpoint[i].bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + } + + /* is it an interrupt endpoint? */ + if ((altsetting->endpoint[i].bmAttributes & + USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT) { + ep_int = altsetting->endpoint[i].bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + } + } + US_DEBUGP("Endpoints In %d Out %d Int %d\n", + ep_in, ep_out, ep_int); + + /* set the interface -- STALL is an acceptable response here */ + result = usb_set_interface(dev, altsetting->bInterfaceNumber, 0); + US_DEBUGP("Result from usb_set_interface is %d\n", result); + if (result == -EPIPE) { + usb_clear_halt(dev, usb_sndctrlpipe(dev, 0)); + } else if (result != 0) { + /* it's not a stall, but another error -- time to bail */ + return NULL; + } + + /* shuttle E-USB */ + /* FIXME: all we should need to do here is determine the protocol */ + if (dev->descriptor.idVendor == 0x04e6 && + dev->descriptor.idProduct == 0x0001) { + __u8 qstat[2]; + + result = usb_control_msg(ss->pusb_dev, + usb_rcvctrlpipe(dev,0), + 1, 0xC0, + 0, ss->ifnum, + qstat, 2, HZ*5); + US_DEBUGP("C0 status 0x%x 0x%x\n", qstat[0], qstat[1]); + init_MUTEX_LOCKED(&(ss->ip_waitq)); + ss->irqpipe = usb_rcvintpipe(ss->pusb_dev, ss->ep_int); + result = usb_request_irq(ss->pusb_dev, ss->irqpipe, + CBI_irq, 255, (void *)ss, + &ss->irq_handle); + if (result < 0) + return NULL; + + /* FIXME: what is this?? */ + down(&(ss->ip_waitq)); + } + + /* Do some basic sanity checks, and bail if we find a problem */ + if (!ep_in || !ep_out || (protocol == US_PR_CBI && !ep_int)) { + US_DEBUGP("Problems with device\n"); + return NULL; + } + + /* At this point, we're committed to using the device */ + + /* clear the GUID and fetch the strings */ + GUID_CLEAR(guid); + memset(mf, 0, sizeof(mf)); + memset(prod, 0, sizeof(prod)); + memset(serial, 0, sizeof(serial)); + if (dev->descriptor.iManufacturer) + usb_string(dev, dev->descriptor.iManufacturer, mf, + sizeof(mf)); + if (dev->descriptor.iProduct) + usb_string(dev, dev->descriptor.iProduct, prod, + sizeof(prod)); + if (dev->descriptor.iSerialNumber) + usb_string(dev, dev->descriptor.iSerialNumber, serial, + sizeof(serial)); + /* Create a GUID for this device */ if (dev->descriptor.iSerialNumber && serial[0]) { /* If we have a serial number, and it's a non-NULL string */ @@ -1498,23 +1606,50 @@ dev->descriptor.idProduct, "0"); } - /* Now check if we have seen this GUID before, and restore - * the flags if we find it - */ - for (ss = us_list; ss != NULL; ss = ss->next) { - if (!ss->pusb_dev && GUID_EQUAL(guid, ss->guid)) { - US_DEBUGP("Found existing GUID " GUID_FORMAT "\n", - GUID_ARGS(guid)); - flags = ss->flags; - break; - } - } + /* lock access to the data structures */ + spin_lock_irqsave(&us_list_spinlock, flags); - /* If ss == NULL, then this is a new device. Allocate memory for it */ - if (!ss) { - if ((ss = (struct us_data *)kmalloc(sizeof(*ss), + /* + * Now check if we have seen this GUID before + * We're looking for a device with a matching GUID that isn't + * allready on the system + */ + ss = us_list; + while ((ss != NULL) && + ((ss->pusb_dev) || !GUID_EQUAL(guid, ss->guid))) + ss = ss->next; + + if (ss != NULL) { + /* Existing device -- re-connect */ + US_DEBUGP("Found existing GUID " GUID_FORMAT "\n", + GUID_ARGS(guid)); + + /* establish the connection to the new device upon reconnect */ + ss->ifnum = ifnum; + ss->pusb_dev = dev; + + /* hook up the IRQ handler again */ + if (ss->protocol == US_PR_CBI) { + /* set up so we'll wait for notification */ + init_MUTEX_LOCKED(&(ss->ip_waitq)); + + /* set up the IRQ pipe and handler */ + /* FIXME: This needs to get period from the device */ + US_DEBUGP("Allocating IRQ for CBI transport\n"); + ss->irqpipe = usb_rcvintpipe(ss->pusb_dev, ss->ep_int); + result = usb_request_irq(ss->pusb_dev, ss->irqpipe, + CBI_irq, 255, + (void *)ss, &ss->irq_handle); + US_DEBUGP("usb_request_irq returned %d\n", result); + } + } else { + /* New device -- Allocate memory and initialize */ + US_DEBUGP("New GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); + + if ((ss = (struct us_data *)kmalloc(sizeof(struct us_data), GFP_KERNEL)) == NULL) { printk(KERN_WARNING USB_STORAGE "Out of memory\n"); + spin_unlock_irqrestore(&us_list_spinlock, flags); return NULL; } memset(ss, 0, sizeof(struct us_data)); @@ -1522,104 +1657,66 @@ /* Initialize the mutexes only when the struct is new */ init_MUTEX_LOCKED(&(ss->sleeper)); init_MUTEX(&(ss->queue_exclusion)); - } - - /* establish the connection to the new device */ - interface = altsetting; - ss->flags = flags; - ss->ifnum = ifnum; - ss->attention_done = 0; - ss->pusb_dev = dev; - - /* If the device has subclass and protocol, then use that. Otherwise, - * take data from the specific interface. - */ - if (subclass) { - ss->subclass = subclass; - ss->protocol = protocol; - } else { - ss->subclass = interface->bInterfaceSubClass; - ss->protocol = interface->bInterfaceProtocol; - } - - /* set the handler pointers based on the protocol */ - US_DEBUGP("Transport: "); - switch (ss->protocol) { - case US_PR_CB: - US_DEBUGPX("Control/Bulk\n"); - ss->transport = CB_transport; - ss->transport_reset = CB_reset; - break; - - case US_PR_CBI: - US_DEBUGPX("Control/Bulk/Interrupt\n"); - ss->transport = CBI_transport; - ss->transport_reset = CB_reset; - break; - - case US_PR_BULK: - US_DEBUGPX("Bulk\n"); - ss->transport = Bulk_transport; - ss->transport_reset = Bulk_reset; - break; - - default: - US_DEBUGPX("Unknown\n"); - kfree(ss); - return NULL; - break; - } - - /* - * We are expecting a minimum of 2 endpoints - in and out (bulk). - * An optional interrupt is OK (necessary for CBI protocol). - * We will ignore any others. - */ - for (i = 0; i < interface->bNumEndpoints; i++) { - /* is it an BULK endpoint? */ - if ((interface->endpoint[i].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) - == USB_ENDPOINT_XFER_BULK) { - if (interface->endpoint[i].bEndpointAddress & USB_DIR_IN) - ss->ep_in = interface->endpoint[i].bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK; - else - ss->ep_out = interface->endpoint[i].bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK; - } - /* is it an interrupt endpoint? */ - if ((interface->endpoint[i].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) - == USB_ENDPOINT_XFER_INT) { - ss->ep_int = interface->endpoint[i].bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK; - } - } - US_DEBUGP("Endpoints In %d Out %d Int %d\n", - ss->ep_in, ss->ep_out, ss->ep_int); - - /* Do some basic sanity checks, and bail if we find a problem */ - if (usb_set_interface(dev, interface->bInterfaceNumber, 0) || - !ss->ep_in || !ss->ep_out || - (ss->protocol == US_PR_CBI && ss->ep_int == 0)) { - US_DEBUGP("Problems with device\n"); - if (ss->host) { - kfree(ss->htmplt->name); - kfree(ss->htmplt); + /* + * If we've allready determined the subclass and protocol, + * use that. Otherwise, use the interface ones. This + * allows us to support devices which are compliant but + * don't announce it. Note that this information is + * maintained in the us_data struct so we only have to do + * this for new devices. + */ + if (subclass) { + ss->subclass = subclass; + ss->protocol = protocol; + } else { + ss->subclass = altsetting->bInterfaceSubClass; + ss->protocol = altsetting->bInterfaceProtocol; } - kfree(ss); - return NULL; - } + /* copy over the endpoint data */ + ss->ep_in = ep_in; + ss->ep_out = ep_out; + ss->ep_int = ep_int; + + /* establish the connection to the new device */ + ss->ifnum = ifnum; + ss->pusb_dev = dev; - /* If this is a new device (i.e. we haven't seen it before), we need to - * generate a scsi host definition, and register with scsi above us - */ - if (!ss->host) { /* copy the GUID we created before */ - US_DEBUGP("New GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); memcpy(ss->guid, guid, sizeof(guid)); + + /* + * Set the handler pointers based on the protocol + * Again, this data is persistant across reattachments + */ + US_DEBUGP("Transport: "); + switch (ss->protocol) { + case US_PR_CB: + US_DEBUGPX("Control/Bulk\n"); + ss->transport = CB_transport; + ss->transport_reset = CB_reset; + break; + + case US_PR_CBI: + US_DEBUGPX("Control/Bulk/Interrupt\n"); + ss->transport = CBI_transport; + ss->transport_reset = CB_reset; + break; + + case US_PR_BULK: + US_DEBUGPX("Bulk\n"); + ss->transport = Bulk_transport; + ss->transport_reset = Bulk_reset; + break; + + default: + US_DEBUGPX("Unknown\n"); + kfree(ss); + return NULL; + break; + } - /* set class specific stuff */ US_DEBUGP("Protocol: "); switch (ss->subclass) { case US_SC_RBC: @@ -1656,97 +1753,75 @@ break; } - /* Allocate memory for the SCSI Host Template */ - if ((ss->htmplt = (Scsi_Host_Template *) - kmalloc(sizeof(Scsi_Host_Template),GFP_KERNEL))==NULL ) { - printk(KERN_WARNING USB_STORAGE "Out of memory\n"); - - kfree(ss); - return NULL; + if (ss->protocol == US_PR_CBI) { + /* set up so we'll wait for notification */ + init_MUTEX_LOCKED(&(ss->ip_waitq)); + + /* set up the IRQ pipe and handler */ + /* FIXME: This needs to get period from the device */ + US_DEBUGP("Allocating IRQ for CBI transport\n"); + ss->irqpipe = usb_rcvintpipe(ss->pusb_dev, ss->ep_int); + result = usb_request_irq(ss->pusb_dev, ss->irqpipe, + CBI_irq, 255, + (void *)ss, &ss->irq_handle); + US_DEBUGP("usb_request_irq returned %d", result); } + + /* + * Since this is a new device, we need to generate a scsi + * host definition, and register with the higher SCSI layers + */ /* Initialize the host template based on the default one */ - memcpy(ss->htmplt, &my_host_template, sizeof(my_host_template)); + memcpy(&(ss->htmplt), &my_host_template, + sizeof(my_host_template)); /* Grab the next host number */ ss->host_number = my_host_number++; - - /* MDD: FIXME: this is bad. We abuse this pointer so we + + /* FIXME: this is bad. We abuse this pointer so we * can pass the ss pointer to the host controler thread * in us_detect */ - (struct us_data *)ss->htmplt->proc_dir = ss; - - /* shuttle E-USB */ - if (dev->descriptor.idVendor == 0x04e6 && - dev->descriptor.idProduct == 0x0001) { - __u8 qstat[2]; - int result; - - result = usb_control_msg(ss->pusb_dev, - usb_rcvctrlpipe(dev,0), - 1, 0xC0, - 0, ss->ifnum, - qstat, 2, HZ*5); - US_DEBUGP("C0 status 0x%x 0x%x\n", qstat[0], qstat[1]); - init_MUTEX_LOCKED(&(ss->ip_waitq)); - ss->irqpipe = usb_rcvintpipe(ss->pusb_dev, ss->ep_int); - result = usb_request_irq(ss->pusb_dev, ss->irqpipe, - CBI_irq, 255, (void *)ss, - &ss->irq_handle); - if (result < 0) - return NULL; - /* FIXME: what is this?? */ - down(&(ss->ip_waitq)); - } else if (ss->protocol == US_PR_CBI) { - int result; - - /* set up so we'll wait for notification */ - init_MUTEX_LOCKED(&(ss->ip_waitq)); - - /* set up the IRQ pipe and handler */ - /* FIXME: This needs to get the period from the device */ - ss->irqpipe = usb_rcvintpipe(ss->pusb_dev, ss->ep_int); - result = usb_request_irq(ss->pusb_dev, ss->irqpipe, CBI_irq, - 255, (void *)ss, &ss->irq_handle); - if (result) { - US_DEBUGP("usb_request_irq failed (0x%x), No interrupt for CBI\n", - result); - } - } - - - /* start up our thread */ + (struct us_data *)ss->htmplt.proc_dir = ss; + + /* start up our thread */ { DECLARE_MUTEX_LOCKED(sem); - + ss->notify = &sem; ss->pid = kernel_thread(usb_stor_control_thread, ss, - CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + CLONE_FS | CLONE_FILES | + CLONE_SIGHAND); if (ss->pid < 0) { - printk(KERN_WARNING USB_STORAGE "Unable to start control thread\n"); - kfree(ss->htmplt); - + printk(KERN_WARNING USB_STORAGE + "Unable to start control thread\n"); kfree(ss); return NULL; } - - /* wait for it to start */ + + /* wait for it to start */ down(&sem); } - + /* now register - our detect function will be called */ - ss->htmplt->module = &__this_module; - scsi_register_module(MODULE_SCSI_HA, ss->htmplt); - + ss->htmplt.module = &__this_module; + scsi_register_module(MODULE_SCSI_HA, &(ss->htmplt)); + /* put us in the list */ ss->next = us_list; - us_list = ss; + us_list = ss; } - printk(KERN_DEBUG "WARNING: USB Mass Storage data integrity not assured\n"); - printk(KERN_DEBUG "USB Mass Storage device found at %d\n", dev->devnum); + /* release the data structure lock */ + spin_unlock_irqrestore(&us_list_spinlock, flags); + printk(KERN_DEBUG + "WARNING: USB Mass Storage data integrity not assured\n"); + printk(KERN_DEBUG + "USB Mass Storage device found at %d\n", dev->devnum); + + /* return a pointer for the disconnect function */ return ss; } @@ -1758,6 +1833,14 @@ if (!ss) return; + /* FIXME: we need mututal exclusion and resource freeing here */ + + /* release the IRQ, if we have one */ + if (ss->irq_handle) { + usb_release_irq(ss->pusb_dev, ss->irq_handle, ss->irqpipe); + ss->irq_handle = NULL; + } + ss->pusb_dev = NULL; } @@ -1788,12 +1871,15 @@ { static struct us_data *ptr; - // FIXME: this needs to be put back to free _all_ the hosts - // for (ptr = us_list; ptr != NULL; ptr = ptr->next) - // scsi_unregister_module(MODULE_SCSI_HA, ptr->htmplt); - printk("MDD: us_list->htmplt is 0x%x\n", (unsigned int)(us_list->htmplt)); - scsi_unregister_module(MODULE_SCSI_HA, us_list->htmplt); + /* unregister all the virtual hosts */ + for (ptr = us_list; ptr != NULL; ptr = ptr->next) + scsi_unregister_module(MODULE_SCSI_HA, &(ptr->htmplt)); + + /* free up the data structures */ + + /* kill the threads */ + /* deregister the driver */ usb_deregister(&storage_driver) ; } diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/usb/usb-uhci.c linux/drivers/usb/usb-uhci.c --- v2.3.99-pre1/linux/drivers/usb/usb-uhci.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/usb/usb-uhci.c Wed Mar 15 17:00:58 2000 @@ -28,6 +28,7 @@ #include #include /* for in_interrupt() */ #include +#include #if LINUX_VERSION_CODE > KERNEL_VERSION(2,3,44) #include #endif diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/video/aty128fb.c linux/drivers/video/aty128fb.c --- v2.3.99-pre1/linux/drivers/video/aty128fb.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/video/aty128fb.c Fri Mar 17 13:34:00 2000 @@ -18,7 +18,6 @@ * - determine MCLK from previous setting -done for x86 * - calculate XCLK, rather than probe BIOS * - hardware cursor support - * - acceleration (do not use with Rage128 Pro!) * - ioctl()'s */ @@ -109,13 +108,13 @@ /* supported Rage128 chipsets */ static const struct aty128_chip_info aty128_pci_probe_list[] __initdata = { - {"Rage128 RE (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RE}, - {"Rage128 RF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RF}, - {"Rage128 RK (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RK}, - {"Rage128 RL (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RL}, - {"Rage128 Pro PF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_PF}, - {"Rage128 Pro PR (PCI)", PCI_DEVICE_ID_ATI_RAGE128_PR}, - {NULL, 0} + { "Rage128 RE (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RE }, + { "Rage128 RF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RF }, + { "Rage128 RK (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RK }, + { "Rage128 RL (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RL }, + { "Rage128 Pro PF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_PF }, + { "Rage128 Pro PR (PCI)", PCI_DEVICE_ID_ATI_RAGE128_PR }, + { NULL, 0 } }; /* packed BIOS settings */ @@ -162,20 +161,20 @@ }; /* various memory configurations */ -const struct aty128_meminfo sdr_128 = +static const struct aty128_meminfo sdr_128 = { 4, 4, 3, 3, 1, 3, 1, 16, 30, 16, "128-bit SDR SGRAM (1:1)" }; -const struct aty128_meminfo sdr_64 = +static const struct aty128_meminfo sdr_64 = { 4, 8, 3, 3, 1, 3, 1, 17, 46, 17, "64-bit SDR SGRAM (1:1)" }; -const struct aty128_meminfo sdr_sgram = +static const struct aty128_meminfo sdr_sgram = { 4, 4, 1, 2, 1, 2, 1, 16, 24, 16, "64-bit SDR SGRAM (2:1)" }; -const struct aty128_meminfo ddr_sgram = +static const struct aty128_meminfo ddr_sgram = { 4, 4, 3, 3, 2, 3, 1, 16, 31, 16, "64-bit DDR SGRAM" }; static int currcon = 0; static char *aty128fb_name = "ATY Rage128"; static char fontname[40] __initdata = { 0 }; -static char noaccel __initdata = 1; +static char noaccel __initdata = 0; static unsigned int initdepth __initdata = 8; #ifndef MODULE @@ -273,6 +272,7 @@ #ifdef CONFIG_MTRR struct { int vram; int vram_valid; } mtrr; #endif + int fifo_slots; /* free slots in FIFO (64 max) */ }; static struct fb_info_aty128 *board_list = NULL; @@ -344,7 +344,8 @@ struct fb_info_aty128 *info); static void aty128_reset_engine(const struct fb_info_aty128 *info); static void aty128_flush_pixel_cache(const struct fb_info_aty128 *info); -static void wait_for_fifo(u16 entries, const struct fb_info_aty128 *info); +static void do_wait_for_fifo(u16 entries, struct fb_info_aty128 *info); +static void wait_for_fifo(u16 entries, struct fb_info_aty128 *info); static void wait_for_idle(struct fb_info_aty128 *info); static u32 bpp_to_depth(u32 bpp); @@ -483,7 +484,8 @@ aty_st_8(CLOCK_CNTL_INDEX, (pll_index & 0x1F) | PLL_WR_EN); aty_st_le32(CLOCK_CNTL_DATA, val); } - + + /* return true when the PLL has completed an atomic update */ static int aty_pll_readupdate(const struct fb_info_aty128 *info) @@ -547,52 +549,64 @@ * Accelerator engine functions */ static void -wait_for_idle(struct fb_info_aty128 *info) +do_wait_for_fifo(u16 entries, struct fb_info_aty128 *info) { - unsigned long timeout = jiffies + HZ/20; - int reset = 1; - - wait_for_fifo(64, info); - - while (time_before(jiffies, timeout)) - if ((aty_ld_le32(GUI_STAT) & GUI_ACTIVE) != ENGINE_IDLE) { - reset = 0; - break; - } + int i; - if (reset) + for (;;) { + for (i = 0; i < 2000000; i++) { + info->fifo_slots = aty_ld_le32(GUI_STAT) & 0x0fff; + if (info->fifo_slots >= entries) + return; + } aty128_reset_engine(info); - - info->blitter_may_be_busy = 0; + } } static void -wait_for_fifo(u16 entries, const struct fb_info_aty128 *info) +wait_for_idle(struct fb_info_aty128 *info) { - unsigned long timeout = jiffies + HZ/20; - int reset = 1; + int i; - while (time_before(jiffies, timeout)) - if ((aty_ld_le32(GUI_STAT) & 0x00000FFF) < entries) { - reset = 0; - break; - } + do_wait_for_fifo(64, info); - if (reset) - aty128_reset_engine(info); + for (;;) { + for (i = 0; i < 2000000; i++) { + if (!(aty_ld_le32(GUI_STAT) & (1 << 31))) { + aty128_flush_pixel_cache(info); + info->blitter_may_be_busy = 0; + return; + } + } + aty128_reset_engine(info); + } +} + + +static void +wait_for_fifo(u16 entries, struct fb_info_aty128 *info) +{ + if (info->fifo_slots < entries) + do_wait_for_fifo(64, info); + info->fifo_slots -= entries; } static void aty128_flush_pixel_cache(const struct fb_info_aty128 *info) { - int i = 16384; + int i; + u32 tmp; - aty_st_le32(PC_NGUI_CTLSTAT, aty_ld_le32(PC_NGUI_CTLSTAT) | 0x000000ff); + tmp = aty_ld_le32(PC_NGUI_CTLSTAT); + tmp &= ~(0x00ff); + tmp |= 0x00ff; + aty_st_le32(PC_NGUI_CTLSTAT, tmp); - while (i && ((aty_ld_le32(PC_NGUI_CTLSTAT) & PC_BUSY) == PC_BUSY)) - i--; + for (i = 0; i < 2000000; i++) + if (!(aty_ld_le32(PC_NGUI_CTLSTAT) & PC_BUSY)) + break; } @@ -798,7 +812,7 @@ return -EINVAL; } - h_disp = (xres/8) - 1; + h_disp = (xres >> 3) - 1; h_total = (((xres + right + hslen + left) / 8) - 1) & 0xFFFFL; v_disp = yres - 1; @@ -1485,7 +1499,7 @@ fix->type = FB_TYPE_PACKED_PIXELS; fix->type_aux = 0; - fix->line_length = par->crtc.vxres*par->crtc.bpp/8; + fix->line_length = par->crtc.vxres*par->crtc.bpp >> 3; fix->visual = par->crtc.bpp <= 8 ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR; fix->ywrapstep = 0; @@ -1662,7 +1676,7 @@ else if(!strncmp(this_opt, "nomtrr", 6)) { mtrr = 0; } -#endif /* CONFIG_MTRR */ +#endif #ifdef CONFIG_PPC /* vmode and cmode depreciated */ else if (!strncmp(this_opt, "vmode:", 6)) { @@ -1791,7 +1805,7 @@ dac = aty_ld_le32(DAC_CNTL); dac |= (DAC_8BIT_EN | DAC_RANGE_CNTL | DAC_BLANKING); - dac |= DAC_MASK; /* set DAC mask */ + dac |= DAC_MASK; aty_st_le32(DAC_CNTL, dac); /* turn off bus mastering, just in case */ @@ -2309,22 +2323,22 @@ wait_for_fifo(2, info); save_dp_datatype = aty_ld_le32(DP_DATATYPE); - save_dp_cntl = aty_ld_le32(DP_CNTL); + save_dp_cntl = aty_ld_le32(DP_CNTL); wait_for_fifo(6, info); - aty_st_le32(DP_DATATYPE, (0 | BRUSH_SOLIDCOLOR << 16) | SRC_DSTCOLOR); + aty_st_le32(DP_DATATYPE, (BRUSH_SOLIDCOLOR << 16) | SRC_DSTCOLOR); aty_st_le32(DP_MIX, ROP3_SRCCOPY | DP_SRC_RECT); aty_st_le32(DP_CNTL, DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM); aty_st_le32(SRC_Y_X, (srcy << 16) | srcx); aty_st_le32(DST_Y_X, (dsty << 16) | dstx); aty_st_le32(DST_HEIGHT_WIDTH, (height << 16) | width); + info->blitter_may_be_busy = 1; + wait_for_fifo(2, info); aty_st_le32(DP_DATATYPE, save_dp_datatype); aty_st_le32(DP_CNTL, save_dp_cntl); - info->blitter_may_be_busy = 1; - wait_for_idle(info); } @@ -2333,16 +2347,15 @@ * Text mode accelerated functions */ - static void fbcon_aty128_bmove(struct display *p, int sy, int sx, int dy, int dx, int height, int width) { - sx *= fontwidth(p); - sy *= fontheight(p); - dx *= fontwidth(p); - dy *= fontheight(p); - width *= fontwidth(p); + sx *= fontwidth(p); + sy *= fontheight(p); + dx *= fontwidth(p); + dy *= fontheight(p); + width *= fontwidth(p); height *= fontheight(p); aty128_rectcopy(sx, sy, dx, dy, width, height, diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/video/cyber2000fb.c linux/drivers/video/cyber2000fb.c --- v2.3.99-pre1/linux/drivers/video/cyber2000fb.c Tue Mar 14 19:10:40 2000 +++ linux/drivers/video/cyber2000fb.c Sat Mar 18 11:38:03 2000 @@ -1175,6 +1175,7 @@ int cyber2000fb_attach(struct cyberpro_info *info) { if (current_par.initialised) { + info->dev = current_par.dev; info->regs = CyberRegs; info->fb = current_par.screen_base; info->fb_size = current_par.screen_size; @@ -1402,7 +1403,15 @@ smem_base = dev->resource[0].start; mmio_base = dev->resource[0].start + 0x00800000; + current_par.dev = dev; current_par.dev_id = dev->device; + + err = pci_enable_device(dev); + if (err) { + printk("%s: unable to enable device: %d\n", + current_par.dev_name, err); + return err; + } /* * Map in the registers diff -u --recursive --new-file v2.3.99-pre1/linux/drivers/video/cyber2000fb.h linux/drivers/video/cyber2000fb.h --- v2.3.99-pre1/linux/drivers/video/cyber2000fb.h Thu Feb 10 17:11:15 2000 +++ linux/drivers/video/cyber2000fb.h Sat Mar 18 11:38:03 2000 @@ -50,8 +50,9 @@ unsigned int palette_size; signed int currcon; char dev_name[32]; - unsigned int initialised; + struct pci_dev *dev; unsigned int dev_id; + unsigned int initialised:1; unsigned int bus_64bit:1; /* @@ -276,6 +277,7 @@ #define CO_REG_DEST_WIDTH 0xbf218 struct cyberpro_info { + struct pci_dev *dev; unsigned char *regs; char *fb; char dev_name[32]; diff -u --recursive --new-file v2.3.99-pre1/linux/fs/autofs/inode.c linux/fs/autofs/inode.c --- v2.3.99-pre1/linux/fs/autofs/inode.c Fri Mar 10 16:40:46 2000 +++ linux/fs/autofs/inode.c Thu Mar 16 11:20:33 2000 @@ -253,7 +253,6 @@ fail_free: kfree(sbi); fail_unlock: -fail_dec: return NULL; } diff -u --recursive --new-file v2.3.99-pre1/linux/fs/binfmt_aout.c linux/fs/binfmt_aout.c --- v2.3.99-pre1/linux/fs/binfmt_aout.c Tue Mar 14 19:10:40 2000 +++ linux/fs/binfmt_aout.c Thu Mar 16 22:23:22 2000 @@ -30,7 +30,7 @@ #include static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); -static int load_aout_library(int fd); +static int load_aout_library(struct file*); static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); extern void dump_thread(struct pt_regs *, struct user *); @@ -414,9 +414,8 @@ return 0; } -static int load_aout_library(int fd) +static int load_aout_library(struct file *file) { - struct file * file; struct inode * inode; unsigned long bss, start_addr, len; unsigned long error; @@ -424,12 +423,6 @@ loff_t offset = 0; struct exec ex; - retval = -EACCES; - file = fget(fd); - if (!file) - goto out; - if (!file->f_op) - goto out_putf; inode = file->f_dentry->d_inode; retval = -ENOEXEC; @@ -438,17 +431,17 @@ error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset); set_fs(USER_DS); if (error != sizeof(ex)) - goto out_putf; + goto out; /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { - goto out_putf; + goto out; } if (N_FLAGS(ex)) - goto out_putf; + goto out; /* For QMAGIC, the starting address is 0x20 into the page. We mask this off to get the starting address for the page */ @@ -474,16 +467,18 @@ (unsigned long) start_addr + ex.a_text + ex.a_data); retval = 0; - goto out_putf; + goto out; } /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, N_TXTOFF(ex)); + up(¤t->mm->mmap_sem); retval = error; if (error != start_addr) - goto out_putf; + goto out; len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; @@ -491,12 +486,9 @@ error = do_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) - goto out_putf; + goto out; } retval = 0; - -out_putf: - fput(file); out: return retval; } diff -u --recursive --new-file v2.3.99-pre1/linux/fs/binfmt_elf.c linux/fs/binfmt_elf.c --- v2.3.99-pre1/linux/fs/binfmt_elf.c Tue Mar 14 19:10:40 2000 +++ linux/fs/binfmt_elf.c Thu Mar 16 22:23:22 2000 @@ -40,7 +40,7 @@ #include static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs); -static int load_elf_library(int fd); +static int load_elf_library(struct file*); extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); extern void dump_thread(struct pt_regs *, struct user *); @@ -495,16 +495,14 @@ current->personality = PER_SVR4; lock_kernel(); - interpreter_dentry = open_namei(elf_interpreter, - 0, 0); + interpreter_dentry = open_namei(elf_interpreter); unlock_kernel(); current->personality = old_pers; } else #endif { lock_kernel(); - interpreter_dentry = open_namei(elf_interpreter, - 0, 0); + interpreter_dentry = open_namei(elf_interpreter); unlock_kernel(); } set_fs(old_fs); @@ -789,9 +787,8 @@ /* This is really simpleminded and specialized - we are loading an a.out library that is given an ELF header. */ -static int load_elf_library(int fd) +static int load_elf_library(struct file *file) { - struct file * file; struct dentry * dentry; struct inode * inode; struct elf_phdr *elf_phdata; @@ -801,9 +798,6 @@ loff_t offset = 0; error = -EACCES; - file = fget(fd); - if (!file || !file->f_op) - goto out; dentry = file->f_dentry; inode = dentry->d_inode; @@ -815,27 +809,27 @@ retval = file->f_op->read(file, (char *) &elf_ex, sizeof(elf_ex), &offset); set_fs(USER_DS); if (retval != sizeof(elf_ex)) - goto out_putf; + goto out; if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - goto out_putf; + goto out; /* First of all, some simple consistency checks */ if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || !elf_check_arch(elf_ex.e_machine) || (!inode->i_fop || !inode->i_fop->mmap)) - goto out_putf; + goto out; /* Now read in all of the header information */ j = sizeof(struct elf_phdr) * elf_ex.e_phnum; if (j > ELF_EXEC_PAGESIZE) - goto out_putf; + goto out; error = -ENOMEM; elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL); if (!elf_phdata) - goto out_putf; + goto out; /* N.B. check for error return?? */ retval = read_exec(dentry, elf_ex.e_phoff, (char *) elf_phdata, @@ -850,6 +844,7 @@ while (elf_phdata->p_type != PT_LOAD) elf_phdata++; /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, ELF_PAGESTART(elf_phdata->p_vaddr), (elf_phdata->p_filesz + @@ -858,6 +853,7 @@ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, (elf_phdata->p_offset - ELF_PAGEOFFSET(elf_phdata->p_vaddr))); + up(¤t->mm->mmap_sem); if (error != ELF_PAGESTART(elf_phdata->p_vaddr)) goto out_free_ph; @@ -875,8 +871,6 @@ out_free_ph: kfree(elf_phdata); -out_putf: - fput(file); out: return error; } diff -u --recursive --new-file v2.3.99-pre1/linux/fs/binfmt_em86.c linux/fs/binfmt_em86.c --- v2.3.99-pre1/linux/fs/binfmt_em86.c Tue Mar 14 19:10:40 2000 +++ linux/fs/binfmt_em86.c Sat Mar 18 16:41:47 2000 @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -81,7 +83,7 @@ * space, and we don't need to copy it. */ lock_kernel(); - dentry = open_namei(interp, 0, 0); + dentry = open_namei(interp); unlock_kernel(); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff -u --recursive --new-file v2.3.99-pre1/linux/fs/binfmt_misc.c linux/fs/binfmt_misc.c --- v2.3.99-pre1/linux/fs/binfmt_misc.c Tue Mar 14 19:10:40 2000 +++ linux/fs/binfmt_misc.c Thu Mar 16 10:40:08 2000 @@ -213,7 +213,7 @@ bprm->argc++; bprm->filename = iname; /* for binfmt_script */ - dentry = open_namei(iname, 0, 0); + dentry = open_namei(iname); retval = PTR_ERR(dentry); if (IS_ERR(dentry)) goto _ret; diff -u --recursive --new-file v2.3.99-pre1/linux/fs/binfmt_script.c linux/fs/binfmt_script.c --- v2.3.99-pre1/linux/fs/binfmt_script.c Tue Mar 14 19:10:40 2000 +++ linux/fs/binfmt_script.c Thu Mar 16 10:40:17 2000 @@ -82,7 +82,7 @@ * OK, now restart the process with the interpreter's dentry. */ lock_kernel(); - dentry = open_namei(interp, 0, 0); + dentry = open_namei(interp); unlock_kernel(); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff -u --recursive --new-file v2.3.99-pre1/linux/fs/buffer.c linux/fs/buffer.c --- v2.3.99-pre1/linux/fs/buffer.c Fri Mar 10 16:40:46 2000 +++ linux/fs/buffer.c Wed Mar 15 19:28:35 2000 @@ -147,13 +147,13 @@ atomic_inc(&bh->b_count); add_wait_queue(&bh->b_wait, &wait); -repeat: - run_task_queue(&tq_disk); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (buffer_locked(bh)) { + do { + run_task_queue(&tq_disk); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!buffer_locked(bh)) + break; schedule(); - goto repeat; - } + } while (buffer_locked(bh)); tsk->state = TASK_RUNNING; remove_wait_queue(&bh->b_wait, &wait); atomic_dec(&bh->b_count); diff -u --recursive --new-file v2.3.99-pre1/linux/fs/dquot.c linux/fs/dquot.c --- v2.3.99-pre1/linux/fs/dquot.c Fri Jan 28 15:09:08 2000 +++ linux/fs/dquot.c Thu Mar 16 10:45:36 2000 @@ -66,6 +66,11 @@ static kmem_cache_t *dquot_cachep; +static inline struct quota_mount_options *sb_dqopt(struct super_block *sb) +{ + return &sb->s_dquot; +} + /* * Dquot List Management: * The quota code uses three lists for dquot management: the inuse_list, @@ -98,29 +103,20 @@ static void dqput(struct dquot *); static struct dquot *dqduplicate(struct dquot *); -static inline char is_enabled(struct vfsmount *vfsmnt, short type) +static inline char is_enabled(struct quota_mount_options *dqopt, short type) { switch (type) { case USRQUOTA: - return((vfsmnt->mnt_dquot.flags & DQUOT_USR_ENABLED) != 0); + return((dqopt->flags & DQUOT_USR_ENABLED) != 0); case GRPQUOTA: - return((vfsmnt->mnt_dquot.flags & DQUOT_GRP_ENABLED) != 0); + return((dqopt->flags & DQUOT_GRP_ENABLED) != 0); } return(0); } static inline char sb_has_quota_enabled(struct super_block *sb, short type) { - struct vfsmount *vfsmnt; - - return((vfsmnt = lookup_vfsmnt(sb->s_dev)) != (struct vfsmount *)NULL && is_enabled(vfsmnt, type)); -} - -static inline char dev_has_quota_enabled(kdev_t dev, short type) -{ - struct vfsmount *vfsmnt; - - return((vfsmnt = lookup_vfsmnt(dev)) != (struct vfsmount *)NULL && is_enabled(vfsmnt, type)); + return is_enabled(sb_dqopt(sb), type); } static inline int const hashfn(kdev_t dev, unsigned int id, short type) @@ -253,15 +249,15 @@ mm_segment_t fs; loff_t offset; ssize_t ret; - struct semaphore *sem = &dquot->dq_mnt->mnt_dquot.dqio_sem; + struct semaphore *sem = &dquot->dq_sb->s_dquot.dqio_sem; lock_dquot(dquot); - if (!dquot->dq_mnt) { /* Invalidated quota? */ + if (!dquot->dq_sb) { /* Invalidated quota? */ unlock_dquot(dquot); return; } down(sem); - filp = dquot->dq_mnt->mnt_dquot.files[type]; + filp = dquot->dq_sb->s_dquot.files[type]; offset = dqoff(dquot->dq_id); fs = get_fs(); set_fs(KERNEL_DS); @@ -293,20 +289,20 @@ mm_segment_t fs; loff_t offset; - filp = dquot->dq_mnt->mnt_dquot.files[type]; + filp = dquot->dq_sb->s_dquot.files[type]; if (filp == (struct file *)NULL) return; lock_dquot(dquot); - if (!dquot->dq_mnt) /* Invalidated quota? */ + if (!dquot->dq_sb) /* Invalidated quota? */ goto out_lock; /* Now we are sure filp is valid - the dquot isn't invalidated */ - down(&dquot->dq_mnt->mnt_dquot.dqio_sem); + down(&dquot->dq_sb->s_dquot.dqio_sem); offset = dqoff(dquot->dq_id); fs = get_fs(); set_fs(KERNEL_DS); filp->f_op->read(filp, (char *)&dquot->dq_dqb, sizeof(struct dqblk), &offset); - up(&dquot->dq_mnt->mnt_dquot.dqio_sem); + up(&dquot->dq_sb->s_dquot.dqio_sem); set_fs(fs); if (dquot->dq_bhardlimit == 0 && dquot->dq_bsoftlimit == 0 && @@ -326,7 +322,7 @@ { /* unhash it first */ unhash_dquot(dquot); - dquot->dq_mnt = NULL; + dquot->dq_sb = NULL; dquot->dq_flags = 0; dquot->dq_referenced = 0; memset(&dquot->dq_dqb, 0, sizeof(struct dqblk)); @@ -346,7 +342,7 @@ continue; if (dquot->dq_type != type) continue; - if (!dquot->dq_mnt) /* Already invalidated entry? */ + if (!dquot->dq_sb) /* Already invalidated entry? */ continue; if (dquot->dq_flags & DQ_LOCKED) { __wait_on_dquot(dquot); @@ -360,7 +356,7 @@ continue; if (dquot->dq_type != type) continue; - if (!dquot->dq_mnt) + if (!dquot->dq_sb) continue; } /* @@ -368,7 +364,7 @@ * the quota needn't to be written to disk. So we write it * ourselves before discarding the data just for sure... */ - if (dquot->dq_flags & DQ_MOD && dquot->dq_mnt) + if (dquot->dq_flags & DQ_MOD && dquot->dq_sb) { write_dquot(dquot); need_restart = 1; /* We slept on IO */ @@ -397,7 +393,7 @@ continue; if (type != -1 && dquot->dq_type != type) continue; - if (!dquot->dq_mnt) /* Invalidated? */ + if (!dquot->dq_sb) /* Invalidated? */ continue; if (!(dquot->dq_flags & (DQ_LOCKED | DQ_MOD))) continue; @@ -435,11 +431,11 @@ } /* - * If the dq_mnt pointer isn't initialized this entry needs no + * If the dq_sb pointer isn't initialized this entry needs no * checking and doesn't need to be written. It's just an empty * dquot that is put back on to the freelist. */ - if (dquot->dq_mnt) + if (dquot->dq_sb) dqstats.drops++; we_slept: if (dquot->dq_count > 1) { @@ -451,7 +447,7 @@ printk(KERN_ERR "VFS: Locked quota to be put on the free list.\n"); dquot->dq_flags &= ~DQ_LOCKED; } - if (dquot->dq_mnt && dquot->dq_flags & DQ_MOD) { + if (dquot->dq_sb && dquot->dq_flags & DQ_MOD) { write_dquot(dquot); goto we_slept; } @@ -571,17 +567,17 @@ goto repeat; } -struct dquot *dqget(kdev_t dev, unsigned int id, short type) +static struct dquot *dqget(struct super_block *sb, unsigned int id, short type) { - unsigned int hashent = hashfn(dev, id, type); + unsigned int hashent = hashfn(sb->s_dev, id, type); struct dquot *dquot, *empty = NULL; - struct vfsmount *vfsmnt; + struct quota_mount_options *dqopt = sb_dqopt(sb); - if ((vfsmnt = lookup_vfsmnt(dev)) == (struct vfsmount *)NULL || !is_enabled(vfsmnt, type)) + if (!is_enabled(dqopt, type)) return(NODQUOT); we_slept: - if ((dquot = find_dquot(hashent, dev, id, type)) == NULL) { + if ((dquot = find_dquot(hashent, sb->s_dev, id, type)) == NULL) { if (empty == NULL) { dquot_updating[hashent]++; empty = get_empty_dquot(); @@ -592,8 +588,8 @@ dquot = empty; dquot->dq_id = id; dquot->dq_type = type; - dquot->dq_dev = dev; - dquot->dq_mnt = vfsmnt; + dquot->dq_dev = sb->s_dev; + dquot->dq_sb = sb; /* hash it first so it can be found */ hash_dquot(dquot); read_dquot(dquot); @@ -610,7 +606,7 @@ while (dquot_updating[hashent]) sleep_on(&update_wait); - if (!dquot->dq_mnt) { /* Has somebody invalidated entry under us? */ + if (!dquot->dq_sb) { /* Has somebody invalidated entry under us? */ /* * Do it as if the quota was invalidated before we started */ @@ -625,11 +621,11 @@ static struct dquot *dqduplicate(struct dquot *dquot) { - if (dquot == NODQUOT || !dquot->dq_mnt) + if (dquot == NODQUOT || !dquot->dq_sb) return NODQUOT; dquot->dq_count++; wait_on_dquot(dquot); - if (!dquot->dq_mnt) { + if (!dquot->dq_sb) { dquot->dq_count--; return NODQUOT; } @@ -642,13 +638,12 @@ static inline int is_quotafile(struct inode *inode) { int cnt; - struct vfsmount *vfsmnt; + struct quota_mount_options *dqopt = sb_dqopt(inode->i_sb); struct file **files; - vfsmnt = lookup_vfsmnt(inode->i_dev); - if (!vfsmnt) + if (!dqopt) return 0; - files = vfsmnt->mnt_dquot.files; + files = dqopt->files; for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (files[cnt] && files[cnt]->f_dentry->d_inode == inode) return 1; @@ -671,13 +666,12 @@ return 0; } -static void add_dquot_ref(kdev_t dev, short type) +static void add_dquot_ref(struct super_block *sb, short type) { - struct super_block *sb = get_super(dev); struct list_head *p; struct inode *inode; - if (!sb || !sb->dq_op) + if (!sb->dq_op) return; /* nothing to do */ restart: @@ -804,7 +798,7 @@ if (!need_print_warning(dquot, flag)) return; - root = dquot->dq_mnt->mnt_sb->s_root; + root = dquot->dq_sb->s_root; dget(root); buffer = (char *) __get_free_page(GFP_KERNEL); path = buffer ? d_path(root, buffer, PAGE_SIZE) : "?"; @@ -817,7 +811,7 @@ static inline char ignore_hardlimit(struct dquot *dquot) { - return capable(CAP_SYS_RESOURCE) && !dquot->dq_mnt->mnt_dquot.rsquash[dquot->dq_type]; + return capable(CAP_SYS_RESOURCE) && !dquot->dq_sb->s_dquot.rsquash[dquot->dq_type]; } static int check_idq(struct dquot *dquot, u_long inodes) @@ -844,7 +838,7 @@ (dquot->dq_curinodes + inodes) > dquot->dq_isoftlimit && dquot->dq_itime == 0) { print_warning(dquot, 0, "%s: warning, %s file quota exceeded\n"); - dquot->dq_itime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.inode_expire[dquot->dq_type]; + dquot->dq_itime = CURRENT_TIME + dquot->dq_sb->s_dquot.inode_expire[dquot->dq_type]; } return QUOTA_OK; @@ -877,7 +871,7 @@ dquot->dq_btime == 0) { if (!prealloc) { print_warning(dquot, 0, "%s: warning, %s disk quota exceeded\n"); - dquot->dq_btime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.block_expire[dquot->dq_type]; + dquot->dq_btime = CURRENT_TIME + dquot->dq_sb->s_dquot.block_expire[dquot->dq_type]; } else /* @@ -894,7 +888,7 @@ * Initialize a dquot-struct with new quota info. This is used by the * system call interface functions. */ -static int set_dqblk(kdev_t dev, int id, short type, int flags, struct dqblk *dqblk) +static int set_dqblk(struct super_block *sb, int id, short type, int flags, struct dqblk *dqblk) { struct dquot *dquot; int error = -EFAULT; @@ -909,7 +903,7 @@ } else memcpy((caddr_t)&dq_dqblk, (caddr_t)dqblk, sizeof(struct dqblk)); - if ((dquot = dqget(dev, id, type)) != NODQUOT) { + if (sb && (dquot = dqget(sb, id, type)) != NODQUOT) { lock_dquot(dquot); if (id > 0 && ((flags & SET_QUOTA) || (flags & SET_QLIMIT))) { @@ -923,22 +917,22 @@ if (dquot->dq_isoftlimit && dquot->dq_curinodes < dquot->dq_isoftlimit && dq_dqblk.dqb_curinodes >= dquot->dq_isoftlimit) - dquot->dq_itime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.inode_expire[type]; + dquot->dq_itime = CURRENT_TIME + dquot->dq_sb->s_dquot.inode_expire[type]; dquot->dq_curinodes = dq_dqblk.dqb_curinodes; if (dquot->dq_curinodes < dquot->dq_isoftlimit) dquot->dq_flags &= ~DQ_INODES; if (dquot->dq_bsoftlimit && dquot->dq_curblocks < dquot->dq_bsoftlimit && dq_dqblk.dqb_curblocks >= dquot->dq_bsoftlimit) - dquot->dq_btime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.block_expire[type]; + dquot->dq_btime = CURRENT_TIME + dquot->dq_sb->s_dquot.block_expire[type]; dquot->dq_curblocks = dq_dqblk.dqb_curblocks; if (dquot->dq_curblocks < dquot->dq_bsoftlimit) dquot->dq_flags &= ~DQ_BLKS; } if (id == 0) { - dquot->dq_mnt->mnt_dquot.block_expire[type] = dquot->dq_btime = dq_dqblk.dqb_btime; - dquot->dq_mnt->mnt_dquot.inode_expire[type] = dquot->dq_itime = dq_dqblk.dqb_itime; + dquot->dq_sb->s_dquot.block_expire[type] = dquot->dq_btime = dq_dqblk.dqb_btime; + dquot->dq_sb->s_dquot.inode_expire[type] = dquot->dq_itime = dq_dqblk.dqb_itime; } if (dq_dqblk.dqb_bhardlimit == 0 && dq_dqblk.dqb_bsoftlimit == 0 && @@ -954,14 +948,14 @@ return(0); } -static int get_quota(kdev_t dev, int id, short type, struct dqblk *dqblk) +static int get_quota(struct super_block *sb, int id, short type, struct dqblk *dqblk) { struct dquot *dquot; int error = -ESRCH; - if (!dev_has_quota_enabled(dev, type)) + if (!sb || !sb_has_quota_enabled(sb, type)) goto out; - dquot = dqget(dev, id, type); + dquot = dqget(sb, id, type); if (dquot == NODQUOT) goto out; @@ -990,17 +984,16 @@ return error; } -static int quota_root_squash(kdev_t dev, short type, int *addr) +static int quota_root_squash(struct super_block *sb, short type, int *addr) { - struct vfsmount *vfsmnt; int new_value, error; - if ((vfsmnt = lookup_vfsmnt(dev)) == (struct vfsmount *)NULL) + if (!sb) return(-ENODEV); error = -EFAULT; if (!copy_from_user(&new_value, addr, sizeof(int))) { - vfsmnt->mnt_dquot.rsquash[type] = new_value; + sb_dqopt(sb)->rsquash[type] = new_value; error = 0; } return error; @@ -1063,7 +1056,7 @@ id = inode->i_gid; break; } - dquot = dqget(inode->i_dev, id, cnt); + dquot = dqget(inode->i_sb, id, cnt); if (dquot == NODQUOT) continue; if (inode->i_dquot[cnt] != NODQUOT) { @@ -1219,10 +1212,13 @@ if (!inode) return -ENOENT; + /* Arguably we could consider that as error, but... no fs - no quota */ + if (!inode->i_sb) + return 0; /* * Find out if this filesystem uses i_blocks. */ - if (!inode->i_sb || !inode->i_sb->s_blocksize) + if (!inode->i_sb->s_blocksize) blocks = isize_to_blocks(inode->i_size, BLOCK_SIZE_BITS); else blocks = (inode->i_blocks >> 1); @@ -1243,14 +1239,14 @@ if (inode->i_uid == iattr->ia_uid) continue; /* We can get transfer_from from inode, can't we? */ - transfer_from[cnt] = dqget(inode->i_dev, inode->i_uid, cnt); - transfer_to[cnt] = dqget(inode->i_dev, iattr->ia_uid, cnt); + transfer_from[cnt] = dqget(inode->i_sb, inode->i_uid, cnt); + transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_uid, cnt); break; case GRPQUOTA: if (inode->i_gid == iattr->ia_gid) continue; - transfer_from[cnt] = dqget(inode->i_dev, inode->i_gid, cnt); - transfer_to[cnt] = dqget(inode->i_dev, iattr->ia_gid, cnt); + transfer_from[cnt] = dqget(inode->i_sb, inode->i_gid, cnt); + transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_gid, cnt); break; } @@ -1286,7 +1282,7 @@ * dqget() could block and so the first structure might got * invalidated or locked... */ - if (!transfer_to[cnt]->dq_mnt || !transfer_from[cnt]->dq_mnt || + if (!transfer_to[cnt]->dq_sb || !transfer_from[cnt]->dq_sb || check_idq(transfer_to[cnt], 1) == NO_QUOTA || check_bdq(transfer_to[cnt], blocks, 0) == NO_QUOTA) { cnt++; @@ -1371,26 +1367,26 @@ dquot_transfer }; -static inline void set_enable_flags(struct vfsmount *vfsmnt, short type) +static inline void set_enable_flags(struct quota_mount_options *dqopt, short type) { switch (type) { case USRQUOTA: - vfsmnt->mnt_dquot.flags |= DQUOT_USR_ENABLED; + dqopt->flags |= DQUOT_USR_ENABLED; break; case GRPQUOTA: - vfsmnt->mnt_dquot.flags |= DQUOT_GRP_ENABLED; + dqopt->flags |= DQUOT_GRP_ENABLED; break; } } -static inline void reset_enable_flags(struct vfsmount *vfsmnt, short type) +static inline void reset_enable_flags(struct quota_mount_options *dqopt, short type) { switch (type) { case USRQUOTA: - vfsmnt->mnt_dquot.flags &= ~DQUOT_USR_ENABLED; + dqopt->flags &= ~DQUOT_USR_ENABLED; break; case GRPQUOTA: - vfsmnt->mnt_dquot.flags &= ~DQUOT_GRP_ENABLED; + dqopt->flags &= ~DQUOT_GRP_ENABLED; break; } } @@ -1401,38 +1397,36 @@ /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ -int quota_off(kdev_t dev, short type) +int quota_off(struct super_block *sb, short type) { - struct vfsmount *vfsmnt; struct file *filp; short cnt; int enabled = 0; + struct quota_mount_options *dqopt = sb_dqopt(sb); - /* We don't need to search for vfsmnt each time - umount has to wait for us */ - vfsmnt = lookup_vfsmnt(dev); - if (!vfsmnt || !vfsmnt->mnt_sb) + if (!sb) goto out; /* We need to serialize quota_off() for device */ - down(&vfsmnt->mnt_dquot.dqoff_sem); + down(&dqopt->dqoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - if (!is_enabled(vfsmnt, cnt)) + if (!is_enabled(dqopt, cnt)) continue; - reset_enable_flags(vfsmnt, cnt); + reset_enable_flags(dqopt, cnt); /* Note: these are blocking operations */ - remove_dquot_ref(dev, cnt); - invalidate_dquots(dev, cnt); + remove_dquot_ref(sb->s_dev, cnt); + invalidate_dquots(sb->s_dev, cnt); /* Wait for any pending IO - remove me as soon as invalidate is more polite */ - down(&vfsmnt->mnt_dquot.dqio_sem); - filp = vfsmnt->mnt_dquot.files[cnt]; - vfsmnt->mnt_dquot.files[cnt] = (struct file *)NULL; - vfsmnt->mnt_dquot.inode_expire[cnt] = 0; - vfsmnt->mnt_dquot.block_expire[cnt] = 0; - up(&vfsmnt->mnt_dquot.dqio_sem); + down(&dqopt->dqio_sem); + filp = dqopt->files[cnt]; + dqopt->files[cnt] = (struct file *)NULL; + dqopt->inode_expire[cnt] = 0; + dqopt->block_expire[cnt] = 0; + up(&dqopt->dqio_sem); fput(filp); } @@ -1441,10 +1435,10 @@ * and if not clear the dq_op pointer. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) - enabled |= is_enabled(vfsmnt, cnt); + enabled |= is_enabled(dqopt, cnt); if (!enabled) - vfsmnt->mnt_sb->dq_op = NULL; - up(&vfsmnt->mnt_dquot.dqoff_sem); + sb->dq_op = NULL; + up(&dqopt->dqoff_sem); out: return(0); } @@ -1457,31 +1451,25 @@ return !(((blocks % sizeof(struct dqblk)) * BLOCK_SIZE + off % sizeof(struct dqblk)) % sizeof(struct dqblk)); } -int quota_on(kdev_t dev, short type, char *path) +static int quota_on(struct super_block *sb, short type, char *path) { struct file *f; - struct vfsmount *vfsmnt; struct inode *inode; struct dquot *dquot; - struct quota_mount_options *mnt_dquot; + struct quota_mount_options *dqopt = sb_dqopt(sb); char *tmp; int error; - vfsmnt = lookup_vfsmnt(dev); - if (vfsmnt == (struct vfsmount *)NULL) - return -ENODEV; - - if (is_enabled(vfsmnt, type)) + if (is_enabled(dqopt, type)) return -EBUSY; - mnt_dquot = &vfsmnt->mnt_dquot; - down(&mnt_dquot->dqoff_sem); + down(&dqopt->dqoff_sem); tmp = getname(path); error = PTR_ERR(tmp); if (IS_ERR(tmp)) goto out_lock; - f = filp_open(tmp, O_RDWR, 0600); + f = filp_open(tmp, O_RDWR, 0600, NULL); putname(tmp); error = PTR_ERR(f); @@ -1499,24 +1487,24 @@ goto out_f; dquot_drop(inode); /* We don't want quota on quota files */ - set_enable_flags(vfsmnt, type); - mnt_dquot->files[type] = f; + set_enable_flags(dqopt, type); + dqopt->files[type] = f; - dquot = dqget(dev, 0, type); - mnt_dquot->inode_expire[type] = (dquot != NODQUOT) ? dquot->dq_itime : MAX_IQ_TIME; - mnt_dquot->block_expire[type] = (dquot != NODQUOT) ? dquot->dq_btime : MAX_DQ_TIME; + dquot = dqget(sb, 0, type); + dqopt->inode_expire[type] = (dquot != NODQUOT) ? dquot->dq_itime : MAX_IQ_TIME; + dqopt->block_expire[type] = (dquot != NODQUOT) ? dquot->dq_btime : MAX_DQ_TIME; dqput(dquot); - vfsmnt->mnt_sb->dq_op = &dquot_operations; - add_dquot_ref(dev, type); + sb->dq_op = &dquot_operations; + add_dquot_ref(sb, type); - up(&mnt_dquot->dqoff_sem); + up(&dqopt->dqoff_sem); return 0; out_f: filp_close(f, NULL); out_lock: - up(&mnt_dquot->dqoff_sem); + up(&dqopt->dqoff_sem); return error; } @@ -1531,6 +1519,7 @@ { int cmds = 0, type = 0, flags = 0; kdev_t dev; + struct super_block *sb = NULL; int ret = -EINVAL; lock_kernel(); @@ -1575,18 +1564,19 @@ ret = -ENOTBLK; if (!S_ISBLK(mode)) goto out; + sb = get_super(dev); } ret = -EINVAL; switch (cmds) { case Q_QUOTAON: - ret = quota_on(dev, type, (char *) addr); + ret = sb ? quota_on(sb, type, (char *) addr) : -ENODEV; goto out; case Q_QUOTAOFF: - ret = quota_off(dev, type); + ret = quota_off(sb, type); goto out; case Q_GETQUOTA: - ret = get_quota(dev, id, type, (struct dqblk *) addr); + ret = get_quota(sb, id, type, (struct dqblk *) addr); goto out; case Q_SETQUOTA: flags |= SET_QUOTA; @@ -1604,7 +1594,7 @@ ret = get_stats(addr); goto out; case Q_RSQUASH: - ret = quota_root_squash(dev, type, (int *) addr); + ret = quota_root_squash(sb, type, (int *) addr); goto out; default: goto out; @@ -1613,8 +1603,8 @@ flags |= QUOTA_SYSCALL; ret = -ESRCH; - if (dev_has_quota_enabled(dev, type)) - ret = set_dqblk(dev, id, type, flags, (struct dqblk *) addr); + if (sb && sb_has_quota_enabled(sb, type)) + ret = set_dqblk(sb, id, type, flags, (struct dqblk *) addr); out: unlock_kernel(); return ret; diff -u --recursive --new-file v2.3.99-pre1/linux/fs/exec.c linux/fs/exec.c --- v2.3.99-pre1/linux/fs/exec.c Tue Mar 14 19:10:40 2000 +++ linux/fs/exec.c Sun Mar 19 11:15:32 2000 @@ -165,14 +165,12 @@ if (file && file->f_dentry && file->f_op && file->f_op->read) { spin_lock(&binfmt_lock); for (fmt = formats ; fmt ; fmt = fmt->next) { - int (*fn)(int) = fmt->load_shlib; - if (!fn) + if (!fmt->load_shlib) continue; if (!try_inc_mod_count(fmt->module)) continue; spin_unlock(&binfmt_lock); - /* N.B. Should use file instead of fd */ - retval = fn(fd); + retval = fmt->load_shlib(file); spin_lock(&binfmt_lock); put_binfmt(fmt); if (retval != -ENOEXEC) @@ -718,6 +716,8 @@ if (current->euid != current->uid || current->egid != current->gid || !cap_issubset(new_permitted, current->cap_permitted)) current->dumpable = 0; + + current->keep_capabilities = 0; } @@ -775,7 +775,7 @@ bprm_loader.page[i] = NULL; lock_kernel(); - dentry = open_namei(dynloader[0], 0, 0); + dentry = open_namei(dynloader[0]); unlock_kernel(); retval = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -855,7 +855,7 @@ memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); lock_kernel(); - dentry = open_namei(filename, 0, 0); + dentry = open_namei(filename); unlock_kernel(); retval = PTR_ERR(dentry); @@ -944,7 +944,7 @@ #else corename[4] = '\0'; #endif - file = filp_open(corename, O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600); + file = filp_open(corename, O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600, NULL); if (IS_ERR(file)) goto fail; dentry = file->f_dentry; diff -u --recursive --new-file v2.3.99-pre1/linux/fs/ext2/dir.c linux/fs/ext2/dir.c --- v2.3.99-pre1/linux/fs/ext2/dir.c Tue Mar 14 19:10:40 2000 +++ linux/fs/ext2/dir.c Fri Mar 17 20:49:19 2000 @@ -137,8 +137,8 @@ bh, offset)) { /* On error, skip the f_pos to the next block. */ - filp->f_pos = (filp->f_pos & (sb->s_blocksize - 1)) - + sb->s_blocksize; + filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + + 1; brelse (bh); return stored; } diff -u --recursive --new-file v2.3.99-pre1/linux/fs/namei.c linux/fs/namei.c --- v2.3.99-pre1/linux/fs/namei.c Fri Mar 10 16:40:47 2000 +++ linux/fs/namei.c Sat Mar 18 11:31:57 2000 @@ -451,6 +451,76 @@ } /* + * Restricted form of lookup. Doesn't follow links, single-component only, + * needs parent already locked. Doesn't follow mounts. + */ +struct dentry * lookup_one(const char * name, struct dentry * base) +{ + struct dentry * dentry; + struct inode *inode; + int err; + unsigned long hash; + struct qstr this; + unsigned int c; + + inode = base->d_inode; + err = permission(inode, MAY_EXEC); + dentry = ERR_PTR(err); + if (err) + goto out; + + this.name = name; + c = *(const unsigned char *)name; + if (!c) + goto access; + + hash = init_name_hash(); + do { + name++; + if (c == '/') + goto access; + hash = partial_name_hash(c, hash); + c = *(const unsigned char *)name; + } while (c); + this.len = name - (const char *) this.name; + this.hash = end_name_hash(hash); + + /* + * See if the low-level filesystem might want + * to use its own hash.. + */ + if (base->d_op && base->d_op->d_hash) { + err = base->d_op->d_hash(base, &this); + dentry = ERR_PTR(err); + if (err < 0) + goto out; + } + + dentry = cached_lookup(base, &this, 0); + if (!dentry) { + struct dentry *new = d_alloc(base, &this); + dentry = ERR_PTR(-ENOMEM); + if (!new) + goto out; + dentry = inode->i_op->lookup(inode, new); + if (!dentry) + dentry = new; + else { + dput(new); + if (IS_ERR(dentry)) + goto out; + } + } + +out: + dput(base); + return dentry; +access: + dentry = ERR_PTR(-EACCES); + goto out; +} + +/* * namei() * * is used by most simple commands to get the inode of a specified name. @@ -609,13 +679,13 @@ * which is a lot more logical, and also allows the "no perm" needed * for symlinks (where the permissions are checked later). */ -struct dentry * open_namei(const char * pathname, int flag, int mode) +struct dentry * __open_namei(const char * pathname, int flag, int mode, struct dentry * dir) { int acc_mode, error; struct inode *inode; struct dentry *dentry; - dentry = lookup_dentry(pathname, NULL, lookup_flags(flag)); + dentry = lookup_dentry(pathname, dir, lookup_flags(flag)); if (IS_ERR(dentry)) return dentry; @@ -1012,13 +1082,13 @@ return error; } -int do_unlink(const char * name) +int do_unlink(const char * name, struct dentry * base) { int error; struct dentry *dir; struct dentry *dentry; - dentry = lookup_dentry(name, NULL, 0); + dentry = lookup_dentry(name, base, 0); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit; @@ -1043,7 +1113,7 @@ if(IS_ERR(tmp)) return PTR_ERR(tmp); lock_kernel(); - error = do_unlink(tmp); + error = do_unlink(tmp, NULL); unlock_kernel(); putname(tmp); @@ -1427,16 +1497,17 @@ int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link) { - u32 len; + int len; len = PTR_ERR(link); if (IS_ERR(link)) goto out; len = strlen(link); - if (len > buflen) + if (len > (unsigned) buflen) len = buflen; - copy_to_user(buffer, link, len); + if (copy_to_user(buffer, link, len)) + len = -EFAULT; out: return len; } diff -u --recursive --new-file v2.3.99-pre1/linux/fs/nfsd/nfs3xdr.c linux/fs/nfsd/nfs3xdr.c --- v2.3.99-pre1/linux/fs/nfsd/nfs3xdr.c Fri Mar 10 16:40:48 2000 +++ linux/fs/nfsd/nfs3xdr.c Fri Mar 17 15:43:32 2000 @@ -727,25 +727,40 @@ /* throw in readdirplus baggage */ if (plus) { struct svc_fh fh; + struct svc_export *exp; + struct dentry *dparent, *dchild; + + dparent = cd->dirfh->fh_dentry; + exp = cd->dirfh->fh_export; fh_init(&fh, NFS3_FHSIZE); - /* Disabled for now because of lock-up */ - if (0 && nfsd_lookup(cd->rqstp, cd->dirfh, name, namlen, &fh) == 0) { - p = encode_post_op_attr(cd->rqstp, p, fh.fh_dentry); - p = encode_fh(p, &fh); - fh_put(&fh); - } else { - /* Didn't find this entry... weird. - * Proceed without the attrs anf fh anyway. - */ - *p++ = 0; - *p++ = 0; - } + if (fh_verify(cd->rqstp, cd->dirfh, S_IFDIR, MAY_EXEC) != 0) + goto noexec; + if (isdotent(name, namlen)) { + dchild = dparent; + if (namlen == 2) + dchild = dchild->d_parent; + dchild = dget(dchild); + } else + dchild = lookup_one(name, dget(dparent)); + if (IS_ERR(dchild)) + goto noexec; + if (fh_compose(&fh, exp, dchild) != 0 || !dchild->d_inode) + goto noexec; + p = encode_post_op_attr(cd->rqstp, p, fh.fh_dentry); + p = encode_fh(p, &fh); + fh_put(&fh); } +out: cd->buflen = buflen; cd->buffer = p; return 0; + +noexec: + *p++ = 0; + *p++ = 0; + goto out; } int diff -u --recursive --new-file v2.3.99-pre1/linux/fs/nfsd/nfsctl.c linux/fs/nfsd/nfsctl.c --- v2.3.99-pre1/linux/fs/nfsd/nfsctl.c Fri Mar 10 16:40:48 2000 +++ linux/fs/nfsd/nfsctl.c Thu Mar 16 11:20:33 2000 @@ -42,8 +42,10 @@ static int nfsctl_unexport(struct nfsctl_export *data); static int nfsctl_getfh(struct nfsctl_fhparm *, __u8 *); static int nfsctl_getfd(struct nfsctl_fdparm *, __u8 *); +#ifdef notyet static int nfsctl_getfs(struct nfsctl_fsparm *, struct knfsd_fh *); -/* static int nfsctl_ugidupdate(struct nfsctl_ugidmap *data); */ +static int nfsctl_ugidupdate(struct nfsctl_ugidmap *data); +#endif static int initialized = 0; diff -u --recursive --new-file v2.3.99-pre1/linux/fs/nfsd/nfsfh.c linux/fs/nfsd/nfsfh.c --- v2.3.99-pre1/linux/fs/nfsd/nfsfh.c Tue Mar 14 19:10:40 2000 +++ linux/fs/nfsd/nfsfh.c Thu Mar 16 22:24:32 2000 @@ -147,7 +147,7 @@ generation); iput(inode); - return NULL; + return ERR_PTR(-ESTALE); } /* now to find a dentry. * If possible, get a well-connected one @@ -353,10 +353,6 @@ if (IS_ERR(result)) goto err_out; err = -ESTALE; - if (!result) { - dprintk("find_fh_dentry: No inode found.\n"); - goto err_out; - } if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) return result; diff -u --recursive --new-file v2.3.99-pre1/linux/fs/nfsd/nfsproc.c linux/fs/nfsd/nfsproc.c --- v2.3.99-pre1/linux/fs/nfsd/nfsproc.c Fri Mar 10 16:40:48 2000 +++ linux/fs/nfsd/nfsproc.c Fri Mar 17 15:43:32 2000 @@ -195,6 +195,7 @@ svc_fh *newfhp = &resp->fh; struct iattr *attr = &argp->attrs; struct inode *inode; + struct dentry *dchild; int nfserr, type, mode, rdonly = 0; dev_t rdev = NODEV; @@ -214,14 +215,24 @@ } else if (nfserr) goto done; - /* - * Do a lookup to verify the new file handle. - */ + nfserr = nfserr_acces; + if (!argp->len) + goto done; + nfserr = nfserr_exist; + if (isdotent(argp->name, argp->len)) + goto done; + fh_lock(dirfhp); + dchild = lookup_one(argp->name, dget(dirfhp->fh_dentry)); + nfserr = nfserrno(PTR_ERR(dchild)); + if (IS_ERR(dchild)) + goto out_unlock; fh_init(newfhp, NFS_FHSIZE); - nfserr = nfsd_lookup(rqstp, dirfhp, argp->name, argp->len, newfhp); + nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild); + if (!nfserr && !dchild->d_inode) + nfserr = nfserr_noent; if (nfserr) { if (nfserr != nfserr_noent) - goto done; + goto out_unlock; /* * If the new file handle wasn't verified, we can't tell * whether the file exists or not. Time to bail ... @@ -230,22 +241,11 @@ if (!newfhp->fh_dverified) { printk(KERN_WARNING "nfsd_proc_create: file handle not verified\n"); - goto done; + goto out_unlock; } } - /* - * Lock the parent directory and check for existence. - */ - nfserr = fh_lock_parent(dirfhp, newfhp->fh_dentry); - if (nfserr) - goto done; inode = newfhp->fh_dentry->d_inode; - if (inode && newfhp->fh_handle.fh_fileid_type == 0) - /* inode might have been instantiated while we slept */ - nfserr = fh_update(newfhp); - if (nfserr) - goto done; /* Unfudge the mode bits */ if (attr->ia_valid & ATTR_MODE) { diff -u --recursive --new-file v2.3.99-pre1/linux/fs/nfsd/vfs.c linux/fs/nfsd/vfs.c --- v2.3.99-pre1/linux/fs/nfsd/vfs.c Fri Mar 10 16:40:48 2000 +++ linux/fs/nfsd/vfs.c Fri Mar 17 15:43:32 2000 @@ -52,9 +52,6 @@ */ #define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i)) -/* Check for dir entries '.' and '..' */ -#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) - /* * This is a cache of readahead params that help us choose the proper * readahead strategy. Initially, we set all readahead parameters to 0 @@ -77,47 +74,6 @@ static struct raparms * raparml = NULL; static struct raparms * raparm_cache = NULL; - -/* - * We need to do a check-parent every time - * after we have locked the parent - to verify - * that the parent is still our parent and - * that we are still hashed onto it.. - * - * This is required in case two processes race - * on removing (or moving) the same entry: the - * parent lock will serialize them, but the - * other process will be too late.. - * - * Note that this nfsd_check_parent is identical - * the check_parent in linux/fs/namei.c. - */ -#define nfsd_check_parent(dir, dentry) \ - ((dir) == (dentry)->d_parent && !d_unhashed(dentry)) - -/* - * Lock a parent directory following the VFS locking protocol. - */ -int -fh_lock_parent(struct svc_fh *parent_fh, struct dentry *dchild) -{ - fh_lock(parent_fh); - /* - * Make sure the parent->child relationship still holds, - * and that the child is still hashed. - */ - if (nfsd_check_parent(parent_fh->fh_dentry, dchild)) - return 0; - - printk(KERN_WARNING - "fh_lock_parent: %s/%s parent changed or child unhashed\n", - dchild->d_parent->d_name.name, dchild->d_name.name); - - fh_unlock(parent_fh); - return nfserr_noent; -} - - /* * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put @@ -156,35 +112,57 @@ err = nfserr_acces; /* Lookup the name, but don't follow links */ - if (strcmp(name,"..")==0 && dparent->d_covers != dparent) - dchild = dget(dparent); - else + if (strcmp(name, "..")==0) { + /* checking mountpoint crossing is very different when stepping up */ + if (dparent == exp->ex_dentry) { + if (!EX_CROSSMNT(exp)) + dchild = dget(dparent); /* .. == . just like at / */ + else + { + struct svc_export *exp2 = NULL; + struct dentry *dp; + dchild = dparent->d_covers->d_parent; + for (dp=dchild; + exp2 == NULL && dp->d_covers->d_parent != dp; + dp=dp->d_covers->d_parent) + exp2 = exp_get(exp->ex_client, dp->d_inode->i_dev, dp->d_inode->i_ino); + if (exp2==NULL || dchild->d_sb != exp2->ex_dentry->d_sb) { + dchild = dget(dparent); + } else { + dget(dchild); + exp = exp2; + } + } + } else + dchild = dget(dparent->d_parent); + } else { dchild = lookup_dentry(name, dget(dparent), 0); - if (IS_ERR(dchild)) - goto out_nfserr; - /* - * check if we have crossed a mount point ... - */ - if (dchild->d_sb != dparent->d_sb) { - struct svc_export *exp2 = NULL; - exp2 = exp_get(rqstp->rq_client, - dchild->d_inode->i_dev, - dchild->d_inode->i_ino); - if (exp2 && EX_CROSSMNT(exp2)) - /* successfully crossed mount point */ - exp = exp2; - else if (dchild->d_covers->d_sb == dparent->d_sb) { - /* stay in the original filesystem */ - struct dentry *tdentry = dget(dchild->d_covers); - dput(dchild); - dchild = tdentry; - } else { - /* This cannot possibly happen */ - printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name); - dput(dchild); - err = nfserr_acces; - goto out; + if (IS_ERR(dchild)) + goto out_nfserr; + /* + * check if we have crossed a mount point ... + */ + if (dchild->d_sb != dparent->d_sb) { + struct svc_export *exp2 = NULL; + exp2 = exp_get(rqstp->rq_client, + dchild->d_inode->i_dev, + dchild->d_inode->i_ino); + if (exp2 && EX_CROSSMNT(exp2)) + /* successfully crossed mount point */ + exp = exp2; + else if (dchild->d_covers->d_sb == dparent->d_sb) { + /* stay in the original filesystem */ + struct dentry *tdentry = dget(dchild->d_covers); + dput(dchild); + dchild = tdentry; + } else { + /* This cannot possibly happen */ + printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name); + dput(dchild); + err = nfserr_acces; + goto out; + } } } /* @@ -216,6 +194,7 @@ int imode; int err; kernel_cap_t saved_cap = 0; + int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= MAY_WRITE; @@ -305,14 +284,31 @@ saved_cap = current->cap_effective; cap_clear(current->cap_effective); } +#ifdef CONFIG_QUOTA + /* DQUOT_TRANSFER needs both ia_uid and ia_gid defined */ + if (iap->ia_valid & (ATTR_UID|ATTR_GID)) { + if (! (iap->ia_valid & ATTR_UID)) + iap->ia_uid = inode->i_uid; + if (! (iap->ia_valid & ATTR_GID)) + iap->ia_gid = inode->i_gid; + iap->ia_valid |= ATTR_UID|ATTR_GID; + } +#endif /* CONFIG_QUOTA */ + if (iap->ia_valid & ATTR_SIZE) { fh_lock(fhp); + size_change = 1; + } +#ifdef CONFIG_QUOTA + if (iap->ia_valid & (ATTR_UID|ATTR_GID)) + err = DQUOT_TRANSFER(dentry, iap); + else +#endif err = notify_change(dentry, iap); + if (size_change) { fh_unlock(fhp); put_write_access(inode); } - else - err = notify_change(dentry, iap); if (current->fsuid != 0) current->cap_effective = saved_cap; if (err) @@ -647,11 +643,11 @@ uid_t saved_euid; #endif - if (!cnt) - goto out; err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); if (err) goto out; + if (!cnt) + goto out_close; err = nfserr_perm; if (!file.f_op->write) goto out_close; @@ -812,6 +808,9 @@ err = nfserr_perm; if (!flen) goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); if (err) @@ -829,14 +828,11 @@ */ if (!resfhp->fh_dverified) { /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ - dchild = lookup_dentry(fname, dget(dentry), 0); + fh_lock(fhp); + dchild = lookup_one(fname, dget(dentry)); err = PTR_ERR(dchild); if (IS_ERR(dchild)) goto out_nfserr; - /* Lock the parent and check for errors ... */ - err = fh_lock_parent(fhp, dchild); - if (err) - goto out; err = fh_compose(resfhp, fhp->fh_export, dchild); if (err) goto out; @@ -934,6 +930,9 @@ err = nfserr_perm; if (!flen) goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; if (!(iap->ia_valid & ATTR_MODE)) iap->ia_mode = 0; err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); @@ -948,21 +947,16 @@ err = nfserr_notdir; if(!dirp->i_op || !dirp->i_op->lookup) goto out; + fh_lock(fhp); /* * Compose the response file handle. */ - dchild = lookup_dentry(fname, dget(dentry), 0); + dchild = lookup_one(fname, dget(dentry)); err = PTR_ERR(dchild); if(IS_ERR(dchild)) goto out_nfserr; - /* - * We must lock the directory before we check for the inode. - */ - err = fh_lock_parent(fhp, dchild); - if (err) - goto out; err = fh_compose(resfhp, fhp->fh_export, dchild); if (err) goto out; @@ -1096,24 +1090,20 @@ err = nfserr_noent; if (!flen || !plen) goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); if (err) goto out; + fh_lock(fhp); dentry = fhp->fh_dentry; - - dnew = lookup_dentry(fname, dget(dentry), 0); + dnew = lookup_one(fname, dget(dentry)); err = PTR_ERR(dnew); if (IS_ERR(dnew)) goto out_nfserr; - /* - * Lock the parent before checking for existence - */ - err = fh_lock_parent(fhp, dnew); - if (err) - goto out_compose; - err = vfs_symlink(dentry->d_inode, dnew, path); if (!err) { if (EX_ISSYNC(fhp->fh_export)) @@ -1134,7 +1124,6 @@ fh_unlock(fhp); /* Compose the fh so the dentry will be freed ... */ -out_compose: cerr = fh_compose(resfhp, fhp->fh_export, dnew); if (err==0) err = cerr; out: @@ -1167,20 +1156,18 @@ err = nfserr_perm; if (!len) goto out; + err = nfserr_exist; + if (isdotent(fname, len)) + goto out; + fh_lock(ffhp); ddir = ffhp->fh_dentry; dirp = ddir->d_inode; - dnew = lookup_dentry(fname, dget(ddir), 0); + dnew = lookup_one(fname, dget(ddir)); err = PTR_ERR(dnew); if (IS_ERR(dnew)) goto out_nfserr; - /* - * Lock the parent before checking for existence - */ - err = fh_lock_parent(ffhp, dnew); - if (err) - goto out_dput; dold = tfhp->fh_dentry; dest = dold->d_inode; @@ -1199,7 +1186,6 @@ } fh_unlock(ffhp); -out_dput: dput(dnew); out: return err; @@ -1210,29 +1196,6 @@ } /* - * This follows the model of double_lock() in the VFS. - */ -static inline void nfsd_double_down(struct semaphore *s1, struct semaphore *s2) -{ - if (s1 != s2) { - if ((unsigned long) s1 < (unsigned long) s2) { - struct semaphore *tmp = s1; - s1 = s2; - s2 = tmp; - } - down(s1); - } - down(s2); -} - -static inline void nfsd_double_up(struct semaphore *s1, struct semaphore *s2) -{ - up(s1); - if (s1 != s2) - up(s2); -} - -/* * Rename a file * N.B. After this call _both_ ffhp and tfhp need an fh_put */ @@ -1261,15 +1224,12 @@ if (fdir->i_dev != tdir->i_dev) goto out; - /* N.B. We shouldn't need this ... dentry layer handles it */ err = nfserr_perm; - if (!flen || (fname[0] == '.' && - (flen == 1 || (flen == 2 && fname[1] == '.'))) || - !tlen || (tname[0] == '.' && - (tlen == 1 || (tlen == 2 && tname[1] == '.')))) + if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; - odentry = lookup_dentry(fname, dget(fdentry), 0); + double_down(&tdir->i_sem, &fdir->i_sem); + odentry = lookup_one(fname, dget(fdentry)); err = PTR_ERR(odentry); if (IS_ERR(odentry)) goto out_nfserr; @@ -1278,16 +1238,11 @@ if (!odentry->d_inode) goto out_dput_old; - ndentry = lookup_dentry(tname, dget(tdentry), 0); + ndentry = lookup_one(tname, dget(tdentry)); err = PTR_ERR(ndentry); if (IS_ERR(ndentry)) goto out_dput_old; - /* - * Lock the parent directories. - */ - nfsd_double_down(&tdir->i_sem, &fdir->i_sem); - #ifdef CONFIG_NFSD_V3 /* Fill in the pre-op attr for the wcc data for both * tdir and fdir @@ -1296,19 +1251,11 @@ fill_pre_wcc(tfhp); #endif /* CONFIG_NFSD_V3 */ - err = -ENOENT; - /* GAM3 check for parent changes after locking. */ - if (nfsd_check_parent(fdentry, odentry) && - nfsd_check_parent(tdentry, ndentry)) { - - err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); - } - } else - dprintk("nfsd: Caught race in nfsd_rename"); - + err = vfs_rename(fdir, odentry, tdir, ndentry); + if (!err && EX_ISSYNC(tfhp->fh_export)) { + nfsd_sync_dir(tdentry); + nfsd_sync_dir(fdentry); + } #ifdef CONFIG_NFSD_V3 /* Fill in the post-op attr for the wcc data for both * tdir and fdir @@ -1316,7 +1263,7 @@ fill_post_wcc(ffhp); fill_post_wcc(tfhp); #endif /* CONFIG_NFSD_V3 */ - nfsd_double_up(&tdir->i_sem, &fdir->i_sem); + double_up(&tdir->i_sem, &fdir->i_sem); dput(ndentry); out_dput_old: @@ -1343,7 +1290,6 @@ struct inode *dirp; int err; - /* N.B. We shouldn't need this test ... handled by dentry layer */ err = nfserr_acces; if (!flen || isdotent(fname, flen)) goto out; @@ -1351,10 +1297,11 @@ if (err) goto out; + fh_lock(fhp); dentry = fhp->fh_dentry; dirp = dentry->d_inode; - rdentry = lookup_dentry(fname, dget(dentry), 0); + rdentry = lookup_one(fname, dget(dentry)); err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) goto out_nfserr; @@ -1365,12 +1312,6 @@ goto out; } - err = fh_lock_parent(fhp, rdentry); - if (err) { - dput(rdentry); - goto out; - } - if (type != S_IFDIR) { /* It's UNLINK */ err = vfs_unlink(dirp, rdentry); } else { /* It's RMDIR */ @@ -1436,6 +1377,7 @@ * may choose to do less. */ inode = file.f_dentry->d_inode; + down(&inode->i_sem); while (1) { oldlen = cd.buflen; @@ -1444,9 +1386,7 @@ file.f_inode->i_dev, file.f_inode->i_ino, (int) file.f_pos, (int) oldlen, (int) cd.buflen); */ - down(&inode->i_sem); err = file.f_op->readdir(&file, &cd, (filldir_t) func); - up(&inode->i_sem); if (err < 0) goto out_nfserr; if (oldlen == cd.buflen) @@ -1454,6 +1394,7 @@ if (cd.eob) break; } + up(&inode->i_sem); /* If we didn't fill the buffer completely, we're at EOF */ eof = !cd.eob; @@ -1482,6 +1423,7 @@ return err; out_nfserr: + up(&inode->i_sem); err = nfserrno(err); goto out_close; } diff -u --recursive --new-file v2.3.99-pre1/linux/fs/open.c linux/fs/open.c --- v2.3.99-pre1/linux/fs/open.c Fri Mar 10 16:40:48 2000 +++ linux/fs/open.c Thu Mar 16 10:45:16 2000 @@ -644,7 +644,7 @@ * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ -struct file *filp_open(const char * filename, int flags, int mode) +struct file *filp_open(const char * filename, int flags, int mode, struct dentry * base) { struct inode * inode; struct dentry * dentry; @@ -661,7 +661,7 @@ flag++; if (flag & O_TRUNC) flag |= 2; - dentry = open_namei(filename,flag,mode); + dentry = __open_namei(filename, flag, mode, base); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto cleanup_file; @@ -787,7 +787,7 @@ if (fd >= 0) { struct file * f; lock_kernel(); - f = filp_open(tmp, flags, mode); + f = filp_open(tmp, flags, mode, NULL); unlock_kernel(); error = PTR_ERR(f); if (IS_ERR(f)) diff -u --recursive --new-file v2.3.99-pre1/linux/fs/partitions/acorn.c linux/fs/partitions/acorn.c --- v2.3.99-pre1/linux/fs/partitions/acorn.c Tue Mar 14 19:10:40 2000 +++ linux/fs/partitions/acorn.c Sat Mar 18 11:38:04 2000 @@ -98,7 +98,7 @@ } } - printk(" >"); + printk(" >\n"); if (hd->part[riscix_minor].nr_sects > 2) hd->part[riscix_minor].nr_sects = 2; @@ -139,7 +139,7 @@ le32_to_cpu(linuxp->nr_sects)); linuxp ++; } - printk(" >"); + printk(" >\n"); /* * Prevent someone doing a mkswap or mkfs on this partition */ diff -u --recursive --new-file v2.3.99-pre1/linux/fs/partitions/msdos.c linux/fs/partitions/msdos.c --- v2.3.99-pre1/linux/fs/partitions/msdos.c Sat Feb 26 22:31:54 2000 +++ linux/fs/partitions/msdos.c Thu Mar 16 14:01:05 2000 @@ -347,19 +347,19 @@ unsigned char *data; int mask = (1 << hd->minor_shift) - 1; int sector_size = get_hardsect_size(dev) / 512; -#ifdef CONFIG_BLK_DEV_IDE +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) int tested_for_xlate = 0; read_mbr: -#endif /* CONFIG_BLK_DEV_IDE */ +#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */ if (!(bh = bread(dev,0,get_ptable_blocksize(dev)))) { if (warn_no_part) printk(" unable to read partition table\n"); return -1; } data = bh->b_data; -#ifdef CONFIG_BLK_DEV_IDE +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) check_table: -#endif /* CONFIG_BLK_DEV_IDE */ +#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */ /* Use bforget(), because we may have changed the disk geometry */ if (*(unsigned short *) (0x1fe + data) != cpu_to_le16(MSDOS_LABEL_MAGIC)) { bforget(bh); @@ -367,7 +367,7 @@ } p = (struct partition *) (0x1be + data); -#ifdef CONFIG_BLK_DEV_IDE +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) if (!tested_for_xlate++) { /* Do this only once per disk */ /* * Look for various forms of IDE disk geometry translation @@ -423,7 +423,7 @@ (void) ide_xlate_1024(dev, 2, heads, " [PTBL]"); } } -#endif /* CONFIG_BLK_DEV_IDE */ +#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */ /* Look for partitions in two passes: First find the primary partitions, and the DOS-type extended partitions. diff -u --recursive --new-file v2.3.99-pre1/linux/fs/smbfs/inode.c linux/fs/smbfs/inode.c --- v2.3.99-pre1/linux/fs/smbfs/inode.c Tue Mar 14 19:10:40 2000 +++ linux/fs/smbfs/inode.c Wed Mar 15 16:51:14 2000 @@ -546,7 +546,7 @@ smb_current_vmalloced = 0; #endif - return init_smb_fs(); + return register_filesystem(&smb_fs_type); } static void __exit exit_smb_fs(void) diff -u --recursive --new-file v2.3.99-pre1/linux/fs/super.c linux/fs/super.c --- v2.3.99-pre1/linux/fs/super.c Tue Mar 14 19:10:40 2000 +++ linux/fs/super.c Fri Mar 17 16:46:27 2000 @@ -247,32 +247,9 @@ return fs; } - struct vfsmount *vfsmntlist = NULL; static struct vfsmount *vfsmnttail = NULL, *mru_vfsmnt = NULL; -/* - * This part handles the management of the list of mounted filesystems. - */ -struct vfsmount *lookup_vfsmnt(kdev_t dev) -{ - struct vfsmount *lptr; - - if (vfsmntlist == NULL) - return NULL; - - if (mru_vfsmnt != NULL && mru_vfsmnt->mnt_dev == dev) - return (mru_vfsmnt); - - for (lptr = vfsmntlist; lptr != NULL; lptr = lptr->mnt_next) - if (lptr->mnt_dev == dev) { - mru_vfsmnt = lptr; - return (lptr); - } - - return NULL; -} - static struct vfsmount *add_vfsmnt(struct super_block *sb, const char *dev_name, const char *dir_name) { @@ -286,11 +263,6 @@ lptr->mnt_sb = sb; lptr->mnt_dev = sb->s_dev; - lptr->mnt_flags = sb->s_flags; - - sema_init(&lptr->mnt_dquot.dqio_sem, 1); - sema_init(&lptr->mnt_dquot.dqoff_sem, 1); - lptr->mnt_dquot.flags = 0; /* N.B. Is it really OK to have a vfsmount without names? */ if (dev_name && !IS_ERR(tmp = getname(dev_name))) { @@ -399,9 +371,9 @@ len += sprintf( buf + len, "%s %s %s %s", tmp->mnt_devname, path, tmp->mnt_sb->s_type->name, - tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" ); + tmp->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw" ); for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { - if (tmp->mnt_flags & fs_infop->flag) { + if (tmp->mnt_sb->s_flags & fs_infop->flag) { strcpy(buf + len, fs_infop->str); len += strlen(fs_infop->str); } @@ -592,6 +564,9 @@ sema_init(&s->s_vfs_rename_sem,1); sema_init(&s->s_nfsd_free_path_sem,1); s->s_type = type; + sema_init(&s->s_dquot.dqio_sem, 1); + sema_init(&s->s_dquot.dqoff_sem, 1); + s->s_dquot.flags = 0; lock_super(s); if (!type->read_super(s, data, silent)) goto out_fail; @@ -606,7 +581,6 @@ s->s_dev = 0; s->s_bdev = 0; s->s_type = NULL; - put_filesystem(type); unlock_super(s); return NULL; } @@ -688,7 +662,7 @@ * on the device. If the umount fails, too bad -- there * are no quotas running any more. Just turn them on again. */ - DQUOT_OFF(dev); + DQUOT_OFF(sb); acct_auto_close(dev); /* @@ -990,7 +964,6 @@ static int do_remount_sb(struct super_block *sb, int flags, char *data) { int retval; - struct vfsmount *vfsmnt; if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev)) return -EACCES; @@ -1007,9 +980,6 @@ return retval; } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); - vfsmnt = lookup_vfsmnt(sb->s_dev); - if (vfsmnt) - vfsmnt->mnt_flags = sb->s_flags; /* * Invalidate the inodes, as some mount options may be changed. @@ -1093,8 +1063,8 @@ * aren't used, as the syscall assumes we are talking to an older * version that didn't understand them. */ -asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void * data) +long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page, + unsigned long new_flags, unsigned long data_page) { struct file_system_type * fstype; struct dentry * dentry = NULL; @@ -1102,28 +1072,19 @@ struct block_device *bdev = NULL; int retval; unsigned long flags = 0; - unsigned long page = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - lock_kernel(); + if ((new_flags & (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) { - retval = copy_mount_options (data, &page); - if (retval < 0) - goto out; retval = do_remount(dir_name, new_flags & ~MS_MGC_MSK & ~MS_REMOUNT, - (char *) page); - free_page(page); + (char *) data_page); goto out; } - retval = copy_mount_options (type, &page); - if (retval < 0) - goto out; - fstype = get_fs_type((char *) page); - free_page(page); + fstype = get_fs_type((char *) type_page); retval = -ENODEV; if (!fstype) goto out; @@ -1150,22 +1111,50 @@ if (bdops) bdev->bd_op = bdops; } - page = 0; - if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) { + if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) flags = new_flags & ~MS_MGC_MSK; - retval = copy_mount_options(data, &page); - if (retval < 0) - goto dput_and_out; - } + retval = do_mount(bdev, dev_name, dir_name, fstype->name, flags, - (void *) page); - free_page(page); + (void *) data_page); dput_and_out: dput(dentry); fs_out: put_filesystem(fstype); out: + return retval; +} + +asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, + unsigned long new_flags, void * data) +{ + int retval; + unsigned long data_page = 0; + unsigned long type_page = 0; + + lock_kernel(); + retval = copy_mount_options (type, &type_page); + if (retval < 0) + goto out; + + /* copy_mount_options allows a NULL user pointer, + * and just returns zero in that case. But if we + * allow the type to be NULL we will crash. + * Previously we did not check this case. + */ + if (type_page == 0) { + retval = -EINVAL; + goto out; + } + + retval = copy_mount_options (data, &data_page); + if (retval >= 0) { + retval = do_sys_mount(dev_name, dir_name, type_page, + new_flags, data_page); + free_page(data_page); + } + free_page(type_page); +out: unlock_kernel(); return retval; } @@ -1255,7 +1244,7 @@ * devfs crap and checking it right now. Later. */ if (!ROOT_DEV) - panic("I have no root and I want to sream"); + panic("I have no root and I want to scream"); bdev = bdget(kdev_t_to_nr(ROOT_DEV)); if (!bdev) diff -u --recursive --new-file v2.3.99-pre1/linux/fs/ufs/dir.c linux/fs/ufs/dir.c --- v2.3.99-pre1/linux/fs/ufs/dir.c Tue Mar 14 19:10:40 2000 +++ linux/fs/ufs/dir.c Fri Mar 17 20:49:19 2000 @@ -106,9 +106,9 @@ bh, offset)) { /* On error, skip the f_pos to the next block. */ - filp->f_pos = (filp->f_pos & + filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + - sb->s_blocksize; + 1; brelse (bh); return stored; } diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_apecs.h linux/include/asm-alpha/core_apecs.h --- v2.3.99-pre1/linux/include/asm-alpha/core_apecs.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_apecs.h Fri Mar 17 13:01:37 2000 @@ -511,32 +511,27 @@ #ifdef __WANT_IO_DEF -#define __inb apecs_inb -#define __inw apecs_inw -#define __inl apecs_inl -#define __outb apecs_outb -#define __outw apecs_outw -#define __outl apecs_outl -#define __readb apecs_readb -#define __readw apecs_readw -#define __readl apecs_readl -#define __readq apecs_readq -#define __writeb apecs_writeb -#define __writew apecs_writew -#define __writel apecs_writel -#define __writeq apecs_writeq -#define __ioremap apecs_ioremap -#define __is_ioaddr apecs_is_ioaddr +#define __inb(p) apecs_inb((unsigned long)(p)) +#define __inw(p) apecs_inw((unsigned long)(p)) +#define __inl(p) apecs_inl((unsigned long)(p)) +#define __outb(x,p) apecs_outb((x),(unsigned long)(p)) +#define __outw(x,p) apecs_outw((x),(unsigned long)(p)) +#define __outl(x,p) apecs_outl((x),(unsigned long)(p)) +#define __readb(a) apecs_readb((unsigned long)(a)) +#define __readw(a) apecs_readw((unsigned long)(a)) +#define __readl(a) apecs_readl((unsigned long)(a)) +#define __readq(a) apecs_readq((unsigned long)(a)) +#define __writeb(x,a) apecs_writeb((x),(unsigned long)(a)) +#define __writew(x,a) apecs_writew((x),(unsigned long)(a)) +#define __writel(x,a) apecs_writel((x),(unsigned long)(a)) +#define __writeq(x,a) apecs_writeq((x),(unsigned long)(a)) +#define __ioremap(a) apecs_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) apecs_is_ioaddr((unsigned long)(a)) -#define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -#define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) - -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_cia.h linux/include/asm-alpha/core_cia.h --- v2.3.99-pre1/linux/include/asm-alpha/core_cia.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_cia.h Sun Mar 19 10:26:21 2000 @@ -4,12 +4,18 @@ /* Define to experiment with fitting everything into one 512MB HAE window. */ #define CIA_ONE_HAE_WINDOW 1 +#include #include #include /* - * CIA is the internal name for the 2117x chipset which provides + * CIA is the internal name for the 21171 chipset which provides * memory controller and PCI access for the 21164 chip based systems. + * Also supported here is the 21172 (CIA-2) and 21174 (PYXIS). + * + * The lineage is a bit confused, since the 21174 was reportedly started + * from the 21171 Pass 1 mask, and so is missing bug fixes that appear + * in 21171 Pass 2 and 21172, but it also contains additional features. * * This file is based on: * @@ -22,24 +28,8 @@ * */ -/*------------------------------------------------------------------------** -** ** -** EB164 I/O procedures ** -** ** -** inport[b|w|t|l], outport[b|w|t|l] 8:16:24:32 IO xfers ** -** inportbxt: 8 bits only ** -** inport: alias of inportw ** -** outport: alias of outportw ** -** ** -** inmem[b|w|t|l], outmem[b|w|t|l] 8:16:24:32 ISA memory xfers ** -** inmembxt: 8 bits only ** -** inmem: alias of inmemw ** -** outmem: alias of outmemw ** -** ** -**------------------------------------------------------------------------*/ - - -/* CIA ADDRESS BIT DEFINITIONS +/* + * CIA ADDRESS BIT DEFINITIONS * * 3333 3333 3322 2222 2222 1111 1111 11 * 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 @@ -78,91 +68,168 @@ #define CIA_MEM_R3_MASK 0x03ffffff /* SPARSE Mem region 3 mask is 26 bits */ /* - * 21171-CA Control and Status Registers (p4-1) - */ -#define CIA_IOC_CIA_REV (IDENT_ADDR + 0x8740000080UL) -#define CIA_IOC_PCI_LAT (IDENT_ADDR + 0x87400000C0UL) -#define CIA_IOC_CIA_CTRL (IDENT_ADDR + 0x8740000100UL) -#define CIA_IOC_CIA_CNFG (IDENT_ADDR + 0x8740000140UL) -#define CIA_IOC_HAE_MEM (IDENT_ADDR + 0x8740000400UL) -#define CIA_IOC_HAE_IO (IDENT_ADDR + 0x8740000440UL) -#define CIA_IOC_CFG (IDENT_ADDR + 0x8740000480UL) -#define CIA_IOC_CACK_EN (IDENT_ADDR + 0x8740000600UL) - -/* - * 21171-CA Diagnostic Registers (p4-2) - */ -#define CIA_IOC_CIA_DIAG (IDENT_ADDR + 0x8740002000UL) -#define CIA_IOC_DIAG_CHECK (IDENT_ADDR + 0x8740003000UL) - -/* - * 21171-CA Performance Monitor registers (p4-3) - */ -#define CIA_IOC_PERF_MONITOR (IDENT_ADDR + 0x8740004000UL) -#define CIA_IOC_PERF_CONTROL (IDENT_ADDR + 0x8740004040UL) - -/* - * 21171-CA Error registers (p4-3) - */ -#define CIA_IOC_CPU_ERR0 (IDENT_ADDR + 0x8740008000UL) -#define CIA_IOC_CPU_ERR1 (IDENT_ADDR + 0x8740008040UL) -#define CIA_IOC_CIA_ERR (IDENT_ADDR + 0x8740008200UL) -#define CIA_IOC_CIA_STAT (IDENT_ADDR + 0x8740008240UL) -#define CIA_IOC_ERR_MASK (IDENT_ADDR + 0x8740008280UL) -#define CIA_IOC_CIA_SYN (IDENT_ADDR + 0x8740008300UL) -#define CIA_IOC_MEM_ERR0 (IDENT_ADDR + 0x8740008400UL) -#define CIA_IOC_MEM_ERR1 (IDENT_ADDR + 0x8740008440UL) -#define CIA_IOC_PCI_ERR0 (IDENT_ADDR + 0x8740008800UL) -#define CIA_IOC_PCI_ERR1 (IDENT_ADDR + 0x8740008840UL) -#define CIA_IOC_PCI_ERR3 (IDENT_ADDR + 0x8740008880UL) - -/* - * 2117A-CA PCI Address Translation Registers. - */ -#define CIA_IOC_PCI_TBIA (IDENT_ADDR + 0x8760000100UL) - -#define CIA_IOC_PCI_W0_BASE (IDENT_ADDR + 0x8760000400UL) -#define CIA_IOC_PCI_W0_MASK (IDENT_ADDR + 0x8760000440UL) -#define CIA_IOC_PCI_T0_BASE (IDENT_ADDR + 0x8760000480UL) - -#define CIA_IOC_PCI_W1_BASE (IDENT_ADDR + 0x8760000500UL) -#define CIA_IOC_PCI_W1_MASK (IDENT_ADDR + 0x8760000540UL) -#define CIA_IOC_PCI_T1_BASE (IDENT_ADDR + 0x8760000580UL) - -#define CIA_IOC_PCI_W2_BASE (IDENT_ADDR + 0x8760000600UL) -#define CIA_IOC_PCI_W2_MASK (IDENT_ADDR + 0x8760000640UL) -#define CIA_IOC_PCI_T2_BASE (IDENT_ADDR + 0x8760000680UL) - -#define CIA_IOC_PCI_W3_BASE (IDENT_ADDR + 0x8760000700UL) -#define CIA_IOC_PCI_W3_MASK (IDENT_ADDR + 0x8760000740UL) -#define CIA_IOC_PCI_T3_BASE (IDENT_ADDR + 0x8760000780UL) - -/* - * 21171-CA System configuration registers (p4-3) + * 21171-CA Control and Status Registers */ -#define CIA_IOC_MCR (IDENT_ADDR + 0x8750000000UL) -#define CIA_IOC_MBA0 (IDENT_ADDR + 0x8750000600UL) -#define CIA_IOC_MBA2 (IDENT_ADDR + 0x8750000680UL) -#define CIA_IOC_MBA4 (IDENT_ADDR + 0x8750000700UL) -#define CIA_IOC_MBA6 (IDENT_ADDR + 0x8750000780UL) -#define CIA_IOC_MBA8 (IDENT_ADDR + 0x8750000800UL) -#define CIA_IOC_MBAA (IDENT_ADDR + 0x8750000880UL) -#define CIA_IOC_MBAC (IDENT_ADDR + 0x8750000900UL) -#define CIA_IOC_MBAE (IDENT_ADDR + 0x8750000980UL) -#define CIA_IOC_TMG0 (IDENT_ADDR + 0x8750000B00UL) -#define CIA_IOC_TMG1 (IDENT_ADDR + 0x8750000B40UL) -#define CIA_IOC_TMG2 (IDENT_ADDR + 0x8750000B80UL) +#define CIA_IOC_CIA_REV (IDENT_ADDR + 0x8740000080UL) +# define CIA_REV_MASK 0xff +#define CIA_IOC_PCI_LAT (IDENT_ADDR + 0x87400000C0UL) +#define CIA_IOC_CIA_CTRL (IDENT_ADDR + 0x8740000100UL) +# define CIA_CTRL_PCI_EN (1 << 0) +# define CIA_CTRL_PCI_LOCK_EN (1 << 1) +# define CIA_CTRL_PCI_LOOP_EN (1 << 2) +# define CIA_CTRL_FST_BB_EN (1 << 3) +# define CIA_CTRL_PCI_MST_EN (1 << 4) +# define CIA_CTRL_PCI_MEM_EN (1 << 5) +# define CIA_CTRL_PCI_REQ64_EN (1 << 6) +# define CIA_CTRL_PCI_ACK64_EN (1 << 7) +# define CIA_CTRL_ADDR_PE_EN (1 << 8) +# define CIA_CTRL_PERR_EN (1 << 9) +# define CIA_CTRL_FILL_ERR_EN (1 << 10) +# define CIA_CTRL_MCHK_ERR_EN (1 << 11) +# define CIA_CTRL_ECC_CHK_EN (1 << 12) +# define CIA_CTRL_ASSERT_IDLE_BC (1 << 13) +# define CIA_CTRL_COM_IDLE_BC (1 << 14) +# define CIA_CTRL_CSR_IOA_BYPASS (1 << 15) +# define CIA_CTRL_IO_FLUSHREQ_EN (1 << 16) +# define CIA_CTRL_CPU_FLUSHREQ_EN (1 << 17) +# define CIA_CTRL_ARB_CPU_EN (1 << 18) +# define CIA_CTRL_EN_ARB_LINK (1 << 19) +# define CIA_CTRL_RD_TYPE_SHIFT 20 +# define CIA_CTRL_RL_TYPE_SHIFT 24 +# define CIA_CTRL_RM_TYPE_SHIFT 28 +# define CIA_CTRL_EN_DMA_RD_PERF (1 << 31) +#define CIA_IOC_CIA_CNFG (IDENT_ADDR + 0x8740000140UL) +# define CIA_CNFG_IOA_BWEN (1 << 0) +# define CIA_CNFG_PCI_MWEN (1 << 4) +# define CIA_CNFG_PCI_DWEN (1 << 5) +# define CIA_CNFG_PCI_WLEN (1 << 8) +#define CIA_IOC_FLASH_CTRL (IDENT_ADDR + 0x8740000200UL) +#define CIA_IOC_HAE_MEM (IDENT_ADDR + 0x8740000400UL) +#define CIA_IOC_HAE_IO (IDENT_ADDR + 0x8740000440UL) +#define CIA_IOC_CFG (IDENT_ADDR + 0x8740000480UL) +#define CIA_IOC_CACK_EN (IDENT_ADDR + 0x8740000600UL) +# define CIA_CACK_EN_LOCK_EN (1 << 0) +# define CIA_CACK_EN_MB_EN (1 << 1) +# define CIA_CACK_EN_SET_DIRTY_EN (1 << 2) +# define CIA_CACK_EN_BC_VICTIM_EN (1 << 3) + + +/* + * 21171-CA Diagnostic Registers + */ +#define CIA_IOC_CIA_DIAG (IDENT_ADDR + 0x8740002000UL) +#define CIA_IOC_DIAG_CHECK (IDENT_ADDR + 0x8740003000UL) + +/* + * 21171-CA Performance Monitor registers + */ +#define CIA_IOC_PERF_MONITOR (IDENT_ADDR + 0x8740004000UL) +#define CIA_IOC_PERF_CONTROL (IDENT_ADDR + 0x8740004040UL) + +/* + * 21171-CA Error registers + */ +#define CIA_IOC_CPU_ERR0 (IDENT_ADDR + 0x8740008000UL) +#define CIA_IOC_CPU_ERR1 (IDENT_ADDR + 0x8740008040UL) +#define CIA_IOC_CIA_ERR (IDENT_ADDR + 0x8740008200UL) +# define CIA_ERR_COR_ERR (1 << 0) +# define CIA_ERR_UN_COR_ERR (1 << 1) +# define CIA_ERR_CPU_PE (1 << 2) +# define CIA_ERR_MEM_NEM (1 << 3) +# define CIA_ERR_PCI_SERR (1 << 4) +# define CIA_ERR_PERR (1 << 5) +# define CIA_ERR_PCI_ADDR_PE (1 << 6) +# define CIA_ERR_RCVD_MAS_ABT (1 << 7) +# define CIA_ERR_RCVD_TAR_ABT (1 << 8) +# define CIA_ERR_PA_PTE_INV (1 << 9) +# define CIA_ERR_FROM_WRT_ERR (1 << 10) +# define CIA_ERR_IOA_TIMEOUT (1 << 11) +# define CIA_ERR_LOST_CORR_ERR (1 << 16) +# define CIA_ERR_LOST_UN_CORR_ERR (1 << 17) +# define CIA_ERR_LOST_CPU_PE (1 << 18) +# define CIA_ERR_LOST_MEM_NEM (1 << 19) +# define CIA_ERR_LOST_PERR (1 << 21) +# define CIA_ERR_LOST_PCI_ADDR_PE (1 << 22) +# define CIA_ERR_LOST_RCVD_MAS_ABT (1 << 23) +# define CIA_ERR_LOST_RCVD_TAR_ABT (1 << 24) +# define CIA_ERR_LOST_PA_PTE_INV (1 << 25) +# define CIA_ERR_LOST_FROM_WRT_ERR (1 << 26) +# define CIA_ERR_LOST_IOA_TIMEOUT (1 << 27) +# define CIA_ERR_VALID (1 << 31) +#define CIA_IOC_CIA_STAT (IDENT_ADDR + 0x8740008240UL) +#define CIA_IOC_ERR_MASK (IDENT_ADDR + 0x8740008280UL) +#define CIA_IOC_CIA_SYN (IDENT_ADDR + 0x8740008300UL) +#define CIA_IOC_MEM_ERR0 (IDENT_ADDR + 0x8740008400UL) +#define CIA_IOC_MEM_ERR1 (IDENT_ADDR + 0x8740008440UL) +#define CIA_IOC_PCI_ERR0 (IDENT_ADDR + 0x8740008800UL) +#define CIA_IOC_PCI_ERR1 (IDENT_ADDR + 0x8740008840UL) +#define CIA_IOC_PCI_ERR3 (IDENT_ADDR + 0x8740008880UL) + +/* + * 21171-CA System configuration registers + */ +#define CIA_IOC_MCR (IDENT_ADDR + 0x8750000000UL) +#define CIA_IOC_MBA0 (IDENT_ADDR + 0x8750000600UL) +#define CIA_IOC_MBA2 (IDENT_ADDR + 0x8750000680UL) +#define CIA_IOC_MBA4 (IDENT_ADDR + 0x8750000700UL) +#define CIA_IOC_MBA6 (IDENT_ADDR + 0x8750000780UL) +#define CIA_IOC_MBA8 (IDENT_ADDR + 0x8750000800UL) +#define CIA_IOC_MBAA (IDENT_ADDR + 0x8750000880UL) +#define CIA_IOC_MBAC (IDENT_ADDR + 0x8750000900UL) +#define CIA_IOC_MBAE (IDENT_ADDR + 0x8750000980UL) +#define CIA_IOC_TMG0 (IDENT_ADDR + 0x8750000B00UL) +#define CIA_IOC_TMG1 (IDENT_ADDR + 0x8750000B40UL) +#define CIA_IOC_TMG2 (IDENT_ADDR + 0x8750000B80UL) + +/* + * 2117A-CA PCI Address and Scatter-Gather Registers. + */ +#define CIA_IOC_PCI_TBIA (IDENT_ADDR + 0x8760000100UL) + +#define CIA_IOC_PCI_W0_BASE (IDENT_ADDR + 0x8760000400UL) +#define CIA_IOC_PCI_W0_MASK (IDENT_ADDR + 0x8760000440UL) +#define CIA_IOC_PCI_T0_BASE (IDENT_ADDR + 0x8760000480UL) + +#define CIA_IOC_PCI_W1_BASE (IDENT_ADDR + 0x8760000500UL) +#define CIA_IOC_PCI_W1_MASK (IDENT_ADDR + 0x8760000540UL) +#define CIA_IOC_PCI_T1_BASE (IDENT_ADDR + 0x8760000580UL) + +#define CIA_IOC_PCI_W2_BASE (IDENT_ADDR + 0x8760000600UL) +#define CIA_IOC_PCI_W2_MASK (IDENT_ADDR + 0x8760000640UL) +#define CIA_IOC_PCI_T2_BASE (IDENT_ADDR + 0x8760000680UL) + +#define CIA_IOC_PCI_W3_BASE (IDENT_ADDR + 0x8760000700UL) +#define CIA_IOC_PCI_W3_MASK (IDENT_ADDR + 0x8760000740UL) +#define CIA_IOC_PCI_T3_BASE (IDENT_ADDR + 0x8760000780UL) + +#define CIA_IOC_PCI_W_DAC (IDENT_ADDR + 0x87600007C0UL) + +/* + * 2117A-CA Address Translation Registers. + */ + +/* 8 tag registers, the first 4 of which are lockable. */ +#define CIA_IOC_TB_TAGn(n) \ + (IDENT_ADDR + 0x8760000800UL + (n)*0x40) + +/* 4 page registers per tag register. */ +#define CIA_IOC_TBn_PAGEm(n,m) \ + (IDENT_ADDR + 0x8760001000UL + (n)*0x100 + (m)*0x40) /* * Memory spaces: */ -#define CIA_IACK_SC (IDENT_ADDR + 0x8720000000UL) -#define CIA_CONF (IDENT_ADDR + 0x8700000000UL) +#define CIA_IACK_SC (IDENT_ADDR + 0x8720000000UL) +#define CIA_CONF (IDENT_ADDR + 0x8700000000UL) #define CIA_IO (IDENT_ADDR + 0x8580000000UL) #define CIA_SPARSE_MEM (IDENT_ADDR + 0x8000000000UL) #define CIA_SPARSE_MEM_R2 (IDENT_ADDR + 0x8400000000UL) #define CIA_SPARSE_MEM_R3 (IDENT_ADDR + 0x8500000000UL) #define CIA_DENSE_MEM (IDENT_ADDR + 0x8600000000UL) +#define CIA_BW_MEM (IDENT_ADDR + 0x8800000000UL) +#define CIA_BW_IO (IDENT_ADDR + 0x8900000000UL) +#define CIA_BW_CFG_0 (IDENT_ADDR + 0x8a00000000UL) +#define CIA_BW_CFG_1 (IDENT_ADDR + 0x8b00000000UL) /* * ALCOR's GRU ASIC registers @@ -182,23 +249,19 @@ #define XLT_GRU_INT_REQ_BITS 0x80003fffUL #define GRU_INT_REQ_BITS (alpha_mv.sys.cia.gru_int_req_bits+0) - /* - * Bit definitions for I/O Controller status register 0: + * PYXIS interrupt control registers */ -#define CIA_IOC_STAT0_CMD 0xf -#define CIA_IOC_STAT0_ERR (1<<4) -#define CIA_IOC_STAT0_LOST (1<<5) -#define CIA_IOC_STAT0_THIT (1<<6) -#define CIA_IOC_STAT0_TREF (1<<7) -#define CIA_IOC_STAT0_CODE_SHIFT 8 -#define CIA_IOC_STAT0_CODE_MASK 0x7 -#define CIA_IOC_STAT0_P_NBR_SHIFT 13 -#define CIA_IOC_STAT0_P_NBR_MASK 0x7ffff - -#if !CIA_ONE_HAE_WINDOW -#define CIA_HAE_ADDRESS CIA_IOC_HAE_MEM -#endif +#define PYXIS_INT_REQ (IDENT_ADDR + 0x87A0000000UL) +#define PYXIS_INT_MASK (IDENT_ADDR + 0x87A0000040UL) +#define PYXIS_INT_HILO (IDENT_ADDR + 0x87A00000C0UL) +#define PYXIS_INT_ROUTE (IDENT_ADDR + 0x87A0000140UL) +#define PYXIS_GPO (IDENT_ADDR + 0x87A0000180UL) +#define PYXIS_INT_CNFG (IDENT_ADDR + 0x87A00001C0UL) +#define PYXIS_RT_COUNT (IDENT_ADDR + 0x87A0000200UL) +#define PYXIS_INT_TIME (IDENT_ADDR + 0x87A0000240UL) +#define PYXIS_IIC_CTRL (IDENT_ADDR + 0x87A00002C0UL) +#define PYXIS_RESET (IDENT_ADDR + 0x8780000900UL) /* * Data structure for handling CIA machine checks. @@ -206,52 +269,6 @@ /* System-specific info. */ struct el_CIA_sysdata_mcheck { -#if 0 - /* ??? Where did this come from. It appears to bear no - relation to the cia logout written in the milo sources. - Who knows what happens in the srm console... */ - unsigned long coma_gcr; - unsigned long coma_edsr; - unsigned long coma_ter; - unsigned long coma_elar; - unsigned long coma_ehar; - unsigned long coma_ldlr; - unsigned long coma_ldhr; - unsigned long coma_base0; - unsigned long coma_base1; - unsigned long coma_base2; - unsigned long coma_cnfg0; - unsigned long coma_cnfg1; - unsigned long coma_cnfg2; - unsigned long epic_dcsr; - unsigned long epic_pear; - unsigned long epic_sear; - unsigned long epic_tbr1; - unsigned long epic_tbr2; - unsigned long epic_pbr1; - unsigned long epic_pbr2; - unsigned long epic_pmr1; - unsigned long epic_pmr2; - unsigned long epic_harx1; - unsigned long epic_harx2; - unsigned long epic_pmlt; - unsigned long epic_tag0; - unsigned long epic_tag1; - unsigned long epic_tag2; - unsigned long epic_tag3; - unsigned long epic_tag4; - unsigned long epic_tag5; - unsigned long epic_tag6; - unsigned long epic_tag7; - unsigned long epic_data0; - unsigned long epic_data1; - unsigned long epic_data2; - unsigned long epic_data3; - unsigned long epic_data4; - unsigned long epic_data5; - unsigned long epic_data6; - unsigned long epic_data7; -#else unsigned long cpu_err0; unsigned long cpu_err1; unsigned long cia_err; @@ -263,7 +280,6 @@ unsigned long pci_err0; unsigned long pci_err1; unsigned long pci_err2; -#endif }; @@ -282,6 +298,8 @@ * get at PCI memory and I/O. */ +#define vucp volatile unsigned char * +#define vusp volatile unsigned short * #define vip volatile int * #define vuip volatile unsigned int * #define vulp volatile unsigned long * @@ -325,6 +343,44 @@ mb(); } +__EXTERN_INLINE unsigned int cia_bwx_inb(unsigned long addr) +{ + /* ??? I wish I could get rid of this. But there's no ioremap + equivalent for I/O space. PCI I/O can be forced into the + CIA BWX I/O region, but that doesn't take care of legacy + ISA crap. */ + + return __kernel_ldbu(*(vucp)(addr+CIA_BW_IO)); +} + +__EXTERN_INLINE void cia_bwx_outb(unsigned char b, unsigned long addr) +{ + __kernel_stb(b, *(vucp)(addr+CIA_BW_IO)); + mb(); +} + +__EXTERN_INLINE unsigned int cia_bwx_inw(unsigned long addr) +{ + return __kernel_ldwu(*(vusp)(addr+CIA_BW_IO)); +} + +__EXTERN_INLINE void cia_bwx_outw(unsigned short b, unsigned long addr) +{ + __kernel_stw(b, *(vusp)(addr+CIA_BW_IO)); + mb(); +} + +__EXTERN_INLINE unsigned int cia_bwx_inl(unsigned long addr) +{ + return *(vuip)(addr+CIA_BW_IO); +} + +__EXTERN_INLINE void cia_bwx_outl(unsigned int b, unsigned long addr) +{ + *(vuip)(addr+CIA_BW_IO) = b; + mb(); +} + /* * Memory functions. 64-bit and 32-bit accesses are done through @@ -362,15 +418,7 @@ { unsigned long result; -#if !CIA_ONE_HAE_WINDOW - unsigned long msb; - /* Note that CIA_DENSE_MEM has no bits not masked in these - operations, so we don't have to subtract it back out. */ - msb = addr & 0xE0000000; - set_hae(msb); -#endif addr &= CIA_MEM_R1_MASK; - result = *(vip) ((addr << 5) + CIA_SPARSE_MEM + 0x00); return __kernel_extbl(result, addr & 3); } @@ -379,15 +427,7 @@ { unsigned long result; -#if !CIA_ONE_HAE_WINDOW - unsigned long msb; - /* Note that CIA_DENSE_MEM has no bits not masked in these - operations, so we don't have to subtract it back out. */ - msb = addr & 0xE0000000; - set_hae(msb); -#endif addr &= CIA_MEM_R1_MASK; - result = *(vip) ((addr << 5) + CIA_SPARSE_MEM + 0x08); return __kernel_extwl(result, addr & 3); } @@ -396,15 +436,7 @@ { unsigned long w; -#if !CIA_ONE_HAE_WINDOW - unsigned long msb; - /* Note that CIA_DENSE_MEM has no bits not masked in these - operations, so we don't have to subtract it back out. */ - msb = addr & 0xE0000000; - set_hae(msb); -#endif addr &= CIA_MEM_R1_MASK; - w = __kernel_insbl(b, addr & 3); *(vuip) ((addr << 5) + CIA_SPARSE_MEM + 0x00) = w; } @@ -413,15 +445,7 @@ { unsigned long w; -#if !CIA_ONE_HAE_WINDOW - unsigned long msb; - /* Note that CIA_DENSE_MEM has no bits not masked in these - operations, so we don't have to subtract it back out. */ - msb = addr & 0xE0000000; - set_hae(msb); -#endif addr &= CIA_MEM_R1_MASK; - w = __kernel_inswl(b, addr & 3); *(vuip) ((addr << 5) + CIA_SPARSE_MEM + 0x08) = w; } @@ -451,44 +475,117 @@ return addr + CIA_DENSE_MEM; } +__EXTERN_INLINE unsigned long cia_bwx_readb(unsigned long addr) +{ + return __kernel_ldbu(*(vucp)addr); +} + +__EXTERN_INLINE unsigned long cia_bwx_readw(unsigned long addr) +{ + return __kernel_ldwu(*(vusp)addr); +} + +__EXTERN_INLINE unsigned long cia_bwx_readl(unsigned long addr) +{ + return *(vuip)addr; +} + +__EXTERN_INLINE unsigned long cia_bwx_readq(unsigned long addr) +{ + return *(vulp)addr; +} + +__EXTERN_INLINE void cia_bwx_writeb(unsigned char b, unsigned long addr) +{ + __kernel_stb(b, *(vucp)addr); +} + +__EXTERN_INLINE void cia_bwx_writew(unsigned short b, unsigned long addr) +{ + __kernel_stw(b, *(vusp)addr); +} + +__EXTERN_INLINE void cia_bwx_writel(unsigned int b, unsigned long addr) +{ + *(vuip)addr = b; +} + +__EXTERN_INLINE void cia_bwx_writeq(unsigned long b, unsigned long addr) +{ + *(vulp)addr = b; +} + +__EXTERN_INLINE unsigned long cia_bwx_ioremap(unsigned long addr) +{ + return addr + CIA_BW_MEM; +} + __EXTERN_INLINE int cia_is_ioaddr(unsigned long addr) { return addr >= IDENT_ADDR + 0x8000000000UL; } +#undef vucp +#undef vusp #undef vip #undef vuip #undef vulp #ifdef __WANT_IO_DEF -#define __inb cia_inb -#define __inw cia_inw -#define __inl cia_inl -#define __outb cia_outb -#define __outw cia_outw -#define __outl cia_outl - -#define __readb cia_readb -#define __readw cia_readw -#define __writeb cia_writeb -#define __writew cia_writew -#define __readl cia_readl -#define __readq cia_readq -#define __writel cia_writel -#define __writeq cia_writeq -#define __ioremap cia_ioremap -#define __is_ioaddr cia_is_ioaddr - -#define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -#define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) - -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#ifdef CONFIG_ALPHA_PYXIS +# define __inb(p) cia_bwx_inb((unsigned long)(p)) +# define __inw(p) cia_bwx_inw((unsigned long)(p)) +# define __inl(p) cia_bwx_inl((unsigned long)(p)) +# define __outb(x,p) cia_bwx_outb((x),(unsigned long)(p)) +# define __outw(x,p) cia_bwx_outw((x),(unsigned long)(p)) +# define __outl(x,p) cia_bwx_outl((x),(unsigned long)(p)) +# define __readb(a) cia_bwx_readb((unsigned long)(a)) +# define __readw(a) cia_bwx_readw((unsigned long)(a)) +# define __readl(a) cia_bwx_readl((unsigned long)(a)) +# define __readq(a) cia_bwx_readq((unsigned long)(a)) +# define __writeb(x,a) cia_bwx_writeb((x),(unsigned long)(a)) +# define __writew(x,a) cia_bwx_writew((x),(unsigned long)(a)) +# define __writel(x,a) cia_bwx_writel((x),(unsigned long)(a)) +# define __writeq(x,a) cia_bwx_writeq((x),(unsigned long)(a)) +# define __ioremap(a) cia_bwx_ioremap((unsigned long)(a)) +# define inb(p) __inb(p) +# define inw(p) __inw(p) +# define inl(p) __inl(p) +# define outb(x,p) __outb((x),(p)) +# define outw(x,p) __outw((x),(p)) +# define outl(x,p) __outl((x),(p)) +# define __raw_readb(a) __readb(a) +# define __raw_readw(a) __readw(a) +# define __raw_readl(a) __readl(a) +# define __raw_readq(a) __readq(a) +# define __raw_writeb(x,a) __writeb((x),(a)) +# define __raw_writew(x,a) __writew((x),(a)) +# define __raw_writel(x,a) __writel((x),(a)) +# define __raw_writeq(x,a) __writeq((x),(a)) +#else +# define __inb(p) cia_inb((unsigned long)(p)) +# define __inw(p) cia_inw((unsigned long)(p)) +# define __inl(p) cia_inl((unsigned long)(p)) +# define __outb(x,p) cia_outb((x),(unsigned long)(p)) +# define __outw(x,p) cia_outw((x),(unsigned long)(p)) +# define __outl(x,p) cia_outl((x),(unsigned long)(p)) +# define __readb(a) cia_readb((unsigned long)(a)) +# define __readw(a) cia_readw((unsigned long)(a)) +# define __readl(a) cia_readl((unsigned long)(a)) +# define __readq(a) cia_readq((unsigned long)(a)) +# define __writeb(x,a) cia_writeb((x),(unsigned long)(a)) +# define __writew(x,a) cia_writew((x),(unsigned long)(a)) +# define __writel(x,a) cia_writel((x),(unsigned long)(a)) +# define __writeq(x,a) cia_writeq((x),(unsigned long)(a)) +# define __ioremap(a) cia_ioremap((unsigned long)(a)) +# define __raw_readl(a) __readl(a) +# define __raw_readq(a) __readq(a) +# define __raw_writel(v,a) __writel((v),(a)) +# define __raw_writeq(v,a) __writeq((v),(a)) +#endif /* PYXIS */ + +#define __is_ioaddr(a) cia_is_ioaddr((unsigned long)(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_irongate.h linux/include/asm-alpha/core_irongate.h --- v2.3.99-pre1/linux/include/asm-alpha/core_irongate.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_irongate.h Fri Mar 17 13:01:37 2000 @@ -493,38 +493,37 @@ #ifdef __WANT_IO_DEF -#define __inb irongate_inb -#define __inw irongate_inw -#define __inl irongate_inl -#define __outb irongate_outb -#define __outw irongate_outw -#define __outl irongate_outl -#define __readb irongate_readb -#define __readw irongate_readw -#define __writeb irongate_writeb -#define __writew irongate_writew -#define __readl irongate_readl -#define __readq irongate_readq -#define __writel irongate_writel -#define __writeq irongate_writeq -#define __ioremap irongate_ioremap -#define __is_ioaddr irongate_is_ioaddr +#define __inb(p) irongate_inb((unsigned long)(p)) +#define __inw(p) irongate_inw((unsigned long)(p)) +#define __inl(p) irongate_inl((unsigned long)(p)) +#define __outb(x,p) irongate_outb((x),(unsigned long)(p)) +#define __outw(x,p) irongate_outw((x),(unsigned long)(p)) +#define __outl(x,p) irongate_outl((x),(unsigned long)(p)) +#define __readb(a) irongate_readb((unsigned long)(a)) +#define __readw(a) irongate_readw((unsigned long)(a)) +#define __readl(a) irongate_readl((unsigned long)(a)) +#define __readq(a) irongate_readq((unsigned long)(a)) +#define __writeb(x,a) irongate_writeb((x),(unsigned long)(a)) +#define __writew(x,a) irongate_writew((x),(unsigned long)(a)) +#define __writel(x,a) irongate_writel((x),(unsigned long)(a)) +#define __writeq(x,a) irongate_writeq((x),(unsigned long)(a)) +#define __ioremap(a) irongate_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) irongate_is_ioaddr((unsigned long)(a)) -#define inb(port) __inb((port)) -#define inw(port) __inw((port)) -#define inl(port) __inl((port)) -#define outb(v, port) __outb((v),(port)) -#define outw(v, port) __outw((v),(port)) -#define outl(v, port) __outl((v),(port)) - -#define __raw_readb(a) __readb((unsigned long)(a)) -#define __raw_readw(a) __readw((unsigned long)(a)) -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writeb(v,a) __writeb((v),(unsigned long)(a)) -#define __raw_writew(v,a) __writew((v),(unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define inb(p) __inb(p) +#define inw(p) __inw(p) +#define inl(p) __inl(p) +#define outb(x,p) __outb((x),(p)) +#define outw(x,p) __outw((x),(p)) +#define outl(x,p) __outl((x),(p)) +#define __raw_readb(a) __readb(a) +#define __raw_readw(a) __readw(a) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writeb(v,a) __writeb((v),(a)) +#define __raw_writew(v,a) __writew((v),(a)) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_lca.h linux/include/asm-alpha/core_lca.h --- v2.3.99-pre1/linux/include/asm-alpha/core_lca.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_lca.h Fri Mar 17 13:01:38 2000 @@ -360,32 +360,27 @@ #ifdef __WANT_IO_DEF -#define __inb lca_inb -#define __inw lca_inw -#define __inl lca_inl -#define __outb lca_outb -#define __outw lca_outw -#define __outl lca_outl -#define __readb lca_readb -#define __readw lca_readw -#define __writeb lca_writeb -#define __writew lca_writew -#define __readl lca_readl -#define __readq lca_readq -#define __writel lca_writel -#define __writeq lca_writeq -#define __ioremap lca_ioremap -#define __is_ioaddr lca_is_ioaddr +#define __inb(p) lca_inb((unsigned long)(p)) +#define __inw(p) lca_inw((unsigned long)(p)) +#define __inl(p) lca_inl((unsigned long)(p)) +#define __outb(x,p) lca_outb((x),(unsigned long)(p)) +#define __outw(x,p) lca_outw((x),(unsigned long)(p)) +#define __outl(x,p) lca_outl((x),(unsigned long)(p)) +#define __readb(a) lca_readb((unsigned long)(a)) +#define __readw(a) lca_readw((unsigned long)(a)) +#define __readl(a) lca_readl((unsigned long)(a)) +#define __readq(a) lca_readq((unsigned long)(a)) +#define __writeb(x,a) lca_writeb((x),(unsigned long)(a)) +#define __writew(x,a) lca_writew((x),(unsigned long)(a)) +#define __writel(x,a) lca_writel((x),(unsigned long)(a)) +#define __writeq(x,a) lca_writeq((x),(unsigned long)(a)) +#define __ioremap(a) lca_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) lca_is_ioaddr((unsigned long)(a)) -#define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -#define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) - -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_mcpcia.h linux/include/asm-alpha/core_mcpcia.h --- v2.3.99-pre1/linux/include/asm-alpha/core_mcpcia.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-alpha/core_mcpcia.h Fri Mar 17 13:01:38 2000 @@ -438,32 +438,27 @@ #ifdef __WANT_IO_DEF -#define __inb mcpcia_inb -#define __inw mcpcia_inw -#define __inl mcpcia_inl -#define __outb mcpcia_outb -#define __outw mcpcia_outw -#define __outl mcpcia_outl -#define __readb mcpcia_readb -#define __readw mcpcia_readw -#define __writeb mcpcia_writeb -#define __writew mcpcia_writew -#define __readl mcpcia_readl -#define __readq mcpcia_readq -#define __writel mcpcia_writel -#define __writeq mcpcia_writeq -#define __ioremap mcpcia_ioremap -#define __is_ioaddr mcpcia_is_ioaddr +#define __inb(p) mcpcia_inb((unsigned long)(p)) +#define __inw(p) mcpcia_inw((unsigned long)(p)) +#define __inl(p) mcpcia_inl((unsigned long)(p)) +#define __outb(x,p) mcpcia_outb((x),(unsigned long)(p)) +#define __outw(x,p) mcpcia_outw((x),(unsigned long)(p)) +#define __outl(x,p) mcpcia_outl((x),(unsigned long)(p)) +#define __readb(a) mcpcia_readb((unsigned long)(a)) +#define __readw(a) mcpcia_readw((unsigned long)(a)) +#define __readl(a) mcpcia_readl((unsigned long)(a)) +#define __readq(a) mcpcia_readq((unsigned long)(a)) +#define __writeb(x,a) mcpcia_writeb((x),(unsigned long)(a)) +#define __writew(x,a) mcpcia_writew((x),(unsigned long)(a)) +#define __writel(x,a) mcpcia_writel((x),(unsigned long)(a)) +#define __writeq(x,a) mcpcia_writeq((x),(unsigned long)(a)) +#define __ioremap(a) mcpcia_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) mcpcia_is_ioaddr((unsigned long)(a)) -# define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -# define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) - -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_polaris.h linux/include/asm-alpha/core_polaris.h --- v2.3.99-pre1/linux/include/asm-alpha/core_polaris.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_polaris.h Fri Mar 17 13:01:38 2000 @@ -170,38 +170,37 @@ #ifdef __WANT_IO_DEF -#define __inb polaris_inb -#define __inw polaris_inw -#define __inl polaris_inl -#define __outb polaris_outb -#define __outw polaris_outw -#define __outl polaris_outl -#define __readb polaris_readb -#define __readw polaris_readw -#define __writeb polaris_writeb -#define __writew polaris_writew -#define __readl polaris_readl -#define __readq polaris_readq -#define __writel polaris_writel -#define __writeq polaris_writeq -#define __ioremap polaris_ioremap -#define __is_ioaddr polaris_is_ioaddr +#define __inb(p) polaris_inb((unsigned long)(p)) +#define __inw(p) polaris_inw((unsigned long)(p)) +#define __inl(p) polaris_inl((unsigned long)(p)) +#define __outb(x,p) polaris_outb((x),(unsigned long)(p)) +#define __outw(x,p) polaris_outw((x),(unsigned long)(p)) +#define __outl(x,p) polaris_outl((x),(unsigned long)(p)) +#define __readb(a) polaris_readb((unsigned long)(a)) +#define __readw(a) polaris_readw((unsigned long)(a)) +#define __readl(a) polaris_readl((unsigned long)(a)) +#define __readq(a) polaris_readq((unsigned long)(a)) +#define __writeb(x,a) polaris_writeb((x),(unsigned long)(a)) +#define __writew(x,a) polaris_writew((x),(unsigned long)(a)) +#define __writel(x,a) polaris_writel((x),(unsigned long)(a)) +#define __writeq(x,a) polaris_writeq((x),(unsigned long)(a)) +#define __ioremap(a) polaris_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) polaris_is_ioaddr((unsigned long)(a)) -#define inb(port) __inb((port)) -#define inw(port) __inw((port)) -#define inl(port) __inl((port)) -#define outb(v, port) __outb((v),(port)) -#define outw(v, port) __outw((v),(port)) -#define outl(v, port) __outl((v),(port)) - -#define __raw_readb(a) __readb((unsigned long)(a)) -#define __raw_readw(a) __readw((unsigned long)(a)) -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writeb(v,a) __writeb((v),(unsigned long)(a)) -#define __raw_writew(v,a) __writew((v),(unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define inb(p) __inb(p) +#define inw(p) __inw(p) +#define inl(p) __inl(p) +#define outb(x,p) __outb((x),(p)) +#define outw(x,p) __outw((x),(p)) +#define outl(x,p) __outl((x),(p)) +#define __raw_readb(a) __readb(a) +#define __raw_readw(a) __readw(a) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writeb(v,a) __writeb((v),(a)) +#define __raw_writew(v,a) __writew((v),(a)) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_pyxis.h linux/include/asm-alpha/core_pyxis.h --- v2.3.99-pre1/linux/include/asm-alpha/core_pyxis.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_pyxis.h Wed Dec 31 16:00:00 1969 @@ -1,444 +0,0 @@ -#ifndef __ALPHA_PYXIS__H__ -#define __ALPHA_PYXIS__H__ - -#include -#include - -/* - * PYXIS is the internal name for a core logic chipset which provides - * memory controller and PCI access for the 21164A chip based systems. - * - * This file is based on: - * - * Pyxis Chipset Spec - * 14-Jun-96 - * Rev. X2.0 - * - */ - -/*------------------------------------------------------------------------** -** ** -** I/O procedures ** -** ** -** inport[b|w|t|l], outport[b|w|t|l] 8:16:24:32 IO xfers ** -** inportbxt: 8 bits only ** -** inport: alias of inportw ** -** outport: alias of outportw ** -** ** -** inmem[b|w|t|l], outmem[b|w|t|l] 8:16:24:32 ISA memory xfers ** -** inmembxt: 8 bits only ** -** inmem: alias of inmemw ** -** outmem: alias of outmemw ** -** ** -**------------------------------------------------------------------------*/ - - -/* PYXIS ADDRESS BIT DEFINITIONS - * - * 3333 3333 3322 2222 2222 1111 1111 11 - * 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 - * ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- - * 1 000 - * ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- - * | |\| - * | Byte Enable --+ | - * | Transfer Length --+ - * +-- IO space, not cached - * - * Byte Transfer - * Enable Length Transfer Byte Address - * adr<6:5> adr<4:3> Length Enable Adder - * --------------------------------------------- - * 00 00 Byte 1110 0x000 - * 01 00 Byte 1101 0x020 - * 10 00 Byte 1011 0x040 - * 11 00 Byte 0111 0x060 - * - * 00 01 Word 1100 0x008 - * 01 01 Word 1001 0x028 <= Not supported in this code. - * 10 01 Word 0011 0x048 - * - * 00 10 Tribyte 1000 0x010 - * 01 10 Tribyte 0001 0x030 - * - * 10 11 Longword 0000 0x058 - * - * Note that byte enables are asserted low. - * - */ - -#define PYXIS_MEM_R1_MASK 0x1fffffff /* SPARSE Mem region 1 mask is 29 bits */ -#define PYXIS_MEM_R2_MASK 0x07ffffff /* SPARSE Mem region 2 mask is 27 bits */ -#define PYXIS_MEM_R3_MASK 0x03ffffff /* SPARSE Mem region 3 mask is 26 bits */ - -/* - * General Registers - */ -#define PYXIS_REV (IDENT_ADDR + 0x8740000080UL) -#define PYXIS_PCI_LAT (IDENT_ADDR + 0x87400000C0UL) -#define PYXIS_CTRL (IDENT_ADDR + 0x8740000100UL) -#define PYXIS_CTRL1 (IDENT_ADDR + 0x8740000140UL) -#define PYXIS_FLASH_CTRL (IDENT_ADDR + 0x8740000200UL) - -#define PYXIS_HAE_MEM (IDENT_ADDR + 0x8740000400UL) -#define PYXIS_HAE_IO (IDENT_ADDR + 0x8740000440UL) -#define PYXIS_CFG (IDENT_ADDR + 0x8740000480UL) - -/* - * Diagnostic Registers - */ -#define PYXIS_DIAG (IDENT_ADDR + 0x8740002000UL) -#define PYXIS_DIAG_CHECK (IDENT_ADDR + 0x8740003000UL) - -/* - * Performance Monitor registers - */ -#define PYXIS_PERF_MONITOR (IDENT_ADDR + 0x8740004000UL) -#define PYXIS_PERF_CONTROL (IDENT_ADDR + 0x8740004040UL) - -/* - * Error registers - */ -#define PYXIS_ERR (IDENT_ADDR + 0x8740008200UL) -#define PYXIS_STAT (IDENT_ADDR + 0x8740008240UL) -#define PYXIS_ERR_MASK (IDENT_ADDR + 0x8740008280UL) -#define PYXIS_SYN (IDENT_ADDR + 0x8740008300UL) -#define PYXIS_ERR_DATA (IDENT_ADDR + 0x8740008308UL) - -#define PYXIS_MEAR (IDENT_ADDR + 0x8740008400UL) -#define PYXIS_MESR (IDENT_ADDR + 0x8740008440UL) -#define PYXIS_PCI_ERR0 (IDENT_ADDR + 0x8740008800UL) -#define PYXIS_PCI_ERR1 (IDENT_ADDR + 0x8740008840UL) -#define PYXIS_PCI_ERR2 (IDENT_ADDR + 0x8740008880UL) - -/* - * PCI Address Translation Registers. - */ -#define PYXIS_TBIA (IDENT_ADDR + 0x8760000100UL) - -#define PYXIS_W0_BASE (IDENT_ADDR + 0x8760000400UL) -#define PYXIS_W0_MASK (IDENT_ADDR + 0x8760000440UL) -#define PYXIS_T0_BASE (IDENT_ADDR + 0x8760000480UL) - -#define PYXIS_W1_BASE (IDENT_ADDR + 0x8760000500UL) -#define PYXIS_W1_MASK (IDENT_ADDR + 0x8760000540UL) -#define PYXIS_T1_BASE (IDENT_ADDR + 0x8760000580UL) - -#define PYXIS_W2_BASE (IDENT_ADDR + 0x8760000600UL) -#define PYXIS_W2_MASK (IDENT_ADDR + 0x8760000640UL) -#define PYXIS_T2_BASE (IDENT_ADDR + 0x8760000680UL) - -#define PYXIS_W3_BASE (IDENT_ADDR + 0x8760000700UL) -#define PYXIS_W3_MASK (IDENT_ADDR + 0x8760000740UL) -#define PYXIS_T3_BASE (IDENT_ADDR + 0x8760000780UL) - -/* - * Memory Control registers - */ -#define PYXIS_MCR (IDENT_ADDR + 0x8750000000UL) - -/* - * Memory spaces: - */ -#define PYXIS_IACK_SC (IDENT_ADDR + 0x8720000000UL) -#define PYXIS_CONF (IDENT_ADDR + 0x8700000000UL) -#define PYXIS_IO (IDENT_ADDR + 0x8580000000UL) -#define PYXIS_SPARSE_MEM (IDENT_ADDR + 0x8000000000UL) -#define PYXIS_SPARSE_MEM_R2 (IDENT_ADDR + 0x8400000000UL) -#define PYXIS_SPARSE_MEM_R3 (IDENT_ADDR + 0x8500000000UL) -#define PYXIS_DENSE_MEM (IDENT_ADDR + 0x8600000000UL) - -/* - * Byte/Word PCI Memory Spaces: - */ -#define PYXIS_BW_MEM (IDENT_ADDR + 0x8800000000UL) -#define PYXIS_BW_IO (IDENT_ADDR + 0x8900000000UL) -#define PYXIS_BW_CFG_0 (IDENT_ADDR + 0x8a00000000UL) -#define PYXIS_BW_CFG_1 (IDENT_ADDR + 0x8b00000000UL) - -/* - * Interrupt Control registers - */ -#define PYXIS_INT_REQ (IDENT_ADDR + 0x87A0000000UL) -#define PYXIS_INT_MASK (IDENT_ADDR + 0x87A0000040UL) -#define PYXIS_INT_HILO (IDENT_ADDR + 0x87A00000C0UL) -#define PYXIS_INT_ROUTE (IDENT_ADDR + 0x87A0000140UL) -#define PYXIS_GPO (IDENT_ADDR + 0x87A0000180UL) -#define PYXIS_INT_CNFG (IDENT_ADDR + 0x87A00001C0UL) -#define PYXIS_RT_COUNT (IDENT_ADDR + 0x87A0000200UL) -#define PYXIS_INT_TIME (IDENT_ADDR + 0x87A0000240UL) -#define PYXIS_IIC_CTRL (IDENT_ADDR + 0x87A00002C0UL) -#define PYXIS_RESET (IDENT_ADDR + 0x8780000900UL) - -/* - * Bit definitions for I/O Controller status register 0: - */ -#define PYXIS_STAT0_CMD 0xf -#define PYXIS_STAT0_ERR (1<<4) -#define PYXIS_STAT0_LOST (1<<5) -#define PYXIS_STAT0_THIT (1<<6) -#define PYXIS_STAT0_TREF (1<<7) -#define PYXIS_STAT0_CODE_SHIFT 8 -#define PYXIS_STAT0_CODE_MASK 0x7 -#define PYXIS_STAT0_P_NBR_SHIFT 13 -#define PYXIS_STAT0_P_NBR_MASK 0x7ffff - -#define PYXIS_HAE_ADDRESS PYXIS_HAE_MEM - -/* - * Data structure for handling PYXIS machine checks: - */ -struct el_PYXIS_sysdata_mcheck { -#if 0 - /* ??? Where did this come from. It appears to bear no - relation to the pyxis logout written in the milo sources. - Who knows what happens in the srm console... */ - u_long coma_gcr; - u_long coma_edsr; - u_long coma_ter; - u_long coma_elar; - u_long coma_ehar; - u_long coma_ldlr; - u_long coma_ldhr; - u_long coma_base0; - u_long coma_base1; - u_long coma_base2; - u_long coma_cnfg0; - u_long coma_cnfg1; - u_long coma_cnfg2; - u_long epic_dcsr; - u_long epic_pear; - u_long epic_sear; - u_long epic_tbr1; - u_long epic_tbr2; - u_long epic_pbr1; - u_long epic_pbr2; - u_long epic_pmr1; - u_long epic_pmr2; - u_long epic_harx1; - u_long epic_harx2; - u_long epic_pmlt; - u_long epic_tag0; - u_long epic_tag1; - u_long epic_tag2; - u_long epic_tag3; - u_long epic_tag4; - u_long epic_tag5; - u_long epic_tag6; - u_long epic_tag7; - u_long epic_data0; - u_long epic_data1; - u_long epic_data2; - u_long epic_data3; - u_long epic_data4; - u_long epic_data5; - u_long epic_data6; - u_long epic_data7; -#else - unsigned long cpu_err0; - unsigned long cpu_err1; - unsigned long cia_err; - unsigned long cia_stat; - unsigned long err_mask; - unsigned long cia_syn; - unsigned long mem_err0; - unsigned long mem_err1; - unsigned long pci_err0; - unsigned long pci_err1; - unsigned long pci_err2; -#endif -}; - - -#ifdef __KERNEL__ - -#ifndef __EXTERN_INLINE -#define __EXTERN_INLINE extern inline -#define __IO_EXTERN_INLINE -#endif - -/* - * I/O functions: - * - * PYXIS, the 21174 PCI/memory support chipset for the EV56 (21164A) - * and PCA56 (21164PC) processors, can use either a sparse address - * mapping scheme, or the so-called byte-word PCI address space, to - * get at PCI memory and I/O. - */ - -#define vucp volatile unsigned char * -#define vusp volatile unsigned short * -#define vip volatile int * -#define vuip volatile unsigned int * -#define vulp volatile unsigned long * - -__EXTERN_INLINE unsigned int pyxis_inb(unsigned long addr) -{ - /* ??? I wish I could get rid of this. But there's no ioremap - equivalent for I/O space. PCI I/O can be forced into the - PYXIS I/O region, but that doesn't take care of legacy ISA crap. */ - - return __kernel_ldbu(*(vucp)(addr+PYXIS_BW_IO)); -} - -__EXTERN_INLINE void pyxis_outb(unsigned char b, unsigned long addr) -{ - __kernel_stb(b, *(vucp)(addr+PYXIS_BW_IO)); - mb(); -} - -__EXTERN_INLINE unsigned int pyxis_inw(unsigned long addr) -{ - return __kernel_ldwu(*(vusp)(addr+PYXIS_BW_IO)); -} - -__EXTERN_INLINE void pyxis_outw(unsigned short b, unsigned long addr) -{ - __kernel_stw(b, *(vusp)(addr+PYXIS_BW_IO)); - mb(); -} - -__EXTERN_INLINE unsigned int pyxis_inl(unsigned long addr) -{ - return *(vuip)(addr+PYXIS_BW_IO); -} - -__EXTERN_INLINE void pyxis_outl(unsigned int b, unsigned long addr) -{ - *(vuip)(addr+PYXIS_BW_IO) = b; - mb(); -} - - -/* - * Memory functions. 64-bit and 32-bit accesses are done through - * dense memory space, everything else through sparse space. - * - * For reading and writing 8 and 16 bit quantities we need to - * go through one of the three sparse address mapping regions - * and use the HAE_MEM CSR to provide some bits of the address. - * The following few routines use only sparse address region 1 - * which gives 1Gbyte of accessible space which relates exactly - * to the amount of PCI memory mapping *into* system address space. - * See p 6-17 of the specification but it looks something like this: - * - * 21164 Address: - * - * 3 2 1 - * 9876543210987654321098765432109876543210 - * 1ZZZZ0.PCI.QW.Address............BBLL - * - * ZZ = SBZ - * BB = Byte offset - * LL = Transfer length - * - * PCI Address: - * - * 3 2 1 - * 10987654321098765432109876543210 - * HHH....PCI.QW.Address........ 00 - * - * HHH = 31:29 HAE_MEM CSR - * - */ - -__EXTERN_INLINE unsigned long pyxis_readb(unsigned long addr) -{ - return __kernel_ldbu(*(vucp)addr); -} - -__EXTERN_INLINE unsigned long pyxis_readw(unsigned long addr) -{ - return __kernel_ldwu(*(vusp)addr); -} - -__EXTERN_INLINE unsigned long pyxis_readl(unsigned long addr) -{ - return *(vuip)addr; -} - -__EXTERN_INLINE unsigned long pyxis_readq(unsigned long addr) -{ - return *(vulp)addr; -} - -__EXTERN_INLINE void pyxis_writeb(unsigned char b, unsigned long addr) -{ - __kernel_stb(b, *(vucp)addr); -} - -__EXTERN_INLINE void pyxis_writew(unsigned short b, unsigned long addr) -{ - __kernel_stw(b, *(vusp)addr); -} - -__EXTERN_INLINE void pyxis_writel(unsigned int b, unsigned long addr) -{ - *(vuip)addr = b; -} - -__EXTERN_INLINE void pyxis_writeq(unsigned long b, unsigned long addr) -{ - *(vulp)addr = b; -} - -__EXTERN_INLINE unsigned long pyxis_ioremap(unsigned long addr) -{ - return addr + PYXIS_BW_MEM; -} - -__EXTERN_INLINE int pyxis_is_ioaddr(unsigned long addr) -{ - return addr >= IDENT_ADDR + 0x8740000000UL; -} - -#undef vucp -#undef vusp -#undef vip -#undef vuip -#undef vulp - -#ifdef __WANT_IO_DEF - -#define __inb pyxis_inb -#define __inw pyxis_inw -#define __inl pyxis_inl -#define __outb pyxis_outb -#define __outw pyxis_outw -#define __outl pyxis_outl -#define __readb pyxis_readb -#define __readw pyxis_readw -#define __writeb pyxis_writeb -#define __writew pyxis_writew -#define __readl pyxis_readl -#define __readq pyxis_readq -#define __writel pyxis_writel -#define __writeq pyxis_writeq -#define __ioremap pyxis_ioremap -#define __is_ioaddr pyxis_is_ioaddr - -#define inb(port) __inb((port)) -#define inw(port) __inw((port)) -#define inl(port) __inl((port)) -#define outb(x, port) __outb((x),(port)) -#define outw(x, port) __outw((x),(port)) -#define outl(x, port) __outl((x),(port)) -#define __raw_readb(addr) __readb((addr)) -#define __raw_readw(addr) __readw((addr)) -#define __raw_writeb(b, addr) __writeb((b),(addr)) -#define __raw_writew(b, addr) __writew((b),(addr)) -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) - -#endif /* __WANT_IO_DEF */ - -#ifdef __IO_EXTERN_INLINE -#undef __EXTERN_INLINE -#undef __IO_EXTERN_INLINE -#endif - -#endif /* __KERNEL__ */ - -#endif /* __ALPHA_PYXIS__H__ */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_t2.h linux/include/asm-alpha/core_t2.h --- v2.3.99-pre1/linux/include/asm-alpha/core_t2.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_t2.h Fri Mar 17 13:01:38 2000 @@ -516,27 +516,22 @@ #ifdef __WANT_IO_DEF -#define __inb t2_inb -#define __inw t2_inw -#define __inl t2_inl -#define __outb t2_outb -#define __outw t2_outw -#define __outl t2_outl -#define __readb t2_readb -#define __readw t2_readw -#define __readl t2_readl -#define __readq t2_readq -#define __writeb t2_writeb -#define __writew t2_writew -#define __writel t2_writel -#define __writeq t2_writeq -#define __ioremap t2_ioremap -#define __is_ioaddr t2_is_ioaddr - -#define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -#define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) +#define __inb(p) t2_inb((unsigned long)(p)) +#define __inw(p) t2_inw((unsigned long)(p)) +#define __inl(p) t2_inl((unsigned long)(p)) +#define __outb(x,p) t2_outb((x),(unsigned long)(p)) +#define __outw(x,p) t2_outw((x),(unsigned long)(p)) +#define __outl(x,p) t2_outl((x),(unsigned long)(p)) +#define __readb(a) t2_readb((unsigned long)(a)) +#define __readw(a) t2_readw((unsigned long)(a)) +#define __readl(a) t2_readl((unsigned long)(a)) +#define __readq(a) t2_readq((unsigned long)(a)) +#define __writeb(x,a) t2_writeb((x),(unsigned long)(a)) +#define __writew(x,a) t2_writew((x),(unsigned long)(a)) +#define __writel(x,a) t2_writel((x),(unsigned long)(a)) +#define __writeq(x,a) t2_writeq((x),(unsigned long)(a)) +#define __ioremap(a) t2_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) t2_is_ioaddr((unsigned long)(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/core_tsunami.h linux/include/asm-alpha/core_tsunami.h --- v2.3.99-pre1/linux/include/asm-alpha/core_tsunami.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/core_tsunami.h Fri Mar 17 13:01:38 2000 @@ -405,38 +405,37 @@ #ifdef __WANT_IO_DEF -#define __inb tsunami_inb -#define __inw tsunami_inw -#define __inl tsunami_inl -#define __outb tsunami_outb -#define __outw tsunami_outw -#define __outl tsunami_outl -#define __readb tsunami_readb -#define __readw tsunami_readw -#define __writeb tsunami_writeb -#define __writew tsunami_writew -#define __readl tsunami_readl -#define __readq tsunami_readq -#define __writel tsunami_writel -#define __writeq tsunami_writeq -#define __ioremap tsunami_ioremap -#define __is_ioaddr tsunami_is_ioaddr +#define __inb(p) tsunami_inb((unsigned long)(p)) +#define __inw(p) tsunami_inw((unsigned long)(p)) +#define __inl(p) tsunami_inl((unsigned long)(p)) +#define __outb(x,p) tsunami_outb((x),(unsigned long)(p)) +#define __outw(x,p) tsunami_outw((x),(unsigned long)(p)) +#define __outl(x,p) tsunami_outl((x),(unsigned long)(p)) +#define __readb(a) tsunami_readb((unsigned long)(a)) +#define __readw(a) tsunami_readw((unsigned long)(a)) +#define __readl(a) tsunami_readl((unsigned long)(a)) +#define __readq(a) tsunami_readq((unsigned long)(a)) +#define __writeb(x,a) tsunami_writeb((x),(unsigned long)(a)) +#define __writew(x,a) tsunami_writew((x),(unsigned long)(a)) +#define __writel(x,a) tsunami_writel((x),(unsigned long)(a)) +#define __writeq(x,a) tsunami_writeq((x),(unsigned long)(a)) +#define __ioremap(a) tsunami_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) tsunami_is_ioaddr((unsigned long)(a)) -#define inb(port) __inb((port)) -#define inw(port) __inw((port)) -#define inl(port) __inl((port)) -#define outb(v, port) __outb((v),(port)) -#define outw(v, port) __outw((v),(port)) -#define outl(v, port) __outl((v),(port)) - -#define __raw_readb(a) __readb((unsigned long)(a)) -#define __raw_readw(a) __readw((unsigned long)(a)) -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writeb(v,a) __writeb((v),(unsigned long)(a)) -#define __raw_writew(v,a) __writew((v),(unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define inb(p) __inb(p) +#define inw(p) __inw(p) +#define inl(p) __inl(p) +#define outb(x,p) __outb((x),(p)) +#define outw(x,p) __outw((x),(p)) +#define outl(x,p) __outl((x),(p)) +#define __raw_readb(a) __readb(a) +#define __raw_readw(a) __readw(a) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writeb(v,a) __writeb((v),(a)) +#define __raw_writew(v,a) __writew((v),(a)) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/delay.h linux/include/asm-alpha/delay.h --- v2.3.99-pre1/linux/include/asm-alpha/delay.h Wed Dec 15 10:43:17 1999 +++ linux/include/asm-alpha/delay.h Fri Mar 17 13:02:05 2000 @@ -4,74 +4,43 @@ #include /* - * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1993, 2000 Linus Torvalds * * Delay routines, using a pre-computed "loops_per_second" value. */ -/* We can make the delay loop inline, but we have to be very careful wrt - scheduling for ev6 machines, so that we keep a consistent number of - iterations for all invocations. */ - -extern __inline__ void -__delay(unsigned long loops) -{ - __asm__ __volatile__( - ".align 4\n" - "1: subq %0,1,%0\n" - " bge %0,1b\n" - " nop" - : "=r" (loops) : "0"(loops)); -} - /* - * division by multiplication: you don't have to worry about - * loss of precision. - * - * Use only for very small delays ( < 1 msec). Should probably use a - * lookup table, really, as the multiplications take much too long with - * short delays. This is a "reasonable" implementation, though (and the - * first constant multiplications gets optimized away if the delay is - * a constant). + * Use only for very small delays (< 1 msec). * - * Optimize small constants further by exposing the second multiplication - * to the compiler. In addition, mulq is 2 cycles faster than umulh. + * The active part of our cycle counter is only 32-bits wide, and + * we're treating the difference between two marks as signed. On + * a 1GHz box, that's about 2 seconds. */ extern __inline__ void -__udelay(unsigned long usecs, unsigned long lps) +__delay(int loops) { - /* compute (usecs * 2**64 / 10**6) * loops_per_sec / 2**64 */ - - usecs *= 0x000010c6f7a0b5edUL; /* 2**64 / 1000000 */ - __asm__("umulh %1,%2,%0" :"=r" (usecs) :"r" (usecs),"r" (lps)); - __delay(usecs); + int tmp; + __asm__ __volatile__( + " rpcc %0\n" + " addl %1,%0,%1\n" + "1: rpcc %0\n" + " subl %1,%0,%0\n" + " bgt %0,1b" + : "=&r" (tmp), "=r" (loops) : "1"(loops)); } extern __inline__ void -__small_const_udelay(unsigned long usecs, unsigned long lps) +__udelay(unsigned long usecs, unsigned long lps) { - /* compute (usecs * 2**32 / 10**6) * loops_per_sec / 2**32 */ - - usecs *= 0x10c6; /* 2^32 / 10^6 */ - usecs *= lps; - usecs >>= 32; - __delay(usecs); + usecs *= ((1UL << 32) / 1000000) * lps; + __delay((long)usecs >> 32); } #ifdef __SMP__ -#define udelay(usecs) \ - (__builtin_constant_p(usecs) && usecs < 0x100000000UL \ - ? __small_const_udelay(usecs, \ - cpu_data[smp_processor_id()].loops_per_sec) \ - : __udelay(usecs, \ - cpu_data[smp_processor_id()].loops_per_sec)) +#define udelay(u) __udelay((u), cpu_data[smp_processor_id()].loops_per_sec) #else -#define udelay(usecs) \ - (__builtin_constant_p(usecs) && usecs < 0x100000000UL \ - ? __small_const_udelay(usecs, loops_per_sec) \ - : __udelay(usecs, loops_per_sec)) +#define udelay(u) __udelay((u), loops_per_sec) #endif - #endif /* defined(__ALPHA_DELAY_H) */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/io.h linux/include/asm-alpha/io.h --- v2.3.99-pre1/linux/include/asm-alpha/io.h Thu Feb 10 17:11:19 2000 +++ linux/include/asm-alpha/io.h Fri Mar 17 13:01:38 2000 @@ -112,12 +112,12 @@ /* In a generic kernel, we always go through the machine vector. */ -# define __inb alpha_mv.mv_inb -# define __inw alpha_mv.mv_inw -# define __inl alpha_mv.mv_inl -# define __outb alpha_mv.mv_outb -# define __outw alpha_mv.mv_outw -# define __outl alpha_mv.mv_outl +# define __inb(p) alpha_mv.mv_inb((unsigned long)(p)) +# define __inw(p) alpha_mv.mv_inw((unsigned long)(p)) +# define __inl(p) alpha_mv.mv_inl((unsigned long)(p)) +# define __outb(x,p) alpha_mv.mv_outb((x),(unsigned long)(p)) +# define __outw(x,p) alpha_mv.mv_outw((x),(unsigned long)(p)) +# define __outl(x,p) alpha_mv.mv_outl((x),(unsigned long)(p)) # define __readb(a) alpha_mv.mv_readb((unsigned long)(a)) # define __readw(a) alpha_mv.mv_readw((unsigned long)(a)) @@ -128,8 +128,8 @@ # define __writel(v,a) alpha_mv.mv_writel((v),(unsigned long)(a)) # define __writeq(v,a) alpha_mv.mv_writeq((v),(unsigned long)(a)) -# define __ioremap(a) alpha_mv.mv_ioremap(a) -# define __is_ioaddr(a) alpha_mv.mv_is_ioaddr(a) +# define __ioremap(a) alpha_mv.mv_ioremap((unsigned long)(a)) +# define __is_ioaddr(a) alpha_mv.mv_is_ioaddr((unsigned long)(a)) # define inb __inb # define inw __inw @@ -166,8 +166,6 @@ # include #elif defined(CONFIG_ALPHA_POLARIS) # include -#elif defined(CONFIG_ALPHA_PYXIS) -# include #elif defined(CONFIG_ALPHA_T2) # include #elif defined(CONFIG_ALPHA_TSUNAMI) @@ -212,13 +210,13 @@ * redefined by userlevel programs. */ #ifndef inb -# define inb(p) _inb((p)) +# define inb(p) _inb(p) #endif #ifndef inw -# define inw(p) _inw((p)) +# define inw(p) _inw(p) #endif #ifndef inl -# define inl(p) _inl((p)) +# define inl(p) _inl(p) #endif #ifndef outb # define outb(b,p) _outb((b),(p)) @@ -256,12 +254,12 @@ /* Userspace declarations. */ -extern unsigned int inb (unsigned long port); -extern unsigned int inw (unsigned long port); -extern unsigned int inl (unsigned long port); -extern void outb (unsigned char b,unsigned long port); -extern void outw (unsigned short w,unsigned long port); -extern void outl (unsigned int l,unsigned long port); +extern unsigned int inb(unsigned long port); +extern unsigned int inw(unsigned long port); +extern unsigned int inl(unsigned long port); +extern void outb(unsigned char b,unsigned long port); +extern void outw(unsigned short w,unsigned long port); +extern void outl(unsigned int l,unsigned long port); extern unsigned long readb(unsigned long addr); extern unsigned long readw(unsigned long addr); extern unsigned long readl(unsigned long addr); diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/mman.h linux/include/asm-alpha/mman.h --- v2.3.99-pre1/linux/include/asm-alpha/mman.h Sun Jan 25 16:31:47 1998 +++ linux/include/asm-alpha/mman.h Thu Mar 16 14:07:09 2000 @@ -31,6 +31,13 @@ #define MCL_CURRENT 8192 /* lock all currently mapped pages */ #define MCL_FUTURE 16384 /* lock all additions to address space */ +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_SPACEAVAIL 5 /* ensure resources are available */ +#define MADV_DONTNEED 6 /* dont need these pages */ + /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS #define MAP_FILE 0 diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/mmu_context.h linux/include/asm-alpha/mmu_context.h --- v2.3.99-pre1/linux/include/asm-alpha/mmu_context.h Thu Mar 2 14:36:23 2000 +++ linux/include/asm-alpha/mmu_context.h Fri Mar 17 13:03:31 2000 @@ -22,11 +22,6 @@ #include #endif -static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ -} - extern inline unsigned long __reload_thread(struct thread_struct *pcb) { @@ -221,6 +216,12 @@ destroy_context(struct mm_struct *mm) { /* Nothing to do. */ +} + +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ + tsk->thread.ptbr = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT; } #ifdef __MMU_EXTERN_INLINE diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/page.h linux/include/asm-alpha/page.h --- v2.3.99-pre1/linux/include/asm-alpha/page.h Wed Feb 16 17:03:52 2000 +++ linux/include/asm-alpha/page.h Wed Mar 15 09:59:06 2000 @@ -38,6 +38,8 @@ } while (count); } +#define clear_user_page(page, vaddr) clear_page(page) + static inline void copy_page(void * _to, void * _from) { unsigned long count = PAGE_SIZE/64; @@ -67,6 +69,8 @@ to += 8; } while (count); } + +#define copy_user_page(to, from, vaddr) copy_page(to, from) #ifdef STRICT_MM_TYPECHECKS /* diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/pci.h linux/include/asm-alpha/pci.h --- v2.3.99-pre1/linux/include/asm-alpha/pci.h Thu Mar 2 14:36:23 2000 +++ linux/include/asm-alpha/pci.h Thu Mar 16 11:28:57 2000 @@ -39,6 +39,11 @@ #define PCIBIOS_MIN_IO alpha_mv.min_io_address #define PCIBIOS_MIN_MEM alpha_mv.min_mem_address +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + /* IOMMU controls. */ /* Allocate and map kernel buffer using consistant mode DMA for PCI diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/smp.h linux/include/asm-alpha/smp.h --- v2.3.99-pre1/linux/include/asm-alpha/smp.h Thu Mar 2 14:36:23 2000 +++ linux/include/asm-alpha/smp.h Fri Mar 17 13:02:05 2000 @@ -30,7 +30,6 @@ unsigned long pgtable_cache_sz; unsigned long ipi_count; unsigned long irq_attempt[NR_IRQS]; - unsigned long smp_local_irq_count; unsigned long prof_multiplier; unsigned long prof_counter; int irq_count, bh_count; diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/unistd.h linux/include/asm-alpha/unistd.h --- v2.3.99-pre1/linux/include/asm-alpha/unistd.h Mon Dec 20 18:48:22 1999 +++ linux/include/asm-alpha/unistd.h Thu Mar 16 14:07:09 2000 @@ -311,6 +311,8 @@ #define __NR_setresgid 371 #define __NR_getresgid 372 #define __NR_dipc 373 +#define __NR_pivot_root 374 +#define __NR_mincore 375 #if defined(__LIBRARY__) && defined(__GNUC__) diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-alpha/vga.h linux/include/asm-alpha/vga.h --- v2.3.99-pre1/linux/include/asm-alpha/vga.h Tue Dec 7 09:32:49 1999 +++ linux/include/asm-alpha/vga.h Fri Mar 17 13:01:38 2000 @@ -14,7 +14,7 @@ #define VT_BUF_HAVE_MEMCPYW #define VT_BUF_HAVE_MEMCPYF -extern inline void scr_writew(u16 val, u16 *addr) +extern inline void scr_writew(u16 val, volatile u16 *addr) { if (__is_ioaddr((unsigned long) addr)) __raw_writew(val, (unsigned long) addr); @@ -22,7 +22,7 @@ *addr = val; } -extern inline u16 scr_readw(const u16 *addr) +extern inline u16 scr_readw(volatile const u16 *addr) { if (__is_ioaddr((unsigned long) addr)) return __raw_readw((unsigned long) addr); diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/arch-cl7500/system.h linux/include/asm-arm/arch-cl7500/system.h --- v2.3.99-pre1/linux/include/asm-arm/arch-cl7500/system.h Tue Mar 14 19:10:40 2000 +++ linux/include/asm-arm/arch-cl7500/system.h Sat Mar 18 11:38:04 2000 @@ -8,18 +8,14 @@ #include -#define arch_do_idle() \ +#define arch_do_idle() \ outb(0, IOMD_SUSMODE) -#define arch_reset(mode) { \ - outb (0, IOMD_ROMCR0); \ - cli(); \ - __asm__ __volatile__("msr spsr, r1;" \ - "mcr p15, 0, %0, c1, c0, 0;" \ - "movs pc, #0" \ - : \ - : "r" (cpu_reset())); \ - } +#define arch_reset(mode) \ + do { \ + outb (0, IOMD_ROMCR0); \ + cpu_reset(0); \ + } while (0); #define arch_power_off() do { } while (0) diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/arch-ebsa110/system.h linux/include/asm-arm/arch-ebsa110/system.h --- v2.3.99-pre1/linux/include/asm-arm/arch-ebsa110/system.h Fri Jan 21 18:19:17 2000 +++ linux/include/asm-arm/arch-ebsa110/system.h Sat Mar 18 11:38:04 2000 @@ -8,15 +8,6 @@ #define arch_do_idle() cpu_do_idle() #define arch_power_off() do { } while (0) - -extern __inline__ void arch_reset(char mode) -{ - if (mode == 's') { - __asm__ volatile( - "mcr p15, 0, %0, c1, c0, 0 @ MMU off - mov pc, #0x80000000 @ jump to flash" - : : "r" (cpu_reset()) : "cc"); - } -} +#define arch_reset(mode) cpu_reset(0x80000000) #endif diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/arch-ebsa285/system.h linux/include/asm-arm/arch-ebsa285/system.h --- v2.3.99-pre1/linux/include/asm-arm/arch-ebsa285/system.h Fri Jan 21 18:19:17 2000 +++ linux/include/asm-arm/arch-ebsa285/system.h Sat Mar 18 11:38:04 2000 @@ -14,10 +14,10 @@ extern __inline__ void arch_reset(char mode) { if (mode == 's') { - __asm__ volatile ( - "mcr p15, 0, %0, c1, c0, 0 @ MMU off - mov pc, #0x41000000 @ jump to ROM" : : - "r" (cpu_reset()) : "cc"); + /* + * Jump into the ROM + */ + cpu_reset(0x41000000); } else { if (machine_is_netwinder()) { /* open up the SuperIO chip diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/arch-rpc/system.h linux/include/asm-arm/arch-rpc/system.h --- v2.3.99-pre1/linux/include/asm-arm/arch-rpc/system.h Fri Jan 21 18:19:17 2000 +++ linux/include/asm-arm/arch-rpc/system.h Sat Mar 18 11:38:04 2000 @@ -18,8 +18,8 @@ outb(0, IOMD_ROMCR0); - __asm__ __volatile__( - "mcr p15, 0, %0, c1, c0, 0\n\t" - "mov pc, #0" - : : "r" (cpu_reset())); + /* + * Jump into the ROM + */ + cpu_reset(0); } diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/arch-sa1100/system.h linux/include/asm-arm/arch-sa1100/system.h --- v2.3.99-pre1/linux/include/asm-arm/arch-sa1100/system.h Thu Feb 10 17:11:20 2000 +++ linux/include/asm-arm/arch-sa1100/system.h Sat Mar 18 11:38:04 2000 @@ -16,11 +16,7 @@ #else -#define arch_reset(x) { \ - __asm__ volatile ( \ -" mcr p15, 0, %0, c1, c0 @ MMU off\n" \ -" mov pc, #0\n" : : "r" (cpu_reset()) : "cc"); \ - } +#define arch_reset(x) cpu_reset(0) #endif diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/cpu-multi32.h linux/include/asm-arm/cpu-multi32.h --- v2.3.99-pre1/linux/include/asm-arm/cpu-multi32.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-arm/cpu-multi32.h Sat Mar 18 11:38:04 2000 @@ -75,7 +75,7 @@ /* * Special stuff for a reset */ - unsigned long (*reset)(void); + volatile void (*reset)(unsigned long addr); /* * flush an icached page */ @@ -123,7 +123,7 @@ #define cpu_set_pgd(pgd) processor._set_pgd(pgd) #define cpu_set_pmd(pmdp, pmd) processor._set_pmd(pmdp, pmd) #define cpu_set_pte(ptep, pte) processor._set_pte(ptep, pte) -#define cpu_reset() processor.reset() +#define cpu_reset(addr) processor.reset(addr) #define cpu_flush_icache_area(start,end) processor._flush_icache_area(start,end) #define cpu_cache_wback_area(start,end) processor._cache_wback_area(start,end) #define cpu_cache_purge_area(start,end) processor._cache_purge_area(start,end) diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/cpu-single.h linux/include/asm-arm/cpu-single.h --- v2.3.99-pre1/linux/include/asm-arm/cpu-single.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-arm/cpu-single.h Sat Mar 18 11:38:04 2000 @@ -62,7 +62,7 @@ extern void cpu_set_pgd(unsigned long pgd_phys); extern void cpu_set_pmd(pmd_t *pmdp, pmd_t pmd); extern void cpu_set_pte(pte_t *ptep, pte_t pte); -extern unsigned long cpu_reset(void); +extern volatile void cpu_reset(unsigned long addr); extern void cpu_flush_icache_area(unsigned long start, unsigned long size); extern void cpu_cache_wback_area(unsigned long start, unsigned long end); extern void cpu_cache_purge_area(unsigned long start, unsigned long end); diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/mman.h linux/include/asm-arm/mman.h --- v2.3.99-pre1/linux/include/asm-arm/mman.h Tue Jan 20 16:39:42 1998 +++ linux/include/asm-arm/mman.h Sat Mar 18 11:38:04 2000 @@ -25,6 +25,12 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MADV_NORMAL 0x0 /* default page-in behavior */ +#define MADV_RANDOM 0x1 /* page-in minimum required */ +#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ +#define MADV_WILLNEED 0x3 /* pre-fault pages */ +#define MADV_DONTNEED 0x4 /* discard these pages */ + /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS #define MAP_FILE 0 diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/page.h linux/include/asm-arm/page.h --- v2.3.99-pre1/linux/include/asm-arm/page.h Wed Feb 16 17:03:52 2000 +++ linux/include/asm-arm/page.h Wed Mar 15 09:59:06 2000 @@ -14,6 +14,9 @@ #define clear_page(page) memzero((void *)(page), PAGE_SIZE) extern void copy_page(void *to, void *from); +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + #ifdef STRICT_MM_TYPECHECKS /* * These are used to make use of C type-checking.. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/pci.h linux/include/asm-arm/pci.h --- v2.3.99-pre1/linux/include/asm-arm/pci.h Thu Mar 2 14:36:23 2000 +++ linux/include/asm-arm/pci.h Thu Mar 16 11:28:57 2000 @@ -1,12 +1,17 @@ #ifndef ASMARM_PCI_H #define ASMARM_PCI_H +#ifdef __KERNEL__ + #define pcibios_assign_all_busses() 0 #define PCIBIOS_MIN_IO 0x8000 #define PCIBIOS_MIN_MEM 0x40000000 -#ifdef __KERNEL__ +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} #include #include diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-arm/system.h linux/include/asm-arm/system.h --- v2.3.99-pre1/linux/include/asm-arm/system.h Tue Mar 14 19:10:40 2000 +++ linux/include/asm-arm/system.h Sat Mar 18 11:38:04 2000 @@ -96,7 +96,7 @@ # define machine_is_netwinder() (0) #endif -#ifdef CONFIG_CATS +#ifdef CONFIG_ARCH_CATS # ifdef machine_arch_type # undef machine_arch_type # define machine_arch_type __machine_arch_type @@ -180,7 +180,7 @@ # define machine_is_sa1100() (0) #endif -#ifdef CONFIG_PERSONAL_SERVER +#ifdef CONFIG_ARCH_PERSONAL_SERVER # ifdef machine_arch_type # undef machine_arch_type # define machine_arch_type __machine_arch_type diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-i386/hw_irq.h linux/include/asm-i386/hw_irq.h --- v2.3.99-pre1/linux/include/asm-i386/hw_irq.h Tue Mar 7 14:32:26 2000 +++ linux/include/asm-i386/hw_irq.h Sat Mar 18 12:10:59 2000 @@ -189,6 +189,9 @@ */ static inline void x86_do_profile (unsigned long eip) { + if (!prof_buffer) + return; + /* * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. * (default is all CPUs.) @@ -196,18 +199,16 @@ if (!((1<>= prof_shift; - /* - * Don't ignore out-of-bounds EIP values silently, - * put them into the last histogram slot, so if - * present, they will show up as a sharp peak. - */ - if (eip > prof_len-1) - eip = prof_len-1; - atomic_inc((atomic_t *)&prof_buffer[eip]); - } + eip -= (unsigned long) &_stext; + eip >>= prof_shift; + /* + * Don't ignore out-of-bounds EIP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (eip > prof_len-1) + eip = prof_len-1; + atomic_inc((atomic_t *)&prof_buffer[eip]); } #ifdef __SMP__ /*more of this file should probably be ifdefed SMP */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-i386/page.h linux/include/asm-i386/page.h --- v2.3.99-pre1/linux/include/asm-i386/page.h Wed Feb 16 17:03:52 2000 +++ linux/include/asm-i386/page.h Sat Mar 18 12:10:59 2000 @@ -30,6 +30,9 @@ #endif +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + /* * These are used to make use of C type-checking.. */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-i386/pci.h linux/include/asm-i386/pci.h --- v2.3.99-pre1/linux/include/asm-i386/pci.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-i386/pci.h Sat Mar 18 12:11:35 2000 @@ -1,6 +1,8 @@ #ifndef __i386_PCI_H #define __i386_PCI_H +#ifdef __KERNEL__ + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -10,7 +12,7 @@ #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 -#ifdef __KERNEL__ +void pcibios_set_master(struct pci_dev *dev); /* Dynamic DMA mapping stuff. * i386 has everything mapped statically. @@ -164,4 +166,3 @@ #endif /* __KERNEL__ */ #endif /* __i386_PCI_H */ - diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-i386/processor.h linux/include/asm-i386/processor.h --- v2.3.99-pre1/linux/include/asm-i386/processor.h Wed Feb 16 17:03:52 2000 +++ linux/include/asm-i386/processor.h Sat Mar 18 12:10:59 2000 @@ -116,6 +116,10 @@ (boot_cpu_data.x86_capability & X86_FEATURE_PAE) #define cpu_has_tsc \ (boot_cpu_data.x86_capability & X86_FEATURE_TSC) +#define cpu_has_de \ + (boot_cpu_data.x86_capability & X86_FEATURE_DE) +#define cpu_has_vme \ + (boot_cpu_data.x86_capability & X86_FEATURE_VME) extern char ignore_irq13; diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-ia64/page.h linux/include/asm-ia64/page.h --- v2.3.99-pre1/linux/include/asm-ia64/page.h Sat Feb 26 22:31:55 2000 +++ linux/include/asm-ia64/page.h Wed Mar 15 09:59:06 2000 @@ -40,6 +40,9 @@ extern void clear_page (void *page); extern void copy_page (void *to, void *from); +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + # ifdef STRICT_MM_TYPECHECKS /* * These are used to make use of C type-checking.. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-ia64/pci.h linux/include/asm-ia64/pci.h --- v2.3.99-pre1/linux/include/asm-ia64/pci.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-ia64/pci.h Thu Mar 16 11:28:58 2000 @@ -11,6 +11,11 @@ #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + /* * Dynamic DMA mapping API. * IA-64 has everything mapped statically. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-m68k/page.h linux/include/asm-m68k/page.h --- v2.3.99-pre1/linux/include/asm-m68k/page.h Wed Feb 16 17:03:52 2000 +++ linux/include/asm-m68k/page.h Wed Mar 15 09:59:06 2000 @@ -76,6 +76,9 @@ #define copy_page(to,from) memcpy((to), (from), PAGE_SIZE) #endif +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + /* * These are used to make use of C type-checking.. */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-m68k/pci.h linux/include/asm-m68k/pci.h --- v2.3.99-pre1/linux/include/asm-m68k/pci.h Fri Jan 28 15:09:09 2000 +++ linux/include/asm-m68k/pci.h Thu Mar 16 11:28:57 2000 @@ -35,4 +35,9 @@ #define pcibios_assign_all_busses() 0 +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + #endif /* _ASM_M68K_PCI_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-mips/page.h linux/include/asm-mips/page.h --- v2.3.99-pre1/linux/include/asm-mips/page.h Sat Feb 26 22:31:56 2000 +++ linux/include/asm-mips/page.h Wed Mar 15 09:59:06 2000 @@ -28,6 +28,8 @@ #define clear_page(page) _clear_page(page) #define copy_page(to, from) _copy_page(to, from) +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) /* * These are used to make use of C type-checking.. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-mips/pci.h linux/include/asm-mips/pci.h --- v2.3.99-pre1/linux/include/asm-mips/pci.h Sat Feb 26 22:31:56 2000 +++ linux/include/asm-mips/pci.h Thu Mar 16 11:28:57 2000 @@ -7,6 +7,8 @@ #ifndef _ASM_PCI_H #define _ASM_PCI_H +#ifdef __KERNEL__ + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -16,7 +18,10 @@ #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 -#ifdef __KERNEL__ +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} /* * Dynamic DMA mapping stuff. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-mips64/page.h linux/include/asm-mips64/page.h --- v2.3.99-pre1/linux/include/asm-mips64/page.h Sat Feb 26 22:31:57 2000 +++ linux/include/asm-mips64/page.h Wed Mar 15 09:59:06 2000 @@ -29,6 +29,8 @@ #define clear_page(page) _clear_page(page) #define copy_page(to, from) _copy_page(to, from) +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) /* * These are used to make use of C type-checking.. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-mips64/pci.h linux/include/asm-mips64/pci.h --- v2.3.99-pre1/linux/include/asm-mips64/pci.h Sat Feb 26 22:31:57 2000 +++ linux/include/asm-mips64/pci.h Thu Mar 16 11:28:58 2000 @@ -7,6 +7,8 @@ #ifndef _ASM_PCI_H #define _ASM_PCI_H +#ifdef __KERNEL__ + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -16,7 +18,10 @@ #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 -#ifdef __KERNEL__ +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} /* * Dynamic DMA mapping stuff. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-ppc/page.h linux/include/asm-ppc/page.h --- v2.3.99-pre1/linux/include/asm-ppc/page.h Wed Feb 16 17:03:52 2000 +++ linux/include/asm-ppc/page.h Wed Mar 15 09:59:06 2000 @@ -78,6 +78,8 @@ extern void clear_page(void *page); extern void copy_page(void *to, void *from); +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) /* map phys->virtual and virtual->phys for RAM pages */ static inline unsigned long ___pa(unsigned long v) diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-ppc/pci.h linux/include/asm-ppc/pci.h --- v2.3.99-pre1/linux/include/asm-ppc/pci.h Sat Feb 26 22:31:58 2000 +++ linux/include/asm-ppc/pci.h Thu Mar 16 11:28:57 2000 @@ -10,6 +10,11 @@ #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + /* Dynamic DMA Mapping stuff * ++ajoshi */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sh/page.h linux/include/asm-sh/page.h --- v2.3.99-pre1/linux/include/asm-sh/page.h Tue Mar 7 14:32:26 2000 +++ linux/include/asm-sh/page.h Wed Mar 15 09:59:06 2000 @@ -26,6 +26,8 @@ #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) #define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) /* * These are used to make use of C type-checking.. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sh/pci.h linux/include/asm-sh/pci.h --- v2.3.99-pre1/linux/include/asm-sh/pci.h Tue Mar 7 14:32:26 2000 +++ linux/include/asm-sh/pci.h Thu Mar 16 11:28:57 2000 @@ -1,13 +1,18 @@ #ifndef __ASM_SH_PCI_H #define __ASM_SH_PCI_H +#ifdef __KERNEL__ + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ #define pcibios_assign_all_busses() 0 -#ifdef __KERNEL__ +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} /* Dynamic DMA mapping stuff. * SuperH has everything mapped statically like x86. diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc/mman.h linux/include/asm-sparc/mman.h --- v2.3.99-pre1/linux/include/asm-sparc/mman.h Sat Nov 9 00:29:41 1996 +++ linux/include/asm-sparc/mman.h Wed Mar 15 00:37:50 2000 @@ -1,4 +1,4 @@ -/* $Id: mman.h,v 1.8 1996/10/27 08:55:28 davem Exp $ */ +/* $Id: mman.h,v 1.9 2000/03/15 02:44:23 davem Exp $ */ #ifndef __SPARC_MMAN_H__ #define __SPARC_MMAN_H__ @@ -41,6 +41,13 @@ #define MC_UNLOCK 3 /* Unlock pages locked via previous mctl() with MC_LOCK arg */ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ + +#define MADV_NORMAL 0x0 /* default page-in behavior */ +#define MADV_RANDOM 0x1 /* page-in minimum required */ +#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ +#define MADV_WILLNEED 0x3 /* pre-fault pages */ +#define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_FREE 0x5 /* (Solaris) contents can be freed */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc/page.h linux/include/asm-sparc/page.h --- v2.3.99-pre1/linux/include/asm-sparc/page.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-sparc/page.h Wed Mar 15 09:59:06 2000 @@ -1,4 +1,4 @@ -/* $Id: page.h,v 1.48 2000/02/16 07:34:51 davem Exp $ +/* $Id: page.h,v 1.51 2000/03/15 07:19:25 davem Exp $ * page.h: Various defines and such for MMU operations on the Sparc for * the Linux kernel. * @@ -14,7 +14,12 @@ #else #define PAGE_SHIFT 12 #endif +#ifndef __ASSEMBLY__ +/* I have my suspicions... -DaveM */ +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#else #define PAGE_SIZE (1 << PAGE_SHIFT) +#endif #define PAGE_MASK (~(PAGE_SIZE-1)) #ifdef __KERNEL__ @@ -33,8 +38,10 @@ BUG(); \ } while (0) -#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) -#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) /* The following structure is used to hold the physical * memory configuration of the machine. This is filled in diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc/pci.h linux/include/asm-sparc/pci.h --- v2.3.99-pre1/linux/include/asm-sparc/pci.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-sparc/pci.h Thu Mar 16 11:28:57 2000 @@ -1,6 +1,8 @@ #ifndef __SPARC_PCI_H #define __SPARC_PCI_H +#ifdef __KERNEL__ + /* Can be used to override the logic in pci_scan_bus for skipping * already-configured bus numbers - to be used for buggy BIOSes * or architectures with incomplete PCI setup by the loader. @@ -10,7 +12,10 @@ #define PCIBIOS_MIN_IO 0UL #define PCIBIOS_MIN_MEM 0UL -#ifdef __KERNEL__ +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} /* Dynamic DMA mapping stuff. */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc64/atomic.h linux/include/asm-sparc64/atomic.h --- v2.3.99-pre1/linux/include/asm-sparc64/atomic.h Sun Jul 4 09:53:12 1999 +++ linux/include/asm-sparc64/atomic.h Thu Mar 16 11:40:17 2000 @@ -1,61 +1,32 @@ -/* $Id: atomic.h,v 1.19 1999/07/03 22:11:17 davem Exp $ +/* $Id: atomic.h,v 1.20 2000/03/16 16:44:44 davem Exp $ * atomic.h: Thankfully the V9 is at least reasonable for this * stuff. * - * Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996, 1997, 2000 David S. Miller (davem@redhat.com) */ #ifndef __ARCH_SPARC64_ATOMIC__ #define __ARCH_SPARC64_ATOMIC__ -/* Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -#define __atomic_fool_gcc(x) ((struct { int a[100]; } *)x) - typedef struct { int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) #define atomic_set(v, i) (((v)->counter) = i) -#define atomic_add_return(__i, __v) \ -({ register atomic_t *__V asm("g1"); \ - register int __I asm("g2"); \ - __V = (__v); __I = (__i); \ - __asm__ __volatile__("sethi %%hi(__atomic_add), %%g3\n\t" \ - "jmpl %%g3 + %%lo(__atomic_add), %%g3\n\t" \ - " nop\n1:" \ - : "=&r" (__I) \ - : "0" (__I), "r" (__V) \ - : "g3", "g5", "g7", "cc", "memory"); \ - __I; \ -}) - -#define atomic_sub_return(__i, __v) \ -({ register atomic_t *__V asm("g1"); \ - register int __I asm("g2"); \ - __V = (__v); __I = (__i); \ - __asm__ __volatile__("sethi %%hi(__atomic_sub), %%g3\n\t" \ - "jmpl %%g3 + %%lo(__atomic_sub), %%g3\n\t" \ - " nop\n1:" \ - : "=&r" (__I) \ - : "0" (__I), "r" (__V) \ - : "g3", "g5", "g7", "cc", "memory"); \ - __I; \ -}) - -#define atomic_add(i, v) atomic_add_return(i, v) -#define atomic_sub(i, v) atomic_sub_return(i, v) +extern int __atomic_add(int, atomic_t *); +extern int __atomic_sub(int, atomic_t *); + +#define atomic_add(i, v) ((void)__atomic_add(i, v)) +#define atomic_sub(i, v) ((void)__atomic_sub(i, v)) -#define atomic_dec_return(v) atomic_sub_return(1,(v)) -#define atomic_inc_return(v) atomic_add_return(1,(v)) +#define atomic_dec_return(v) __atomic_sub(1, v) +#define atomic_inc_return(v) __atomic_add(1, v) -#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0) -#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) +#define atomic_sub_and_test(i, v) (__atomic_sub(i, v) == 0) +#define atomic_dec_and_test(v) (__atomic_sub(1, v) == 0) -#define atomic_inc(v) atomic_add(1,(v)) -#define atomic_dec(v) atomic_sub(1,(v)) +#define atomic_inc(v) ((void)__atomic_add(1, v)) +#define atomic_dec(v) ((void)__atomic_sub(1, v)) #endif /* !(__ARCH_SPARC64_ATOMIC__) */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc64/mman.h linux/include/asm-sparc64/mman.h --- v2.3.99-pre1/linux/include/asm-sparc64/mman.h Fri Dec 13 01:37:47 1996 +++ linux/include/asm-sparc64/mman.h Wed Mar 15 00:37:50 2000 @@ -1,4 +1,4 @@ -/* $Id: mman.h,v 1.1 1996/12/02 00:07:29 davem Exp $ */ +/* $Id: mman.h,v 1.2 2000/03/15 02:44:26 davem Exp $ */ #ifndef __SPARC64_MMAN_H__ #define __SPARC64_MMAN_H__ @@ -41,6 +41,13 @@ #define MC_UNLOCK 3 /* Unlock pages locked via previous mctl() with MC_LOCK arg */ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ + +#define MADV_NORMAL 0x0 /* default page-in behavior */ +#define MADV_RANDOM 0x1 /* page-in minimum required */ +#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ +#define MADV_WILLNEED 0x3 /* pre-fault pages */ +#define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_FREE 0x5 /* (Solaris) contents can be freed */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc64/page.h linux/include/asm-sparc64/page.h --- v2.3.99-pre1/linux/include/asm-sparc64/page.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-sparc64/page.h Wed Mar 15 09:59:06 2000 @@ -1,4 +1,4 @@ -/* $Id: page.h,v 1.30 2000/02/16 07:34:54 davem Exp $ */ +/* $Id: page.h,v 1.32 2000/03/15 07:19:28 davem Exp $ */ #ifndef _SPARC64_PAGE_H #define _SPARC64_PAGE_H @@ -23,6 +23,8 @@ extern void clear_page(void *page); extern void copy_page(void *to, void *from); +extern void clear_user_page(void *page, unsigned long vaddr); +extern void copy_user_page(void *to, void *from, unsigned long vaddr); /* GROSS, defining this makes gcc pass these types as aggregates, * and thus on the stack, turn this crap off... -DaveM diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc64/parport.h linux/include/asm-sparc64/parport.h --- v2.3.99-pre1/linux/include/asm-sparc64/parport.h Tue Mar 14 19:10:40 2000 +++ linux/include/asm-sparc64/parport.h Thu Mar 16 11:20:33 2000 @@ -1,4 +1,4 @@ -/* $Id: parport.h,v 1.7 2000/01/28 13:43:14 jj Exp $ +/* $Id: parport.h,v 1.9 2000/03/16 07:47:27 davem Exp $ * parport.h: sparc64 specific parport initialization and dma. * * Copyright (C) 1999 Eddie C. Dost (ecd@skynet.be) @@ -7,7 +7,6 @@ #ifndef _ASM_SPARC64_PARPORT_H #define _ASM_SPARC64_PARPORT_H 1 -#include #include #include diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc64/pci.h linux/include/asm-sparc64/pci.h --- v2.3.99-pre1/linux/include/asm-sparc64/pci.h Sun Feb 20 21:12:39 2000 +++ linux/include/asm-sparc64/pci.h Thu Mar 16 11:28:57 2000 @@ -1,6 +1,8 @@ #ifndef __SPARC64_PCI_H #define __SPARC64_PCI_H +#ifdef __KERNEL__ + /* Can be used to override the logic in pci_scan_bus for skipping * already-configured bus numbers - to be used for buggy BIOSes * or architectures with incomplete PCI setup by the loader. @@ -10,7 +12,10 @@ #define PCIBIOS_MIN_IO 0UL #define PCIBIOS_MIN_MEM 0UL -#ifdef __KERNEL__ +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} /* Dynamic DMA mapping stuff. */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc64/semaphore.h linux/include/asm-sparc64/semaphore.h --- v2.3.99-pre1/linux/include/asm-sparc64/semaphore.h Fri Jan 28 15:09:09 2000 +++ linux/include/asm-sparc64/semaphore.h Thu Mar 16 11:40:17 2000 @@ -90,7 +90,7 @@ ba,pt %%xcc, 2b restore %%l3, %%g0, %%g3 .previous\n" - : : "r" (__atomic_fool_gcc(sem)), "i" (__down) + : : "r" (sem), "i" (__down) : "g5", "g7", "memory", "cc"); } @@ -126,7 +126,7 @@ restore %%o0, %%g0, %0 .previous\n" : "=r" (ret) - : "0" (ret), "r" (__atomic_fool_gcc(sem)), "i" (__down_interruptible) + : "0" (ret), "r" (sem), "i" (__down_interruptible) : "g5", "g7", "memory", "cc"); return ret; } @@ -162,7 +162,7 @@ restore %%o0, %%g0, %0 .previous\n" : "=r" (ret) - : "0" (ret), "r" (__atomic_fool_gcc(sem)), "i" (__down_trylock) + : "0" (ret), "r" (sem), "i" (__down_trylock) : "g5", "g7", "memory", "cc"); return ret; } @@ -196,7 +196,7 @@ ba,pt %%xcc, 2b restore %%l3, %%g0, %%g3 .previous\n" - : : "r" (__atomic_fool_gcc(sem)), "i" (__up) + : : "r" (sem), "i" (__up) : "g5", "g7", "memory", "cc"); } @@ -309,7 +309,7 @@ ba,pt %%xcc, 2b restore %%l3, %%g0, %%g3 .previous\n" - : : "r" (__atomic_fool_gcc(sem)), "i" (__down_read_failed) + : : "r" (sem), "i" (__down_read_failed) : "g5", "g7", "memory", "cc"); #if WAITQUEUE_DEBUG if (test_le_bit(1, &sem->granted)) @@ -348,7 +348,7 @@ ba,pt %%xcc, 2b restore %%l3, %%g0, %%g3 .previous\n" - : : "r" (__atomic_fool_gcc(sem)), "i" (__down_write_failed) + : : "r" (sem), "i" (__down_write_failed) : "g5", "g7", "memory", "cc"); #if WAITQUEUE_DEBUG if (atomic_read(&sem->writers)) @@ -394,7 +394,7 @@ ba,pt %%xcc, 2b restore %%l3, %%g0, %%g3 .previous\n" - : : "r" (__atomic_fool_gcc(sem)), "i" (__rwsem_wake) + : : "r" (sem), "i" (__rwsem_wake) : "g5", "g7", "memory", "cc"); } @@ -430,7 +430,7 @@ ba,pt %%xcc, 2b restore %%l3, %%g0, %%g3 .previous\n" - : : "r" (__atomic_fool_gcc(sem)), "i" (__rwsem_wake) + : : "r" (sem), "i" (__rwsem_wake) : "g5", "g7", "memory", "cc"); } diff -u --recursive --new-file v2.3.99-pre1/linux/include/asm-sparc64/spinlock.h linux/include/asm-sparc64/spinlock.h --- v2.3.99-pre1/linux/include/asm-sparc64/spinlock.h Fri Sep 10 23:57:37 1999 +++ linux/include/asm-sparc64/spinlock.h Thu Mar 16 11:40:17 2000 @@ -110,41 +110,15 @@ typedef unsigned int rwlock_t; #define RW_LOCK_UNLOCKED 0 -#define read_lock(__rw_lck) \ -do { register rwlock_t *__X asm("g1"); \ - __asm__ __volatile__("sethi %%hi(__read_lock), %%g3\n\t" \ - "jmpl %%g3 + %%lo(__read_lock), %%g3\n\t" \ - " nop\n1:" \ - : : "r" (__X = (__rw_lck)) \ - : "g3", "g5", "g7", "cc", "memory"); \ -} while(0) +extern void __read_lock(rwlock_t *); +extern void __read_unlock(rwlock_t *); +extern void __write_lock(rwlock_t *); +extern void __write_unlock(rwlock_t *); -#define read_unlock(__rw_lck) \ -do { register rwlock_t *__X asm("g1"); \ - __asm__ __volatile__("sethi %%hi(__read_unlock), %%g3\n\t" \ - "jmpl %%g3 + %%lo(__read_unlock), %%g3\n\t" \ - " nop\n1:" \ - : : "r" (__X = (__rw_lck)) \ - : "g3", "g5", "g7", "cc", "memory"); \ -} while(0) - -#define write_lock(__rw_lck) \ -do { register rwlock_t *__X asm("g1"); \ - __asm__ __volatile__("sethi %%hi(__write_lock), %%g3\n\t" \ - "jmpl %%g3 + %%lo(__write_lock), %%g3\n\t" \ - " nop\n1:" \ - : : "r" (__X = (__rw_lck)) \ - : "g2", "g3", "g5", "g7", "cc", "memory"); \ -} while(0) - -#define write_unlock(__rw_lck) \ -do { register rwlock_t *__X asm("g1"); \ - __asm__ __volatile__("sethi %%hi(__write_unlock), %%g3\n\t" \ - "jmpl %%g3 + %%lo(__write_unlock), %%g3\n\t" \ - " nop\n1:" \ - : : "r" (__X = (__rw_lck)) \ - : "g2", "g3", "g5", "g7", "cc", "memory"); \ -} while(0) +#define read_lock(p) __read_lock(p) +#define read_unlock(p) __read_unlock(p) +#define write_lock(p) __write_lock(p) +#define write_unlock(p) __write_unlock(p) #else /* !(SPIN_LOCK_DEBUG) */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/ac97_codec.h linux/include/linux/ac97_codec.h --- v2.3.99-pre1/linux/include/linux/ac97_codec.h Tue Mar 7 14:32:26 2000 +++ linux/include/linux/ac97_codec.h Thu Mar 16 11:18:35 2000 @@ -39,6 +39,21 @@ #define AC97_RESERVED_3A 0x003A /* Reserved */ /* range 0x3c-0x58 - MODEM */ +#define AC97_EXTENDED_MODEM_ID 0x003C +#define AC97_EXTEND_MODEM_STAT 0x003E +#define AC97_LINE1_RATE 0x0040 +#define AC97_LINE2_RATE 0x0042 +#define AC97_HANDSET_RATE 0x0044 +#define AC97_LINE1_LEVEL 0x0046 +#define AC97_LINE2_LEVEL 0x0048 +#define AC97_HANDSET_LEVEL 0x004A +#define AC97_GPIO_CONFIG 0x004C +#define AC97_GPIO_POLARITY 0x004E +#define AC97_GPIO_STICKY 0x0050 +#define AC97_GPIO_WAKE_UP 0x0052 +#define AC97_GPIO_STATUS 0x0054 +#define AC97_MISC_MODEM_STAT 0x0056 +#define AC97_RESERVED_58 0x0058 /* registers 0x005a - 0x007a are vendor reserved */ @@ -150,6 +165,9 @@ /* saved OSS mixer states */ unsigned int mixer_state[SOUND_MIXER_NRDEVICES]; + + /* Software Modem interface */ + int (*modem_ioctl)(struct ac97_codec *codec, unsigned int cmd, unsigned long arg); }; extern int ac97_read_proc (char *page_out, char **start, off_t off, diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/binfmts.h linux/include/linux/binfmts.h --- v2.3.99-pre1/linux/include/linux/binfmts.h Fri Oct 22 13:21:55 1999 +++ linux/include/linux/binfmts.h Sat Mar 18 12:10:59 2000 @@ -37,7 +37,7 @@ struct linux_binfmt * next; struct module *module; int (*load_binary)(struct linux_binprm *, struct pt_regs * regs); - int (*load_shlib)(int fd); + int (*load_shlib)(struct file *); int (*core_dump)(long signr, struct pt_regs * regs, struct file * file); unsigned long min_coredump; /* minimal dump size */ }; diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/fs.h linux/include/linux/fs.h --- v2.3.99-pre1/linux/include/linux/fs.h Tue Mar 14 19:10:40 2000 +++ linux/include/linux/fs.h Sat Mar 18 16:54:39 2000 @@ -589,6 +589,7 @@ struct list_head s_files; struct block_device *s_bdev; + struct quota_mount_options s_dquot; /* Diskquota specific options */ union { struct minix_sb_info minix_sb; @@ -730,7 +731,6 @@ struct file_system_type * next; }; -#ifdef MODULE #define DECLARE_FSTYPE(var,type,read,flags) \ struct file_system_type var = { \ name: type, \ @@ -738,14 +738,6 @@ fs_flags: flags, \ owner: THIS_MODULE, \ } -#else -#define DECLARE_FSTYPE(var,type,read,flags) \ -struct file_system_type var = { \ - name: type, \ - read_super: read, \ - fs_flags: flags, \ -} -#endif #define DECLARE_FSTYPE_DEV(var,type,read) \ DECLARE_FSTYPE(var,type,read,FS_REQUIRES_DEV) @@ -821,7 +813,7 @@ extern int get_unused_fd(void); extern void put_unused_fd(unsigned int); -extern struct file *filp_open(const char *, int, int); +extern struct file *filp_open(const char *, int, int, struct dentry *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char *); @@ -936,10 +928,15 @@ extern int permission(struct inode *, int); extern int get_write_access(struct inode *); extern void put_write_access(struct inode *); -extern struct dentry * open_namei(const char *, int, int); extern struct dentry * do_mknod(const char *, int, dev_t); extern int do_pipe(int *); -extern int do_unlink(const char * name); +extern int do_unlink(const char * name, struct dentry *); +extern struct dentry * __open_namei(const char *, int, int, struct dentry *); + +static inline struct dentry * open_namei(const char *pathname) +{ + return __open_namei(pathname, 0, 0, NULL); +} /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); @@ -989,6 +986,7 @@ extern struct dentry * lookup_dentry(const char *, struct dentry *, unsigned int); +extern struct dentry * lookup_one(const char *, struct dentry *); extern struct dentry * __namei(const char *, unsigned int); #define namei(pathname) __namei(pathname, 1) diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/hdreg.h linux/include/linux/hdreg.h --- v2.3.99-pre1/linux/include/linux/hdreg.h Sat Feb 26 22:32:06 2000 +++ linux/include/linux/hdreg.h Sat Mar 18 12:11:01 2000 @@ -193,6 +193,8 @@ #define HDIO_SET_NICE 0x0329 /* set nice flags */ #define HDIO_UNREGISTER_HWIF 0x032a /* unregister interface */ +#define __NEW_HD_DRIVE_ID + /* structure returned by HDIO_GET_IDENTITY, as per ANSI ATA2 rev.2f spec */ struct hd_driveid { unsigned short config; /* lots of obsolete bit flags */ @@ -256,7 +258,7 @@ unsigned short CurAPMvalues; /* current APM values */ unsigned short word92; /* reserved (word 92) */ unsigned short hw_config; /* hardware config */ - unsigned short words94_125[33];/* reserved words 94-125 */ + unsigned short words94_125[31];/* reserved words 94-125 */ unsigned short last_lun; /* reserved (word 126) */ unsigned short word127; /* reserved (word 127) */ unsigned short dlf; /* device lock function diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/highmem.h linux/include/linux/highmem.h --- v2.3.99-pre1/linux/include/linux/highmem.h Tue Dec 7 09:32:51 1999 +++ linux/include/linux/highmem.h Sat Mar 18 12:11:01 2000 @@ -29,6 +29,15 @@ #endif /* CONFIG_HIGHMEM */ /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ +extern inline void clear_user_highpage(struct page *page, unsigned long vaddr) +{ + unsigned long kaddr; + + kaddr = kmap(page); + clear_user_page((void *)kaddr, vaddr); + kunmap(page); +} + extern inline void clear_highpage(struct page *page) { unsigned long kaddr; @@ -62,6 +71,17 @@ memset((void *)(kaddr + offset), 0, size); flush_page_to_ram(page); kunmap(page); +} + +extern inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr) +{ + unsigned long vfrom, vto; + + vfrom = kmap(from); + vto = kmap(to); + copy_user_page((void *)vto, (void *)vfrom, vaddr); + kunmap(from); + kunmap(to); } extern inline void copy_highpage(struct page *to, struct page *from) diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/i2c-algo-pcf.h linux/include/linux/i2c-algo-pcf.h --- v2.3.99-pre1/linux/include/linux/i2c-algo-pcf.h Tue Jan 4 13:57:21 2000 +++ linux/include/linux/i2c-algo-pcf.h Sun Mar 19 11:15:29 2000 @@ -22,10 +22,10 @@ /* With some changes from Kyösti Mälkki and even Frodo Looijaard */ -/* $Id: i2c-algo-pcf.h,v 1.6 1999/12/21 23:45:58 frodo Exp $ */ +/* $Id: i2c-algo-pcf.h,v 1.7 2000/02/27 23:02:45 frodo Exp $ */ #ifndef I2C_ALGO_PCF_H -#define I2C_AGLO_PCF_H 1 +#define I2C_ALGO_PCF_H 1 /* --- Defines for pcf-adapters --------------------------------------- */ #include diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/i2c-dev.h linux/include/linux/i2c-dev.h --- v2.3.99-pre1/linux/include/linux/i2c-dev.h Tue Feb 1 01:35:44 2000 +++ linux/include/linux/i2c-dev.h Sun Mar 19 11:15:29 2000 @@ -19,7 +19,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* $Id: i2c-dev.h,v 1.6 2000/01/24 21:56:58 frodo Exp $ */ +/* $Id: i2c-dev.h,v 1.7 2000/02/15 17:57:27 frodo Exp $ */ #ifndef I2C_DEV_H #define I2C_DEV_H @@ -37,6 +37,12 @@ __u8 command; int size; union i2c_smbus_data *data; +}; + +/* This is the structure as used in the I2C_RDWR ioctl call */ +struct i2c_rdwr_ioctl_data { + struct i2c_msg *msgs; /* pointers to i2c_msgs */ + int nmsgs; /* number of i2c_msgs */ }; #ifndef __KERNEL__ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/i2c-id.h linux/include/linux/i2c-id.h --- v2.3.99-pre1/linux/include/linux/i2c-id.h Tue Mar 7 14:32:26 2000 +++ linux/include/linux/i2c-id.h Sun Mar 19 11:15:29 2000 @@ -20,7 +20,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* ------------------------------------------------------------------------- */ -/* $Id: i2c-id.h,v 1.10 2000/02/04 02:47:41 mds Exp $ */ +/* $Id: i2c-id.h,v 1.15 2000/03/16 14:01:57 frodo Exp $ */ #ifndef I2C_ID_H #define I2C_ID_H @@ -92,9 +92,7 @@ #define I2C_ALGO_ATI 0x030000 /* ATI video card */ #define I2C_ALGO_SMBUS 0x040000 #define I2C_ALGO_ISA 0x050000 /* lm_sensors ISA pseudo-adapter */ -#define I2C_ALGO_SAA714 0x060000 /* SAA 7146 video decoder bus */ -#define I2C_ALGO_SAA7146A 0x060001 /* SAA 7146A - enhanced version */ - +#define I2C_ALGO_SAA7146 0x060000 /* SAA 7146 video decoder bus */ #define I2C_ALGO_EXP 0x800000 /* experimental */ @@ -120,6 +118,7 @@ #define I2C_HW_B_HYDRA 0x08 /* Apple Hydra Mac I/O */ #define I2C_HW_B_G400 0x09 /* Matrox G400 */ #define I2C_HW_B_I810 0x0a /* Intel I810 */ +#define I2C_HW_B_VOO 0x0b /* 3dfx Voodoo 3 / Banshee */ #define I2C_HW_B_RIVA 0x10 /* Riva based graphics cards */ #define I2C_HW_B_IOC 0x11 /* IOC bit-wiggling */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/i2c.h linux/include/linux/i2c.h --- v2.3.99-pre1/linux/include/linux/i2c.h Thu Feb 10 17:11:22 2000 +++ linux/include/linux/i2c.h Sun Mar 19 11:15:29 2000 @@ -23,7 +23,7 @@ /* With some changes from Kyösti Mälkki and Frodo Looijaard */ -/* $Id: i2c.h,v 1.36 2000/01/18 23:54:07 frodo Exp $ */ +/* $Id: i2c.h,v 1.37 2000/02/15 17:57:27 frodo Exp $ */ #ifndef I2C_H #define I2C_H @@ -441,6 +441,7 @@ #define I2C_TENBIT 0x0704 /* 0 for 7 bit addrs, != 0 for 10 bit */ #define I2C_FUNCS 0x0705 /* Get the adapter functionality */ +#define I2C_RDWR 0x0707 /* Combined R/W transfer (one stop only)*/ #if 0 #define I2C_ACK_TEST 0x0710 /* See if a slave is at a specific adress */ #endif @@ -451,7 +452,6 @@ #define I2C_UDELAY 0x0705 /* set delay in microsecs between each */ /* written byte (except address) */ #define I2C_MDELAY 0x0706 /* millisec delay between written bytes */ - /* ----- I2C-DEV: char device interface stuff ------------------------- */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/icmp.h linux/include/linux/icmp.h --- v2.3.99-pre1/linux/include/linux/icmp.h Sat Nov 29 10:33:21 1997 +++ linux/include/linux/icmp.h Sat Mar 18 12:11:22 2000 @@ -82,10 +82,31 @@ #ifdef __KERNEL__ +#include + struct icmp_err { int errno; unsigned fatal:1; }; + +/* + * Build xmit assembly blocks + */ + +struct icmp_bxm +{ + void *data_ptr; + int data_len; + struct icmphdr icmph; + unsigned long csum; + struct ip_options replyopts; + unsigned char optbuf[40]; +}; + +struct sk_buff; + +extern void icmp_reply(struct icmp_bxm *, struct sk_buff *); + #endif /* diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/kernel.h linux/include/linux/kernel.h --- v2.3.99-pre1/linux/include/linux/kernel.h Fri Oct 22 13:21:55 1999 +++ linux/include/linux/kernel.h Wed Mar 15 00:38:56 2000 @@ -15,8 +15,10 @@ #define barrier() __asm__ __volatile__("": : :"memory") #define INT_MAX ((int)(~0U>>1)) +#define INT_MIN (-INT_MAX - 1) #define UINT_MAX (~0U) #define LONG_MAX ((long)(~0UL>>1)) +#define LONG_MIN (-LONG_MAX - 1) #define ULONG_MAX (~0UL) #define STACK_MAGIC 0xdeadbeef diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/module.h linux/include/linux/module.h --- v2.3.99-pre1/linux/include/linux/module.h Fri Mar 10 16:40:49 2000 +++ linux/include/linux/module.h Sat Mar 18 16:53:32 2000 @@ -207,14 +207,12 @@ #define MOD_DEC_USE_COUNT __MOD_DEC_USE_COUNT(THIS_MODULE) #define MOD_IN_USE __MOD_IN_USE(THIS_MODULE) -#ifndef __NO_VERSION__ #include -const char __module_kernel_version[] __attribute__((section(".modinfo"))) = +static const char __module_kernel_version[] __attribute__((section(".modinfo"))) = "kernel_version=" UTS_RELEASE; #ifdef MODVERSIONS -const char __module_using_checksums[] __attribute__((section(".modinfo"))) = +static const char __module_using_checksums[] __attribute__((section(".modinfo"))) = "using_checksums=1"; -#endif #endif #else /* MODULE */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/mount.h linux/include/linux/mount.h --- v2.3.99-pre1/linux/include/linux/mount.h Thu Nov 18 20:25:38 1999 +++ linux/include/linux/mount.h Wed Mar 15 21:45:27 2000 @@ -30,13 +30,9 @@ kdev_t mnt_dev; /* Device this applies to */ char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ char *mnt_dirname; /* Name of directory mounted on */ - unsigned int mnt_flags; /* Flags of this device */ struct super_block *mnt_sb; /* pointer to superblock */ - struct quota_mount_options mnt_dquot; /* Diskquota specific mount options */ struct vfsmount *mnt_next; /* pointer to next in linkedlist */ }; - -struct vfsmount *lookup_vfsmnt(kdev_t dev); /* * Umount options diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter.h linux/include/linux/netfilter.h --- v2.3.99-pre1/linux/include/linux/netfilter.h Tue Mar 7 14:32:26 2000 +++ linux/include/linux/netfilter.h Sat Mar 18 12:16:09 2000 @@ -16,7 +16,8 @@ #define NF_ACCEPT 1 #define NF_STOLEN 2 #define NF_QUEUE 3 -#define NF_MAX_VERDICT NF_QUEUE +#define NF_REPEAT 4 +#define NF_MAX_VERDICT NF_REPEAT /* Generic cache responses from hook functions. */ #define NFC_ALTERED 0x8000 @@ -66,6 +67,10 @@ int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void *user, int *len); + + /* Number of users inside set() or get(). */ + unsigned int use; + struct task_struct *cleanup_task; }; /* Each queued (to userspace) skbuff has one of these. */ @@ -172,13 +177,5 @@ #define SUMAX(a,b) ((size_t)(a)>(size_t)(b) ? (ssize_t)(a) : (ssize_t)(b)) #define SUMIN(a,b) ((size_t)(a)<(size_t)(b) ? (ssize_t)(a) : (ssize_t)(b)) #endif /*__KERNEL__*/ - -enum nf_reason { - /* Do not, NOT, reorder these. Add at end. */ - NF_REASON_NONE, - NF_REASON_SET_BY_IPCHAINS, - NF_REASON_FOR_ROUTING, - NF_REASON_FOR_CLS_FW, -}; #endif /*__LINUX_NETFILTER_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/compat_firewall.h linux/include/linux/netfilter_ipv4/compat_firewall.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/compat_firewall.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/compat_firewall.h Sat Mar 18 16:51:35 2000 @@ -0,0 +1,45 @@ +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +#ifndef __LINUX_FIREWALL_H +#define __LINUX_FIREWALL_H + +/* + * Definitions for loadable firewall modules + */ + +#define FW_QUEUE 0 +#define FW_BLOCK 1 +#define FW_ACCEPT 2 +#define FW_REJECT (-1) +#define FW_REDIRECT 3 +#define FW_MASQUERADE 4 +#define FW_SKIP 5 + +struct firewall_ops +{ + struct firewall_ops *next; + int (*fw_forward)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + int (*fw_input)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + int (*fw_output)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + /* These may be NULL. */ + int (*fw_acct_in)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + int (*fw_acct_out)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); +}; + +extern int register_firewall(int pf, struct firewall_ops *fw); +extern int unregister_firewall(int pf, struct firewall_ops *fw); + +extern int ip_fw_masq_timeouts(void *user, int len); +#endif /* __LINUX_FIREWALL_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack.h linux/include/linux/netfilter_ipv4/ip_conntrack.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_conntrack.h Sat Mar 18 16:51:35 2000 @@ -0,0 +1,177 @@ +#ifndef _IP_CONNTRACK_H +#define _IP_CONNTRACK_H +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +#include +#include + +enum ip_conntrack_info +{ + /* Part of an established connection (either direction). */ + IP_CT_ESTABLISHED, + + /* Like NEW, but related to an existing connection, or ICMP error + (in either direction). */ + IP_CT_RELATED, + + /* Started a new connection to track (only + IP_CT_DIR_ORIGINAL); may be a retransmission. */ + IP_CT_NEW, + + /* >= this indicates reply direction */ + IP_CT_IS_REPLY, + + /* Number of distinct IP_CT types (no NEW in reply dirn). */ + IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 +}; + +#ifdef __KERNEL__ + +#include +#include + +#ifdef CONFIG_NF_DEBUG +#define IP_NF_ASSERT(x) \ +do { \ + if (!(x)) \ + /* Wooah! I'm tripping my conntrack in a frenzy of \ + netplay... */ \ + printk("NF_IP_ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ +} while(0) +#else +#define IP_NF_ASSERT(x) +#endif + +/* Bitset representing status of connection. */ +enum ip_conntrack_status { + /* It's an expected connection: bit 0 set. This bit never changed */ + IPS_EXPECTED = 0x01, + + /* We've seen packets both ways: bit 1 set. Can be set, not unset. */ + IPS_SEEN_REPLY = 0x02 +}; + +struct ip_conntrack_expect +{ + /* Internal linked list */ + struct list_head list; + + /* We expect this tuple, but DON'T CARE ABOUT THE SOURCE + per-protocol part. */ + struct ip_conntrack_tuple tuple; + + /* The conntrack we are part of (set iff we're live) */ + struct ip_conntrack *expectant; +}; + +#if defined(CONFIG_IP_NF_NAT) || defined(CONFIG_IP_NF_NAT_MODULE) +#include +#endif + +#if defined(CONFIG_IP_NF_FTP) || defined(CONFIG_IP_NF_FTP_MODULE) +#include +#if defined(CONFIG_IP_NF_NAT) || defined(CONFIG_IP_NF_NAT_MODULE) +#include +#endif +#endif + +struct ip_conntrack +{ + /* Usage count in here is 1 for destruct timer, 1 per skb, + plus 1 for any connection(s) we are `master' for */ + struct nf_conntrack ct_general; + + /* These are my tuples; original and reply */ + struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; + + /* Have we seen traffic both ways yet? (bitset) */ + unsigned int status; + + /* Timer function; drops refcnt when it goes off. */ + struct timer_list timeout; + + /* If we're expecting another related connection, this will be + in expected linked list */ + struct ip_conntrack_expect expected; + + /* If we were expected by another connection, this will be it */ + struct nf_ct_info master; + + /* Helper, if any. */ + struct ip_conntrack_helper *helper; + + /* Our various nf_ct_info structs specify *what* relation this + packet has to the conntrack */ + struct nf_ct_info infos[IP_CT_NUMBER]; + + /* Storage reserved for other modules: */ + + union { + int /*enum tcp_conntrack*/ tcp_state; + } proto; + + union { +#if defined(CONFIG_IP_NF_FTP) || defined(CONFIG_IP_NF_FTP_MODULE) + struct ip_ct_ftp ct_ftp_info; +#endif + } help; + +#if defined(CONFIG_IP_NF_NAT) || defined(CONFIG_IP_NF_NAT_MODULE) + struct { + struct ip_nat_info info; + union { +#if defined(CONFIG_IP_NF_FTP) || defined(CONFIG_IP_NF_FTP_MODULE) + struct ip_nat_ftp_info ftp_info[IP_CT_DIR_MAX]; +#endif + } help; +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + int masq_index; +#endif + } nat; +#endif /* CONFIG_IP_NF_NAT || CONFIG_IP_NF_NAT_MODULE */ + +}; + +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +extern int +ip_conntrack_alter_reply(struct ip_conntrack *conntrack, + const struct ip_conntrack_tuple *newreply); + +/* Is this tuple taken? (ignoring any belonging to the given + conntrack). */ +extern int +ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +/* Return conntrack_info and tuple hash for given skb. */ +extern struct ip_conntrack * +ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo); + +extern struct module *ip_conntrack_module; + +extern int invert_tuplepr(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig); + +/* Refresh conntrack for this many jiffies */ +extern void ip_ct_refresh(struct ip_conntrack *ct, + unsigned long extra_jiffies); + +/* These are for NAT. Icky. */ +/* Call me when a conntrack is destroyed. */ +extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack); + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +ip_ct_gather_frags(struct sk_buff *skb); + +/* Delete all conntracks which match. */ +extern void +ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), + void *data); +#endif /* __KERNEL__ */ +#endif /* _IP_CONNTRACK_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_core.h linux/include/linux/netfilter_ipv4/ip_conntrack_core.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_core.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_core.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,39 @@ +#ifndef _IP_CONNTRACK_CORE_H +#define _IP_CONNTRACK_CORE_H +#include + +/* This header is used to share core functionality between the + standalone connection tracking module, and the compatibility layer's use + of connection tracking. */ +extern unsigned int ip_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + +extern int ip_conntrack_init(void); +extern void ip_conntrack_cleanup(void); + +struct ip_conntrack_protocol; +extern struct ip_conntrack_protocol *find_proto(u_int8_t protocol); +/* Like above, but you already have conntrack read lock. */ +extern struct ip_conntrack_protocol *__find_proto(u_int8_t protocol); +extern struct list_head protocol_list; + +/* Returns TRUE if it dealt with ICMP, and filled in skb->nfct */ +int icmp_error_track(struct sk_buff *skb); +extern int get_tuple(const struct iphdr *iph, size_t len, + struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol); + +/* Find a connection corresponding to a tuple. */ +struct ip_conntrack_tuple_hash * +ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +extern unsigned int ip_conntrack_htable_size; +extern struct list_head *ip_conntrack_hash; +extern struct list_head expect_list; +DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); +#endif /* _IP_CONNTRACK_CORE_H */ + diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_ftp.h linux/include/linux/netfilter_ipv4/ip_conntrack_ftp.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_ftp.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_ftp.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,41 @@ +#ifndef _IP_CONNTRACK_FTP_H +#define _IP_CONNTRACK_FTP_H +/* FTP tracking. */ + +#ifndef __KERNEL__ +#error Only in kernel. +#endif + +#include + +/* Protects ftp part of conntracks */ +DECLARE_LOCK_EXTERN(ip_ftp_lock); + +enum ip_ct_ftp_type +{ + /* PORT command from client */ + IP_CT_FTP_PORT = IP_CT_DIR_ORIGINAL, + /* PASV response from server */ + IP_CT_FTP_PASV = IP_CT_DIR_REPLY +}; + +/* Protected by ip_conntrack_lock */ +/* We record seq number and length of ftp ip/port text here: all in + host order. */ +struct ip_ct_ftp +{ + /* This tells NAT that this is an ftp connection */ + int is_ftp; + u_int32_t seq; + /* 0 means not found yet */ + u_int32_t len; + enum ip_ct_ftp_type ftptype; + /* Port that was to be used */ + u_int16_t port; + /* Next valid seq position for cmd matching after newline */ + u_int32_t seq_aft_nl[IP_CT_DIR_MAX]; + /* 0 means seq_match_aft_nl not set */ + int seq_aft_nl_set[IP_CT_DIR_MAX]; +}; + +#endif /* _IP_CONNTRACK_FTP_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux/include/linux/netfilter_ipv4/ip_conntrack_helper.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_helper.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_helper.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,30 @@ +/* IP connection tracking helpers. */ +#ifndef _IP_CONNTRACK_HELPER_H +#define _IP_CONNTRACK_HELPER_H +#include + +struct module; + +struct ip_conntrack_helper +{ + /* Internal use. */ + struct list_head list; + + /* Returns TRUE if it wants to help this connection (tuple is + the tuple of REPLY packets from server). */ + int (*will_help)(const struct ip_conntrack_tuple *rtuple); + + /* Function to call when data passes; return verdict, or -1 to + invalidate. */ + int (*help)(const struct iphdr *, size_t len, + struct ip_conntrack *ct, + enum ip_conntrack_info conntrackinfo); +}; + +extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); +extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *); + +/* Add an expected connection. */ +extern int ip_conntrack_expect_related(struct ip_conntrack *related_to, + const struct ip_conntrack_tuple *tuple); +#endif /*_IP_CONNTRACK_HELPER_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux/include/linux/netfilter_ipv4/ip_conntrack_protocol.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_protocol.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_protocol.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,58 @@ +/* Header for use in defining a given protocol for connection tracking. */ +#ifndef _IP_CONNTRACK_PROTOCOL_H +#define _IP_CONNTRACK_PROTOCOL_H +#include + +struct ip_conntrack_protocol +{ + /* Next pointer. */ + struct list_head list; + + /* Protocol number. */ + u_int8_t proto; + + /* Protocol name */ + const char *name; + + /* Try to fill in the third arg; return true if possible. */ + int (*pkt_to_tuple)(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple); + + /* Invert the per-proto part of the tuple: ie. turn xmit into reply. + * Some packets can't be inverted: return 0 in that case. + */ + int (*invert_tuple)(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig); + + /* Print out the per-protocol part of the tuple. */ + unsigned int (*print_tuple)(char *buffer, + const struct ip_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + unsigned int (*print_conntrack)(char *buffer, + const struct ip_conntrack *); + + /* Returns verdict for packet, or -1 for invalid. */ + int (*packet)(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info ctinfo); + + /* Called when a new connection for this protocol found; returns + * TRUE if it's OK. If so, packet() called next. */ + int (*new)(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len); + + /* Module (if any) which this is connected to. */ + struct module *me; +}; + +/* Protocol registration. */ +extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); +extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); + +/* Existing built-in protocols */ +extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; +extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; +extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp; +extern int ip_conntrack_protocol_tcp_init(void); +#endif /*_IP_CONNTRACK_PROTOCOL_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,105 @@ +#ifndef _IP_CONNTRACK_TUPLE_H +#define _IP_CONNTRACK_TUPLE_H + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +/* The protocol-specific manipulable parts of the tuple. */ +union ip_conntrack_manip_proto +{ + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int16_t id; + } icmp; +}; + +/* The manipulable part of the tuple. */ +struct ip_conntrack_manip +{ + u_int32_t ip; + union ip_conntrack_manip_proto u; + u_int16_t pad; /* Must be set to 0 for memcmp. */ +}; + +/* This contains the information to distinguish a connection. */ +struct ip_conntrack_tuple +{ + struct ip_conntrack_manip src; + + /* These are the parts of the tuple which are fixed. */ + struct { + u_int32_t ip; + union { + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int8_t type, code; + } icmp; + } u; + + /* The protocol. */ + u_int16_t protonum; + } dst; +}; + +#define IP_PARTS_NATIVE(n) \ +(unsigned int)((n)>>24)&0xFF, \ +(unsigned int)((n)>>16)&0xFF, \ +(unsigned int)((n)>>8)&0xFF, \ +(unsigned int)((n)&0xFF) + +#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n)) + +#ifdef __KERNEL__ + +#define DUMP_TUPLE(tp) \ +DEBUGP("tuple %p: %u %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n", \ + (tp), (tp)->dst.protonum, \ + IP_PARTS((tp)->src.ip), ntohs((tp)->src.u.all), \ + IP_PARTS((tp)->dst.ip), ntohs((tp)->dst.u.all)) + +#define CTINFO2DIR(ctinfo) ((ctinfo) == IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) + +/* If we're the first tuple, it's the original dir. */ +#define DIRECTION(h) ((enum ip_conntrack_dir)(&(h)->ctrack->tuplehash[1] == (h))) + +enum ip_conntrack_dir +{ + IP_CT_DIR_ORIGINAL, + IP_CT_DIR_REPLY, + IP_CT_DIR_MAX +}; + +/* Connections have two entries in the hash table: one for each way */ +struct ip_conntrack_tuple_hash +{ + struct list_head list; + + struct ip_conntrack_tuple tuple; + + /* this == &ctrack->tuplehash[DIRECTION(this)]. */ + struct ip_conntrack *ctrack; +}; + +#endif /* __KERNEL__ */ +#endif /* _IP_CONNTRACK_TUPLE_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat.h linux/include/linux/netfilter_ipv4/ip_nat.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_nat.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,117 @@ +#ifndef _IP_NAT_H +#define _IP_NAT_H +#include +#include + +#define IP_NAT_MAPPING_TYPE_MAX_NAMELEN 16 + +enum ip_nat_manip_type +{ + IP_NAT_MANIP_SRC, + IP_NAT_MANIP_DST +}; + +/* SRC manip occurs only on POST_ROUTING */ +#define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING) + +/* 2.3.19 (I hope) will define this in linux/netfilter_ipv4.h. */ +#ifndef SO_ORIGINAL_DST +#define SO_ORIGINAL_DST 80 +#endif + +#define IP_NAT_RANGE_MAP_IPS 1 +#define IP_NAT_RANGE_PROTO_SPECIFIED 2 +/* Used internally by get_unique_tuple(). */ +#define IP_NAT_RANGE_FULL 4 + +/* Single range specification. */ +struct ip_nat_range +{ + /* Set to OR of flags above. */ + unsigned int flags; + + /* Inclusive: network order. */ + u_int32_t min_ip, max_ip; + + /* Inclusive: network order */ + union ip_conntrack_manip_proto min, max; +}; + +/* A range consists of an array of 1 or more ip_nat_range */ +struct ip_nat_multi_range +{ + unsigned int rangesize; + + /* hangs off end. */ + struct ip_nat_range range[1]; +}; + +#ifdef __KERNEL__ +#include +#include + +/* Protects NAT hash tables, and NAT-private part of conntracks. */ +DECLARE_RWLOCK_EXTERN(ip_nat_lock); + +/* Hashes for by-source and IP/protocol. */ +struct ip_nat_hash +{ + struct list_head list; + + /* conntrack we're embedded in: NULL if not in hash. */ + struct ip_conntrack *conntrack; +}; + +/* Worst case: local-out manip + 1 post-routing, and reverse dirn. */ +#define IP_NAT_MAX_MANIPS (2*3) + +struct ip_nat_info_manip +{ + /* The direction. */ + u_int8_t direction; + + /* Which hook the manipulation happens on. */ + u_int8_t hooknum; + + /* The manipulation type. */ + u_int8_t maniptype; + + /* Manipulations to occur at each conntrack in this dirn. */ + struct ip_conntrack_manip manip; +}; + +/* The structure embedded in the conntrack structure. */ +struct ip_nat_info +{ + /* Set to zero when conntrack created: bitmask of maniptypes */ + int initialized; + + unsigned int num_manips; + + /* Manipulations to be done on this conntrack. */ + struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS]; + + /* The mapping type which created us (NULL for null mapping). */ + const struct ip_nat_mapping_type *mtype; + + struct ip_nat_hash bysource, byipsproto; + + /* Helper (NULL if none). */ + struct ip_nat_helper *helper; +}; + +/* Set up the info structure to map into this range. */ +extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, + const struct ip_nat_multi_range *mr, + unsigned int hooknum); + +/* Is this tuple already taken? (not by us)*/ +extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +/* Calculate relative checksum. */ +extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, + u_int32_t newval, + u_int16_t oldcheck); +#endif /*__KERNEL__*/ +#endif diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_core.h linux/include/linux/netfilter_ipv4/ip_nat_core.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_core.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_nat_core.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,33 @@ +#ifndef _IP_NAT_CORE_H +#define _IP_NAT_CORE_H +#include +#include + +/* This header used to share core functionality between the standalone + NAT module, and the compatibility layer's use of NAT for masquerading. */ +extern int ip_nat_init(void); +extern void ip_nat_cleanup(void); + +extern unsigned int do_bindings(struct ip_conntrack *ct, + enum ip_conntrack_info conntrackinfo, + struct ip_nat_info *info, + unsigned int hooknum, + struct sk_buff **pskb); + +extern struct list_head protos; + +extern void icmp_reply_translation(struct sk_buff *skb, + struct ip_conntrack *conntrack, + unsigned int hooknum, + int dir); + +extern void replace_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info); +extern void place_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info); + +/* Built-in protocols. */ +extern struct ip_nat_protocol ip_nat_protocol_tcp; +extern struct ip_nat_protocol ip_nat_protocol_udp; +extern struct ip_nat_protocol ip_nat_protocol_icmp; +#endif /* _IP_NAT_CORE_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_ftp.h linux/include/linux/netfilter_ipv4/ip_nat_ftp.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_ftp.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_nat_ftp.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,21 @@ +#ifndef _IP_NAT_FTP_H +#define _IP_NAT_FTP_H +/* FTP extension for TCP NAT alteration. */ + +#ifndef __KERNEL__ +#error Only in kernel. +#endif + +/* Protects ftp part of conntracks */ +DECLARE_LOCK_EXTERN(ip_ftp_lock); + +/* We keep track of where the last SYN correction was, and the SYN + offsets before and after that correction. Two of these (indexed by + direction). */ +struct ip_nat_ftp_info +{ + u_int32_t syn_correction_pos; + int32_t syn_offset_before, syn_offset_after; +}; + +#endif /* _IP_NAT_FTP_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_helper.h linux/include/linux/netfilter_ipv4/ip_nat_helper.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_helper.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_nat_helper.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,30 @@ +#ifndef _IP_NAT_HELPER_H +#define _IP_NAT_HELPER_H +/* NAT protocol helper routines. */ + +#include + +struct sk_buff; + +struct ip_nat_helper +{ + /* Internal use */ + struct list_head list; + + /* Here's the protocol and dst we care about. */ + u_int16_t protocol; + u_int16_t protocol_dst; + + /* Helper function: returns verdict */ + unsigned int (*help)(struct ip_conntrack *ct, + struct ip_nat_info *info, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb); + + const char *name; +}; + +extern int ip_nat_helper_register(struct ip_nat_helper *me); +extern void ip_nat_helper_unregister(struct ip_nat_helper *me); +#endif diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_protocol.h linux/include/linux/netfilter_ipv4/ip_nat_protocol.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_protocol.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_nat_protocol.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,57 @@ +/* Header for use in defining a given protocol. */ +#ifndef _IP_NAT_PROTOCOL_H +#define _IP_NAT_PROTOCOL_H +#include +#include + +struct iphdr; +struct ip_nat_range; + +struct ip_nat_protocol +{ + struct list_head list; + + /* Protocol name */ + const char *name; + + /* Protocol number. */ + unsigned int protonum; + + /* Do a packet translation according to the ip_nat_proto_manip + * and manip type. */ + void (*manip_pkt)(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype); + + /* Is the manipable part of the tuple between min and max incl? */ + int (*in_range)(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max); + + /* Alter the per-proto part of the tuple (depending on + maniptype), to give a unique tuple in the given range if + possible; return false if not. Per-protocol part of tuple + is initialized to the incoming packet. */ + int (*unique_tuple)(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack); + + unsigned int (*print)(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask); + + unsigned int (*print_range)(char *buffer, + const struct ip_nat_range *range); +}; + +/* Protocol registration. */ +extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); +extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); + +extern int init_protocols(void) __init; +extern void cleanup_protocols(void); +extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); + +#endif /*_IP_NAT_PROTO_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_rule.h linux/include/linux/netfilter_ipv4/ip_nat_rule.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_nat_rule.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_nat_rule.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,35 @@ +#ifndef _IP_NAT_RULE_H +#define _IP_NAT_RULE_H +#include +#include +#include + +#ifdef __KERNEL__ +/* Want to be told when we first NAT an expected packet for a conntrack? */ +struct ip_nat_expect +{ + struct list_head list; + + /* Returns 1 (and sets verdict) if it has setup NAT for this + connection */ + int (*expect)(struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info, + struct ip_conntrack *master, + struct ip_nat_info *masterinfo, + unsigned int *verdict); +}; + +extern int ip_nat_expect_register(struct ip_nat_expect *expect); +extern void ip_nat_expect_unregister(struct ip_nat_expect *expect); +extern int ip_nat_rule_init(void) __init; +extern void ip_nat_rule_cleanup(void); +extern int ip_nat_rule_find(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct ip_conntrack *ct, + struct ip_nat_info *info); +#endif +#endif /* _IP_NAT_RULE_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_queue.h linux/include/linux/netfilter_ipv4/ip_queue.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_queue.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_queue.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,86 @@ +/* + * This is a module which is used for queueing IPv4 packets and + * communicating with userspace via netlink. + * + * (C) 2000 James Morris + */ +#ifndef _IP_QUEUE_H +#define _IP_QUEUE_H + +#ifdef __KERNEL__ +#ifdef DEBUG_IPQ +#define QDEBUG(x...) printk(KERN_DEBUG ## x) +#else +#define QDEBUG(x...) +#endif /* DEBUG_IPQ */ +#else +#include +#endif /* ! __KERNEL__ */ + +/* Messages sent from kernel */ +typedef struct ipq_packet_msg { + unsigned long packet_id; /* ID of queued packet */ + unsigned long mark; /* Netfilter mark value */ + long timestamp_sec; /* Packet arrival time (seconds) */ + long timestamp_usec; /* Packet arrvial time (+useconds) */ + unsigned int hook; /* Netfilter hook we rode in on */ + char indev_name[IFNAMSIZ]; /* Name of incoming interface */ + char outdev_name[IFNAMSIZ]; /* Name of outgoing interface */ + size_t data_len; /* Length of packet data */ + /* Optional packet data follows */ +} ipq_packet_msg_t; + +/* Messages sent from userspace */ +typedef struct ipq_mode_msg { + unsigned char value; /* Requested mode */ + size_t range; /* Optional range of packet requested */ +} ipq_mode_msg_t; + +typedef struct ipq_verdict_msg { + unsigned int value; /* Verdict to hand to netfilter */ + unsigned long id; /* Packet ID for this verdict */ + size_t data_len; /* Length of replacement data */ + /* Optional replacement data follows */ +} ipq_verdict_msg_t; + +typedef struct ipq_peer_msg { + union { + ipq_verdict_msg_t verdict; + ipq_mode_msg_t mode; + } msg; +} ipq_peer_msg_t; + +/* Each queued packet has one of these states */ +enum { + IPQ_PS_NEW, /* Newly arrived packet */ + IPQ_PS_WAITING, /* User has been notified of packet, + we're waiting for a verdict */ + IPQ_PS_VERDICT /* Packet has been assigned verdict, + waiting to be reinjected */ +}; +#define IPQ_PS_MAX IPQ_PS_VERDICT + +/* The queue operates in one of these states */ +enum { + IPQ_QS_HOLD, /* Hold all packets in queue */ + IPQ_QS_COPY, /* Copy metadata and/or packets to user */ + IPQ_QS_FLUSH /* Flush and drop all queue entries */ +}; +#define IPQ_QS_MAX IPQ_QS_FLUSH + +/* Modes requested by peer */ +enum { + IPQ_COPY_NONE, /* Copy nothing */ + IPQ_COPY_META, /* Copy metadata */ + IPQ_COPY_PACKET /* Copy metadata + packet (range) */ +}; +#define IPQ_COPY_MAX IPQ_COPY_PACKET + +/* Types of messages */ +#define IPQM_BASE 0x10 /* standard netlink messages below this */ +#define IPQM_MODE (IPQM_BASE + 1) /* Mode request from peer */ +#define IPQM_VERDICT (IPQM_BASE + 2) /* Verdict from peer */ +#define IPQM_PACKET (IPQM_BASE + 3) /* Packet from kernel */ +#define IPQM_MAX (IPQM_BASE + 4) + +#endif /*_IP_QUEUE_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_tables.h linux/include/linux/netfilter_ipv4/ip_tables.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ip_tables.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ip_tables.h Sat Mar 18 16:51:35 2000 @@ -0,0 +1,420 @@ +/* + * 25-Jul-1998 Major changes to allow for ip chain table + * + * 3-Jan-2000 Named tables to allow packet selection for different uses. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifndef _IPTABLES_H +#define _IPTABLES_H + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#endif +#include + +#define IPT_FUNCTION_MAXNAMELEN 32 +#define IPT_TABLE_MAXNAMELEN 32 + +/* Yes, Virginia, you have to zero the padding. */ +struct ipt_ip { + /* Source and destination IP addr */ + struct in_addr src, dst; + /* Mask for src and dest IP addr */ + struct in_addr smsk, dmsk; + char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; + unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; + + /* Protocol, 0 = ANY */ + u_int16_t proto; + + /* Flags word */ + u_int8_t flags; + /* Inverse flags */ + u_int8_t invflags; +}; + +struct ipt_entry_match +{ + /* Total length */ + u_int16_t match_size; + union { + /* Used by userspace */ + char name[IPT_FUNCTION_MAXNAMELEN]; + /* Used inside the kernel */ + struct ipt_match *match; + } u; + + unsigned char data[0]; +}; + +struct ipt_entry_target +{ + /* Total length */ + u_int16_t target_size; + union { + /* Used by userspace */ + char name[IPT_FUNCTION_MAXNAMELEN]; + /* Used inside the kernel */ + struct ipt_target *target; + } u; + + unsigned char data[0]; +}; + +struct ipt_standard_target +{ + struct ipt_entry_target target; + int verdict; +}; + +struct ipt_counters +{ + u_int64_t pcnt, bcnt; /* Packet and byte counters */ +}; + +/* Values for "flag" field in struct ipt_ip (general ip structure). */ +#define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ +#define IPT_F_MASK 0x01 /* All possible flag bits mask. */ + +/* Values for "inv" field in struct ipt_ip. */ +#define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ +#define IPT_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ +#define IPT_INV_TOS 0x04 /* Invert the sense of TOS. */ +#define IPT_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ +#define IPT_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ +#define IPT_INV_FRAG 0x20 /* Invert the sense of FRAG. */ +#define IPT_INV_PROTO 0x40 /* Invert the sense of PROTO. */ +#define IPT_INV_MASK 0x7F /* All possible flag bits mask. */ + +/* This structure defines each of the firewall rules. Consists of 3 + parts which are 1) general IP header stuff 2) match specific + stuff 3) the target to perform if the rule matches */ +struct ipt_entry +{ + struct ipt_ip ip; + + /* Mark with fields that we care about. */ + unsigned int nfcache; + + /* Size of ipt_entry + matches */ + u_int16_t target_offset; + /* Size of ipt_entry + matches + target */ + u_int16_t next_offset; + + /* Back pointer */ + unsigned int comefrom; + + /* Packet and byte counters. */ + struct ipt_counters counters; + + /* The matches (if any), then the target. */ + unsigned char elems[0]; +}; + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use a raw + * socket for this. Instead we check rights in the calls. */ +#define IPT_BASE_CTL 64 /* base for firewall socket options */ + +#define IPT_SO_SET_REPLACE (IPT_BASE_CTL) +#define IPT_SO_SET_ADD_COUNTERS (IPT_BASE_CTL + 1) +#define IPT_SO_SET_MAX IPT_SO_SET_ADD_COUNTERS + +#define IPT_SO_GET_INFO (IPT_BASE_CTL) +#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) +#define IPT_SO_GET_MAX IPT_SO_GET_ENTRIES + +/* CONTINUE verdict for targets */ +#define IPT_CONTINUE 0xFFFFFFFF + +/* For standard target */ +#define IPT_RETURN (-NF_MAX_VERDICT - 1) + +/* TCP matching stuff */ +struct ipt_tcp +{ + u_int16_t spts[2]; /* Source port range. */ + u_int16_t dpts[2]; /* Destination port range. */ + u_int8_t option; /* TCP Option iff non-zero*/ + u_int8_t flg_mask; /* TCP flags mask byte */ + u_int8_t flg_cmp; /* TCP flags compare byte */ + u_int8_t invflags; /* Inverse flags */ +}; + +/* Values for "inv" field in struct ipt_tcp. */ +#define IPT_TCP_INV_SRCPT 0x01 /* Invert the sense of source ports. */ +#define IPT_TCP_INV_DSTPT 0x02 /* Invert the sense of dest ports. */ +#define IPT_TCP_INV_FLAGS 0x04 /* Invert the sense of TCP flags. */ +#define IPT_TCP_INV_OPTION 0x08 /* Invert the sense of option test. */ +#define IPT_TCP_INV_MASK 0x0F /* All possible flags. */ + +/* UDP matching stuff */ +struct ipt_udp +{ + u_int16_t spts[2]; /* Source port range. */ + u_int16_t dpts[2]; /* Destination port range. */ + u_int8_t invflags; /* Inverse flags */ +}; + +/* Values for "invflags" field in struct ipt_udp. */ +#define IPT_UDP_INV_SRCPT 0x01 /* Invert the sense of source ports. */ +#define IPT_UDP_INV_DSTPT 0x02 /* Invert the sense of dest ports. */ +#define IPT_UDP_INV_MASK 0x03 /* All possible flags. */ + +/* ICMP matching stuff */ +struct ipt_icmp +{ + u_int8_t type; /* type to match */ + u_int8_t code[2]; /* range of code */ + u_int8_t invflags; /* Inverse flags */ +}; + +/* Values for "inv" field for struct ipt_icmp. */ +#define IPT_ICMP_INV 0x01 /* Invert the sense of type/code test */ + +/* The argument to IPT_SO_GET_INFO */ +struct ipt_getinfo +{ + /* Which table: caller fills this in. */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* Kernel fills these in. */ + /* Which hook entry points are valid: bitmask */ + unsigned int valid_hooks; + + /* Hook entry points: one per netfilter hook. */ + unsigned int hook_entry[NF_IP_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_IP_NUMHOOKS]; + + /* Number of entries */ + unsigned int num_entries; + + /* Size of entries. */ + unsigned int size; +}; + +/* The argument to IPT_SO_SET_REPLACE. */ +struct ipt_replace +{ + /* Which table. */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* Which hook entry points are valid: bitmask. You can't + change this. */ + unsigned int valid_hooks; + + /* Number of entries */ + unsigned int num_entries; + + /* Total size of new entries */ + unsigned int size; + + /* Hook entry points. */ + unsigned int hook_entry[NF_IP_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_IP_NUMHOOKS]; + + /* Information about old entries: */ + /* Number of counters (must be equal to current number of entries). */ + unsigned int num_counters; + /* The old entries' counters. */ + struct ipt_counters *counters; + + /* The entries (hang off end: not really an array). */ + struct ipt_entry entries[0]; +}; + +/* The argument to IPT_SO_ADD_COUNTERS. */ +struct ipt_counters_info +{ + /* Which table. */ + char name[IPT_TABLE_MAXNAMELEN]; + + unsigned int num_counters; + + /* The counters (actually `number' of these). */ + struct ipt_counters counters[0]; +}; + +/* The argument to IPT_SO_GET_ENTRIES. */ +struct ipt_get_entries +{ + /* Which table: user fills this in. */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* User fills this in: total entry size. */ + unsigned int size; + + /* The entries. */ + unsigned char entries[0]; +}; + +/* Standard return verdict, or do jump. */ +#define IPT_STANDARD_TARGET "" +/* Error verdict. */ +#define IPT_ERROR_TARGET "ERROR" + +/* Helper functions */ +extern __inline__ struct ipt_entry_target * +ipt_get_target(struct ipt_entry *e) +{ + return (void *)e + e->target_offset; +} + +/* fn returns 0 to continue iteration */ +#define IPT_MATCH_ITERATE(e, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct ipt_entry_match *__m; \ + \ + for (__i = sizeof(struct ipt_entry); \ + __i < (e)->target_offset; \ + __i += __m->match_size) { \ + __m = (void *)(e) + __i; \ + \ + __ret = fn(__m , ## args); \ + if (__ret != 0) \ + break; \ + } \ + __ret; \ +}) + +/* fn returns 0 to continue iteration */ +#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct ipt_entry *__e; \ + \ + for (__i = 0; __i < (size); __i += __e->next_offset) { \ + __e = (void *)(entries) + __i; \ + \ + __ret = fn(__e , ## args); \ + if (__ret != 0) \ + break; \ + } \ + __ret; \ +}) + +/* + * Main firewall chains definitions and global var's definitions. + */ +#ifdef __KERNEL__ + +#include +extern void ipt_init(void) __init; + +struct ipt_match +{ + struct list_head list; + + const char name[IPT_FUNCTION_MAXNAMELEN]; + + /* Return true or false: return FALSE and set *hotdrop = 1 to + force immediate packet drop. */ + int (*match)(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop); + + /* Called when user tries to insert an entry of this type. */ + /* Should return true or false. */ + int (*checkentry)(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask); + + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ + struct module *me; +}; + +/* Registration hooks for targets. */ +struct ipt_target +{ + struct list_head list; + + const char name[IPT_FUNCTION_MAXNAMELEN]; + + /* Returns verdict. */ + unsigned int (*target)(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userdata); + + /* Called when user tries to insert an entry of this type: + hook_mask is a bitmask of hooks from which it can be + called. */ + /* Should return true or false. */ + int (*checkentry)(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask); + + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ + struct module *me; +}; + +extern int ipt_register_target(struct ipt_target *target); +extern void ipt_unregister_target(struct ipt_target *target); + +extern int ipt_register_match(struct ipt_match *match); +extern void ipt_unregister_match(struct ipt_match *match); + +/* Furniture shopping... */ +struct ipt_table +{ + struct list_head list; + + /* A unique name... */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* Seed table: copied in register_table */ + struct ipt_replace *table; + + /* What hooks you will enter on */ + unsigned int valid_hooks; + + /* Lock for the curtain */ + rwlock_t lock; + + /* Man behind the curtain... */ + struct ipt_table_info *private; +}; + +extern int ipt_register_table(struct ipt_table *table); +extern void ipt_unregister_table(struct ipt_table *table); +extern unsigned int ipt_do_table(struct sk_buff **pskb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + struct ipt_table *table, + void *userdata); + +#define IPT_ALIGN(s) (((s) + (__alignof__(struct ipt_match)-1)) & ~(__alignof__(struct ipt_match)-1)) +#endif /*__KERNEL__*/ +#endif /* _IPTABLES_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipchains_core.h linux/include/linux/netfilter_ipv4/ipchains_core.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipchains_core.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipchains_core.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,193 @@ +/* + * This code is heavily based on the code in ip_fw.h; see that file for + * copyrights and attributions. This code is basically GPL. + * + * 15-Feb-1997: Major changes to allow graphs for firewall rules. + * Paul Russell and + * Michael Neuling + * 2-Nov-1997: Changed types to __u16, etc. + * Removed IP_FW_F_TCPACK & IP_FW_F_BIDIR. + * Added inverse flags field. + * Removed multiple port specs. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifndef _IP_FWCHAINS_H +#define _IP_FWCHAINS_H + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#endif /* __KERNEL__ */ +#define IP_FW_MAX_LABEL_LENGTH 8 +typedef char ip_chainlabel[IP_FW_MAX_LABEL_LENGTH+1]; + +struct ip_fw +{ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */ + __u32 fw_mark; /* ID to stamp on packet */ + __u16 fw_proto; /* Protocol, 0 = ANY */ + __u16 fw_flg; /* Flags word */ + __u16 fw_invflg; /* Inverse flags */ + __u16 fw_spts[2]; /* Source port range. */ + __u16 fw_dpts[2]; /* Destination port range. */ + __u16 fw_redirpt; /* Port to redirect to. */ + __u16 fw_outputsize; /* Max amount to output to + NETLINK */ + char fw_vianame[IFNAMSIZ]; /* name of interface "via" */ + __u8 fw_tosand, fw_tosxor; /* Revised packet priority */ +}; + +struct ip_fwuser +{ + struct ip_fw ipfw; + ip_chainlabel label; +}; + +/* Values for "fw_flg" field . */ +#define IP_FW_F_PRN 0x0001 /* Print packet if it matches */ +#define IP_FW_F_TCPSYN 0x0002 /* For tcp packets-check SYN only */ +#define IP_FW_F_FRAG 0x0004 /* Set if rule is a fragment rule */ +#define IP_FW_F_MARKABS 0x0008 /* Set the mark to fw_mark, not add. */ +#define IP_FW_F_WILDIF 0x0010 /* Need only match start of interface name. */ +#define IP_FW_F_NETLINK 0x0020 /* Redirect to netlink: 2.1.x only */ +#define IP_FW_F_MASK 0x003F /* All possible flag bits mask */ + +/* Values for "fw_invflg" field. */ +#define IP_FW_INV_SRCIP 0x0001 /* Invert the sense of fw_src. */ +#define IP_FW_INV_DSTIP 0x0002 /* Invert the sense of fw_dst. */ +#define IP_FW_INV_PROTO 0x0004 /* Invert the sense of fw_proto. */ +#define IP_FW_INV_SRCPT 0x0008 /* Invert the sense of source ports. */ +#define IP_FW_INV_DSTPT 0x0010 /* Invert the sense of destination ports. */ +#define IP_FW_INV_VIA 0x0020 /* Invert the sense of fw_vianame. */ +#define IP_FW_INV_SYN 0x0040 /* Invert the sense of IP_FW_F_TCPSYN. */ +#define IP_FW_INV_FRAG 0x0080 /* Invert the sense of IP_FW_F_FRAG. */ + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use + * a raw socket for this. Instead we check rights in the calls. */ + +#define IP_FW_BASE_CTL 64 /* base for firewall socket options */ + +#define IP_FW_APPEND (IP_FW_BASE_CTL) /* Takes ip_fwchange */ +#define IP_FW_REPLACE (IP_FW_BASE_CTL+1) /* Takes ip_fwnew */ +#define IP_FW_DELETE_NUM (IP_FW_BASE_CTL+2) /* Takes ip_fwdelnum */ +#define IP_FW_DELETE (IP_FW_BASE_CTL+3) /* Takes ip_fwchange */ +#define IP_FW_INSERT (IP_FW_BASE_CTL+4) /* Takes ip_fwnew */ +#define IP_FW_FLUSH (IP_FW_BASE_CTL+5) /* Takes ip_chainlabel */ +#define IP_FW_ZERO (IP_FW_BASE_CTL+6) /* Takes ip_chainlabel */ +#define IP_FW_CHECK (IP_FW_BASE_CTL+7) /* Takes ip_fwtest */ +#define IP_FW_MASQ_TIMEOUTS (IP_FW_BASE_CTL+8) /* Takes 3 ints */ +#define IP_FW_CREATECHAIN (IP_FW_BASE_CTL+9) /* Takes ip_chainlabel */ +#define IP_FW_DELETECHAIN (IP_FW_BASE_CTL+10) /* Takes ip_chainlabel */ +#define IP_FW_POLICY (IP_FW_BASE_CTL+11) /* Takes ip_fwpolicy */ +/* Masquerade control, only 1 optname */ + +#define IP_FW_MASQ_CTL (IP_FW_BASE_CTL+12) /* General ip_masq ctl */ + +/* Builtin chain labels */ +#define IP_FW_LABEL_FORWARD "forward" +#define IP_FW_LABEL_INPUT "input" +#define IP_FW_LABEL_OUTPUT "output" + +/* Special targets */ +#define IP_FW_LABEL_MASQUERADE "MASQ" +#define IP_FW_LABEL_REDIRECT "REDIRECT" +#define IP_FW_LABEL_ACCEPT "ACCEPT" +#define IP_FW_LABEL_BLOCK "DENY" +#define IP_FW_LABEL_REJECT "REJECT" +#define IP_FW_LABEL_RETURN "RETURN" +#define IP_FW_LABEL_QUEUE "QUEUE" + +/* Files in /proc/net */ +#define IP_FW_PROC_CHAINS "ip_fwchains" +#define IP_FW_PROC_CHAIN_NAMES "ip_fwnames" + + +struct ip_fwpkt +{ + struct iphdr fwp_iph; /* IP header */ + union { + struct tcphdr fwp_tcph; /* TCP header or */ + struct udphdr fwp_udph; /* UDP header */ + struct icmphdr fwp_icmph; /* ICMP header */ + } fwp_protoh; + struct in_addr fwp_via; /* interface address */ + char fwp_vianame[IFNAMSIZ]; /* interface name */ +}; + +/* The argument to IP_FW_DELETE and IP_FW_APPEND */ +struct ip_fwchange +{ + struct ip_fwuser fwc_rule; + ip_chainlabel fwc_label; +}; + +/* The argument to IP_FW_CHECK. */ +struct ip_fwtest +{ + struct ip_fwpkt fwt_packet; /* Packet to be tested */ + ip_chainlabel fwt_label; /* Block to start test in */ +}; + +/* The argument to IP_FW_DELETE_NUM */ +struct ip_fwdelnum +{ + __u32 fwd_rulenum; + ip_chainlabel fwd_label; +}; + +/* The argument to IP_FW_REPLACE and IP_FW_INSERT */ +struct ip_fwnew +{ + __u32 fwn_rulenum; + struct ip_fwuser fwn_rule; + ip_chainlabel fwn_label; +}; + +/* The argument to IP_FW_POLICY */ +struct ip_fwpolicy +{ + ip_chainlabel fwp_policy; + ip_chainlabel fwp_label; +}; +/* + * timeouts for ip masquerading + */ + +extern int ip_fw_masq_timeouts(void *, int); + + +/* + * Main firewall chains definitions and global var's definitions. + */ + +#ifdef __KERNEL__ + +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0) +#include +extern void ip_fw_init(void) __init; +#else /* 2.0.x */ +extern void ip_fw_init(void); +#endif /* 2.1.x */ +extern int ip_fw_ctl(int, void *, int); +#ifdef CONFIG_IP_MASQUERADE +extern int ip_masq_uctl(int, char *, int); +#endif +#endif /* KERNEL */ + +#endif /* _IP_FWCHAINS_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipfwadm_core.h linux/include/linux/netfilter_ipv4/ipfwadm_core.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipfwadm_core.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipfwadm_core.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,256 @@ +#ifndef _IPFWADM_CORE_H +#define _IPFWADM_CORE_H +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +/* + * IP firewalling code. This is taken from 4.4BSD. Please note the + * copyright message below. As per the GPL it must be maintained + * and the licenses thus do not conflict. While this port is subject + * to the GPL I also place my modifications under the original + * license in recognition of the original copyright. + * + * Ported from BSD to Linux, + * Alan Cox 22/Nov/1994. + * Merged and included the FreeBSD-Current changes at Ugen's request + * (but hey it's a lot cleaner now). Ugen would prefer in some ways + * we waited for his final product but since Linux 1.2.0 is about to + * appear it's not practical - Read: It works, it's not clean but please + * don't consider it to be his standard of finished work. + * Alan. + * + * Fixes: + * Pauline Middelink : Added masquerading. + * Jos Vos : Separate input and output firewall + * chains, new "insert" and "append" + * commands to replace "add" commands, + * add ICMP header to struct ip_fwpkt. + * Jos Vos : Add support for matching device names. + * Willy Konynenberg : Add transparent proxying support. + * Jos Vos : Add options for input/output accounting. + * + * All the real work was done by ..... + */ + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags and num_*_ports are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#endif + +struct ip_fw +{ + struct ip_fw *fw_next; /* Next firewall on chain */ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */ + struct in_addr fw_via; /* IP address of interface "via" */ + struct net_device *fw_viadev; /* device of interface "via" */ + __u16 fw_flg; /* Flags word */ + __u16 fw_nsp, fw_ndp; /* N'of src ports and # of dst ports */ + /* in ports array (dst ports follow */ + /* src ports; max of 10 ports in all; */ + /* count of 0 means match all ports) */ +#define IP_FW_MAX_PORTS 10 /* A reasonable maximum */ + __u16 fw_pts[IP_FW_MAX_PORTS]; /* Array of port numbers to match */ + unsigned long fw_pcnt,fw_bcnt; /* Packet and byte counters */ + __u8 fw_tosand, fw_tosxor; /* Revised packet priority */ + char fw_vianame[IFNAMSIZ]; /* name of interface "via" */ +}; + +/* + * Values for "flags" field . + */ + +#define IP_FW_F_ALL 0x0000 /* This is a universal packet firewall*/ +#define IP_FW_F_TCP 0x0001 /* This is a TCP packet firewall */ +#define IP_FW_F_UDP 0x0002 /* This is a UDP packet firewall */ +#define IP_FW_F_ICMP 0x0003 /* This is a ICMP packet firewall */ +#define IP_FW_F_KIND 0x0003 /* Mask to isolate firewall kind */ +#define IP_FW_F_ACCEPT 0x0004 /* This is an accept firewall (as * + * opposed to a deny firewall)* + * */ +#define IP_FW_F_SRNG 0x0008 /* The first two src ports are a min * + * and max range (stored in host byte * + * order). * + * */ +#define IP_FW_F_DRNG 0x0010 /* The first two dst ports are a min * + * and max range (stored in host byte * + * order). * + * (ports[0] <= port <= ports[1]) * + * */ +#define IP_FW_F_PRN 0x0020 /* In verbose mode print this firewall*/ +#define IP_FW_F_BIDIR 0x0040 /* For bidirectional firewalls */ +#define IP_FW_F_TCPSYN 0x0080 /* For tcp packets-check SYN only */ +#define IP_FW_F_ICMPRPL 0x0100 /* Send back icmp unreachable packet */ +#define IP_FW_F_MASQ 0x0200 /* Masquerading */ +#define IP_FW_F_TCPACK 0x0400 /* For tcp-packets match if ACK is set*/ +#define IP_FW_F_REDIR 0x0800 /* Redirect to local port fw_pts[n] */ +#define IP_FW_F_ACCTIN 0x1000 /* Account incoming packets only. */ +#define IP_FW_F_ACCTOUT 0x2000 /* Account outgoing packets only. */ + +#define IP_FW_F_MASK 0x3FFF /* All possible flag bits mask */ + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use + * a raw socket for this. Instead we check rights in the calls. + */ + +#define IP_FW_BASE_CTL 64 /* base for firewall socket options */ + +#define IP_FW_COMMAND 0x00FF /* mask for command without chain */ +#define IP_FW_TYPE 0x0300 /* mask for type (chain) */ +#define IP_FW_SHIFT 8 /* shift count for type (chain) */ + +#define IP_FW_FWD 0 +#define IP_FW_IN 1 +#define IP_FW_OUT 2 +#define IP_FW_ACCT 3 +#define IP_FW_CHAINS 4 /* total number of ip_fw chains */ +#define IP_FW_MASQ 5 + +#define IP_FW_INSERT (IP_FW_BASE_CTL) +#define IP_FW_APPEND (IP_FW_BASE_CTL+1) +#define IP_FW_DELETE (IP_FW_BASE_CTL+2) +#define IP_FW_FLUSH (IP_FW_BASE_CTL+3) +#define IP_FW_ZERO (IP_FW_BASE_CTL+4) +#define IP_FW_POLICY (IP_FW_BASE_CTL+5) +#define IP_FW_CHECK (IP_FW_BASE_CTL+6) +#define IP_FW_MASQ_TIMEOUTS (IP_FW_BASE_CTL+7) + +#define IP_FW_INSERT_FWD (IP_FW_INSERT | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_APPEND_FWD (IP_FW_APPEND | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_DELETE_FWD (IP_FW_DELETE | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_FLUSH_FWD (IP_FW_FLUSH | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_ZERO_FWD (IP_FW_ZERO | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_POLICY_FWD (IP_FW_POLICY | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_CHECK_FWD (IP_FW_CHECK | (IP_FW_FWD << IP_FW_SHIFT)) + +#define IP_FW_INSERT_IN (IP_FW_INSERT | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_APPEND_IN (IP_FW_APPEND | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_DELETE_IN (IP_FW_DELETE | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_FLUSH_IN (IP_FW_FLUSH | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_ZERO_IN (IP_FW_ZERO | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_POLICY_IN (IP_FW_POLICY | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_CHECK_IN (IP_FW_CHECK | (IP_FW_IN << IP_FW_SHIFT)) + +#define IP_FW_INSERT_OUT (IP_FW_INSERT | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_APPEND_OUT (IP_FW_APPEND | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_DELETE_OUT (IP_FW_DELETE | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_FLUSH_OUT (IP_FW_FLUSH | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_ZERO_OUT (IP_FW_ZERO | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_POLICY_OUT (IP_FW_POLICY | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_CHECK_OUT (IP_FW_CHECK | (IP_FW_OUT << IP_FW_SHIFT)) + +#define IP_ACCT_INSERT (IP_FW_INSERT | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_APPEND (IP_FW_APPEND | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_DELETE (IP_FW_DELETE | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_FLUSH (IP_FW_FLUSH | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_ZERO (IP_FW_ZERO | (IP_FW_ACCT << IP_FW_SHIFT)) + +#define IP_FW_MASQ_INSERT (IP_FW_INSERT | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_ADD (IP_FW_APPEND | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_DEL (IP_FW_DELETE | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_FLUSH (IP_FW_FLUSH | (IP_FW_MASQ << IP_FW_SHIFT)) + +#define IP_FW_MASQ_INSERT (IP_FW_INSERT | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_ADD (IP_FW_APPEND | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_DEL (IP_FW_DELETE | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_FLUSH (IP_FW_FLUSH | (IP_FW_MASQ << IP_FW_SHIFT)) + +struct ip_fwpkt +{ + struct iphdr fwp_iph; /* IP header */ + union { + struct tcphdr fwp_tcph; /* TCP header or */ + struct udphdr fwp_udph; /* UDP header */ + struct icmphdr fwp_icmph; /* ICMP header */ + } fwp_protoh; + struct in_addr fwp_via; /* interface address */ + char fwp_vianame[IFNAMSIZ]; /* interface name */ +}; + +#define IP_FW_MASQCTL_MAX 256 +#define IP_MASQ_MOD_NMAX 32 + +struct ip_fw_masqctl +{ + int mctl_action; + union { + struct { + char name[IP_MASQ_MOD_NMAX]; + char data[1]; + } mod; + } u; +}; + +/* + * timeouts for ip masquerading + */ + +struct ip_fw_masq; + +/* + * Main firewall chains definitions and global var's definitions. + */ + +#ifdef __KERNEL__ + +/* Modes used in the ip_fw_chk() routine. */ +#define IP_FW_MODE_FW 0x00 /* kernel firewall check */ +#define IP_FW_MODE_ACCT_IN 0x01 /* accounting (incoming) */ +#define IP_FW_MODE_ACCT_OUT 0x02 /* accounting (outgoing) */ +#define IP_FW_MODE_CHK 0x04 /* check requested by user */ + +#include +#ifdef CONFIG_IP_FIREWALL +extern struct ip_fw *ip_fw_in_chain; +extern struct ip_fw *ip_fw_out_chain; +extern struct ip_fw *ip_fw_fwd_chain; +extern int ip_fw_in_policy; +extern int ip_fw_out_policy; +extern int ip_fw_fwd_policy; +extern int ip_fw_ctl(int, void *, int); +#endif +#ifdef CONFIG_IP_ACCT +extern struct ip_fw *ip_acct_chain; +extern int ip_acct_ctl(int, void *, int); +#endif +#ifdef CONFIG_IP_MASQUERADE +extern int ip_masq_ctl(int, void *, int); +#endif +#ifdef CONFIG_IP_MASQUERADE +extern int ip_masq_ctl(int, void *, int); +#endif + +extern int ip_fw_masq_timeouts(void *user, int len); + +extern int ip_fw_chk(struct iphdr *, struct net_device *, __u16 *, + struct ip_fw *, int, int); +#endif /* KERNEL */ +#endif /* _IP_FW_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_LOG.h linux/include/linux/netfilter_ipv4/ipt_LOG.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_LOG.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_LOG.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,15 @@ +#ifndef _IPT_LOG_H +#define _IPT_LOG_H + +#define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ +#define IPT_LOG_IPOPT 0x04 /* Log IP options */ +#define IPT_LOG_MASK 0x07 + +struct ipt_log_info { + unsigned char level; + unsigned char logflags; + char prefix[30]; +}; + +#endif /*_IPT_LOG_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_MARK.h linux/include/linux/netfilter_ipv4/ipt_MARK.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_MARK.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_MARK.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,8 @@ +#ifndef _IPT_MARK_H_target +#define _IPT_MARK_H_target + +struct ipt_mark_target_info { + unsigned long mark; +}; + +#endif /*_IPT_MARK_H_target*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_REJECT.h linux/include/linux/netfilter_ipv4/ipt_REJECT.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_REJECT.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_REJECT.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,17 @@ +#ifndef _IPT_REJECT_H +#define _IPT_REJECT_H + +enum ipt_reject_with { + IPT_ICMP_NET_UNREACHABLE, + IPT_ICMP_HOST_UNREACHABLE, + IPT_ICMP_PROT_UNREACHABLE, + IPT_ICMP_PORT_UNREACHABLE, + IPT_ICMP_ECHOREPLY, + IPT_TCP_RESET, +}; + +struct ipt_reject_info { + enum ipt_reject_with with; /* reject type */ +}; + +#endif /*_IPT_REJECT_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_TOS.h linux/include/linux/netfilter_ipv4/ipt_TOS.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_TOS.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_TOS.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,12 @@ +#ifndef _IPT_TOS_H_target +#define _IPT_TOS_H_target + +#ifndef IPTOS_NORMALSVC +#define IPTOS_NORMALSVC 0 +#endif + +struct ipt_tos_target_info { + u_int8_t tos; +}; + +#endif /*_IPT_TOS_H_target*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_limit.h linux/include/linux/netfilter_ipv4/ipt_limit.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_limit.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_limit.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,21 @@ +#ifndef _IPT_RATE_H +#define _IPT_RATE_H + +/* timings are in milliseconds. */ +#define IPT_LIMIT_SCALE 10000 + +/* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 + seconds, or one every 59 hours. */ +struct ipt_rateinfo { + u_int32_t avg; /* Average secs between packets * scale */ + u_int32_t burst; /* Period multiplier for upper limit. */ + + /* Used internally by the kernel */ + unsigned long prev; + u_int32_t credit; + u_int32_t credit_cap, cost; + + /* Ugly, ugly fucker. */ + struct ipt_rateinfo *master; +}; +#endif /*_IPT_RATE_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_mac.h linux/include/linux/netfilter_ipv4/ipt_mac.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_mac.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_mac.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,8 @@ +#ifndef _IPT_MAC_H +#define _IPT_MAC_H + +struct ipt_mac_info { + unsigned char srcaddr[ETH_ALEN]; + int invert; +}; +#endif /*_IPT_MAC_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_mark.h linux/include/linux/netfilter_ipv4/ipt_mark.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_mark.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_mark.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,9 @@ +#ifndef _IPT_MARK_H +#define _IPT_MARK_H + +struct ipt_mark_info { + unsigned long mark, mask; + u_int8_t invert; +}; + +#endif /*_IPT_MARK_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_multiport.h linux/include/linux/netfilter_ipv4/ipt_multiport.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_multiport.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_multiport.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,21 @@ +#ifndef _IPT_MULTIPORT_H +#define _IPT_MULTIPORT_H +#include + +enum ipt_multiport_flags +{ + IPT_MULTIPORT_SOURCE, + IPT_MULTIPORT_DESTINATION, + IPT_MULTIPORT_EITHER +}; + +#define IPT_MULTI_PORTS 15 + +/* Must fit inside union ipt_matchinfo: 16 bytes */ +struct ipt_multiport +{ + u_int8_t flags; /* Type of comparison */ + u_int8_t count; /* Number of ports */ + u_int16_t ports[IPT_MULTI_PORTS]; /* Ports */ +}; +#endif /*_IPT_MULTIPORT_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_owner.h linux/include/linux/netfilter_ipv4/ipt_owner.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_owner.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_owner.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,18 @@ +#ifndef _IPT_OWNER_H +#define _IPT_OWNER_H + +/* match and invert flags */ +#define IPT_OWNER_UID 0x01 +#define IPT_OWNER_GID 0x02 +#define IPT_OWNER_PID 0x04 +#define IPT_OWNER_SID 0x08 + +struct ipt_owner_info { + uid_t uid; + gid_t gid; + pid_t pid; + pid_t sid; + u_int8_t match, invert; /* flags */ +}; + +#endif /*_IPT_OWNER_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_state.h linux/include/linux/netfilter_ipv4/ipt_state.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_state.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_state.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,12 @@ +#ifndef _IPT_STATE_H +#define _IPT_STATE_H + +#define _IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)+1)) +#define IPT_STATE_BIT(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? _IPT_STATE_BIT((ctinfo)-IP_CT_IS_REPLY) : _IPT_STATE_BIT(ctinfo)) +#define IPT_STATE_INVALID (1 << 0) + +struct ipt_state_info +{ + unsigned int statemask; +}; +#endif /*_IPT_STATE_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_tos.h linux/include/linux/netfilter_ipv4/ipt_tos.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/ipt_tos.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/ipt_tos.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,13 @@ +#ifndef _IPT_TOS_H +#define _IPT_TOS_H + +struct ipt_tos_info { + u_int8_t tos; + u_int8_t invert; +}; + +#ifndef IPTOS_NORMALSVC +#define IPTOS_NORMALSVC 0 +#endif + +#endif /*_IPT_TOS_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/listhelp.h linux/include/linux/netfilter_ipv4/listhelp.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/listhelp.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/listhelp.h Sat Mar 18 16:51:35 2000 @@ -0,0 +1,115 @@ +#ifndef _LISTHELP_H +#define _LISTHELP_H +#include +#include +#include + +/* Header to do more comprehensive job than linux/list.h; assume list + is first entry in structure. */ + +/* Return pointer to first true entry, if any, or NULL. A macro + required to allow inlining of cmpfn. */ +#define LIST_FIND(head, cmpfn, type, args...) \ +({ \ + const struct list_head *__i = (head); \ + \ + ASSERT_READ_LOCK(head); \ + do { \ + __i = __i->next; \ + if (__i == (head)) { \ + __i = NULL; \ + break; \ + } \ + } while (!cmpfn((const type)__i , ## args)); \ + (type)__i; \ +}) + +#define LIST_FIND_W(head, cmpfn, type, args...) \ +({ \ + const struct list_head *__i = (head); \ + \ + ASSERT_WRITE_LOCK(head); \ + do { \ + __i = __i->next; \ + if (__i == (head)) { \ + __i = NULL; \ + break; \ + } \ + } while (!cmpfn((type)__i , ## args)); \ + (type)__i; \ +}) + +extern inline int +__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; } + +/* Is this entry in the list? */ +extern inline int +list_inlist(struct list_head *head, const void *entry) +{ + return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL; +} + +/* Delete from list. */ +#ifdef CONFIG_NETFILTER_DEBUG +#define LIST_DELETE(head, oldentry) \ +do { \ + ASSERT_WRITE_LOCK(head); \ + if (!list_inlist(head, oldentry)) \ + printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n", \ + __FILE__, __LINE__, #oldentry, oldentry, #head); \ + else list_del((struct list_head *)oldentry); \ +} while(0) +#else +#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry) +#endif + +/* Append. */ +extern inline void +list_append(struct list_head *head, void *new) +{ + ASSERT_WRITE_LOCK(head); + list_add((new), (head)->prev); +} + +/* Prepend. */ +extern inline void +list_prepend(struct list_head *head, void *new) +{ + ASSERT_WRITE_LOCK(head); + list_add(new, head); +} + +/* Insert according to ordering function; insert before first true. */ +#define LIST_INSERT(head, new, cmpfn) \ +do { \ + struct list_head *__i; \ + ASSERT_WRITE_LOCK(head); \ + for (__i = (head)->next; \ + !cmpfn((new), (typeof (new))__i) && __i != (head); \ + __i = __i->next); \ + list_add((struct list_head *)(new), __i->prev); \ +} while(0) + +/* If the field after the list_head is a nul-terminated string, you + can use these functions. */ +extern inline int __list_cmp_name(const void *i, const char *name) +{ + return strcmp(name, i+sizeof(struct list_head)) == 0; +} + +/* Returns false if same name already in list, otherwise does insert. */ +extern inline int +list_named_insert(struct list_head *head, void *new) +{ + if (LIST_FIND(head, __list_cmp_name, void *, + new + sizeof(struct list_head))) + return 0; + list_prepend(head, new); + return 1; +} + +/* Find this named element in the list. */ +#define list_named_find(head, name) \ +LIST_FIND(head, __list_cmp_name, void *, name) + +#endif /*_LISTHELP_H*/ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4/lockhelp.h linux/include/linux/netfilter_ipv4/lockhelp.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4/lockhelp.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/netfilter_ipv4/lockhelp.h Fri Mar 17 10:56:20 2000 @@ -0,0 +1,129 @@ +#ifndef _LOCKHELP_H +#define _LOCKHELP_H +#include + +#include +#include +#include +#include + +/* Header to do help in lock debugging. */ + +#ifdef CONFIG_NETFILTER_DEBUG +struct spinlock_debug +{ + spinlock_t l; + atomic_t locked_by; +}; + +struct rwlock_debug +{ + rwlock_t l; + int read_locked_map; + int write_locked_map; +}; + +#define DECLARE_LOCK(l) \ +struct spinlock_debug l = { SPIN_LOCK_UNLOCKED, ATOMIC_INIT(-1) } +#define DECLARE_LOCK_EXTERN(l) \ +extern struct spinlock_debug l +#define DECLARE_RWLOCK(l) \ +struct rwlock_debug l = { RW_LOCK_UNLOCKED, 0, 0 } +#define DECLARE_RWLOCK_EXTERN(l) \ +extern struct rwlock_debug l + +#define MUST_BE_LOCKED(l) \ +do { if (atomic_read(&(l)->locked_by) != smp_processor_id()) \ + printk("ASSERT %s:%u %s unlocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define MUST_BE_UNLOCKED(l) \ +do { if (atomic_read(&(l)->locked_by) == smp_processor_id()) \ + printk("ASSERT %s:%u %s locked\n", __FILE__, __LINE__, #l); \ +} while(0) + +/* Write locked OK as well. */ \ +#define MUST_BE_READ_LOCKED(l) \ +do { if (!((l)->read_locked_map & (1 << smp_processor_id())) \ + && !((l)->write_locked_map & (1 << smp_processor_id()))) \ + printk("ASSERT %s:%u %s not readlocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define MUST_BE_WRITE_LOCKED(l) \ +do { if (!((l)->write_locked_map & (1 << smp_processor_id()))) \ + printk("ASSERT %s:%u %s not writelocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define MUST_BE_READ_WRITE_UNLOCKED(l) \ +do { if ((l)->read_locked_map & (1 << smp_processor_id())) \ + printk("ASSERT %s:%u %s readlocked\n", __FILE__, __LINE__, #l); \ + else if ((l)->write_locked_map & (1 << smp_processor_id())) \ + printk("ASSERT %s:%u %s writelocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define LOCK_BH(lk) \ +do { \ + MUST_BE_UNLOCKED(lk); \ + spin_lock_bh(&(lk)->l); \ + atomic_set(&(lk)->locked_by, smp_processor_id()); \ +} while(0) + +#define UNLOCK_BH(lk) \ +do { \ + MUST_BE_LOCKED(lk); \ + atomic_set(&(lk)->locked_by, -1); \ + spin_unlock_bh(&(lk)->l); \ +} while(0) + +#define READ_LOCK(lk) \ +do { \ + MUST_BE_READ_WRITE_UNLOCKED(lk); \ + read_lock_bh(&(lk)->l); \ + set_bit(smp_processor_id(), &(lk)->read_locked_map); \ +} while(0) + +#define WRITE_LOCK(lk) \ +do { \ + MUST_BE_READ_WRITE_UNLOCKED(lk); \ + write_lock_bh(&(lk)->l); \ + set_bit(smp_processor_id(), &(lk)->write_locked_map); \ +} while(0) + +#define READ_UNLOCK(lk) \ +do { \ + if (!((lk)->read_locked_map & (1 << smp_processor_id()))) \ + printk("ASSERT: %s:%u %s not readlocked\n", \ + __FILE__, __LINE__, #lk); \ + clear_bit(smp_processor_id(), &(lk)->read_locked_map); \ + read_unlock_bh(&(lk)->l); \ +} while(0) + +#define WRITE_UNLOCK(lk) \ +do { \ + MUST_BE_WRITE_LOCKED(lk); \ + clear_bit(smp_processor_id(), &(lk)->write_locked_map); \ + write_unlock_bh(&(lk)->l); \ +} while(0) + +#else +#define DECLARE_LOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED +#define DECLARE_LOCK_EXTERN(l) extern spinlock_t l +#define DECLARE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED +#define DECLARE_RWLOCK_EXTERN(l) extern rwlock_t l + +#define MUST_BE_LOCKED(l) +#define MUST_BE_UNLOCKED(l) +#define MUST_BE_READ_LOCKED(l) +#define MUST_BE_WRITE_LOCKED(l) +#define MUST_BE_READ_WRITE_UNLOCKED(l) + +#define LOCK_BH(l) spin_lock_bh(l) +#define UNLOCK_BH(l) spin_unlock_bh(l) + +#define READ_LOCK(l) read_lock_bh(l) +#define WRITE_LOCK(l) write_lock_bh(l) +#define READ_UNLOCK(l) read_unlock_bh(l) +#define WRITE_UNLOCK(l) write_unlock_bh(l) +#endif /*CONFIG_NETFILTER_DEBUG*/ + +#endif /* _LOCKHELP_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/netfilter_ipv4.h linux/include/linux/netfilter_ipv4.h --- v2.3.99-pre1/linux/include/linux/netfilter_ipv4.h Thu Mar 2 14:36:23 2000 +++ linux/include/linux/netfilter_ipv4.h Sat Mar 18 12:16:09 2000 @@ -49,6 +49,16 @@ #define NF_IP_POST_ROUTING 4 #define NF_IP_NUMHOOKS 5 +enum nf_ip_hook_priorities { + NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_CONNTRACK = -200, + NF_IP_PRI_MANGLE = -150, + NF_IP_PRI_NAT_DST = -100, + NF_IP_PRI_FILTER = 0, + NF_IP_PRI_NAT_SRC = 100, + NF_IP_PRI_LAST = INT_MAX, +}; + #ifdef CONFIG_NETFILTER_DEBUG #ifdef __KERNEL__ void nf_debug_ip_local_deliver(struct sk_buff *skb); diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/nfsd/nfsd.h linux/include/linux/nfsd/nfsd.h --- v2.3.99-pre1/linux/include/linux/nfsd/nfsd.h Fri Mar 10 16:40:50 2000 +++ linux/include/linux/nfsd/nfsd.h Sat Mar 18 12:14:48 2000 @@ -172,6 +172,9 @@ #define nfserr_badtype __constant_htonl(NFSERR_BADTYPE) #define nfserr_jukebox __constant_htonl(NFSERR_JUKEBOX) +/* Check for dir entries '.' and '..' */ +#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) + /* * Time of server startup */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/pci.h linux/include/linux/pci.h --- v2.3.99-pre1/linux/include/linux/pci.h Tue Mar 14 19:10:40 2000 +++ linux/include/linux/pci.h Sat Mar 18 12:11:35 2000 @@ -293,8 +293,6 @@ #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 -#include - #define DEVICE_COUNT_COMPATIBLE 4 #define DEVICE_COUNT_IRQ 2 #define DEVICE_COUNT_DMA 2 @@ -542,6 +540,10 @@ struct pci_driver *pci_dev_driver(const struct pci_dev *); const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev); +/* Include architecture-dependent settings and functions */ + +#include + /* * If the system does not have PCI, clearly these return errors. Define * these as simple inline functions to avoid hair in drivers. @@ -578,8 +580,9 @@ extern inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } extern inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; } extern inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;} -extern inline int pci_register_driver(struct pci_driver *) { return 0;} -extern inline void pci_unregister_driver(struct pci_driver *) { } +extern inline int pci_register_driver(struct pci_driver *drv) { return 0;} +extern inline void pci_unregister_driver(struct pci_driver *drv) { } +extern inline int scsi_to_pci_dma_dir(unsigned char scsi_dir) { return scsi_dir; } #else diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/ppp_channel.h linux/include/linux/ppp_channel.h --- v2.3.99-pre1/linux/include/linux/ppp_channel.h Tue Mar 14 19:10:40 2000 +++ linux/include/linux/ppp_channel.h Sat Mar 18 16:41:47 2000 @@ -61,8 +61,8 @@ /* Detach a channel from its PPP unit (e.g. on hangup). */ extern void ppp_unregister_channel(struct ppp_channel *); -/* Get the channel number for a channel */ -extern int ppp_channel_index(struct ppp_channel *); +/* Get the unit number associated with a channel */ +extern int ppp_unit_number(struct ppp_channel *); /* * SMP locking notes: diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/prctl.h linux/include/linux/prctl.h --- v2.3.99-pre1/linux/include/linux/prctl.h Sat Feb 26 22:32:07 2000 +++ linux/include/linux/prctl.h Sun Mar 19 11:15:32 2000 @@ -16,4 +16,8 @@ # define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */ # define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */ +/* Get/set whether or not to drop capabilities on setuid() away from uid 0 */ +#define PR_GET_KEEPCAPS 7 +#define PR_SET_KEEPCAPS 8 + #endif /* _LINUX_PRCTL_H */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/quota.h linux/include/linux/quota.h --- v2.3.99-pre1/linux/include/linux/quota.h Tue Dec 7 09:32:51 1999 +++ linux/include/linux/quota.h Wed Mar 15 21:45:27 2000 @@ -174,7 +174,7 @@ int dq_count; /* Reference count */ /* fields after this point are cleared when invalidating */ - struct vfsmount *dq_mnt; /* VFS_mount_point this applies to */ + struct super_block *dq_sb; /* superblock this applies to */ unsigned int dq_id; /* ID this applies to (uid, gid) */ kdev_t dq_dev; /* Device this applies to */ short dq_type; /* Type of quota */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/quotaops.h linux/include/linux/quotaops.h --- v2.3.99-pre1/linux/include/linux/quotaops.h Tue Dec 7 09:32:51 1999 +++ linux/include/linux/quotaops.h Sat Mar 18 12:14:15 2000 @@ -20,7 +20,7 @@ extern void dquot_initialize(struct inode *inode, short type); extern void dquot_drop(struct inode *inode); extern void invalidate_dquots(kdev_t dev, short type); -extern int quota_off(kdev_t dev, short type); +extern int quota_off(struct super_block *sb, short type); extern int sync_dquots(kdev_t dev, short type); extern int dquot_alloc_block(const struct inode *inode, unsigned long number, char prealloc); @@ -103,7 +103,7 @@ } #define DQUOT_SYNC(dev) sync_dquots(dev, -1) -#define DQUOT_OFF(dev) quota_off(dev, -1) +#define DQUOT_OFF(sb) quota_off(sb, -1) #else @@ -118,7 +118,7 @@ #define DQUOT_FREE_BLOCK(sb, inode, nr) do { } while(0) #define DQUOT_FREE_INODE(sb, inode) do { } while(0) #define DQUOT_SYNC(dev) do { } while(0) -#define DQUOT_OFF(dev) do { } while(0) +#define DQUOT_OFF(sb) do { } while(0) /* * Special case expands to a simple notify_change. diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/sched.h linux/include/linux/sched.h --- v2.3.99-pre1/linux/include/linux/sched.h Sun Feb 20 21:12:40 2000 +++ linux/include/linux/sched.h Sun Mar 19 11:15:32 2000 @@ -326,6 +326,7 @@ int ngroups; gid_t groups[NGROUPS]; kernel_cap_t cap_effective, cap_inheritable, cap_permitted; + int keep_capabilities:1; struct user_struct *user; /* limits */ struct rlimit rlim[RLIM_NLIMITS]; @@ -414,6 +415,7 @@ cap_effective: CAP_INIT_EFF_SET, \ cap_inheritable: CAP_INIT_INH_SET, \ cap_permitted: CAP_FULL_SET, \ + keep_capabilities: 0, \ rlim: INIT_RLIMITS, \ comm: "swapper", \ thread: INIT_THREAD, \ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/shm.h linux/include/linux/shm.h --- v2.3.99-pre1/linux/include/linux/shm.h Fri Mar 10 16:40:50 2000 +++ linux/include/linux/shm.h Sat Mar 18 12:13:53 2000 @@ -11,8 +11,8 @@ #define SHMMAX 0x2000000 /* max shared seg size (bytes) */ #define SHMMIN 0 /* min shared seg size (bytes) */ -#define SHMMNI 128 /* max num of segs system wide */ -#define SHMALL (SHMMAX/PAGE_SIZE*SHMMNI) /* max shm system wide (pages) */ +#define SHMMNI 4096 /* max num of segs system wide */ +#define SHMALL (SHMMAX/PAGE_SIZE*(SHMMNI/16)) /* max shm system wide (pages) */ #define SHMSEG SHMMNI /* max shared segs per process */ #include diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/skbuff.h linux/include/linux/skbuff.h --- v2.3.99-pre1/linux/include/linux/skbuff.h Tue Mar 7 14:32:26 2000 +++ linux/include/linux/skbuff.h Sat Mar 18 12:11:01 2000 @@ -122,8 +122,6 @@ #ifdef CONFIG_NETFILTER /* Can be used for communication between hooks. */ unsigned long nfmark; - /* Reason for doing this to the packet (see netfilter.h) */ - __u32 nfreason; /* Cache info */ __u32 nfcache; /* Associated connection, if any */ diff -u --recursive --new-file v2.3.99-pre1/linux/include/linux/soundcard.h linux/include/linux/soundcard.h --- v2.3.99-pre1/linux/include/linux/soundcard.h Mon Jul 5 20:02:10 1999 +++ linux/include/linux/soundcard.h Thu Mar 16 11:18:35 2000 @@ -552,6 +552,7 @@ # define AFMT_U16_LE 0x00000080 /* Little endian U16 */ # define AFMT_U16_BE 0x00000100 /* Big endian U16 */ # define AFMT_MPEG 0x00000200 /* MPEG (2) audio */ +# define AFMT_AC3 0x00000400 /* Dolby Digital AC3 */ /* * Buffer status queries. @@ -581,11 +582,14 @@ /* but usually not */ # define DSP_CAP_TRIGGER 0x00001000 /* Supports SETTRIGGER */ # define DSP_CAP_MMAP 0x00002000 /* Supports mmap() */ +# define DSP_CAP_MULTI 0x00004000 /* support multiple open */ +# define DSP_CAP_BIND 0x00008000 /* channel binding to front/rear/cneter/lfe */ + #define SNDCTL_DSP_GETTRIGGER _SIOR ('P',16, int) #define SNDCTL_DSP_SETTRIGGER _SIOW ('P',16, int) # define PCM_ENABLE_INPUT 0x00000001 -# define PCM_ENABLE_OUTPUT 0x00000002 +# define PCM_ENABLE_OUTPUT 0x00000002 typedef struct count_info { int bytes; /* Total # of bytes processed */ @@ -605,6 +609,19 @@ #define SNDCTL_DSP_SETSYNCRO _SIO ('P', 21) #define SNDCTL_DSP_SETDUPLEX _SIO ('P', 22) #define SNDCTL_DSP_GETODELAY _SIOR ('P', 23, int) + +#define SNDCTL_DSP_GETCHANNELMASK _SIOWR('P', 64, int) +#define SNDCTL_DSP_BIND_CHANNEL _SIOWR('P', 65, int) +# define DSP_BIND_QUERY 0x00000000 +# define DSP_BIND_FRONT 0x00000001 +# define DSP_BIND_SURR 0x00000002 +# define DSP_BIND_CENTER_LFE 0x00000004 +# define DSP_BIND_HANDSET 0x00000008 +# define DSP_BIND_MIC 0x00000010 +# define DSP_BIND_MODEM1 0x00000020 +# define DSP_BIND_MODEM2 0x00000040 +# define DSP_BIND_I2S 0x00000080 +# define DSP_BIND_SPDIF 0x00000100 /* * Application's profile defines the way how playback underrun situations should be handled. diff -u --recursive --new-file v2.3.99-pre1/linux/include/net/tcp.h linux/include/net/tcp.h --- v2.3.99-pre1/linux/include/net/tcp.h Fri Mar 10 16:40:50 2000 +++ linux/include/net/tcp.h Sat Mar 18 12:15:47 2000 @@ -669,6 +669,8 @@ struct tcphdr *th, int len, struct sk_buff *skb); +extern void tcp_v4_send_reset(struct sk_buff *skb); + extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); diff -u --recursive --new-file v2.3.99-pre1/linux/ipc/shm.c linux/ipc/shm.c --- v2.3.99-pre1/linux/ipc/shm.c Tue Mar 14 19:10:40 2000 +++ linux/ipc/shm.c Sun Mar 19 10:11:41 2000 @@ -18,10 +18,15 @@ * 1) It only can handle one directory. * 2) Because the directory is represented by the SYSV shm array it * can only be mounted one time. - * 3) This again leads to SYSV shm not working properly in a chrooted - * environment - * 4) Read and write are not implemented (should they?) - * 5) No special nodes are supported + * 3) Read and write are not implemented (should they?) + * 4) No special nodes are supported + * + * There are the following mount options: + * - nr_blocks (^= shmall) is the number of blocks of size PAGE_SIZE + * we are allowed to allocate + * - nr_inodes (^= shmmni) is the number of files we are allowed to + * allocate + * - mode is the mode for the root directory (default S_IRWXUGO | S_ISVTX) */ #include @@ -48,7 +53,7 @@ static int shm_remount_fs (struct super_block *, int *, char *); static void shm_read_inode (struct inode *); static void shm_write_inode(struct inode *); -static int shm_statfs (struct super_block *, struct statfs *); +static int shm_statfs (struct super_block *, struct statfs *); static int shm_create (struct inode *,struct dentry *,int); static struct dentry *shm_lookup (struct inode *,struct dentry *); static int shm_unlink (struct inode *,struct dentry *); @@ -57,8 +62,6 @@ static int shm_mmap (struct file *, struct vm_area_struct *); static int shm_readdir (struct file *, void *, filldir_t); -char shm_path[256] = "/var/shm"; - #define SHM_NAME_LEN NAME_MAX #define SHM_FMT ".IPC_%08x" #define SHM_FMT_LEN 13 @@ -78,6 +81,7 @@ time_t ctime; pid_t cpid; pid_t lpid; + int unlinked; int nlen; char nm[0]; } shmem; @@ -95,6 +99,7 @@ #define shm_lprid permap.shmem.lpid #define shm_namelen permap.shmem.nlen #define shm_name permap.shmem.nm +#define shm_unlinked permap.shmem.unlinked #define zsem permap.zero.sema #define zero_list permap.zero.list @@ -105,7 +110,6 @@ #define shm_lockall() ipc_lockall(&shm_ids) #define shm_unlockall() ipc_unlockall(&shm_ids) #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id)) -#define shm_rmid(id) ((struct shmid_kernel*)ipc_rmid(&shm_ids,id)) #define shm_checkid(s, id) \ ipc_checkid(&shm_ids,&s->shm_perm,id) #define shm_buildid(id, seq) \ @@ -115,6 +119,7 @@ static void killseg_core(struct shmid_kernel *shp, int doacc); static void shm_open (struct vm_area_struct *shmd); static void shm_close (struct vm_area_struct *shmd); +static int shm_remove_name(int id); static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int); static int shm_swapout(struct page *, struct file *); #ifdef CONFIG_PROC_FS @@ -266,7 +271,7 @@ struct inode * root_inode; if (shm_sb) { - printk ("shm fs already mounted\n"); + printk(KERN_ERR "shm fs already mounted\n"); return NULL; } @@ -274,7 +279,7 @@ shm_ctlmni = SHMMNI; shm_mode = S_IRWXUGO | S_ISVTX; if (shm_parse_options (data)) { - printk ("shm fs invalid option\n"); + printk(KERN_ERR "shm fs invalid option\n"); goto out_unlock; } @@ -297,7 +302,7 @@ return s; out_no_root: - printk("proc_read_super: get root inode failed\n"); + printk(KERN_ERR "proc_read_super: get root inode failed\n"); iput(root_inode); out_unlock: return NULL; @@ -310,6 +315,16 @@ return 0; } +static inline struct shmid_kernel *shm_rmid(int id) +{ + return (struct shmid_kernel *)ipc_rmid(&shm_ids,id); +} + +static __inline__ int shm_addid(struct shmid_kernel *shp) +{ + return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1); +} + static void shm_put_super(struct super_block *sb) { struct super_block **p = &shm_sb; @@ -329,7 +344,7 @@ if (!(shp = shm_lock (i))) continue; if (shp->shm_nattch) - printk ("shm_nattch = %ld\n", shp->shm_nattch); + printk(KERN_DEBUG "shm_nattch = %ld\n", shp->shm_nattch); shp = shm_rmid(i); shm_unlock(i); killseg_core(shp, 1); @@ -439,7 +454,7 @@ continue; if (!(shp = shm_get (nr-2))) continue; - if (shp->shm_perm.mode & SHM_DEST) + if (shp->shm_unlinked) continue; if (filldir(dirent, shp->shm_name, shp->shm_namelen, nr, nr) < 0 ) break;; @@ -468,7 +483,7 @@ continue; if (!(shp = shm_lock(i))) continue; - if (!(shp->shm_perm.mode & SHM_DEST) && + if (!(shp->shm_unlinked) && dent->d_name.len == shp->shm_namelen && strncmp(dent->d_name.name, shp->shm_name, shp->shm_namelen) == 0) goto found; @@ -506,12 +521,21 @@ down (&shm_ids.sem); if (!(shp = shm_lock (inode->i_ino))) BUG(); + shp->shm_unlinked = 1; shp->shm_perm.mode |= SHM_DEST; shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */ shm_unlock (inode->i_ino); up (&shm_ids.sem); inode->i_nlink -= 1; - d_delete (dent); + /* + * If it's a reserved name we have to drop the dentry instead + * of creating a negative dentry + */ + if (dent->d_name.len == SHM_FMT_LEN && + memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0) + d_drop (dent); + else + d_delete (dent); return 0; } @@ -637,16 +661,18 @@ static inline struct shmid_kernel *newseg_alloc(int numpages, size_t namelen) { struct shmid_kernel *shp; + pte_t **dir; shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL); if (!shp) - return 0; + return ERR_PTR(-ENOMEM); - shp->shm_dir = shm_alloc (numpages); - if (!shp->shm_dir) { + dir = shm_alloc (numpages); + if (IS_ERR(dir)) { kfree(shp); - return 0; + return ERR_PTR(PTR_ERR(dir)); } + shp->shm_dir = dir; shp->shm_npages = numpages; shp->shm_nattch = 0; shp->shm_namelen = namelen; @@ -665,12 +691,13 @@ if (size > shm_ctlmax) return -EINVAL; + if (shm_tot + numpages >= shm_ctlall) return -ENOSPC; if (!(shp = newseg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1))) return -ENOMEM; - id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1); + id = shm_addid(shp); if(id == -1) { shm_free(shp->shm_dir,numpages); kfree(shp); @@ -684,6 +711,7 @@ shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = CURRENT_TIME; shp->id = shm_buildid(id,shp->shm_perm.seq); + shp->shm_unlinked = 0; if (namelen != 0) { shp->shm_namelen = namelen; memcpy (shp->shm_name, name, namelen); @@ -877,18 +905,6 @@ } } -char * shm_getname(int id) -{ - char *result; - - result = __getname (); - if (IS_ERR(result)) - return result; - - sprintf (result, "%s/" SHM_FMT, shm_path, id); - return result; -} - asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) { struct shm_setbuf setbuf; @@ -1018,18 +1034,50 @@ } case IPC_RMID: { - char *name; - if ((shmid % SEQ_MULTIPLIER)== zero_id) + /* + * We cannot simply remove the file. The SVID states + * that the block remains until the last person + * detaches from it, then is deleted. A shmat() on + * an RMID segment is legal in older Linux and if + * we change it apps break... + * + * Instead we set a destroyed flag, and then blow + * the name away when the usage hits zero. + */ + if ((shmid % SEQ_MULTIPLIER) == zero_id) return -EINVAL; - name = shm_getname(shmid); - if (IS_ERR(name)) - return PTR_ERR(name); lock_kernel(); - err = do_unlink (name); + down(&shm_ids.sem); + shp = shm_lock(shmid); + if (shp == NULL) { + up(&shm_ids.sem); + unlock_kernel(); + return -EINVAL; + } + err = -EIDRM; + if (shm_checkid(shp, shmid) == 0) { + if (shp->shm_nattch == 0) { + int id=shp->id; + shm_unlock(shmid); + up(&shm_ids.sem); + /* The kernel lock prevents new attaches from + * being happening. We can't hold shm_lock here + * else we will deadlock in shm_lookup when we + * try to recursively grab it. + */ + err = shm_remove_name(id); + unlock_kernel(); + return err; + } + /* Do not find me any more */ + shp->shm_perm.mode |= SHM_DEST; + shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */ + err = 0; + } + /* Unlock */ + shm_unlock(shmid); + up(&shm_ids.sem); unlock_kernel(); - putname (name); - if (err == -ENOENT) - err = -EINVAL; return err; } @@ -1108,7 +1156,7 @@ struct file * file; int err; int flags; - char *name; + char name[SHM_FMT_LEN+1]; if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id) return -EINVAL; @@ -1125,17 +1173,11 @@ } else flags = MAP_SHARED; - name = shm_getname(shmid); - if (IS_ERR (name)) - return PTR_ERR (name); - + sprintf (name, SHM_FMT, shmid); lock_kernel(); - file = filp_open (name, O_RDWR, 0); - putname (name); - if (IS_ERR (file)) { - unlock_kernel(); + file = filp_open(name, O_RDWR, 0, dget(shm_sb->s_root)); + if (IS_ERR (file)) goto bad_file; - } *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size, (shmflg & SHM_RDONLY ? PROT_READ : PROT_READ | PROT_WRITE), flags, 0); @@ -1148,6 +1190,7 @@ return err; bad_file: + unlock_kernel(); if ((err = PTR_ERR(file)) == -ENOENT) return -EINVAL; return err; @@ -1160,6 +1203,17 @@ } /* + * Remove a name. Must be called with lock_kernel + */ + +static int shm_remove_name(int id) +{ + char name[SHM_FMT_LEN+1]; + sprintf (name, SHM_FMT, id); + return do_unlink (name, dget(shm_sb->s_root)); +} + +/* * remove the attach descriptor shmd. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list @@ -1170,13 +1224,33 @@ int id = shmd->vm_file->f_dentry->d_inode->i_ino; struct shmid_kernel *shp; + lock_kernel(); + /* remove from the list of attaches of the shm segment */ if(!(shp = shm_lock(id))) BUG(); shp->shm_lprid = current->pid; shp->shm_dtim = CURRENT_TIME; shp->shm_nattch--; - shm_unlock(id); + if(shp->shm_nattch == 0 && shp->shm_perm.mode & SHM_DEST) { + int pid=shp->id; + int err; + shm_unlock(id); + + /* The kernel lock prevents new attaches from + * being happening. We can't hold shm_lock here + * else we will deadlock in shm_lookup when we + * try to recursively grab it. + */ + err = shm_remove_name(pid); + if(err && err != -ENOENT) + printk(KERN_ERR "Unlink of SHM id %d failed (%d).\n", pid, err); + + } else { + shm_unlock(id); + } + + unlock_kernel(); } /* @@ -1214,13 +1288,13 @@ /* * page not present ... go through shm_dir */ -static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss) +static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss, unsigned long address) { pte_t pte; struct page * page; if (idx >= shp->shm_npages) - goto sigbus; + return NOPAGE_SIGBUS; pte = SHM_ENTRY(shp,idx); if (!pte_present(pte)) { @@ -1232,7 +1306,7 @@ page = alloc_page(GFP_HIGHUSER); if (!page) goto oom; - clear_highpage(page); + clear_user_highpage(page, address); if ((shp != shm_lock(shp->id)) && (shp->id != zero_id)) BUG(); } else { @@ -1267,9 +1341,8 @@ return pte_page(pte); oom: + shm_lock(shp->id); return NOPAGE_OOM; -sigbus: - return NOPAGE_SIGBUS; } static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) @@ -1285,7 +1358,7 @@ down(&inode->i_sem); if(!(shp = shm_lock(inode->i_ino))) BUG(); - page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss); + page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss, address); shm_unlock(inode->i_ino); up(&inode->i_sem); return(page); @@ -1482,8 +1555,8 @@ continue; shp = shm_lock(i); if(shp!=NULL) { -#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n" -#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n" +#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n" +#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n" char *format; if (sizeof(size_t) <= sizeof(int)) @@ -1506,7 +1579,8 @@ shp->shm_dtim, shp->shm_ctim, shp->shm_namelen, - shp->shm_name); + shp->shm_name, + shp->shm_unlinked ? " (deleted)" : ""); shm_unlock(i); pos += len; @@ -1595,8 +1669,8 @@ if (!vm_enough_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)) return -ENOMEM; - if (!(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0))) - return -ENOMEM; + if (IS_ERR(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0))) + return PTR_ERR(shp); if ((filp = file_setup(vma->vm_file, shp)) == 0) { killseg_core(shp, 0); return -ENOMEM; @@ -1657,7 +1731,7 @@ shp = VMA_TO_SHP(shmd); down(&shp->zsem); shm_lock(zero_id); - page = shm_nopage_core(shp, idx, &dummy, &zshm_rss); + page = shm_nopage_core(shp, idx, &dummy, &zshm_rss, address); shm_unlock(zero_id); up(&shp->zsem); return(page); diff -u --recursive --new-file v2.3.99-pre1/linux/kernel/acct.c linux/kernel/acct.c --- v2.3.99-pre1/linux/kernel/acct.c Fri Mar 10 16:40:50 2000 +++ linux/kernel/acct.c Thu Mar 16 10:45:58 2000 @@ -154,7 +154,7 @@ if (IS_ERR(tmp)) goto out; /* Difference from BSD - they don't do O_APPEND */ - file = filp_open(tmp, O_WRONLY|O_APPEND, 0); + file = filp_open(tmp, O_WRONLY|O_APPEND, 0, NULL); putname(tmp); if (IS_ERR(file)) { error = PTR_ERR(file); diff -u --recursive --new-file v2.3.99-pre1/linux/kernel/ksyms.c linux/kernel/ksyms.c --- v2.3.99-pre1/linux/kernel/ksyms.c Fri Mar 10 16:40:50 2000 +++ linux/kernel/ksyms.c Fri Mar 17 15:43:32 2000 @@ -141,7 +141,8 @@ EXPORT_SYMBOL(iput); EXPORT_SYMBOL(__namei); EXPORT_SYMBOL(lookup_dentry); -EXPORT_SYMBOL(open_namei); +EXPORT_SYMBOL(lookup_one); +EXPORT_SYMBOL(__open_namei); EXPORT_SYMBOL(sys_close); EXPORT_SYMBOL(d_alloc_root); EXPORT_SYMBOL(d_delete); @@ -360,6 +361,7 @@ EXPORT_SYMBOL(expand_kiobuf); EXPORT_SYMBOL(map_user_kiobuf); +EXPORT_SYMBOL(unmap_kiobuf); EXPORT_SYMBOL(lock_kiovec); EXPORT_SYMBOL(unlock_kiovec); EXPORT_SYMBOL(brw_kiovec); @@ -504,3 +506,6 @@ /* init task, for moving kthread roots - ought to export a function ?? */ EXPORT_SYMBOL(init_task_union); + +EXPORT_SYMBOL(tasklist_lock); +EXPORT_SYMBOL(pidhash); diff -u --recursive --new-file v2.3.99-pre1/linux/kernel/sys.c linux/kernel/sys.c --- v2.3.99-pre1/linux/kernel/sys.c Fri Mar 10 16:40:50 2000 +++ linux/kernel/sys.c Sun Mar 19 11:15:32 2000 @@ -336,12 +336,23 @@ * never happen. * * -astor + * + * cevans - New behaviour, Oct '99 + * A process may, via prctl(), elect to keep its capabilities when it + * calls setuid() and switches away from uid==0. Both permitted and + * effective sets will be retained. + * Without this change, it was impossible for a daemon to drop only some + * of its privilege. The call to setuid(!=0) would drop all privileges! + * Keeping uid 0 is not an option because uid 0 owns too many vital + * files.. + * Thanks to Olaf Kirch and Peter Benie for spotting this. */ extern inline void cap_emulate_setxuid(int old_ruid, int old_euid, int old_suid) { if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (current->uid != 0 && current->euid != 0 && current->suid != 0)) { + (current->uid != 0 && current->euid != 0 && current->suid != 0) && + !current->keep_capabilities) { cap_clear(current->cap_permitted); cap_clear(current->cap_effective); } @@ -1073,6 +1084,17 @@ #endif break; + case PR_GET_KEEPCAPS: + if (current->keep_capabilities) + error = 1; + break; + case PR_SET_KEEPCAPS: + if (arg2 != 0 && arg2 != 1) { + error = -EINVAL; + break; + } + current->keep_capabilities = arg2; + break; default: error = -EINVAL; break; diff -u --recursive --new-file v2.3.99-pre1/linux/kernel/sysctl.c linux/kernel/sysctl.c --- v2.3.99-pre1/linux/kernel/sysctl.c Fri Mar 10 16:40:50 2000 +++ linux/kernel/sysctl.c Thu Mar 16 10:48:15 2000 @@ -57,7 +57,6 @@ #endif #ifdef CONFIG_SYSVIPC extern size_t shm_ctlmax; -extern char shm_path[]; extern int msg_ctlmax; extern int msg_ctlmnb; extern int msg_ctlmni; @@ -199,8 +198,6 @@ {KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int), 0644, NULL, &proc_dointvec}, #ifdef CONFIG_SYSVIPC - {KERN_SHMPATH, "shmpath", &shm_path, 256, - 0644, NULL, &proc_dostring, &sysctl_string }, {KERN_SHMMAX, "shmmax", &shm_ctlmax, sizeof (size_t), 0644, NULL, &proc_doulongvec_minmax}, {KERN_MSGMAX, "msgmax", &msg_ctlmax, sizeof (int), diff -u --recursive --new-file v2.3.99-pre1/linux/mm/filemap.c linux/mm/filemap.c --- v2.3.99-pre1/linux/mm/filemap.c Tue Mar 14 19:10:40 2000 +++ linux/mm/filemap.c Fri Mar 17 13:26:03 2000 @@ -108,14 +108,15 @@ curr = curr->next; /* We cannot invalidate a locked page */ - if (PageLocked(page)) + if (TryLockPage(page)) continue; lru_cache_del(page); - remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); page->mapping = NULL; + UnlockPage(page); + page_cache_release(page); } spin_unlock(&pagecache_lock); @@ -1413,7 +1414,7 @@ struct page *new_page = page_cache_alloc(); if (new_page) { - copy_highpage(new_page, old_page); + copy_user_highpage(new_page, old_page, address); flush_page_to_ram(new_page); } else new_page = NOPAGE_OOM; diff -u --recursive --new-file v2.3.99-pre1/linux/mm/memory.c linux/mm/memory.c --- v2.3.99-pre1/linux/mm/memory.c Fri Mar 10 16:40:50 2000 +++ linux/mm/memory.c Wed Mar 15 09:59:06 2000 @@ -61,10 +61,10 @@ static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address) { if (from == ZERO_PAGE(address)) { - clear_highpage(to); + clear_user_highpage(to, address); return; } - copy_highpage(to, from); + copy_user_highpage(to, from, address); } mem_map_t * mem_map = NULL; @@ -1073,7 +1073,7 @@ return -1; if (PageHighMem(page)) high = 1; - clear_highpage(page); + clear_user_highpage(page, addr); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); vma->vm_mm->rss++; tsk->min_flt++; diff -u --recursive --new-file v2.3.99-pre1/linux/mm/page_alloc.c linux/mm/page_alloc.c --- v2.3.99-pre1/linux/mm/page_alloc.c Tue Mar 14 19:10:40 2000 +++ linux/mm/page_alloc.c Fri Mar 17 18:34:02 2000 @@ -58,6 +58,8 @@ */ #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size)) +#if 0 + static inline unsigned long classfree(zone_t *zone) { unsigned long free = 0; @@ -71,6 +73,8 @@ return(free); } +#endif + /* * Buddy system. Hairy. You really aren't expected to understand this * @@ -148,8 +152,10 @@ spin_unlock_irqrestore(&zone->lock, flags); - if (classfree(zone) > zone->pages_high) + if (zone->free_pages > zone->pages_high) { zone->zone_wake_kswapd = 0; + zone->low_on_memory = 0; + } } #define MARK_USED(index, order, area) \ @@ -176,7 +182,8 @@ return page; } -static inline struct page * rmqueue (zone_t *zone, unsigned long order) +static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned long order)); +static struct page * rmqueue(zone_t *zone, unsigned long order) { free_area_t * area = zone->free_area + order; unsigned long curr_order = order; @@ -216,19 +223,43 @@ return NULL; } -static inline int zone_balance_memory (zone_t *zone, int gfp_mask) +static int zone_balance_memory(zonelist_t *zonelist) { - int freed; + int tried = 0, freed = 0; + zone_t **zone; + int gfp_mask = zonelist->gfp_mask; + extern wait_queue_head_t kswapd_wait; - /* - * In the atomic allocation case we only 'kick' the - * state machine, but do not try to free pages - * ourselves. - */ - freed = try_to_free_pages(gfp_mask, zone); + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; + if (z->free_pages > z->pages_low) + continue; - if (!freed && !(gfp_mask & __GFP_HIGH)) - return 0; + z->zone_wake_kswapd = 1; + wake_up_interruptible(&kswapd_wait); + + /* Are we reaching the critical stage? */ + if (!z->low_on_memory) { + /* Not yet critical, so let kswapd handle it.. */ + if (z->free_pages > z->pages_min) + continue; + z->low_on_memory = 1; + } + /* + * In the atomic allocation case we only 'kick' the + * state machine, but do not try to free pages + * ourselves. + */ + tried = 1; + freed |= try_to_free_pages(gfp_mask, z); + } + if (tried && !freed) { + if (!(gfp_mask & __GFP_HIGH)) + return 0; + } return 1; } @@ -237,9 +268,7 @@ */ struct page * __alloc_pages (zonelist_t *zonelist, unsigned long order) { - zone_t **zone, *z; - struct page *page; - int gfp_mask; + zone_t **zone = zonelist->zones; /* * (If anyone calls gfp from interrupts nonatomically then it @@ -248,10 +277,8 @@ * We are falling back to lower-level zones if allocation * in a higher zone fails. */ - zone = zonelist->zones; - gfp_mask = zonelist->gfp_mask; for (;;) { - z = *(zone++); + zone_t *z = *(zone++); if (!z) break; if (!z->size) @@ -261,23 +288,10 @@ * do our best to just allocate things without * further thought. */ - if (!(current->flags & PF_MEMALLOC)) - { - unsigned long free = classfree(z); - - if (free <= z->pages_high) - { - extern wait_queue_head_t kswapd_wait; - - z->zone_wake_kswapd = 1; - wake_up_interruptible(&kswapd_wait); - - if (free <= z->pages_min) - z->low_on_memory = 1; - - if (z->low_on_memory) - goto balance; - } + if (!(current->flags & PF_MEMALLOC)) { + /* Are we low on memory? */ + if (z->free_pages <= z->pages_low) + continue; } /* * This is an optimization for the 'higher order zone @@ -287,24 +301,30 @@ * we do not take the spinlock and it's not exact for * the higher order case, but will do it for most things.) */ -ready: if (z->free_pages) { - page = rmqueue(z, order); + struct page *page = rmqueue(z, order); if (page) return page; } } - -nopage: + if (zone_balance_memory(zonelist)) { + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; + if (z->free_pages) { + struct page *page = rmqueue(z, order); + if (page) + return page; + } + } + } return NULL; /* * The main chunk of the balancing code is in this offline branch: */ -balance: - if (!zone_balance_memory(z, gfp_mask)) - goto nopage; - goto ready; } /* @@ -549,7 +569,7 @@ zone->offset = offset; cumulative += size; - mask = (cumulative / zone_balance_ratio[j]); + mask = (size / zone_balance_ratio[j]); if (mask < zone_balance_min[j]) mask = zone_balance_min[j]; else if (mask > zone_balance_max[j]) diff -u --recursive --new-file v2.3.99-pre1/linux/net/Config.in linux/net/Config.in --- v2.3.99-pre1/linux/net/Config.in Fri Mar 10 16:40:50 2000 +++ linux/net/Config.in Fri Mar 17 10:56:20 2000 @@ -13,9 +13,9 @@ tristate ' Netlink device emulation' CONFIG_NETLINK_DEV fi bool 'Network packet filtering (replaces ipchains)' CONFIG_NETFILTER -if [ "$CONFIG_NETFILTER" = "y" ]; then - bool ' Network packet filtering debugging' CONFIG_NETFILTER_DEBUG -fi +#if [ "$CONFIG_NETFILTER" = "y" ]; then +# bool ' Network packet filtering debugging' CONFIG_NETFILTER_DEBUG +#fi bool 'Socket Filtering' CONFIG_FILTER tristate 'Unix domain sockets' CONFIG_UNIX bool 'TCP/IP networking' CONFIG_INET diff -u --recursive --new-file v2.3.99-pre1/linux/net/Makefile linux/net/Makefile --- v2.3.99-pre1/linux/net/Makefile Sun Feb 20 21:12:40 2000 +++ linux/net/Makefile Fri Mar 17 10:56:20 2000 @@ -20,6 +20,10 @@ ifeq ($(CONFIG_INET),y) SUB_DIRS += ipv4 +ifeq ($(CONFIG_NETFILTER),y) +SUB_DIRS += ipv4/netfilter +MOD_SUB_DIRS += ipv4/netfilter +endif endif ifeq ($(CONFIG_UNIX),y) @@ -198,7 +202,7 @@ endif L_TARGET := network.a -L_OBJS := $(SOCK) protocols.o $(join $(SUB_DIRS),$(SUB_DIRS:%=/%.o)) +L_OBJS := $(SOCK) protocols.o $(join $(SUB_DIRS), $(patsubst %,/%.o,$(notdir $(SUB_DIRS)))) M_OBJS := diff -u --recursive --new-file v2.3.99-pre1/linux/net/core/netfilter.c linux/net/core/netfilter.c --- v2.3.99-pre1/linux/net/core/netfilter.c Tue Mar 7 14:32:26 2000 +++ linux/net/core/netfilter.c Fri Mar 17 10:56:20 2000 @@ -4,9 +4,10 @@ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any * way. * - * Rusty Russell (C)1998 -- This code is GPL. + * Rusty Russell (C)2000 -- This code is GPL. * * February 2000: Modified by James Morris to have 1 queue per protocol. + * 15-Mar-2000: Added NF_REPEAT --RR. */ #include #include @@ -56,8 +57,6 @@ { struct list_head *i; - NFDEBUG("nf_register_hook: pf=%i hook=%u.\n", reg->pf, reg->hooknum); - br_write_lock_bh(BR_NETPROTO_LOCK); for (i = nf_hooks[reg->pf][reg->hooknum].next; i != &nf_hooks[reg->pf][reg->hooknum]; @@ -119,7 +118,16 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { /* No point being interruptible: we're probably in cleanup_module() */ + restart: down(&nf_sockopt_mutex); + if (reg->use != 0) { + /* To be woken by nf_sockopt call... */ + reg->cleanup_task = current; + up(&nf_sockopt_mutex); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + goto restart; + } list_del(®->list); up(&nf_sockopt_mutex); } @@ -178,7 +186,7 @@ dst_port = ntohs(tcp->dest); } - printk("PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu" + printk("PROTO=%d %d.%d.%d.%d:%hu %d.%d.%d.%d:%hu" " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", ip->protocol, (ntohl(ip->saddr)>>24)&0xFF, @@ -261,9 +269,16 @@ if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD) | (1 << NF_IP_POST_ROUTING))) { - printk("ip_finish_output: bad unowned skb = %p: ",skb); - debug_print_hooks_ip(skb->nf_debug); - nf_dump_skb(PF_INET, skb); + /* Fragments will have no owners, but still + may be local */ + if (!(skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) + || skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))){ + printk("ip_finish_output:" + " bad unowned skb = %p: ",skb); + debug_print_hooks_ip(skb->nf_debug); + nf_dump_skb(PF_INET, skb); + } } } } @@ -274,31 +289,42 @@ char *opt, int *len, int get) { struct list_head *i; + struct nf_sockopt_ops *ops; int ret; if (down_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { - struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; + ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { + ops->use++; + up(&nf_sockopt_mutex); ret = ops->get(sk, val, opt, len); goto out; } } else { if (val >= ops->set_optmin && val < ops->set_optmax) { + ops->use++; + up(&nf_sockopt_mutex); ret = ops->set(sk, val, opt, *len); goto out; } } } } - ret = -ENOPROTOOPT; + up(&nf_sockopt_mutex); + return -ENOPROTOOPT; + out: + down(&nf_sockopt_mutex); + ops->use--; + if (ops->cleanup_task) + wake_up_process(ops->cleanup_task); up(&nf_sockopt_mutex); return ret; } @@ -334,6 +360,10 @@ case NF_DROP: return NF_DROP; + case NF_REPEAT: + *i = (*i)->prev; + break; + #ifdef CONFIG_NETFILTER_DEBUG case NF_ACCEPT: break; @@ -367,7 +397,6 @@ /* The caller must flush their queue before this */ int nf_unregister_queue_handler(int pf) { - NFDEBUG("Unregistering Netfilter queue handler for pf=%d\n", pf); br_write_lock_bh(BR_NETPROTO_LOCK); queue_handler[pf].outfn = NULL; queue_handler[pf].data = NULL; @@ -390,7 +419,6 @@ struct nf_info *info; if (!queue_handler[pf].outfn) { - NFDEBUG("nf_queue: noone wants the packet, dropping it.\n"); kfree_skb(skb); return; } @@ -432,6 +460,14 @@ unsigned int verdict; int ret = 0; +#ifdef CONFIG_NETFILTER_DEBUG + if (skb->nf_debug & (1 << hook)) { + printk("nf_hook: hook %i already set.\n", hook); + nf_dump_skb(pf, skb); + } + skb->nf_debug |= (1 << hook); +#endif + elem = &nf_hooks[pf][hook]; verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev, outdev, &elem, okfn); @@ -473,6 +509,11 @@ } /* Continue traversal iff userspace said ok... */ + if (verdict == NF_REPEAT) { + elem = elem->prev; + verdict = NF_ACCEPT; + } + if (verdict == NF_ACCEPT) { verdict = nf_iterate(&nf_hooks[info->pf][info->hook], &skb, info->hook, diff -u --recursive --new-file v2.3.99-pre1/linux/net/core/skbuff.c linux/net/core/skbuff.c --- v2.3.99-pre1/linux/net/core/skbuff.c Tue Mar 7 14:32:26 2000 +++ linux/net/core/skbuff.c Fri Mar 17 10:56:20 2000 @@ -4,7 +4,7 @@ * Authors: Alan Cox * Florian La Roche * - * Version: $Id: skbuff.c,v 1.69 2000/03/06 03:47:58 davem Exp $ + * Version: $Id: skbuff.c,v 1.70 2000/03/17 14:41:39 davem Exp $ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. @@ -203,7 +203,7 @@ skb->dst = NULL; skb->rx_dev = NULL; #ifdef CONFIG_NETFILTER - skb->nfmark = skb->nfreason = skb->nfcache = 0; + skb->nfmark = skb->nfcache = 0; skb->nfct = NULL; #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 0; @@ -319,7 +319,6 @@ new->security=old->security; #ifdef CONFIG_NETFILTER new->nfmark=old->nfmark; - new->nfreason=old->nfreason; new->nfcache=old->nfcache; new->nfct=old->nfct; nf_conntrack_get(new->nfct); diff -u --recursive --new-file v2.3.99-pre1/linux/net/decnet/dn_route.c linux/net/decnet/dn_route.c --- v2.3.99-pre1/linux/net/decnet/dn_route.c Fri Mar 10 16:40:51 2000 +++ linux/net/decnet/dn_route.c Fri Mar 17 10:56:20 2000 @@ -805,10 +805,7 @@ key.scope = RT_SCOPE_UNIVERSE; #ifdef CONFIG_DECNET_ROUTE_FWMASK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - key.fwmark = skb->fwmark; - else - key.fwmark = 0; + key.fwmark = skb->fwmark; #else key.fwmark = 0; #endif @@ -886,9 +883,7 @@ (rt->key.daddr == cb->dst) && (rt->key.oif == 0) && #ifdef CONFIG_DECNET_ROUTE_FWMASK - (rt->key.fwmark == (skb->nfreason == - NF_REASON_FOR_ROUTING - ? skb->nfmark : 0)) && + (rt->key.fwmark == skb->nfmark) && #endif (rt->key.iif == cb->iif)) { rt->u.dst.lastuse = jiffies; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/Config.in linux/net/ipv4/Config.in --- v2.3.99-pre1/linux/net/ipv4/Config.in Thu Jan 6 12:57:48 2000 +++ linux/net/ipv4/Config.in Fri Mar 17 10:56:20 2000 @@ -9,7 +9,7 @@ bool ' IP: policy routing' CONFIG_IP_MULTIPLE_TABLES if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then if [ "$CONFIG_NETFILTER" = "y" ]; then - bool ' IP: use FWMARK value as routing key' CONFIG_IP_ROUTE_FWMARK + bool ' IP: use netfilter MARK value as routing key' CONFIG_IP_ROUTE_FWMARK fi bool ' IP: fast network address translation' CONFIG_IP_ROUTE_NAT fi @@ -53,3 +53,6 @@ #if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then # bool ' IP: support checksum copy to user for UDP (EXPERIMENTAL)' CONFIG_UDP_DELAY_CSUM #fi +if [ "$CONFIG_NETFILTER" != "n" ]; then + source net/ipv4/netfilter/Config.in +fi diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/icmp.c linux/net/ipv4/icmp.c --- v2.3.99-pre1/linux/net/ipv4/icmp.c Thu Mar 2 14:36:23 2000 +++ linux/net/ipv4/icmp.c Fri Mar 17 10:56:20 2000 @@ -3,7 +3,7 @@ * * Alan Cox, * - * Version: $Id: icmp.c,v 1.65 2000/02/22 23:54:25 davem Exp $ + * Version: $Id: icmp.c,v 1.66 2000/03/17 14:41:50 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -332,20 +332,6 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; /* - * Build xmit assembly blocks - */ - -struct icmp_bxm -{ - void *data_ptr; - int data_len; - struct icmphdr icmph; - unsigned long csum; - struct ip_options replyopts; - unsigned char optbuf[40]; -}; - -/* * The ICMP socket. This is the most convenient way to flow control * our ICMP output as well as maintain a clean interface throughout * all layers. All Socketless IP sends will soon be gone. @@ -508,7 +494,7 @@ * Driving logic for building and sending ICMP messages. */ -static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) +void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct sock *sk=icmp_socket->sk; struct ipcm_cookie ipc; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/ip_gre.c linux/net/ipv4/ip_gre.c --- v2.3.99-pre1/linux/net/ipv4/ip_gre.c Tue Jan 11 22:31:46 2000 +++ linux/net/ipv4/ip_gre.c Fri Mar 17 10:56:20 2000 @@ -599,6 +599,10 @@ skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); read_unlock(&ipgre_lock); return(0); @@ -817,6 +821,11 @@ iph->tot_len = htons(skb->len); ip_select_ident(iph, &rt->u.dst); ip_send_check(iph); + +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif stats->tx_bytes += skb->len; stats->tx_packets++; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c --- v2.3.99-pre1/linux/net/ipv4/ip_output.c Tue Mar 7 14:32:26 2000 +++ linux/net/ipv4/ip_output.c Fri Mar 17 10:56:20 2000 @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.81 2000/03/06 03:48:01 davem Exp $ + * Version: $Id: ip_output.c,v 1.82 2000/03/17 14:41:50 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -894,6 +894,9 @@ /* Connection association is same as pre-frag packet */ skb2->nfct = skb->nfct; nf_conntrack_get(skb2->nfct); +#ifdef CONFIG_NETFILTER_DEBUG + skb2->nf_debug = skb->nf_debug; +#endif #endif /* diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/ipip.c linux/net/ipv4/ipip.c --- v2.3.99-pre1/linux/net/ipv4/ipip.c Thu Jan 6 12:57:48 2000 +++ linux/net/ipv4/ipip.c Sat Mar 18 16:51:35 2000 @@ -1,7 +1,7 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.30 2000/01/06 00:41:55 davem Exp $ + * Version: $Id: ipip.c,v 1.31 2000/03/17 14:41:51 davem Exp $ * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 @@ -93,6 +93,7 @@ */ +#include #include #include #include @@ -483,6 +484,10 @@ skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); read_unlock(&ipip_lock); return 0; @@ -618,6 +623,11 @@ iph->tot_len = htons(skb->len); ip_select_ident(iph, &rt->u.dst); ip_send_check(iph); + +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif stats->tx_bytes += skb->len; stats->tx_packets++; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/ipmr.c linux/net/ipv4/ipmr.c --- v2.3.99-pre1/linux/net/ipv4/ipmr.c Tue Jan 11 22:31:46 2000 +++ linux/net/ipv4/ipmr.c Fri Mar 17 10:56:20 2000 @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.50 2000/01/09 02:19:32 davem Exp $ + * Version: $Id: ipmr.c,v 1.51 2000/03/17 14:41:52 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. @@ -1100,6 +1100,10 @@ skb->h.ipiph = skb->nh.iph; skb->nh.iph = iph; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif } static inline int ipmr_forward_finish(struct sk_buff *skb) @@ -1433,6 +1437,10 @@ skb->dst = NULL; ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; ((struct net_device_stats*)reg_dev->priv)->rx_packets++; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); dev_put(reg_dev); return 0; @@ -1488,6 +1496,10 @@ ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; ((struct net_device_stats*)reg_dev->priv)->rx_packets++; skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); dev_put(reg_dev); return 0; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/Config.in linux/net/ipv4/netfilter/Config.in --- v2.3.99-pre1/linux/net/ipv4/netfilter/Config.in Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/Config.in Fri Mar 17 10:56:20 2000 @@ -0,0 +1,64 @@ +# +# IP netfilter configuration +# +mainmenu_option next_comment +comment ' IP: Netfilter Configuration' + +tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP_NF_CONNTRACK +if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then + dep_tristate ' FTP protocol support' CONFIG_IP_NF_FTP $CONFIG_IP_NF_CONNTRACK +fi + +if [ "$CONFIG_EXPERIMENTAL" = "y" -a "$CONFIG_NETLINK" = "y" ]; then + tristate 'Userspace queueing via NETLINK (EXPERIMENTAL)' CONFIG_IP_NF_QUEUE +fi +tristate 'IP tables support (required for filtering/masq/NAT)' CONFIG_IP_NF_IPTABLES +if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then +# The simple matches. + dep_tristate ' limit match support' CONFIG_IP_NF_MATCH_LIMIT $CONFIG_IP_NF_IPTABLES + dep_tristate ' MAC address match support' CONFIG_IP_NF_MATCH_MAC $CONFIG_IP_NF_IPTABLES + dep_tristate ' netfilter MARK match support' CONFIG_IP_NF_MATCH_MARK $CONFIG_IP_NF_IPTABLES + dep_tristate ' Multiple port match support' CONFIG_IP_NF_MATCH_MULTIPORT $CONFIG_IP_NF_IPTABLES + dep_tristate ' TOS match support' CONFIG_IP_NF_MATCH_TOS $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then + dep_tristate ' Connection state match support' CONFIG_IP_NF_MATCH_STATE $CONFIG_IP_NF_CONNTRACK $CONFIG_IP_NF_IPTABLES + fi + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES + dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES + fi +# The targets + dep_tristate ' Packet filtering' CONFIG_IP_NF_FILTER $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_FILTER" != "n" ]; then + dep_tristate ' REJECT target support' CONFIG_IP_NF_TARGET_REJECT $CONFIG_IP_NF_FILTER + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP_NF_TARGET_MIRROR $CONFIG_IP_NF_FILTER + fi + fi + + if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then + dep_tristate ' Full NAT' CONFIG_IP_NF_NAT $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_NAT" != "n" ]; then + dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT + dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT + fi + fi + + dep_tristate ' Packet mangling' CONFIG_IP_NF_MANGLE $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_MANGLE" != "n" ]; then + dep_tristate ' TOS target support' CONFIG_IP_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE + dep_tristate ' MARK target support' CONFIG_IP_NF_TARGET_MARK $CONFIG_IP_NF_MANGLE + fi + dep_tristate ' LOG target support' CONFIG_IP_NF_TARGET_LOG $CONFIG_IP_NF_IPTABLES +fi + +# Backwards compatibility modules: only if you don't build in the others. +if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then + if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then + tristate 'ipchains (2.2-style) support' CONFIG_IP_NF_COMPAT_IPCHAINS + if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "y" ]; then + tristate 'ipfwadm (2.0-style) support' CONFIG_IP_NF_COMPAT_IPFWADM + fi + fi +fi +endmenu diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/Makefile linux/net/ipv4/netfilter/Makefile --- v2.3.99-pre1/linux/net/ipv4/netfilter/Makefile Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/Makefile Fri Mar 17 10:56:20 2000 @@ -0,0 +1,234 @@ +# +# Makefile for the netfilter modules on top of IPv4. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := netfilter.o +MOD_LIST_NAME := IPV4_MODULES +M_OBJS := + +IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o + +IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o + +# Link order matters here. +ifeq ($(CONFIG_IP_NF_CONNTRACK),y) +OX_OBJS += ip_conntrack_standalone.o +O_OBJS += $(IP_NF_CONNTRACK_OBJ) +else + ifeq ($(CONFIG_IP_NF_CONNTRACK),m) + M_OBJS += ip_conntrack.o + endif +endif + +ifeq ($(CONFIG_IP_NF_QUEUE),y) +O_OBJS += ip_queue.o +else + ifeq ($(CONFIG_IP_NF_QUEUE),m) + M_OBJS += ip_queue.o + endif +endif + +ifeq ($(CONFIG_IP_NF_FTP),y) +OX_OBJS += ip_conntrack_ftp.o +else + ifeq ($(CONFIG_IP_NF_FTP),m) + M_OBJS += ip_conntrack_ftp.o + endif +endif + +ifeq ($(CONFIG_IP_NF_IPTABLES),y) +O_OBJS += ip_tables.o +else + ifeq ($(CONFIG_IP_NF_IPTABLES),m) + M_OBJS += ip_tables.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),y) +O_OBJS += ipt_limit.o +else + ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),m) + M_OBJS += ipt_limit.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_MARK),y) +O_OBJS += ipt_mark.o +else + ifeq ($(CONFIG_IP_NF_MATCH_MARK),m) + M_OBJS += ipt_mark.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_MAC),y) +O_OBJS += ipt_mac.o +else + ifeq ($(CONFIG_IP_NF_MATCH_MAC),m) + M_OBJS += ipt_mac.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),y) +O_OBJS += ipt_multiport.o +else + ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),m) + M_OBJS += ipt_multiport.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_OWNER),y) +O_OBJS += ipt_owner.o +else + ifeq ($(CONFIG_IP_NF_MATCH_OWNER),m) + M_OBJS += ipt_owner.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_TOS),y) +O_OBJS += ipt_tos.o +else + ifeq ($(CONFIG_IP_NF_MATCH_TOS),m) + M_OBJS += ipt_tos.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_STATE),y) +O_OBJS += ipt_state.o +else + ifeq ($(CONFIG_IP_NF_MATCH_STATE),m) + M_OBJS += ipt_state.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),y) +O_OBJS += ipt_unclean.o +else + ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),m) + M_OBJS += ipt_unclean.o + endif +endif + +ifeq ($(CONFIG_IP_NF_FILTER),y) +O_OBJS += iptable_filter.o +else + ifeq ($(CONFIG_IP_NF_FILTER),m) + M_OBJS += iptable_filter.o + endif +endif + +ifeq ($(CONFIG_IP_NF_NAT),y) +OX_OBJS += ip_nat_standalone.o +O_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ) + ifeq ($(CONFIG_IP_NF_FTP),y) + O_OBJS += ip_nat_ftp.o + endif +else + ifeq ($(CONFIG_IP_NF_NAT),m) + M_OBJS += iptable_nat.o + ifeq ($(CONFIG_IP_NF_FTP),m) + M_OBJS += ip_nat_ftp.o + endif + endif +endif + +ifeq ($(CONFIG_IP_NF_MANGLE),y) +O_OBJS += iptable_mangle.o +else + ifeq ($(CONFIG_IP_NF_MANGLE),m) + M_OBJS += iptable_mangle.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_REJECT),y) +O_OBJS += ipt_REJECT.o +else + ifeq ($(CONFIG_IP_NF_TARGET_REJECT),m) + M_OBJS += ipt_REJECT.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),y) +O_OBJS += ipt_MIRROR.o +else + ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),m) + M_OBJS += ipt_MIRROR.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_TOS),y) +O_OBJS += ipt_TOS.o +else + ifeq ($(CONFIG_IP_NF_TARGET_TOS),m) + M_OBJS += ipt_TOS.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_MARK),y) +O_OBJS += ipt_MARK.o +else + ifeq ($(CONFIG_IP_NF_TARGET_MARK),m) + M_OBJS += ipt_MARK.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),y) +O_OBJS += ipt_MASQUERADE.o +else + ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),m) + M_OBJS += ipt_MASQUERADE.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),y) +O_OBJS += ipt_REDIRECT.o +else + ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),m) + M_OBJS += ipt_REDIRECT.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_LOG),y) +O_OBJS += ipt_LOG.o +else + ifeq ($(CONFIG_IP_NF_TARGET_LOG),m) + M_OBJS += ipt_LOG.o + endif +endif + +ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y) +O_OBJS += ipchains.o +else + ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m) + M_OBJS += ipchains.o + endif +endif + +ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y) +O_OBJS += ipfwadm.o +else + ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m) + M_OBJS += ipfwadm.o + endif +endif + +include $(TOPDIR)/Rules.make + +ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ) + $(LD) -r -o $@ $(IP_NF_CONNTRACK_OBJ) ip_conntrack_standalone.o + +iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) + $(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) + +# All the parts of conntrack and NAT required for compatibility layer. +IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ) + +ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER) + $(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER) + +ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER) + $(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER) diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_core.c linux/net/ipv4/netfilter/ip_conntrack_core.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_core.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_conntrack_core.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,891 @@ +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#ifdef MODULE +#define __NO_VERSION__ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This rwlock protects the main hash table, protocol/helper/expected + registrations, conntrack timers*/ +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_conntrack_lock); + +void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; +static LIST_HEAD(expect_list); +static LIST_HEAD(protocol_list); +static LIST_HEAD(helpers); +unsigned int ip_conntrack_htable_size = 0; +static int ip_conntrack_max = 0; +static atomic_t ip_conntrack_count = ATOMIC_INIT(0); +struct list_head *ip_conntrack_hash; + +extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; + +static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, + u_int8_t protocol) +{ + return protocol == curr->proto; +} + +struct ip_conntrack_protocol *__find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + p = LIST_FIND(&protocol_list, proto_cmpfn, + struct ip_conntrack_protocol *, protocol); + if (!p) + p = &ip_conntrack_generic_protocol; + + return p; +} + +struct ip_conntrack_protocol *find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + READ_LOCK(&ip_conntrack_lock); + p = __find_proto(protocol); + READ_UNLOCK(&ip_conntrack_lock); + return p; +} + +static inline void ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + IP_NF_ASSERT(ct->infos[0].master); + /* nf_conntrack_put wants to go via an info struct, so feed it + one at random. */ + nf_conntrack_put(&ct->infos[0]); +} + +static inline u_int32_t +hash_conntrack(const struct ip_conntrack_tuple *tuple) +{ +#if 0 + dump_tuple(tuple); +#endif +#ifdef CONFIG_NETFILTER_DEBUG + if (tuple->src.pad) + DEBUGP("Tuple %p has non-zero padding.\n", tuple); +#endif + /* ntohl because more differences in low bits. */ + /* To ensure that halves of the same connection don't hash + clash, we add the source per-proto again. */ + return (ntohl(tuple->src.ip + tuple->dst.ip + + tuple->src.u.all + tuple->dst.u.all + + tuple->dst.protonum) + + ntohs(tuple->src.u.all)) + % ip_conntrack_htable_size; +} + +inline int +get_tuple(const struct iphdr *iph, size_t len, + struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol) +{ + int ret; + + /* Can only happen when extracting tuples from inside ICMP + packets */ + if (iph->frag_off & htons(IP_OFFSET)) { + if (net_ratelimit()) + printk("ip_conntrack_core: Frag of proto %u.\n", + iph->protocol); + return 0; + } + /* Guarantee 8 protocol bytes: if more wanted, use len param */ + else if (iph->ihl * 4 + 8 > len) + return 0; + + tuple->src.ip = iph->saddr; + tuple->src.pad = 0; + tuple->dst.ip = iph->daddr; + tuple->dst.protonum = iph->protocol; + + ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl, + len - 4*iph->ihl, + tuple); + return ret; +} + +static int +invert_tuple(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig, + const struct ip_conntrack_protocol *protocol) +{ + inverse->src.ip = orig->dst.ip; + inverse->src.pad = 0; + inverse->dst.ip = orig->src.ip; + inverse->dst.protonum = orig->dst.protonum; + + return protocol->invert_tuple(inverse, orig); +} + +static void +destroy_conntrack(struct nf_conntrack *nfct) +{ + struct ip_conntrack *ct = (struct ip_conntrack *)nfct; + + IP_NF_ASSERT(atomic_read(&nfct->use) == 0); + IP_NF_ASSERT(!timer_pending(&ct->timeout)); + + if (ct->master.master) + nf_conntrack_put(&ct->master); + + if (ip_conntrack_destroyed) + ip_conntrack_destroyed(ct); + kfree(ct); + atomic_dec(&ip_conntrack_count); +} + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct ip_conntrack *ct = (void *)ul_conntrack; + + WRITE_LOCK(&ip_conntrack_lock); + /* Remove from both hash lists */ + LIST_DELETE(&ip_conntrack_hash + [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)], + &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + LIST_DELETE(&ip_conntrack_hash + [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)], + &ct->tuplehash[IP_CT_DIR_REPLY]); + /* If our expected is in the list, take it out. */ + if (ct->expected.expectant) { + IP_NF_ASSERT(list_inlist(&expect_list, &ct->expected)); + IP_NF_ASSERT(ct->expected.expectant == ct); + LIST_DELETE(&expect_list, &ct->expected); + } + WRITE_UNLOCK(&ip_conntrack_lock); + ip_conntrack_put(ct); +} + +static inline int +conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + return i->ctrack != ignored_conntrack + && memcmp(tuple, &i->tuple, sizeof(*tuple)) == 0; +} + +static struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + h = LIST_FIND(&ip_conntrack_hash[hash_conntrack(tuple)], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + tuple, ignored_conntrack); + return h; +} + +/* Find a connection corresponding to a tuple. */ +struct ip_conntrack_tuple_hash * +ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +/* Returns true if a connection correspondings to the tuple (required + for NAT). */ +int +ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + READ_UNLOCK(&ip_conntrack_lock); + + return h != NULL; +} + +/* Returns TRUE if it dealt with ICMP, and filled in skb fields */ +int icmp_error_track(struct sk_buff *skb) +{ + const struct iphdr *iph = skb->nh.iph; + struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + struct ip_conntrack_tuple innertuple, origtuple; + struct iphdr *inner = (struct iphdr *)(hdr + 1); + size_t datalen = skb->len - iph->ihl*4 - sizeof(*hdr); + struct ip_conntrack_protocol *innerproto; + struct ip_conntrack_tuple_hash *h; + enum ip_conntrack_info ctinfo; + + if (iph->protocol != IPPROTO_ICMP) + return 0; + + if (skb->len < iph->ihl * 4 + sizeof(struct icmphdr)) { + DEBUGP("icmp_error_track: too short\n"); + return 1; + } + + if (hdr->type != ICMP_DEST_UNREACH + && hdr->type != ICMP_SOURCE_QUENCH + && hdr->type != ICMP_TIME_EXCEEDED + && hdr->type != ICMP_PARAMETERPROB + && hdr->type != ICMP_REDIRECT) + return 0; + + /* Ignore it if the checksum's bogus. */ + if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) { + DEBUGP("icmp_error_track: bad csum\n"); + return 1; + } + + innerproto = find_proto(inner->protocol); + /* Are they talking about one of our connections? */ + if (inner->ihl * 4 + 8 > datalen + || !get_tuple(inner, datalen, &origtuple, innerproto)) { + DEBUGP("icmp_error: ! get_tuple p=%u (%u*4+%u dlen=%u)\n", + inner->protocol, inner->ihl, 8, + datalen); + return 1; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!invert_tuple(&innertuple, &origtuple, innerproto)) { + DEBUGP("icmp_error_track: Can't invert tuple\n"); + return 1; + } + h = ip_conntrack_find_get(&innertuple, NULL); + if (!h) { + DEBUGP("icmp_error_track: no match\n"); + return 1; + } + + ctinfo = IP_CT_RELATED; + if (DIRECTION(h) == IP_CT_DIR_REPLY) + ctinfo += IP_CT_IS_REPLY; + + /* Update skb to refer to this connection */ + skb->nfct = &h->ctrack->infos[ctinfo]; + return 1; +} + +static inline int helper_cmp(const struct ip_conntrack_helper *i, + const struct ip_conntrack_tuple *rtuple) +{ + return i->will_help(rtuple); +} + +/* Compare all but src per-proto part. */ +static int expect_cmp(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple) +{ + return (tuple->src.ip == i->tuple.src.ip + && tuple->dst.ip == i->tuple.dst.ip + && tuple->dst.u.all == i->tuple.dst.u.all + && tuple->dst.protonum == i->tuple.dst.protonum); +} + +/* Allocate a new conntrack; we set everything up, then grab write + lock and see if we lost a race. If we lost it we return 0, + indicating the controlling code should look again. */ +static int +init_conntrack(const struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol, + struct sk_buff *skb) +{ + struct ip_conntrack *conntrack; + struct ip_conntrack_tuple repl_tuple; + size_t hash, repl_hash; + struct ip_conntrack_expect *expected; + enum ip_conntrack_info ctinfo; + int i; + + if (!invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return 1; + } + + if(ip_conntrack_max && + (atomic_read(&ip_conntrack_count) >= ip_conntrack_max)) { + if (net_ratelimit()) + printk(KERN_WARNING "ip_conntrack: maximum limit of %d entries exceeded\n", ip_conntrack_max); + return 1; + } + + conntrack = kmalloc(sizeof(struct ip_conntrack), GFP_ATOMIC); + if (!conntrack) { + DEBUGP("Can't allocate conntrack.\n"); + return 1; + } + hash = hash_conntrack(tuple); + repl_hash = hash_conntrack(&repl_tuple); + + memset(conntrack, 0, sizeof(struct ip_conntrack)); + atomic_set(&conntrack->ct_general.use, 1); + conntrack->ct_general.destroy = destroy_conntrack; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; + conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; + for(i=0; i < IP_CT_NUMBER; i++) + conntrack->infos[i].master = &conntrack->ct_general; + + if (!protocol->new(conntrack, skb->nh.iph, skb->len)) { + kfree(conntrack); + return 1; + } + + /* Sew in at head of hash list. */ + WRITE_LOCK(&ip_conntrack_lock); + /* Check noone else beat us in the race... */ + if (__ip_conntrack_find(tuple, NULL)) { + WRITE_UNLOCK(&ip_conntrack_lock); + printk("ip_conntrack: Wow someone raced us!\n"); + kfree(conntrack); + return 0; + } + conntrack->helper = LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + &repl_tuple); + /* Need finding and deleting of expected ONLY if we win race */ + expected = LIST_FIND(&expect_list, expect_cmp, + struct ip_conntrack_expect *, tuple); + if (expected) { + /* Welcome, Mr. Bond. We've been expecting you... */ + conntrack->status = IPS_EXPECTED; + conntrack->master.master = &expected->expectant->ct_general; + IP_NF_ASSERT(conntrack->master.master); + LIST_DELETE(&expect_list, expected); + expected->expectant = NULL; + nf_conntrack_get(&conntrack->master); + ctinfo = IP_CT_RELATED; + } else { + ctinfo = IP_CT_NEW; + } + list_prepend(&ip_conntrack_hash[hash], + &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]); + list_prepend(&ip_conntrack_hash[repl_hash], + &conntrack->tuplehash[IP_CT_DIR_REPLY]); + WRITE_UNLOCK(&ip_conntrack_lock); + + /* Update skb to refer to this connection */ + skb->nfct = &conntrack->infos[ctinfo]; + + atomic_inc(&ip_conntrack_count); + return 1; +} + +static void +resolve_normal_ct(struct sk_buff *skb) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_protocol *proto; + enum ip_conntrack_info ctinfo; + + proto = find_proto(skb->nh.iph->protocol); + if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto)) + return; + + /* Loop around search/insert race */ + do { + /* look for tuple match */ + h = ip_conntrack_find_get(&tuple, NULL); + if (!h && init_conntrack(&tuple, proto, skb)) + return; + } while (!h); + + /* It exists; we have (non-exclusive) reference. */ + if (DIRECTION(h) == IP_CT_DIR_REPLY) { + ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + h->ctrack->status |= IPS_SEEN_REPLY; + } else { + /* Once we've had two way comms, always ESTABLISHED. */ + if (h->ctrack->status & IPS_SEEN_REPLY) { + DEBUGP("ip_conntrack_in: normal packet for %p\n", + h->ctrack); + ctinfo = IP_CT_ESTABLISHED; + } else if (h->ctrack->status & IPS_EXPECTED) { + DEBUGP("ip_conntrack_in: related packet for %p\n", + h->ctrack); + ctinfo = IP_CT_RELATED; + } else { + DEBUGP("ip_conntrack_in: new packet for %p\n", + h->ctrack); + ctinfo = IP_CT_NEW; + } + } + skb->nfct = &h->ctrack->infos[ctinfo]; +} + +/* Return conntrack and conntrack_info a given skb */ +struct ip_conntrack * +ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) +{ + if (!skb->nfct) { + /* It may be an icmp error... */ + if (!icmp_error_track(skb)) + resolve_normal_ct(skb); + } + + if (skb->nfct) { + struct ip_conntrack *ct + = (struct ip_conntrack *)skb->nfct->master; + + /* ctinfo is the index of the nfct inside the conntrack */ + *ctinfo = skb->nfct - ct->infos; + IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER); + return ct; + } + return NULL; +} + +/* Netfilter hook itself. */ +unsigned int ip_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_conntrack_protocol *proto; + int ret; + + /* FIXME: Do this right please. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN; + + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + /* Gather fragments. */ + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + if (!*pskb) + return NF_STOLEN; + } + + ct = ip_conntrack_get(*pskb, &ctinfo); + if (!ct) + /* Not valid part of a connection */ + return NF_ACCEPT; + + proto = find_proto((*pskb)->nh.iph->protocol); + /* If this is new, this is first time timer will be set */ + ret = proto->packet(ct, (*pskb)->nh.iph, (*pskb)->len, ctinfo); + + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + + if (ret != NF_DROP && ct->helper) { + ret = ct->helper->help((*pskb)->nh.iph, (*pskb)->len, + ct, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + } + + return ret; +} + +int invert_tuplepr(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig) +{ + return invert_tuple(inverse, orig, find_proto(orig->dst.protonum)); +} + +/* Add a related connection. */ +int ip_conntrack_expect_related(struct ip_conntrack *related_to, + const struct ip_conntrack_tuple *tuple) +{ + WRITE_LOCK(&ip_conntrack_lock); + related_to->expected.tuple = *tuple; + + if (!related_to->expected.expectant) { + list_prepend(&expect_list, &related_to->expected); + related_to->expected.expectant = related_to; + } else { + IP_NF_ASSERT(list_inlist(&expect_list, &related_to->expected)); + IP_NF_ASSERT(related_to->expected.expectant + == related_to); + } + WRITE_UNLOCK(&ip_conntrack_lock); + + return 0; +} + +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +int ip_conntrack_alter_reply(struct ip_conntrack *conntrack, + const struct ip_conntrack_tuple *newreply) +{ + unsigned int newindex = hash_conntrack(newreply); + + WRITE_LOCK(&ip_conntrack_lock); + if (__ip_conntrack_find(newreply, conntrack)) { + WRITE_UNLOCK(&ip_conntrack_lock); + return 0; + } + DEBUGP("Altering reply tuple of %p to ", conntrack); + DUMP_TUPLE(newreply); + + LIST_DELETE(&ip_conntrack_hash + [hash_conntrack(&conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple)], + &conntrack->tuplehash[IP_CT_DIR_REPLY]); + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; + list_prepend(&ip_conntrack_hash[newindex], + &conntrack->tuplehash[IP_CT_DIR_REPLY]); + conntrack->helper = LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + newreply); + WRITE_UNLOCK(&ip_conntrack_lock); + return 1; +} + +int ip_conntrack_helper_register(struct ip_conntrack_helper *me) +{ + MOD_INC_USE_COUNT; + + WRITE_LOCK(&ip_conntrack_lock); + list_prepend(&helpers, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + return 0; +} + +static inline int unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) +{ + if (i->ctrack->helper == me) { + i->ctrack->helper = NULL; + /* Get rid of any expected. */ + if (i->ctrack->expected.expectant) { + IP_NF_ASSERT(i->ctrack->expected.expectant + == i->ctrack); + LIST_DELETE(&expect_list, &i->ctrack->expected); + i->ctrack->expected.expectant = NULL; + } + } + return 0; +} + +void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) +{ + unsigned int i; + + /* Need write lock here, to delete helper. */ + WRITE_LOCK(&ip_conntrack_lock); + LIST_DELETE(&helpers, me); + + /* Get rid of expecteds, set helpers to NULL. */ + for (i = 0; i < ip_conntrack_htable_size; i++) + LIST_FIND_W(&ip_conntrack_hash[i], unhelp, + struct ip_conntrack_tuple_hash *, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + MOD_DEC_USE_COUNT; +} + +/* Refresh conntrack for this many jiffies: if noone calls this, + conntrack will vanish with current skb. */ +void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies) +{ + WRITE_LOCK(&ip_conntrack_lock); + /* If this hasn't had a timer before, it's still being set up */ + if (ct->timeout.data == 0) { + ct->timeout.data = (unsigned long)ct; + ct->timeout.function = death_by_timeout; + ct->timeout.expires = jiffies + extra_jiffies; + atomic_inc(&ct->ct_general.use); + add_timer(&ct->timeout); + } else { + /* Need del_timer for race avoidance (may already be dying). */ + if (del_timer(&ct->timeout)) { + ct->timeout.expires = jiffies + extra_jiffies; + add_timer(&ct->timeout); + } + } + WRITE_UNLOCK(&ip_conntrack_lock); +} + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +ip_ct_gather_frags(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int olddebug = skb->nf_debug; +#endif + if (sk) sock_hold(sk); + skb = ip_defrag(skb); + if (!skb) { + if (sk) sock_put(sk); + return skb; + } + if (sk) { + skb_set_owner_w(skb, sk); + sock_put(sk); + } + + ip_send_check(skb->nh.iph); + skb->nfcache |= NFC_ALTERED; +#ifdef CONFIG_NETFILTER_DEBUG + /* Packet path as if nothing had happened. */ + skb->nf_debug = olddebug; +#endif + return skb; +} + +static inline int +do_kill(const struct ip_conntrack_tuple_hash *i, + int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + return kill(i->ctrack, data); +} + +/* Bring out ya dead! */ +static struct ip_conntrack_tuple_hash * +get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + struct ip_conntrack_tuple_hash *h = NULL; + unsigned int i; + + READ_LOCK(&ip_conntrack_lock); + for (i = 0; !h && i < ip_conntrack_htable_size; i++) { + h = LIST_FIND(&ip_conntrack_hash[i], do_kill, + struct ip_conntrack_tuple_hash *, kill, data); + } + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +void +ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + struct ip_conntrack_tuple_hash *h; + + /* This is order n^2, by the way. */ + while ((h = get_next_corpse(kill, data)) != NULL) { + /* Time to push up daises... */ + if (del_timer(&h->ctrack->timeout)) + death_by_timeout((unsigned long)h->ctrack); + /* ... else the timer will get him soon. */ + + ip_conntrack_put(h->ctrack); + } +} + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void *user, int *len) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport }, + 0 }, + { sk->daddr, { sk->dport }, + IPPROTO_TCP } }; + + /* We only do TCP at the moment: is there a better way? */ + if (strcmp(sk->prot->name, "TCP") != 0) { + DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + return -ENOPROTOOPT; + } + + if (*len != sizeof(struct sockaddr_in)) { + DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", + *len, sizeof(struct sockaddr_in)); + return -EINVAL; + } + + h = ip_conntrack_find_get(&tuple, NULL); + if (h) { + struct sockaddr_in sin; + + sin.sin_family = AF_INET; + sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.ip; + + DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", + IP_PARTS(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + ip_conntrack_put(h->ctrack); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", + IP_PARTS(tuple.src.ip), ntohs(tuple.src.u.tcp.port), + IP_PARTS(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +static struct nf_sockopt_ops so_getorigdst += { { NULL, NULL }, PF_INET, + 0, 0, NULL, /* Setsockopts */ + SO_ORIGINAL_DST, SO_ORIGINAL_DST+1, &getorigdst, + 0, NULL }; + +#define NET_IP_CONNTRACK_MAX 2089 +#define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max" + +static struct ctl_table_header *ip_conntrack_sysctl_header; + +static ctl_table ip_conntrack_table[] = { + { NET_IP_CONNTRACK_MAX, NET_IP_CONNTRACK_MAX_NAME, &ip_conntrack_max, + sizeof(ip_conntrack_max), 0644, NULL, proc_dointvec }, + { 0 } +}; + +static ctl_table ip_conntrack_dir_table[] = { + {NET_IPV4, "ipv4", NULL, 0, 0555, ip_conntrack_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +static ctl_table ip_conntrack_root_table[] = { + {CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +static int kill_all(const struct ip_conntrack *i, void *data) +{ + return 1; +} + +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip_conntrack_cleanup(void) +{ + unregister_sysctl_table(ip_conntrack_sysctl_header); + ip_ct_selective_cleanup(kill_all, NULL); + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); +} + +int __init ip_conntrack_init(void) +{ + unsigned int i; + int ret; + + /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB + * machine has 256 buckets. 1GB machine has 8192 buckets. */ + ip_conntrack_htable_size + = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct list_head)); + ip_conntrack_max = 8 * ip_conntrack_htable_size; + + printk("ip_conntrack (%u buckets, %d max)\n", + ip_conntrack_htable_size, ip_conntrack_max); + + ret = nf_register_sockopt(&so_getorigdst); + if (ret != 0) + return ret; + + ip_conntrack_hash = vmalloc(sizeof(struct list_head) + * ip_conntrack_htable_size); + if (!ip_conntrack_hash) { + nf_unregister_sockopt(&so_getorigdst); + return -ENOMEM; + } + + /* Don't NEED lock here, but good form anyway. */ + WRITE_LOCK(&ip_conntrack_lock); + /* Sew in builtin protocols. */ + list_append(&protocol_list, &ip_conntrack_protocol_tcp); + list_append(&protocol_list, &ip_conntrack_protocol_udp); + list_append(&protocol_list, &ip_conntrack_protocol_icmp); + WRITE_UNLOCK(&ip_conntrack_lock); + + for (i = 0; i < ip_conntrack_htable_size; i++) + INIT_LIST_HEAD(&ip_conntrack_hash[i]); + +/* This is fucking braindead. There is NO WAY of doing this without + the CONFIG_SYSCTL unless you don't want to detect errors. + Grrr... --RR */ +#ifdef CONFIG_SYSCTL + ip_conntrack_sysctl_header + = register_sysctl_table(ip_conntrack_root_table, 0); + if (ip_conntrack_sysctl_header == NULL) { + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); + return -ENOMEM; + } +#endif /*CONFIG_SYSCTL*/ + + ret = ip_conntrack_protocol_tcp_init(); + if (ret != 0) { + unregister_sysctl_table(ip_conntrack_sysctl_header); + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); + } + + return ret; +} + diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_ftp.c linux/net/ipv4/netfilter/ip_conntrack_ftp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_ftp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_conntrack_ftp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,251 @@ +/* FTP extension for IP connection tracking. */ +#ifdef MODULE +#define EXPORT_SYMTAB +#endif +#include +#include +#include +#include +#include + +#include +#include +#include + +DECLARE_LOCK(ip_ftp_lock); + +#define SERVER_STRING "227 Entering Passive Mode (" +#define CLIENT_STRING "PORT " + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define IP_PARTS_NATIVE(n) \ +(unsigned int)((n)>>24)&0xFF, \ +(unsigned int)((n)>>16)&0xFF, \ +(unsigned int)((n)>>8)&0xFF, \ +(unsigned int)((n)&0xFF) + +#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n)) + +static struct { + const char *pattern; + size_t plen; + char term; +} search[2] = { + [IP_CT_FTP_PORT] { CLIENT_STRING, sizeof(CLIENT_STRING) - 1, '\r' }, + [IP_CT_FTP_PASV] { SERVER_STRING, sizeof(SERVER_STRING) - 1, ')' } +}; + +/* Returns 0, or length of numbers */ +static int try_number(const char *data, size_t dlen, u_int32_t array[6], + char term) +{ + u_int32_t i, len; + + /* Keep data pointing at next char. */ + for (i = 0, len = 0; len < dlen; len++, data++) { + if (*data >= '0' && *data <= '9') { + array[i] = array[i]*10 + *data - '0'; + } + else if (*data == ',') + i++; + else { + /* Unexpected character; true if it's the + terminator and we're finished. */ + if (*data == term && i == 5) + return len; + + DEBUGP("Char %u (got %u nums) `%u' unexpected\n", + len, i, *data); + return 0; + } + } + + return 0; +} + +/* Return 1 for match, 0 for accept, -1 for partial. */ +static int find_pattern(const char *data, size_t dlen, + const char *pattern, size_t plen, + char term, + unsigned int *numoff, + unsigned int *numlen, + u_int32_t array[6]) +{ + if (dlen == 0) + return 0; + + if (dlen < plen) { + /* Short packet: try for partial? */ + if (strnicmp(data, pattern, dlen) == 0) + return -1; + else return 0; + } + + if (strnicmp(data, pattern, plen) != 0) { +#if 0 + size_t i; + + DEBUGP("ftp: string mismatch\n"); + for (i = 0; i < plen; i++) { + DEBUGFTP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", + i, data[i], data[i], + pattern[i], pattern[i]); + } +#endif + return 0; + } + + *numoff = plen; + *numlen = try_number(data + plen, dlen - plen, array, term); + if (!*numlen) + return -1; + + return 1; +} + +/* FIXME: This should be in userspace. Later. */ +static int help(const struct iphdr *iph, size_t len, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + /* tcplen not negative guarenteed by ip_conntrack_tcp.c */ + struct tcphdr *tcph = (void *)iph + iph->ihl * 4; + const char *data = (const char *)tcph + tcph->doff * 4; + unsigned int tcplen = len - iph->ihl * 4; + unsigned int datalen = tcplen - tcph->doff * 4; + u_int32_t old_seq_aft_nl; + int old_seq_aft_nl_set; + u_int32_t array[6] = { 0 }; + int dir = CTINFO2DIR(ctinfo); + unsigned int matchlen, matchoff; + struct ip_conntrack_tuple t; + struct ip_ct_ftp *info = &ct->help.ct_ftp_info; + + /* Can't track connections formed before we registered */ + if (!info) + return NF_ACCEPT; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); + return NF_ACCEPT; + } + + /* Not whole TCP header? */ + if (tcplen < sizeof(struct tcphdr) || tcplen < tcph->doff*4) { + DEBUGP("ftp: tcplen = %u\n", (unsigned)tcplen); + return NF_ACCEPT; + } + + /* Checksum invalid? Ignore. */ + /* FIXME: Source route IP option packets --RR */ + if (tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr, + csum_partial((char *)tcph, tcplen, 0))) { + DEBUGP("ftp_help: bad csum: %p %u %u.%u.%u.%u %u.%u.%u.%u\n", + tcph, tcplen, IP_PARTS(iph->saddr), + IP_PARTS(iph->daddr)); + return NF_ACCEPT; + } + + LOCK_BH(&ip_ftp_lock); + old_seq_aft_nl_set = info->seq_aft_nl_set[dir]; + old_seq_aft_nl = info->seq_aft_nl[dir]; + + DEBUGP("conntrack_ftp: datalen %u\n", datalen); + if ((datalen > 0) && (data[datalen-1] == '\n')) { + DEBUGP("conntrack_ftp: datalen %u ends in \\n\n", datalen); + if (!old_seq_aft_nl_set + || after(ntohl(tcph->seq) + datalen, old_seq_aft_nl)) { + DEBUGP("conntrack_ftp: updating nl to %u\n", + ntohl(tcph->seq) + datalen); + info->seq_aft_nl[dir] = ntohl(tcph->seq) + datalen; + info->seq_aft_nl_set[dir] = 1; + } + } + UNLOCK_BH(&ip_ftp_lock); + + if(!old_seq_aft_nl_set || + (ntohl(tcph->seq) != old_seq_aft_nl)) { + DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u)\n", + old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl); + return NF_ACCEPT; + } + + switch (find_pattern(data, datalen, + search[dir].pattern, + search[dir].plen, search[dir].term, + &matchoff, &matchlen, + array)) { + case -1: /* partial */ + /* We don't usually drop packets. After all, this is + connection tracking, not packet filtering. + However, it is neccessary for accurate tracking in + this case. */ + DEBUGP("conntrack_ftp: partial `%.*s'\n", + (int)datalen, data); + return NF_DROP; + + case 0: /* no match */ + DEBUGP("ip_conntrack_ftp_help: no match\n"); + return NF_ACCEPT; + } + + DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", + (int)matchlen, data + matchoff, + matchlen, ntohl(tcph->seq) + matchoff); + + /* Update the ftp info */ + LOCK_BH(&ip_ftp_lock); + info->is_ftp = 1; + info->seq = ntohl(tcph->seq) + matchoff; + info->len = matchlen; + info->ftptype = dir; + info->port = array[4] << 8 | array[5]; + + t = ((struct ip_conntrack_tuple) + { { ct->tuplehash[!dir].tuple.src.ip, + { 0 }, 0 }, + { htonl((array[0] << 24) | (array[1] << 16) + | (array[2] << 8) | array[3]), + { htons(array[4] << 8 | array[5]) }, + IPPROTO_TCP }}); + ip_conntrack_expect_related(ct, &t); + UNLOCK_BH(&ip_ftp_lock); + + return NF_ACCEPT; +} + +/* Returns TRUE if it wants to help this connection (tuple is the + tuple of REPLY packets from server). */ +static int ftp_will_help(const struct ip_conntrack_tuple *rtuple) +{ + return (rtuple->dst.protonum == IPPROTO_TCP + && rtuple->src.u.tcp.port == __constant_htons(21)); +} + +static struct ip_conntrack_helper ftp = { { NULL, NULL }, + ftp_will_help, + help }; + +static int __init init(void) +{ + return ip_conntrack_helper_register(&ftp); +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&ftp); +} + +struct module *ip_conntrack_ftp = THIS_MODULE; +EXPORT_SYMBOL(ip_conntrack_ftp); +EXPORT_SYMBOL(ip_ftp_lock); + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux/net/ipv4/netfilter/ip_conntrack_proto_generic.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_generic.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_conntrack_proto_generic.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include + +#define GENERIC_TIMEOUT (3600*HZ) + +static int generic_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +static int generic_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int generic_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return 0; +} + +/* Print out the private part of the conntrack. */ +static unsigned int generic_print_conntrack(char *buffer, + const struct ip_conntrack *state) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int established(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info conntrackinfo) +{ + ip_ct_refresh(conntrack, GENERIC_TIMEOUT); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int new(struct ip_conntrack *conntrack, struct iphdr *iph, size_t len) +{ + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_generic_protocol += { { NULL, NULL }, 0, "unknown", + generic_pkt_to_tuple, generic_invert_tuple, generic_print_tuple, + generic_print_conntrack, established, new, NULL }; + diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux/net/ipv4/netfilter/ip_conntrack_proto_icmp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_icmp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_conntrack_proto_icmp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include + +#define ICMP_TIMEOUT (30*HZ) + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int icmp_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + const struct icmphdr *hdr = datah; + + tuple->dst.u.icmp.type = hdr->type; + tuple->src.u.icmp.id = hdr->un.echo.id; + tuple->dst.u.icmp.code = hdr->code; + + return 1; +} + +static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + /* Add 1; spaces filled with 0. */ + static u_int8_t invmap[] + = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1, + [ICMP_ECHOREPLY] = ICMP_ECHO + 1, + [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, + [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, + [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, + [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, + [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, + [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1}; + + if (orig->dst.u.icmp.type >= sizeof(invmap) + || !invmap[orig->dst.u.icmp.type]) + return 0; + + tuple->src.u.icmp.id = orig->src.u.icmp.id; + tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; + tuple->dst.u.icmp.code = orig->dst.u.icmp.code; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int icmp_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return sprintf(buffer, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int icmp_print_conntrack(char *buffer, + const struct ip_conntrack *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int icmp_packet(struct ip_conntrack *ct, + struct iphdr *iph, size_t len, + enum ip_conntrack_info ctinfo) +{ + /* FIXME: Should keep count of orig - reply packets: if == 0, + destroy --RR */ + /* Delete connection immediately on reply: won't actually + vanish as we still have skb */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + } else + ip_ct_refresh(ct, ICMP_TIMEOUT); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int icmp_new(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len) +{ + static u_int8_t valid_new[] + = { [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 }; + + if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { + /* Can't create a new ICMP `conn' with this. */ + DEBUGP("icmp: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmp.type); + DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + return 0; + } + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_icmp += { { NULL, NULL }, IPPROTO_ICMP, "icmp", + icmp_pkt_to_tuple, icmp_invert_tuple, icmp_print_tuple, + icmp_print_conntrack, icmp_packet, icmp_new, NULL }; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,227 @@ +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.tcp_state */ +static DECLARE_RWLOCK(tcp_lock); + +/* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR */ + +/* Actually, I believe that neither ipmasq (where this code is stolen + from) nor ipfilter do it exactly right. A new conntrack machine taking + into account packet loss (which creates uncertainty as to exactly + the conntrack of the connection) is required. RSN. --RR */ +enum tcp_conntrack { + TCP_CONNTRACK_NONE, + TCP_CONNTRACK_ESTABLISHED, + TCP_CONNTRACK_SYN_SENT, + TCP_CONNTRACK_SYN_RECV, + TCP_CONNTRACK_FIN_WAIT, + TCP_CONNTRACK_TIME_WAIT, + TCP_CONNTRACK_CLOSE, + TCP_CONNTRACK_CLOSE_WAIT, + TCP_CONNTRACK_LAST_ACK, + TCP_CONNTRACK_LISTEN, + TCP_CONNTRACK_MAX +}; + +static const char *tcp_conntrack_names[] = { + "NONE", + "ESTABLISHED", + "SYN_SENT", + "SYN_RECV", + "FIN_WAIT", + "TIME_WAIT", + "CLOSE", + "CLOSE_WAIT", + "LAST_ACK", + "LISTEN" +}; + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + + +static unsigned long tcp_timeouts[] += { 30 MINS, /* TCP_CONNTRACK_NONE, */ + 5 DAYS, /* TCP_CONNTRACK_ESTABLISHED, */ + 2 MINS, /* TCP_CONNTRACK_SYN_SENT, */ + 60 SECS, /* TCP_CONNTRACK_SYN_RECV, */ + 2 MINS, /* TCP_CONNTRACK_FIN_WAIT, */ + 2 MINS, /* TCP_CONNTRACK_TIME_WAIT, */ + 10 SECS, /* TCP_CONNTRACK_CLOSE, */ + 60 SECS, /* TCP_CONNTRACK_CLOSE_WAIT, */ + 30 SECS, /* TCP_CONNTRACK_LAST_ACK, */ + 2 MINS, /* TCP_CONNTRACK_LISTEN, */ +}; + +#define sNO TCP_CONNTRACK_NONE +#define sES TCP_CONNTRACK_ESTABLISHED +#define sSS TCP_CONNTRACK_SYN_SENT +#define sSR TCP_CONNTRACK_SYN_RECV +#define sFW TCP_CONNTRACK_FIN_WAIT +#define sTW TCP_CONNTRACK_TIME_WAIT +#define sCL TCP_CONNTRACK_CLOSE +#define sCW TCP_CONNTRACK_CLOSE_WAIT +#define sLA TCP_CONNTRACK_LAST_ACK +#define sLI TCP_CONNTRACK_LISTEN +#define sIV TCP_CONNTRACK_MAX + +static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ +/*syn*/ {sSS, sES, sSS, sES, sSS, sSS, sSS, sSS, sSS, sLI }, +/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI }, +/*ack*/ {sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sES }, +/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL }, +/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + }, + { +/* REPLY */ +/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ +/*syn*/ {sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR }, +/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI }, +/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI }, +/*rst*/ {sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI }, +/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + } +}; + +static int tcp_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + const struct tcphdr *hdr = datah; + + tuple->src.u.tcp.port = hdr->source; + tuple->dst.u.tcp.port = hdr->dest; + + return 1; +} + +static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->src.u.tcp.port = orig->dst.u.tcp.port; + tuple->dst.u.tcp.port = orig->src.u.tcp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int tcp_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return sprintf(buffer, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int tcp_print_conntrack(char *buffer, + const struct ip_conntrack *conntrack) +{ + enum tcp_conntrack state; + + READ_LOCK(&tcp_lock); + state = conntrack->proto.tcp_state; + READ_UNLOCK(&tcp_lock); + + return sprintf(buffer, "%s ", tcp_conntrack_names[state]); +} + +static unsigned int get_conntrack_index(const struct tcphdr *tcph) +{ + if (tcph->rst) return 3; + else if (tcph->syn) return 0; + else if (tcph->fin) return 1; + else if (tcph->ack) return 2; + else return 4; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int tcp_packet(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info ctinfo) +{ + enum tcp_conntrack newconntrack; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + iph->ihl); + + /* We're guaranteed to have the base header, but maybe not the + options. */ + if (len < (iph->ihl + tcph->doff) * 4) { + DEBUGP("ip_conntrack_tcp: Truncated packet.\n"); + return -1; + } + + WRITE_LOCK(&tcp_lock); + newconntrack + = tcp_conntracks + [CTINFO2DIR(ctinfo)] + [get_conntrack_index(tcph)][conntrack->proto.tcp_state]; + + /* Invalid */ + if (newconntrack == TCP_CONNTRACK_MAX) { + DEBUGP("ip_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n", + CTINFO2DIR(ctinfo), get_conntrack_index(tcph), + conntrack->proto.tcp_state); + WRITE_UNLOCK(&tcp_lock); + return -1; + } + + conntrack->proto.tcp_state = newconntrack; + WRITE_UNLOCK(&tcp_lock); + + /* Refresh: need write lock to write to conntrack. */ + ip_ct_refresh(conntrack, tcp_timeouts[conntrack->proto.tcp_state]); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int tcp_new(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len) +{ + enum tcp_conntrack newconntrack; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + iph->ihl); + + /* Don't need lock here: this conntrack not in circulation yet */ + newconntrack + = tcp_conntracks[0][get_conntrack_index(tcph)] + [TCP_CONNTRACK_NONE]; + + /* Invalid: delete conntrack */ + if (newconntrack == TCP_CONNTRACK_MAX) { + DEBUGP("ip_conntrack_tcp: invalid new deleting.\n"); + return 0; + } else { + conntrack->proto.tcp_state = newconntrack; + ip_ct_refresh(conntrack, tcp_timeouts[conntrack->proto.tcp_state]); + } + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_tcp += { { NULL, NULL }, IPPROTO_TCP, "tcp", + tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack, + tcp_packet, tcp_new, NULL }; + +int __init ip_conntrack_protocol_tcp_init(void) +{ + return 0; +} diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +#include + +#define UDP_TIMEOUT (60*HZ) + +static int udp_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + const struct udphdr *hdr = datah; + + tuple->src.u.udp.port = hdr->source; + tuple->dst.u.udp.port = hdr->dest; + + return 1; +} + +static int udp_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->src.u.udp.port = orig->dst.u.udp.port; + tuple->dst.u.udp.port = orig->src.u.udp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int udp_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return sprintf(buffer, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int udp_print_conntrack(char *buffer, + const struct ip_conntrack *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, and may modify conntracktype */ +static int udp_packet(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info conntrackinfo) +{ + /* Refresh. */ + ip_ct_refresh(conntrack, UDP_TIMEOUT); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int udp_new(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len) +{ + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_udp += { { NULL, NULL }, IPPROTO_UDP, "udp", + udp_pkt_to_tuple, udp_invert_tuple, udp_print_tuple, udp_print_conntrack, + udp_packet, udp_new, NULL }; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_standalone.c linux/net/ipv4/netfilter/ip_conntrack_standalone.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_conntrack_standalone.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_conntrack_standalone.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,297 @@ +/* This file contains all the functions required for the standalone + ip_conntrack module. + + These are not required by the compatibility layer. +*/ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#ifdef MODULE +#define EXPORT_SYMTAB +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct module *ip_conntrack_module = THIS_MODULE; + +static unsigned int +print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *proto) +{ + int len; + + len = sprintf(buffer, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", + NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip)); + + len += proto->print_tuple(buffer + len, tuple); + + return len; +} + +/* FIXME: Don't print source proto part. --RR */ +static unsigned int +print_expect(char *buffer, const struct ip_conntrack_expect *expect) +{ + unsigned int len; + + len = sprintf(buffer, "EXPECTING: proto=%u ", + expect->tuple.dst.protonum); + len += print_tuple(buffer + len, &expect->tuple, + __find_proto(expect->tuple.dst.protonum)); + len += sprintf(buffer + len, "\n"); + return len; +} + +static unsigned int +print_conntrack(char *buffer, const struct ip_conntrack *conntrack) +{ + unsigned int len; + struct ip_conntrack_protocol *proto + = __find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + + len = sprintf(buffer, "%-8s %u %lu ", + proto->name, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum, + timer_pending(&conntrack->timeout) + ? (conntrack->timeout.expires - jiffies)/HZ : 0); + + len += proto->print_conntrack(buffer + len, conntrack); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + proto); + if (!(conntrack->status & IPS_SEEN_REPLY)) + len += sprintf(buffer + len, "[UNREPLIED] "); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, + proto); + len += sprintf(buffer + len, "\n"); + + return len; +} + +/* Returns true when finished. */ +static int +conntrack_iterate(const struct ip_conntrack_tuple_hash *hash, + char *buffer, off_t offset, off_t *upto, + unsigned int *len, unsigned int maxlen) +{ + unsigned int newlen; + IP_NF_ASSERT(hash->ctrack); + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + + /* Only count originals */ + if (DIRECTION(hash)) + return 0; + + if ((*upto)++ < offset) + return 0; + + newlen = print_conntrack(buffer + *len, hash->ctrack); + if (*len + newlen > maxlen) + return 1; + else *len += newlen; + + return 0; +} + +static int +list_conntracks(char *buffer, char **start, off_t offset, int length) +{ + unsigned int i; + unsigned int len = 0; + off_t upto = 0; + struct list_head *e; + + READ_LOCK(&ip_conntrack_lock); + /* Traverse hash; print originals then reply. */ + for (i = 0; i < ip_conntrack_htable_size; i++) { + if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate, + struct ip_conntrack_tuple_hash *, + buffer, offset, &upto, &len, length)) + goto finished; + } + + /* Now iterate through expecteds. */ + for (e = expect_list.next; e != &expect_list; e = e->next) { + unsigned int last_len; + struct ip_conntrack_expect *expect + = (struct ip_conntrack_expect *)e; + if (upto++ < offset) continue; + + last_len = len; + len += print_expect(buffer + len, expect); + if (len > length) { + len = last_len; + goto finished; + } + } + + finished: + READ_UNLOCK(&ip_conntrack_lock); + + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start = (char *)((unsigned int)upto - offset); + return len; +} + +static unsigned int ip_refrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct rtable *rt = (struct rtable *)(*pskb)->dst; + + /* Local packets are never produced too large for their + interface. We degfragment them at LOCAL_OUT, however, + so we have to refragment them here. */ + if ((*pskb)->len > rt->u.dst.pmtu) { + DEBUGP("ip_conntrack: refragm %p (size %u) to %u (okfn %p)\n", + *pskb, (*pskb)->len, rt->u.dst.pmtu, okfn); + /* No hook can be after us, so this should be OK. */ + ip_fragment(*pskb, okfn); + return NF_STOLEN; + } + return NF_ACCEPT; +} + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ip_conntrack_in_ops += { { NULL, NULL }, ip_conntrack_in, PF_INET, NF_IP_PRE_ROUTING, + NF_IP_PRI_CONNTRACK }; +static struct nf_hook_ops ip_conntrack_local_out_ops += { { NULL, NULL }, ip_conntrack_in, PF_INET, NF_IP_LOCAL_OUT, + NF_IP_PRI_CONNTRACK }; +/* Refragmenter; last chance. */ +static struct nf_hook_ops ip_conntrack_out_ops += { { NULL, NULL }, ip_refrag, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_LAST }; + +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = ip_conntrack_init(); + if (ret < 0) + goto cleanup_nothing; + + proc_net_create("ip_conntrack",0,list_conntracks); + ret = nf_register_hook(&ip_conntrack_in_ops); + if (ret < 0) { + printk("ip_conntrack: can't register in hook.\n"); + goto cleanup_init; + } + ret = nf_register_hook(&ip_conntrack_local_out_ops); + if (ret < 0) { + printk("ip_conntrack: can't register local out hook.\n"); + goto cleanup_inops; + } + ret = nf_register_hook(&ip_conntrack_out_ops); + if (ret < 0) { + printk("ip_conntrack: can't register post-routing hook.\n"); + goto cleanup_inandlocalops; + } + + return ret; + + cleanup: + nf_unregister_hook(&ip_conntrack_out_ops); + cleanup_inandlocalops: + nf_unregister_hook(&ip_conntrack_local_out_ops); + cleanup_inops: + nf_unregister_hook(&ip_conntrack_in_ops); + cleanup_init: + proc_net_remove("ip_conntrack"); + ip_conntrack_cleanup(); + cleanup_nothing: + return ret; +} + +/* FIXME: Allow NULL functions and sub in pointers to generic for + them. --RR */ +int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) +{ + int ret = 0; + struct list_head *i; + + WRITE_LOCK(&ip_conntrack_lock); + for (i = protocol_list.next; i != &protocol_list; i = i->next) { + if (((struct ip_conntrack_protocol *)i)->proto + == proto->proto) { + ret = -EBUSY; + goto out; + } + } + + list_prepend(&protocol_list, proto); + MOD_INC_USE_COUNT; + + out: + WRITE_UNLOCK(&ip_conntrack_lock); + return ret; +} + +/* FIXME: Implement this --RR */ +#if 0 +void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) +{ +} +#endif + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_conntrack_protocol_register); +EXPORT_SYMBOL(invert_tuplepr); +EXPORT_SYMBOL(ip_conntrack_alter_reply); +EXPORT_SYMBOL(ip_conntrack_destroyed); +EXPORT_SYMBOL(ip_conntrack_get); +EXPORT_SYMBOL(ip_conntrack_module); +EXPORT_SYMBOL(ip_conntrack_helper_register); +EXPORT_SYMBOL(ip_conntrack_helper_unregister); +EXPORT_SYMBOL(ip_conntrack_lock); +EXPORT_SYMBOL(find_proto); +EXPORT_SYMBOL(get_tuple); +EXPORT_SYMBOL(ip_ct_selective_cleanup); +EXPORT_SYMBOL(ip_ct_refresh); +EXPORT_SYMBOL(ip_conntrack_expect_related); +EXPORT_SYMBOL(ip_conntrack_tuple_taken); +EXPORT_SYMBOL(ip_ct_gather_frags); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_fw_compat.c linux/net/ipv4/netfilter/ip_fw_compat.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_fw_compat.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_fw_compat.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,238 @@ +/* Compatibility framework for ipchains and ipfwadm support; designed + to look as much like the 2.2 infrastructure as possible. */ +struct notifier_block; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +static struct firewall_ops *fwops; + +/* From ip_fw_compat_redir.c */ +extern unsigned int +do_redirect(struct sk_buff *skb, + const struct net_device *dev, + u_int16_t redirpt); + +extern void +check_for_redirect(struct sk_buff *skb); + +extern void +check_for_unredirect(struct sk_buff *skb); + +/* From ip_fw_compat_masq.c */ +extern unsigned int +do_masquerade(struct sk_buff **pskb, const struct net_device *dev); + +extern unsigned int +check_for_demasq(struct sk_buff **pskb); + +extern int __init masq_init(void); +extern void masq_cleanup(void); + +/* They call these; we do what they want. */ +int register_firewall(int pf, struct firewall_ops *fw) +{ + if (pf != PF_INET) { + printk("Attempt to register non-IP firewall module.\n"); + return -EINVAL; + } + if (fwops) { + printk("Attempt to register multiple firewall modules.\n"); + return -EBUSY; + } + + fwops = fw; + return 0; +} + +int unregister_firewall(int pf, struct firewall_ops *fw) +{ + fwops = NULL; + return 0; +} + +static unsigned int +fw_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + int ret = FW_BLOCK; + u_int16_t redirpt; + + (*pskb)->nfcache |= NFC_UNKNOWN; + (*pskb)->ip_summed = CHECKSUM_NONE; + + switch (hooknum) { + case NF_IP_PRE_ROUTING: + if (fwops->fw_acct_in) + fwops->fw_acct_in(fwops, PF_INET, + (struct net_device *)in, + (*pskb)->nh.raw, &redirpt, pskb); + + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + + if (!*pskb) + return NF_STOLEN; + } + + ret = fwops->fw_input(fwops, PF_INET, (struct net_device *)in, + (*pskb)->nh.raw, &redirpt, pskb); + break; + + case NF_IP_FORWARD: + /* Connection will only be set if it was + demasqueraded: if so, skip forward chain. */ + if ((*pskb)->nfct) + ret = FW_ACCEPT; + else ret = fwops->fw_forward(fwops, PF_INET, + (struct net_device *)out, + (*pskb)->nh.raw, &redirpt, pskb); + break; + + case NF_IP_POST_ROUTING: + ret = fwops->fw_output(fwops, PF_INET, + (struct net_device *)out, + (*pskb)->nh.raw, &redirpt, pskb); + if (fwops->fw_acct_out && (ret == FW_ACCEPT || ret == FW_SKIP)) + fwops->fw_acct_out(fwops, PF_INET, + (struct net_device *)in, + (*pskb)->nh.raw, &redirpt, pskb); + break; + } + + switch (ret) { + case FW_REJECT: { + /* Alexey says: + * + * Generally, routing is THE FIRST thing to make, when + * packet enters IP stack. Before packet is routed you + * cannot call any service routines from IP stack. */ + struct iphdr *iph = (*pskb)->nh.iph; + + if ((*pskb)->dst != NULL + || ip_route_input(*pskb, iph->daddr, iph->saddr, iph->tos, + (struct net_device *)in) == 0) + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, + 0); + return NF_DROP; + } + + case FW_ACCEPT: + case FW_SKIP: + if (hooknum == NF_IP_PRE_ROUTING) { + check_for_demasq(pskb); + check_for_redirect(*pskb); + } else if (hooknum == NF_IP_POST_ROUTING) + check_for_unredirect(*pskb); + + return NF_ACCEPT; + + case FW_MASQUERADE: + if (hooknum == NF_IP_FORWARD) + return do_masquerade(pskb, out); + else return NF_ACCEPT; + + case FW_REDIRECT: + if (hooknum == NF_IP_PRE_ROUTING) + return do_redirect(*pskb, in, redirpt); + else return NF_ACCEPT; + + default: + /* FW_BLOCK */ + return NF_DROP; + } +} + +extern int ip_fw_ctl(int optval, void *user, unsigned int len); + +static int sock_fn(struct sock *sk, int optval, void *user, unsigned int len) +{ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + return -ip_fw_ctl(optval, user, len); +} + +static struct nf_hook_ops preroute_ops += { { NULL, NULL }, fw_in, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_FILTER }; + +static struct nf_hook_ops postroute_ops += { { NULL, NULL }, fw_in, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_FILTER }; + +static struct nf_hook_ops forward_ops += { { NULL, NULL }, fw_in, PF_INET, NF_IP_FORWARD, NF_IP_PRI_FILTER }; + +static struct nf_sockopt_ops sock_ops += { { NULL, NULL }, PF_INET, 64, 64 + 1024 + 1, &sock_fn, 0, 0, NULL, + 0, NULL }; + +extern int ipfw_init_or_cleanup(int init); + +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = nf_register_sockopt(&sock_ops); + + if (ret < 0) + goto cleanup_nothing; + + ret = ipfw_init_or_cleanup(1); + if (ret < 0) + goto cleanup_sockopt; + + ret = masq_init(); + if (ret < 0) + goto cleanup_ipfw; + + nf_register_hook(&preroute_ops); + nf_register_hook(&postroute_ops); + nf_register_hook(&forward_ops); + + return ret; + + cleanup: + nf_unregister_hook(&preroute_ops); + nf_unregister_hook(&postroute_ops); + nf_unregister_hook(&forward_ops); + + masq_cleanup(); + + cleanup_ipfw: + ipfw_init_or_cleanup(0); + + cleanup_sockopt: + nf_unregister_sockopt(&sock_ops); + + cleanup_nothing: + return ret; +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_fw_compat_masq.c linux/net/ipv4/netfilter/ip_fw_compat_masq.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_fw_compat_masq.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_fw_compat_masq.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,288 @@ +/* Masquerading compatibility layer. + + Note that there are no restrictions on other programs binding to + ports 61000:65095 (in 2.0 and 2.2 they get EADDRINUSE). Just DONT + DO IT. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +unsigned int +do_masquerade(struct sk_buff **pskb, const struct net_device *dev) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct ip_nat_info *info; + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct; + unsigned int ret; + + /* Sorry, only ICMP, TCP and UDP. */ + if (iph->protocol != IPPROTO_ICMP + && iph->protocol != IPPROTO_TCP + && iph->protocol != IPPROTO_UDP) + return NF_DROP; + + /* Feed it to connection tracking; in fact we're in NF_IP_FORWARD, + but connection tracking doesn't expect that */ + ret = ip_conntrack_in(NF_IP_POST_ROUTING, pskb, dev, NULL, NULL); + if (ret != NF_ACCEPT) { + DEBUGP("ip_conntrack_in returned %u.\n", ret); + return ret; + } + + ct = ip_conntrack_get(*pskb, &ctinfo); + + if (!ct) { + DEBUGP("ip_conntrack_in set to invalid conntrack.\n"); + return NF_DROP; + } + + info = &ct->nat.info; + + WRITE_LOCK(&ip_nat_lock); + /* Setup the masquerade, if not already */ + if (!info->initialized) { + u_int32_t newsrc; + struct rtable *rt; + struct ip_nat_multi_range range; + + /* Pass 0 instead of saddr, since it's going to be changed + anyway. */ + if (ip_route_output(&rt, iph->daddr, 0, 0, 0) != 0) { + DEBUGP("ipnat_rule_masquerade: Can't reroute.\n"); + return NF_DROP; + } + newsrc = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, + RT_SCOPE_UNIVERSE); + ip_rt_put(rt); + range = ((struct ip_nat_multi_range) + { 1, + {{IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED, + newsrc, newsrc, + { htons(61000) }, { htons(65095) } } } }); + + ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + place_in_hashes(ct, info); + info->initialized = 1; + } else + DEBUGP("Masquerading already done on this conn.\n"); + WRITE_UNLOCK(&ip_nat_lock); + + return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb); +} + +unsigned int +check_for_demasq(struct sk_buff **pskb) +{ + struct ip_conntrack_tuple tuple; + struct iphdr *iph = (*pskb)->nh.iph; + struct ip_conntrack_protocol *protocol; + struct ip_conntrack_tuple_hash *h; + enum ip_conntrack_info ctinfo; + int ret; + + protocol = find_proto(iph->protocol); + + /* We don't feed packets to conntrack system unless we know + they're part of an connection already established by an + explicit masq command. */ + switch (iph->protocol) { + case IPPROTO_ICMP: + /* ICMP errors. */ + if (icmp_error_track(*pskb)) { + /* If it is valid, tranlsate it */ + if ((*pskb)->nfct) { + struct ip_conntrack *ct + = (struct ip_conntrack *) + (*pskb)->nfct->master; + enum ip_conntrack_dir dir; + + if ((*pskb)->nfct-ct->infos >= IP_CT_IS_REPLY) + dir = IP_CT_DIR_REPLY; + else + dir = IP_CT_DIR_ORIGINAL; + + icmp_reply_translation(*pskb, + ct, + NF_IP_PRE_ROUTING, + dir); + } + return NF_ACCEPT; + } + /* Fall thru... */ + case IPPROTO_TCP: + case IPPROTO_UDP: + if (!get_tuple(iph, (*pskb)->len, &tuple, protocol)) { + printk("ip_fw_compat_masq: Couldn't get tuple\n"); + return NF_ACCEPT; + } + break; + + default: + /* Not ours... */ + return NF_ACCEPT; + } + h = ip_conntrack_find_get(&tuple, NULL); + + /* MUST be found, and MUST be reply. */ + if (h && DIRECTION(h) == 1) { + ret = ip_conntrack_in(NF_IP_PRE_ROUTING, pskb, + NULL, NULL, NULL); + + /* Put back the reference gained from find_get */ + nf_conntrack_put(&h->ctrack->infos[0]); + if (ret == NF_ACCEPT) { + struct ip_conntrack *ct; + ct = ip_conntrack_get(*pskb, &ctinfo); + + if (ct) { + struct ip_nat_info *info = &ct->nat.info; + + do_bindings(ct, ctinfo, info, + NF_IP_PRE_ROUTING, + pskb); + } else + printk("ip_fw_compat_masq: conntrack" + " didn't like\n"); + } + } else { + if (h) + /* Put back the reference gained from find_get */ + nf_conntrack_put(&h->ctrack->infos[0]); + ret = NF_ACCEPT; + } + + return ret; +} + +int ip_fw_masq_timeouts(void *user, int len) +{ + printk("Sorry: masquerading timeouts set 5DAYS/2MINS/60SECS\n"); + return 0; +} + +static const char *masq_proto_name(u_int16_t protonum) +{ + switch (protonum) { + case IPPROTO_TCP: return "TCP"; + case IPPROTO_UDP: return "UDP"; + case IPPROTO_ICMP: return "ICMP"; + default: return "MORE-CAFFIENE-FOR-RUSTY"; + } +} + +static unsigned int +print_masq(char *buffer, const struct ip_conntrack *conntrack) +{ + char temp[129]; + + /* This is for backwards compatibility, but ick!. + We should never export jiffies to userspace. + */ + sprintf(temp,"%s %08X:%04X %08X:%04X %04X %08X %6d %6d %7lu", + masq_proto_name(conntrack->tuplehash[0].tuple.dst.protonum), + ntohl(conntrack->tuplehash[0].tuple.src.ip), + ntohs(conntrack->tuplehash[0].tuple.src.u.all), + ntohl(conntrack->tuplehash[0].tuple.dst.ip), + ntohs(conntrack->tuplehash[0].tuple.dst.u.all), + ntohs(conntrack->tuplehash[1].tuple.dst.u.all), + /* Sorry, no init_seq, delta or previous_delta (yet). */ + 0, 0, 0, + conntrack->timeout.expires - jiffies); + + return sprintf(buffer, "%-127s\n", temp); +} + +/* Returns true when finished. */ +static int +masq_iterate(const struct ip_conntrack_tuple_hash *hash, + char *buffer, off_t offset, off_t *upto, + unsigned int *len, unsigned int maxlen) +{ + unsigned int newlen; + + IP_NF_ASSERT(hash->ctrack); + + /* Only count originals */ + if (DIRECTION(hash)) + return 0; + + if ((*upto)++ < offset) + return 0; + + newlen = print_masq(buffer + *len, hash->ctrack); + if (*len + newlen > maxlen) + return 1; + else *len += newlen; + + return 0; +} + +/* Everything in the hash is masqueraded. */ +static int +masq_procinfo(char *buffer, char **start, off_t offset, int length) +{ + unsigned int i; + int len = 0; + off_t upto = 0; + + READ_LOCK(&ip_conntrack_lock); + /* Traverse hash; print originals then reply. */ + for (i = 0; i < ip_conntrack_htable_size; i++) { + if (LIST_FIND(&ip_conntrack_hash[i], masq_iterate, + struct ip_conntrack_tuple_hash *, + buffer, offset, &upto, &len, length)) + break; + } + READ_UNLOCK(&ip_conntrack_lock); + + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start = (char *)((unsigned int)upto - offset); + return len; +} + +int __init masq_init(void) +{ + int ret; + + ret = ip_conntrack_init(); + if (ret == 0) { + ret = ip_nat_init(); + if (ret == 0) + proc_net_create("ip_masquerade", 0, masq_procinfo); + else + ip_conntrack_cleanup(); + } + + return ret; +} + +void masq_cleanup(void) +{ + ip_nat_cleanup(); + ip_conntrack_cleanup(); + proc_net_remove("ip_masquerade"); +} diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_fw_compat_redir.c linux/net/ipv4/netfilter/ip_fw_compat_redir.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_fw_compat_redir.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_fw_compat_redir.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,284 @@ +/* This is a file to handle the "simple" NAT cases (redirect and + masquerade) required for the compatibility layer. + + `bind to foreign address' and `getpeername' hacks are not + supported. + + FIXME: Timing is overly simplistic. If anyone complains, make it + use conntrack. +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static DECLARE_LOCK(redir_lock); +#define ASSERT_READ_LOCK(x) MUST_BE_LOCKED(&redir_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_LOCKED(&redir_lock) + +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#ifdef CONFIG_NETFILTER_DEBUG +#define IP_NF_ASSERT(x) \ +do { \ + if (!(x)) \ + /* Wooah! I'm tripping my conntrack in a frenzy of \ + netplay... */ \ + printk("ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ +} while(0); +#else +#define IP_NF_ASSERT(x) +#endif + +static u_int16_t +cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + +struct redir_core { + u_int32_t orig_srcip, orig_dstip; + u_int16_t orig_sport, orig_dport; + + u_int32_t new_dstip; + u_int16_t new_dport; +}; + +struct redir +{ + struct list_head list; + struct redir_core core; + struct timer_list destroyme; +}; + +static LIST_HEAD(redirs); + +static int +redir_cmp(const struct redir *i, + u_int32_t orig_srcip, u_int32_t orig_dstip, + u_int16_t orig_sport, u_int16_t orig_dport) +{ + return (i->core.orig_srcip == orig_srcip + && i->core.orig_dstip == orig_dstip + && i->core.orig_sport == orig_sport + && i->core.orig_dport == orig_dport); +} + +/* Search for an existing redirection of the TCP packet. */ +static struct redir * +find_redir(u_int32_t orig_srcip, u_int32_t orig_dstip, + u_int16_t orig_sport, u_int16_t orig_dport) +{ + return LIST_FIND(&redirs, redir_cmp, struct redir *, + orig_srcip, orig_dstip, orig_sport, orig_dport); +} + +static void do_tcp_redir(struct sk_buff *skb, struct redir *redir) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + + tcph->check = cheat_check(~redir->core.orig_dstip, + redir->core.new_dstip, + cheat_check(redir->core.orig_dport ^ 0xFFFF, + redir->core.new_dport, + tcph->check)); + iph->check = cheat_check(~redir->core.orig_dstip, + redir->core.new_dstip, iph->check); + tcph->dest = redir->core.new_dport; + iph->daddr = redir->core.new_dstip; + + skb->nfcache |= NFC_ALTERED; +} + +static int +unredir_cmp(const struct redir *i, + u_int32_t new_dstip, u_int32_t orig_srcip, + u_int16_t new_dport, u_int16_t orig_sport) +{ + return (i->core.orig_srcip == orig_srcip + && i->core.new_dstip == new_dstip + && i->core.orig_sport == orig_sport + && i->core.new_dport == new_dport); +} + +/* Match reply packet against redir */ +static struct redir * +find_unredir(u_int32_t new_dstip, u_int32_t orig_srcip, + u_int16_t new_dport, u_int16_t orig_sport) +{ + return LIST_FIND(&redirs, unredir_cmp, struct redir *, + new_dstip, orig_srcip, new_dport, orig_sport); +} + +/* `unredir' a reply packet. */ +static void do_tcp_unredir(struct sk_buff *skb, struct redir *redir) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + + tcph->check = cheat_check(~redir->core.new_dstip, + redir->core.orig_dstip, + cheat_check(redir->core.new_dport ^ 0xFFFF, + redir->core.orig_dport, + tcph->check)); + iph->check = cheat_check(~redir->core.new_dstip, + redir->core.orig_dstip, + iph->check); + tcph->source = redir->core.orig_dport; + iph->saddr = redir->core.orig_dstip; + + skb->nfcache |= NFC_ALTERED; +} + +/* REDIRECT a packet. */ +unsigned int +do_redirect(struct sk_buff *skb, + const struct net_device *dev, + u_int16_t redirpt) +{ + struct iphdr *iph = skb->nh.iph; + u_int32_t newdst; + + /* Figure out address: not loopback. */ + if (!dev) + return NF_DROP; + + /* Grab first address on interface. */ + newdst = ((struct in_device *)dev->ip_ptr)->ifa_list->ifa_local; + + switch (iph->protocol) { + case IPPROTO_UDP: { + /* Simple mangle. */ + struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + + iph->ihl); + + udph->check = cheat_check(~iph->daddr, newdst, + cheat_check(udph->dest ^ 0xFFFF, + redirpt, + udph->check)); + iph->check = cheat_check(~iph->daddr, newdst, iph->check); + udph->dest = redirpt; + iph->daddr = newdst; + + skb->nfcache |= NFC_ALTERED; + return NF_ACCEPT; + } + case IPPROTO_TCP: { + /* Mangle, maybe record. */ + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + struct redir *redir; + int ret; + + DEBUGP("Doing tcp redirect. %08X:%u %08X:%u -> %08X:%u\n", + iph->saddr, tcph->source, iph->daddr, tcph->dest, + newdst, redirpt); + LOCK_BH(&redir_lock); + redir = find_redir(iph->saddr, iph->daddr, + tcph->source, tcph->dest); + + if (!redir) { + redir = kmalloc(sizeof(struct redir), GFP_ATOMIC); + if (!redir) { + ret = NF_DROP; + goto out; + } + list_prepend(&redirs, redir); + init_timer(&redir->destroyme); + } + /* In case mangling has changed, rewrite this part. */ + redir->core = ((struct redir_core) + { iph->saddr, iph->daddr, + tcph->source, tcph->dest, + newdst, redirpt }); + do_tcp_redir(skb, redir); + ret = NF_ACCEPT; + + out: + UNLOCK_BH(&redir_lock); + return ret; + } + + default: /* give up if not TCP or UDP. */ + return NF_DROP; + } +} + +static void destroyme(unsigned long me) +{ + LOCK_BH(&redir_lock); + LIST_DELETE(&redirs, (struct redir *)me); + UNLOCK_BH(&redir_lock); +} + +/* Incoming packet: is it a reply to a masqueraded connection, or + part of an already-redirected TCP connection? */ +void +check_for_redirect(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + struct redir *redir; + + if (iph->protocol != IPPROTO_TCP) + return; + + LOCK_BH(&redir_lock); + redir = find_redir(iph->saddr, iph->daddr, tcph->source, tcph->dest); + if (redir) { + DEBUGP("Doing tcp redirect again.\n"); + do_tcp_redir(skb, redir); + if (tcph->rst || tcph->fin) { + redir->destroyme.function = destroyme; + redir->destroyme.data = (unsigned long)redir; + mod_timer(&redir->destroyme, 75*HZ); + } + } + UNLOCK_BH(&redir_lock); +} + +void +check_for_unredirect(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + struct redir *redir; + + if (iph->protocol != IPPROTO_TCP) + return; + + LOCK_BH(&redir_lock); + redir = find_unredir(iph->saddr, iph->daddr, tcph->source, tcph->dest); + if (redir) { + DEBUGP("Doing tcp unredirect.\n"); + do_tcp_unredir(skb, redir); + if (tcph->rst || tcph->fin) { + redir->destroyme.function = destroyme; + redir->destroyme.data = (unsigned long)redir; + mod_timer(&redir->destroyme, 75*HZ); + } + } + UNLOCK_BH(&redir_lock); +} diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_core.c linux/net/ipv4/netfilter/ip_nat_core.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_core.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_core.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,855 @@ +/* NAT for netfilter; shared with compatibility layer. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ +#ifdef MODULE +#define __NO_VERSION__ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For tcp_prot in getorigdst */ + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_nat_lock); + +#define IP_NAT_HTABLE_SIZE 64 + +static struct list_head bysource[IP_NAT_HTABLE_SIZE]; +static struct list_head byipsproto[IP_NAT_HTABLE_SIZE]; +LIST_HEAD(protos); +static LIST_HEAD(helpers); + +extern struct ip_nat_protocol unknown_nat_protocol; + +/* We keep extra hashes for each conntrack, for fast searching. */ +static inline size_t +hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto) +{ + /* Modified src and dst, to ensure we don't create two + identical streams. */ + return (src + dst + proto) % IP_NAT_HTABLE_SIZE; +} + +static inline size_t +hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto) +{ + /* Original src, to ensure we map it consistently if poss. */ + return (manip->ip + manip->u.all + proto) % IP_NAT_HTABLE_SIZE; +} + +/* Noone using conntrack by the time this called. */ +static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) +{ + struct ip_nat_info *info = &conn->nat.info; + + if (!info->initialized) + return; + + IP_NF_ASSERT(info->bysource.conntrack); + IP_NF_ASSERT(info->byipsproto.conntrack); + + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&bysource[hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conn->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum)], + &info->bysource); + + LIST_DELETE(&byipsproto + [hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum)], + &info->byipsproto); + WRITE_UNLOCK(&ip_nat_lock); +} + +/* We do checksum mangling, so if they were wrong before they're still + * wrong. Also works for incomplete packets (eg. ICMP dest + * unreachables.) */ +u_int16_t +ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + +static inline int cmp_proto(const struct ip_nat_protocol *i, int proto) +{ + return i->protonum == proto; +} + +struct ip_nat_protocol * +find_nat_proto(u_int16_t protonum) +{ + struct ip_nat_protocol *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum); + if (!i) + i = &unknown_nat_protocol; + return i; +} + +/* Is this tuple already taken? (not by us) */ +int +ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + /* Conntrack tracking doesn't keep track of outgoing tuples; only + incoming ones. NAT means they don't have a fixed mapping, + so we invert the tuple and look for the incoming reply. + + We could keep a separate hash if this proves too slow. */ + struct ip_conntrack_tuple reply; + + invert_tuplepr(&reply, tuple); + return ip_conntrack_tuple_taken(&reply, ignored_conntrack); +} + +/* Does tuple + the source manip come within the range mr */ +static int +in_range(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_manip *manip, + const struct ip_nat_multi_range *mr) +{ + struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum); + unsigned int i; + struct ip_conntrack_tuple newtuple = { *manip, tuple->dst }; + + for (i = 0; i < mr->rangesize; i++) { + /* If we are allowed to map IPs, then we must be in the + range specified, otherwise we must be unchanged. */ + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip) + || (ntohl(newtuple.src.ip) + > ntohl(mr->range[i].max_ip))) + continue; + } else { + if (newtuple.src.ip != tuple->src.ip) + continue; + } + + if ((mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED) + && proto->in_range(&newtuple, IP_NAT_MANIP_SRC, + &mr->range[i].min, &mr->range[i].max)) + return 1; + } + return 0; +} + +static inline int +src_cmp(const struct ip_nat_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum + == tuple->dst.protonum + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip + == tuple->src.ip + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all + == tuple->src.u.all + && in_range(tuple, + &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + mr)); +} + +/* Only called for SRC manip */ +static struct ip_conntrack_manip * +find_appropriate_src(const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum); + struct ip_nat_hash *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr); + if (i) + return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src; + else + return NULL; +} + +/* If it's really a local destination manip, it may need to do a + source manip too. */ +static int +do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp) +{ + struct rtable *rt; + + /* FIXME: IPTOS_TOS(iph->tos) --RR */ + if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) { + DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n", + IP_PARTS(var_ip)); + return 0; + } + + *other_ipp = rt->rt_src; + ip_rt_put(rt); + return 1; +} + +/* Simple way to iterate through all. */ +static inline int fake_cmp(const struct ip_nat_hash *i, + u_int32_t src, u_int32_t dst, u_int16_t protonum, + unsigned int *score, + const struct ip_conntrack *conntrack) +{ + /* Compare backwards: we're dealing with OUTGOING tuples, and + inside the conntrack is the REPLY tuple. Don't count this + conntrack. */ + if (i->conntrack != conntrack + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src + && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum + == protonum)) + (*score)++; + return 0; +} + +static inline unsigned int +count_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum, + const struct ip_conntrack *conntrack) +{ + unsigned int score = 0; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + LIST_FIND(&byipsproto[hash_by_ipsproto(src, dst, protonum)], + fake_cmp, struct ip_nat_hash *, src, dst, protonum, &score, + conntrack); + + return score; +} + +/* For [FUTURE] fragmentation handling, we want the least-used + src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + 1-65535, we don't do pro-rata allocation based on ports; we choose + the ip with the lowest src-ip/dst-ip/proto usage. + + If an allocation then fails (eg. all 6 ports used in the 1.2.3.4 + range), we eliminate that and try again. This is not the most + efficient approach, but if you're worried about that, don't hand us + ranges you don't really have. */ +static struct ip_nat_range * +find_best_ips_proto(struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr, + const struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + unsigned int i; + struct { + const struct ip_nat_range *range; + unsigned int score; + struct ip_conntrack_tuple tuple; + } best = { NULL, 0xFFFFFFFF }; + u_int32_t *var_ipp, *other_ipp, saved_ip; + + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { + var_ipp = &tuple->src.ip; + saved_ip = tuple->dst.ip; + other_ipp = &tuple->dst.ip; + } else { + var_ipp = &tuple->dst.ip; + saved_ip = tuple->src.ip; + other_ipp = &tuple->src.ip; + } + + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) { + u_int32_t minip, maxip; + + /* Don't do ranges which are already eliminated. */ + if (mr->range[i].flags & IP_NAT_RANGE_FULL) { + continue; + } + + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + minip = mr->range[i].min_ip; + maxip = mr->range[i].max_ip; + } else + minip = maxip = *var_ipp; + + for (*var_ipp = minip; + ntohl(*var_ipp) <= ntohl(maxip); + *var_ipp = htonl(ntohl(*var_ipp) + 1)) { + unsigned int score; + + /* Reset the other ip in case it was mangled by + * do_extra_mangle last time. */ + *other_ipp = saved_ip; + + if (hooknum == NF_IP_LOCAL_OUT + && !do_extra_mangle(*var_ipp, other_ipp)) { + DEBUGP("Range %u %u.%u.%u.%u rt failed!\n", + i, IP_PARTS(*var_ipp)); + /* Can't route? This whole range part is + * probably screwed, but keep trying + * anyway. */ + continue; + } + + /* Count how many others map onto this. */ + score = count_maps(tuple->src.ip, tuple->dst.ip, + tuple->dst.protonum, conntrack); + if (score < best.score) { + /* Optimization: doesn't get any better than + this. */ + if (score == 0) + return (struct ip_nat_range *) + &mr->range[i]; + + best.score = score; + best.tuple = *tuple; + best.range = &mr->range[i]; + } + } + } + *tuple = best.tuple; + + /* Discard const. */ + return (struct ip_nat_range *)best.range; +} + +static int +get_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig_tuple, + const struct ip_nat_multi_range *mrr, + struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + struct ip_nat_protocol *proto + = find_nat_proto(orig_tuple->dst.protonum); + struct ip_nat_range *rptr; + unsigned int i; + int ret; + + /* We temporarily use flags for marking full parts, but we + always clean up afterwards */ + struct ip_nat_multi_range *mr = (void *)mrr; + + /* 1) If this srcip/proto/src-proto-part is currently mapped, + and that same mapping gives a unique tuple within the given + range, use that. + + This is only required for source (ie. NAT/masq) mappings. + So far, we don't do local source mappings, so multiple + manips not an issue. */ + if (hooknum == NF_IP_POST_ROUTING) { + struct ip_conntrack_manip *manip; + + manip = find_appropriate_src(orig_tuple, mr); + if (manip) { + /* Apply same source manipulation. */ + *tuple = ((struct ip_conntrack_tuple) + { *manip, orig_tuple->dst }); + DEBUGP("get_unique_tuple: Found current src map\n"); + return 1; + } + } + + /* 2) Select the least-used IP/proto combination in the given + range. + */ + *tuple = *orig_tuple; + while ((rptr = find_best_ips_proto(tuple, mr, conntrack, hooknum)) + != NULL) { + DEBUGP("Found best for "); DUMP_TUPLE(tuple); + /* 3) The per-protocol part of the manip is made to + map into the range to make a unique tuple. */ + + /* Only bother mapping if it's not already in range + and unique */ + if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED) + || proto->in_range(tuple, HOOK2MANIP(hooknum), + &rptr->min, &rptr->max)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ret = 1; + goto clear_fulls; + } else { + if (proto->unique_tuple(tuple, rptr, + HOOK2MANIP(hooknum), + conntrack)) { + /* Must be unique. */ + IP_NF_ASSERT(!ip_nat_used_tuple(tuple, + conntrack)); + ret = 1; + goto clear_fulls; + } + DEBUGP("Protocol can't get unique tuple.\n"); + } + + /* Eliminate that from range, and try again. */ + rptr->flags |= IP_NAT_RANGE_FULL; + *tuple = *orig_tuple; + } + + ret = 0; + + clear_fulls: + /* Clear full flags. */ + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) + mr->range[i].flags &= ~IP_NAT_RANGE_FULL; + + return ret; +} + +static inline int +helper_cmp(const struct ip_nat_helper *helper, + u_int16_t protocol, + u_int16_t protocol_dst) +{ + return (protocol == helper->protocol + && protocol_dst == helper->protocol_dst); +} + +/* Where to manip the reply packets (will be reverse manip). */ +static unsigned int opposite_hook[NF_IP_NUMHOOKS] += { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, + [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING, + [NF_IP_LOCAL_OUT] = NF_IP_PRE_ROUTING +}; + +unsigned int +ip_nat_setup_info(struct ip_conntrack *conntrack, + const struct ip_nat_multi_range *mr, + unsigned int hooknum) +{ + struct ip_conntrack_tuple new_tuple, inv_tuple, reply; + struct ip_conntrack_tuple orig_tp; + struct ip_nat_info *info = &conntrack->nat.info; + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING + || hooknum == NF_IP_POST_ROUTING + || hooknum == NF_IP_LOCAL_OUT); + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* What we've got will look like inverse of reply. Normally + this is what is in the conntrack, except for prior + manipulations (future optimization: if num_manips == 0, + orig_tp = + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ + invert_tuplepr(&orig_tp, + &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); + +#if 0 + { + unsigned int i; + + DEBUGP("Hook %u (%s), ", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST"); + DUMP_TUPLE(&orig_tp); + DEBUGP("Range %p: ", mr); + for (i = 0; i < mr->rangesize; i++) { + DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n", + i, + (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) + ? " MAP_IPS" : "", + (mr->range[i].flags + & IP_NAT_RANGE_PROTO_SPECIFIED) + ? " PROTO_SPECIFIED" : "", + (mr->range[i].flags & IP_NAT_RANGE_FULL) + ? " FULL" : "", + IP_PARTS(mr->range[i].min_ip), + IP_PARTS(mr->range[i].max_ip), + mr->range[i].min.all, + mr->range[i].max.all); + } + } +#endif + + do { + if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack, + hooknum)) { + DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n", + conntrack); + return NF_DROP; + } + +#if 0 + DEBUGP("Hook %u (%s) %p\n", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST", + conntrack); + DEBUGP("Original: "); + DUMP_TUPLE(&orig_tp); + DEBUGP("New: "); + DUMP_TUPLE(&new_tuple); +#endif + + /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT): + the original (A/B/C/D') and the mangled one (E/F/G/H'). + + We're only allowed to work with the SRC per-proto + part, so we create inverses of both to start, then + derive the other fields we need. */ + + /* Reply connection: simply invert the new tuple + (G/H/E/F') */ + invert_tuplepr(&reply, &new_tuple); + + /* Alter conntrack table so it recognizes replies. + If fail this race (reply tuple now used), repeat. */ + } while (!ip_conntrack_alter_reply(conntrack, &reply)); + + /* FIXME: We can simply used existing conntrack reply tuple + here --RR */ + /* Create inverse of original: C/D/A/B' */ + invert_tuplepr(&inv_tuple, &orig_tp); + + /* Has source changed?. */ + if (memcmp(&new_tuple.src, &orig_tp.src, sizeof(new_tuple.src)) + != 0) { + /* In this direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_SRC, new_tuple.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a destination manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_DST, orig_tp.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* Has destination changed? */ + if (memcmp(&new_tuple.dst, &orig_tp.dst, sizeof(new_tuple.dst)) + != 0) { + /* In this direction, a destination manip */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_DST, reply.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_SRC, inv_tuple.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* If there's a helper, assign it; based on new tuple. */ + info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, + new_tuple.dst.protonum, + new_tuple.dst.u.all); + + /* It's done. */ + info->initialized |= (1 << HOOK2MANIP(hooknum)); + return NF_ACCEPT; +} + +void replace_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + /* Source has changed, so replace in hashes. */ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(info->bysource.conntrack == conntrack); + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + + list_del(&info->bysource.list); + list_del(&info->byipsproto.list); + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +void place_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(!info->bysource.conntrack); + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + info->byipsproto.conntrack = conntrack; + info->bysource.conntrack = conntrack; + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +static void +manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype); + + if (maniptype == IP_NAT_MANIP_SRC) { + iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip, + iph->check); + iph->saddr = manip->ip; + } else { + iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip, + iph->check); + iph->daddr = manip->ip; + } +#if 0 + if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + DEBUGP("IP: checksum on packet bad.\n"); + + if (proto == IPPROTO_TCP) { + void *th = (u_int32_t *)iph + iph->ihl; + if (tcp_v4_check(th, len - 4*iph->ihl, iph->saddr, iph->daddr, + csum_partial((char *)th, len-4*iph->ihl, 0))) + DEBUGP("TCP: checksum on packet bad\n"); + } +#endif +} + +/* Do packet manipulations according to binding. */ +unsigned int +do_bindings(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct ip_nat_info *info, + unsigned int hooknum, + struct sk_buff **pskb) +{ + unsigned int i; + struct ip_nat_helper *helper; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + /* Need nat lock to protect against modification, but neither + conntrack (referenced) and helper (deleted with + synchronize_bh()) can vanish. */ + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + if (info->manips[i].direction == dir + && info->manips[i].hooknum == hooknum) { + DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n", + *pskb, + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + IP_PARTS(info->manips[i].manip.ip), + htons(info->manips[i].manip.u.all)); + manip_pkt((*pskb)->nh.iph->protocol, + (*pskb)->nh.iph, + (*pskb)->len, + &info->manips[i].manip, + info->manips[i].maniptype); + } + } + helper = info->helper; + READ_UNLOCK(&ip_nat_lock); + + if (helper) { + /* Always defragged for helpers */ + IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off + & __constant_htons(IP_MF|IP_OFFSET))); + return helper->help(ct, info, ctinfo, hooknum, pskb); + } else return NF_ACCEPT; +} + +void +icmp_reply_translation(struct sk_buff *skb, + struct ip_conntrack *conntrack, + unsigned int hooknum, + int dir) +{ + struct iphdr *iph = skb->nh.iph; + struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + struct iphdr *inner = (struct iphdr *)(hdr + 1); + size_t datalen = skb->len - ((void *)inner - (void *)iph); + unsigned int i; + struct ip_nat_info *info = &conntrack->nat.info; + + IP_NF_ASSERT(skb->len >= iph->ihl*4 + sizeof(struct icmphdr)); + + DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n", + skb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); + /* Note: May not be from a NAT'd host, but probably safest to + do translation always as if it came from the host itself + (even though a "host unreachable" coming from the host + itself is a bit wierd). + + More explanation: some people use NAT for anonomizing. + Also, CERT recommends dropping all packets from private IP + addresses (although ICMP errors from internal links with + such addresses are not too uncommon, as Alan Cox points + out) */ + + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + DEBUGP("icmp_reply: manip %u dir %s hook %u\n", + i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ? + "ORIG" : "REPLY", info->manips[i].hooknum); + /* Mapping the inner packet is just like a normal + packet in the other direction, except it was never + src/dst reversed, so where we would normally apply + a dst manip, we reply a src, and vice versa. */ + if (info->manips[i].direction != dir + && info->manips[i].hooknum == opposite_hook[hooknum]) { + DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "DST" : "SRC", + IP_PARTS(info->manips[i].manip.ip), + ntohs(info->manips[i].manip.u.udp.port)); + manip_pkt(inner->protocol, inner, + skb->len - ((void *)inner - (void *)iph), + &info->manips[i].manip, + !info->manips[i].maniptype); + } + /* Outer packet needs to have IP header NATed like + it's a reply. */ + else if (info->manips[i].direction != dir + && info->manips[i].hooknum == hooknum) { + /* Use mapping to map outer packet: 0 give no + per-proto mapping */ + DEBUGP("icmp_reply: outer %s %u.%u.%u.%u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + IP_PARTS(info->manips[i].manip.ip)); + manip_pkt(0, iph, skb->len, + &info->manips[i].manip, + info->manips[i].maniptype); + } + } + READ_UNLOCK(&ip_nat_lock); + + /* Since we mangled inside ICMP packet, recalculate its + checksum from scratch. (Hence the handling of incorrect + checksums in conntrack, so we don't accidentally fix one.) */ + hdr->checksum = 0; + hdr->checksum = ip_compute_csum((unsigned char *)hdr, + sizeof(*hdr) + datalen); +} + +int ip_nat_helper_register(struct ip_nat_helper *me) +{ + int ret = 0; + + WRITE_LOCK(&ip_nat_lock); + if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, + me->protocol, me->protocol_dst)) + ret = -EBUSY; + else { + list_prepend(&helpers, me); + MOD_INC_USE_COUNT; + } + WRITE_UNLOCK(&ip_nat_lock); + + return ret; +} + +static int +kill_helper(const struct ip_conntrack *i, void *helper) +{ + int ret; + + READ_LOCK(&ip_nat_lock); + ret = (i->nat.info.helper == helper); + READ_UNLOCK(&ip_nat_lock); + + return ret; +} + +void ip_nat_helper_unregister(struct ip_nat_helper *me) +{ + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&helpers, me); + WRITE_UNLOCK(&ip_nat_lock); + + /* Someone could be still looking at the helper in a bh. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + /* Find anything using it, and umm, kill them. We can't turn + them into normal connections: if we've adjusted SYNs, then + they'll ackstorm. So we just drop it. We used to just + bump module count when a connection existed, but that + forces admins to gen fake RSTs or bounce box, either of + which is just a long-winded way of making things + worse. --RR */ + ip_ct_selective_cleanup(kill_helper, me); + + MOD_DEC_USE_COUNT; +} + +int __init ip_nat_init(void) +{ + size_t i; + + /* Sew in builtin protocols. */ + WRITE_LOCK(&ip_nat_lock); + list_append(&protos, &ip_nat_protocol_tcp); + list_append(&protos, &ip_nat_protocol_udp); + list_append(&protos, &ip_nat_protocol_icmp); + WRITE_UNLOCK(&ip_nat_lock); + + for (i = 0; i < IP_NAT_HTABLE_SIZE; i++) { + INIT_LIST_HEAD(&bysource[i]); + INIT_LIST_HEAD(&byipsproto[i]); + } + + /* FIXME: Man, this is a hack. */ + IP_NF_ASSERT(ip_conntrack_destroyed == NULL); + ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; + + return 0; +} + +void ip_nat_cleanup(void) +{ + ip_conntrack_destroyed = NULL; +} diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_ftp.c linux/net/ipv4/netfilter/ip_nat_ftp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_ftp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_ftp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,403 @@ +/* FTP extension for TCP NAT alteration. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* FIXME: Time out? --RR */ + +static int +ftp_nat_expected(struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info, + struct ip_conntrack *master, + struct ip_nat_info *masterinfo, + unsigned int *verdict) +{ + struct ip_nat_multi_range mr; + u_int32_t newdstip, newsrcip, newip; + struct ip_ct_ftp *ftpinfo; + + IP_NF_ASSERT(info); + IP_NF_ASSERT(master); + IP_NF_ASSERT(masterinfo); + + IP_NF_ASSERT(!(info->initialized & (1<help.ct_ftp_info; + + LOCK_BH(&ip_ftp_lock); + if (!ftpinfo->is_ftp) { + UNLOCK_BH(&ip_ftp_lock); + DEBUGP("nat_expected: master not ftp\n"); + return 0; + } + + if (ftpinfo->ftptype == IP_CT_FTP_PORT) { + /* PORT command: make connection go to the client. */ + newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + DEBUGP("nat_expected: PORT cmd. %u.%u.%u.%u->%u.%u.%u.%u\n", + IP_PARTS(newsrcip), IP_PARTS(newdstip)); + } else { + /* PASV command: make the connection go to the server */ + newdstip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + DEBUGP("nat_expected: PASV cmd. %u.%u.%u.%u->%u.%u.%u.%u\n", + IP_PARTS(newsrcip), IP_PARTS(newdstip)); + } + UNLOCK_BH(&ip_ftp_lock); + + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) + newip = newsrcip; + else + newip = newdstip; + + DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", IP_PARTS(newip)); + + mr.rangesize = 1; + /* We don't want to manip the per-protocol, just the IPs. */ + mr.range[0].flags = IP_NAT_RANGE_MAP_IPS; + mr.range[0].min_ip = mr.range[0].max_ip = newip; + + *verdict = ip_nat_setup_info(ct, &mr, hooknum); + + return 1; +} + +/* This is interesting. We simply use the port given us by the client + or server. In practice it's extremely unlikely to clash; if it + does, the rule won't be able to get a unique tuple and will drop + the packets. */ +static int +mangle_packet(struct sk_buff **pskb, + u_int32_t newip, + u_int16_t port, + unsigned int matchoff, + unsigned int matchlen, + struct ip_nat_ftp_info *this_way, + struct ip_nat_ftp_info *other_way) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph; + unsigned char *data; + unsigned int tcplen, newlen, newtcplen; + char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; + + MUST_BE_LOCKED(&ip_ftp_lock); + sprintf(buffer, "%u,%u,%u,%u,%u,%u", + IP_PARTS(newip), port>>8, port&0xFF); + + tcplen = (*pskb)->len - iph->ihl * 4; + newtcplen = tcplen - matchlen + strlen(buffer); + newlen = iph->ihl*4 + newtcplen; + + /* So there I am, in the middle of my `netfilter-is-wonderful' + talk in Sydney, and someone asks `What happens if you try + to enlarge a 64k packet here?'. I think I said something + eloquent like `fuck'. */ + if (newlen > 65535) { + if (net_ratelimit()) + printk("nat_ftp cheat: %u.%u.%u.%u->%u.%u.%u.%u %u\n", + NIPQUAD((*pskb)->nh.iph->saddr), + NIPQUAD((*pskb)->nh.iph->daddr), + (*pskb)->nh.iph->protocol); + return NF_DROP; + } + + if (newlen > (*pskb)->len + skb_tailroom(*pskb)) { + struct sk_buff *newskb; + newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), newlen, + GFP_ATOMIC); + if (!newskb) { + DEBUGP("ftp: oom\n"); + return 0; + } else { + kfree_skb(*pskb); + *pskb = newskb; + iph = (*pskb)->nh.iph; + } + } + + tcph = (void *)iph + iph->ihl*4; + data = (void *)tcph + tcph->doff*4; + + DEBUGP("Mapping `%.*s' [%u %u %u] to new `%s' [%u]\n", + (int)matchlen, data+matchoff, + data[matchoff], data[matchoff+1], + matchlen, buffer, strlen(buffer)); + + /* SYN adjust. If it's uninitialized, or this is after last + correction, record it: we don't handle more than one + adjustment in the window, but do deal with common case of a + retransmit. */ + if (this_way->syn_offset_before == this_way->syn_offset_after + || before(this_way->syn_correction_pos, ntohl(tcph->seq))) { + this_way->syn_correction_pos = ntohl(tcph->seq); + this_way->syn_offset_before = this_way->syn_offset_after; + this_way->syn_offset_after = (int32_t) + this_way->syn_offset_before + newlen - (*pskb)->len; + } + + /* Move post-replacement */ + memmove(data + matchoff + strlen(buffer), + data + matchoff + matchlen, + (*pskb)->tail - (data + matchoff + matchlen)); + memcpy(data + matchoff, buffer, strlen(buffer)); + + /* Resize packet. */ + if (newlen > (*pskb)->len) { + DEBUGP("ip_nat_ftp: Extending packet by %u to %u bytes\n", + newlen - (*pskb)->len, newlen); + skb_put(*pskb, newlen - (*pskb)->len); + } else { + DEBUGP("ip_nat_ftp: Shrinking packet from %u to %u bytes\n", + (*pskb)->len, newlen); + skb_trim(*pskb, newlen); + } + + /* Fix checksums */ + iph->tot_len = htons(newlen); + (*pskb)->csum = csum_partial((char *)tcph + tcph->doff*4, + newtcplen - tcph->doff*4, 0); + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, newtcplen, iph->saddr, iph->daddr, + csum_partial((char *)tcph, tcph->doff*4, + (*pskb)->csum)); + ip_send_check(iph); + return 1; +} + +/* Grrr... SACK. Fuck me even harder. Don't want to fix it on the + fly, so blow it away. */ +static void +delete_sack(struct sk_buff *skb, struct tcphdr *tcph) +{ + unsigned int i; + u_int8_t *opt = (u_int8_t *)tcph; + + DEBUGP("Seeking SACKPERM in SYN packet (doff = %u).\n", + tcph->doff * 4); + for (i = sizeof(struct tcphdr); i < tcph->doff * 4;) { + DEBUGP("%u ", opt[i]); + switch (opt[i]) { + case TCPOPT_NOP: + case TCPOPT_EOL: + i++; + break; + + case TCPOPT_SACK_PERM: + goto found_opt; + + default: + /* Worst that can happen: it will take us over. */ + i += opt[i+1] ?: 1; + } + } + DEBUGP("\n"); + return; + + found_opt: + DEBUGP("\n"); + DEBUGP("Found SACKPERM at offset %u.\n", i); + + /* Must be within TCP header, and valid SACK perm. */ + if (i + opt[i+1] <= tcph->doff*4 && opt[i+1] == 2) { + /* Replace with NOPs. */ + tcph->check + = ip_nat_cheat_check(*((u_int16_t *)(opt + i))^0xFFFF, + 0, tcph->check); + opt[i] = opt[i+1] = 0; + } + else DEBUGP("Something wrong with SACK_PERM.\n"); +} + +static int ftp_data_fixup(const struct ip_ct_ftp *ct_ftp_info, + struct ip_conntrack *ct, + struct ip_nat_ftp_info *ftp, + unsigned int datalen, + struct sk_buff **pskb) +{ + u_int32_t newip; + struct ip_conntrack_tuple t; + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *)iph + iph->ihl*4; + + MUST_BE_LOCKED(&ip_ftp_lock); + DEBUGP("FTP_NAT: seq %u + %u in %u + %u\n", + ct_ftp_info->seq, ct_ftp_info->len, + ntohl(tcph->seq), datalen); + + /* Change address inside packet to match way we're mapping + this connection. */ + if (ct_ftp_info->ftptype == IP_CT_FTP_PASV) { + /* PASV response: must be where client thinks server + is */ + newip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + } else { + /* PORT command: must be where server thinks client is */ + newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + } + + if (!mangle_packet(pskb, newip, ct_ftp_info->port, + ct_ftp_info->seq - ntohl(tcph->seq), + ct_ftp_info->len, + &ftp[ct_ftp_info->ftptype], + &ftp[!ct_ftp_info->ftptype])) + return 0; + + /* Alter conntrack's expectations. */ + + /* We can read expect here without conntrack lock, since it's + only set in ip_conntrack_ftp, with ip_ftp_lock held + writable */ + t = ct->expected.tuple; + t.dst.ip = newip; + ip_conntrack_expect_related(ct, &t); + + return 1; +} + +static unsigned int help(struct ip_conntrack *ct, + struct ip_nat_info *info, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *)iph + iph->ihl*4; + u_int32_t newseq, newack; + unsigned int datalen; + int dir; + int score; + struct ip_ct_ftp *ct_ftp_info + = &ct->help.ct_ftp_info; + struct ip_nat_ftp_info *ftp + = &ct->nat.help.ftp_info[0]; + + /* Delete SACK_OK on initial TCP SYNs. */ + if (tcph->syn && !tcph->ack) + delete_sack(*pskb, tcph); + + /* Only mangle things once: original direction in POST_ROUTING + and reply direction on PRE_ROUTING. */ + dir = CTINFO2DIR(ctinfo); + if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) + || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) { + DEBUGP("nat_ftp: Not touching dir %s at hook %s\n", + dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", + hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" + : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" + : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "???"); + return NF_ACCEPT; + } + + datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4; + score = 0; + LOCK_BH(&ip_ftp_lock); + if (ct_ftp_info->len) { + /* If it's in the right range... */ + score += between(ct_ftp_info->seq, ntohl(tcph->seq), + ntohl(tcph->seq) + datalen); + score += between(ct_ftp_info->seq + ct_ftp_info->len, + ntohl(tcph->seq), + ntohl(tcph->seq) + datalen); + if (score == 1) { + /* Half a match? This means a partial retransmisison. + It's a cracker being funky. */ + if (net_ratelimit()) { + printk("FTP_NAT: partial packet %u/%u in %u/%u\n", + ct_ftp_info->seq, ct_ftp_info->len, + ntohl(tcph->seq), + ntohl(tcph->seq) + datalen); + } + UNLOCK_BH(&ip_ftp_lock); + return NF_DROP; + } else if (score == 2) { + if (!ftp_data_fixup(ct_ftp_info, ct, ftp, datalen, + pskb)) { + UNLOCK_BH(&ip_ftp_lock); + return NF_DROP; + } + + /* skb may have been reallocated */ + iph = (*pskb)->nh.iph; + tcph = (void *)iph + iph->ihl*4; + } + } + + /* Sequence adjust */ + if (after(ntohl(tcph->seq), ftp[dir].syn_correction_pos)) + newseq = ntohl(tcph->seq) + ftp[dir].syn_offset_after; + else + newseq = ntohl(tcph->seq) + ftp[dir].syn_offset_before; + newseq = htonl(newseq); + + /* Ack adjust */ + if (after(ntohl(tcph->ack_seq), ftp[!dir].syn_correction_pos)) + newack = ntohl(tcph->ack_seq) - ftp[!dir].syn_offset_after; + else + newack = ntohl(tcph->ack_seq) - ftp[!dir].syn_offset_before; + newack = htonl(newack); + UNLOCK_BH(&ip_ftp_lock); + + tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, + ip_nat_cheat_check(~tcph->ack_seq, + newack, + tcph->check)); + tcph->seq = newseq; + tcph->ack_seq = newack; + + return NF_ACCEPT; +} + +static struct ip_nat_helper ftp += { { NULL, NULL }, IPPROTO_TCP, __constant_htons(21), help, "ftp" }; +static struct ip_nat_expect ftp_expect += { { NULL, NULL }, ftp_nat_expected }; + +extern struct module *ip_conntrack_ftp; + +static int __init init(void) +{ + int ret; + + ret = ip_nat_expect_register(&ftp_expect); + if (ret == 0) { + ret = ip_nat_helper_register(&ftp); + + if (ret == 0) + __MOD_INC_USE_COUNT(ip_conntrack_ftp); + else + ip_nat_expect_unregister(&ftp_expect); + } + return ret; +} + +static void __exit fini(void) +{ + __MOD_DEC_USE_COUNT(ip_conntrack_ftp); + ip_nat_helper_unregister(&ftp); + ip_nat_expect_unregister(&ftp_expect); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_icmp.c linux/net/ipv4/netfilter/ip_nat_proto_icmp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_icmp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_proto_icmp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +icmp_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + return (tuple->src.u.icmp.id >= min->icmp.id + && tuple->src.u.icmp.id <= max->icmp.id); +} + +static int +icmp_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t id = 0; + unsigned int range_size + = (unsigned int)range->max.icmp.id - range->min.icmp.id + 1; + unsigned int i; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xFFFF; + + for (i = 0; i < range_size; i++, id++) { + tuple->src.u.icmp.id = range->min.icmp.id + (id % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + return 0; +} + +static void +icmp_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + + hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, + manip->u.icmp.id, + hdr->checksum); + hdr->un.echo.id = manip->u.icmp.id; +} + +static unsigned int +icmp_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.icmp.id) + len += sprintf(buffer + len, "id=%u ", + ntohs(match->src.u.icmp.id)); + + if (mask->dst.u.icmp.type) + len += sprintf(buffer + len, "type=%u ", + ntohs(match->dst.u.icmp.type)); + + if (mask->dst.u.icmp.code) + len += sprintf(buffer + len, "code=%u ", + ntohs(match->dst.u.icmp.code)); + + return len; +} + +static unsigned int +icmp_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF) + return sprintf(buffer, "id %u-%u ", + ntohs(range->min.icmp.id), + ntohs(range->max.icmp.id)); + else return 0; +} + +struct ip_nat_protocol ip_nat_protocol_icmp += { { NULL, NULL }, "ICMP", IPPROTO_ICMP, + icmp_manip_pkt, + icmp_in_range, + icmp_unique_tuple, + icmp_print, + icmp_print_range +}; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_tcp.c linux/net/ipv4/netfilter/ip_nat_proto_tcp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_tcp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_proto_tcp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +tcp_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int16_t port; + + if (maniptype == IP_NAT_MANIP_SRC) + port = tuple->src.u.tcp.port; + else + port = tuple->dst.u.tcp.port; + + return ntohs(port) >= ntohs(min->tcp.port) + && ntohs(port) <= ntohs(max->tcp.port); +} + +static int +tcp_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t port = 0, *portptr; + unsigned int range_size, min, i; + + if (maniptype == IP_NAT_MANIP_SRC) + portptr = &tuple->src.u.tcp.port; + else + portptr = &tuple->dst.u.tcp.port; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == IP_NAT_MANIP_DST) + return 0; + + /* Map privileged onto privileged. */ + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr)<512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min.tcp.port); + range_size = ntohs(range->max.tcp.port) - min + 1; + } + + for (i = 0; i < range_size; i++, port++) { + *portptr = htons(min + port % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) { + return 1; + } + } + return 0; +} + +static void +tcp_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct tcphdr *hdr = (struct tcphdr *)((u_int32_t *)iph + iph->ihl); + u_int32_t oldip; + u_int16_t *portptr; + + if (maniptype == IP_NAT_MANIP_SRC) { + /* Get rid of src ip and src pt */ + oldip = iph->saddr; + portptr = &hdr->source; + } else { + /* Get rid of dst ip and dst pt */ + oldip = iph->daddr; + portptr = &hdr->dest; + } + hdr->check = ip_nat_cheat_check(~oldip, manip->ip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + manip->u.tcp.port, + hdr->check)); + *portptr = manip->u.tcp.port; +} + +static unsigned int +tcp_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.tcp.port) + len += sprintf(buffer + len, "srcpt=%u ", + ntohs(match->src.u.tcp.port)); + + + if (mask->dst.u.tcp.port) + len += sprintf(buffer + len, "dstpt=%u ", + ntohs(match->dst.u.tcp.port)); + + return len; +} + +static unsigned int +tcp_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) { + if (range->min.tcp.port == range->max.tcp.port) + return sprintf(buffer, "port %u ", + ntohs(range->min.tcp.port)); + else + return sprintf(buffer, "ports %u-%u ", + ntohs(range->min.tcp.port), + ntohs(range->max.tcp.port)); + } + else return 0; +} + +struct ip_nat_protocol ip_nat_protocol_tcp += { { NULL, NULL }, "TCP", IPPROTO_TCP, + tcp_manip_pkt, + tcp_in_range, + tcp_unique_tuple, + tcp_print, + tcp_print_range +}; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_udp.c linux/net/ipv4/netfilter/ip_nat_proto_udp.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_udp.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_proto_udp.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +udp_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int16_t port; + + if (maniptype == IP_NAT_MANIP_SRC) + port = tuple->src.u.udp.port; + else + port = tuple->dst.u.udp.port; + + return ntohs(port) >= ntohs(min->udp.port) + && ntohs(port) <= ntohs(max->udp.port); +} + +static int +udp_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t port = 0, *portptr; + unsigned int range_size, min, i; + + if (maniptype == IP_NAT_MANIP_SRC) + portptr = &tuple->src.u.udp.port; + else + portptr = &tuple->dst.u.udp.port; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == IP_NAT_MANIP_DST) + return 0; + + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr)<512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min.udp.port); + range_size = ntohs(range->max.udp.port) - min + 1; + } + + for (i = 0; i < range_size; i++, port++) { + *portptr = htons(min + port % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + return 0; +} + +static void +udp_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct udphdr *hdr = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + u_int32_t oldip; + u_int16_t *portptr; + + if (maniptype == IP_NAT_MANIP_SRC) { + /* Get rid of src ip and src pt */ + oldip = iph->saddr; + portptr = &hdr->source; + } else { + /* Get rid of dst ip and dst pt */ + oldip = iph->daddr; + portptr = &hdr->dest; + } + hdr->check = ip_nat_cheat_check(~oldip, manip->ip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + manip->u.udp.port, + hdr->check)); + *portptr = manip->u.udp.port; +} + +static unsigned int +udp_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.udp.port) + len += sprintf(buffer + len, "srcpt=%u ", + ntohs(match->src.u.udp.port)); + + + if (mask->dst.u.udp.port) + len += sprintf(buffer + len, "dstpt=%u ", + ntohs(match->dst.u.udp.port)); + + return len; +} + +static unsigned int +udp_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) { + if (range->min.udp.port == range->max.udp.port) + return sprintf(buffer, "port %u ", + ntohs(range->min.udp.port)); + else + return sprintf(buffer, "ports %u-%u ", + ntohs(range->min.udp.port), + ntohs(range->max.udp.port)); + } + else return 0; +} + +struct ip_nat_protocol ip_nat_protocol_udp += { { NULL, NULL }, "UDP", IPPROTO_UDP, + udp_manip_pkt, + udp_in_range, + udp_unique_tuple, + udp_print, + udp_print_range +}; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_unknown.c linux/net/ipv4/netfilter/ip_nat_proto_unknown.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_proto_unknown.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_proto_unknown.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,61 @@ +/* The "unknown" protocol. This is what is used for protocols we + * don't understand. It's returned by find_proto(). + */ + +#include +#include +#include +#include + +#include +#include +#include + +static int unknown_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type manip_type, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + return 1; +} + +static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + /* Sorry: we can't help you; if it's not unique, we can't frob + anything. */ + return 0; +} + +static void +unknown_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + return; +} + +static unsigned int +unknown_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + return 0; +} + +static unsigned int +unknown_print_range(char *buffer, const struct ip_nat_range *range) +{ + return 0; +} + +struct ip_nat_protocol unknown_nat_protocol = { + { NULL, NULL }, "unknown", 0, + unknown_manip_pkt, + unknown_in_range, + unknown_unique_tuple, + unknown_print, + unknown_print_range +}; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_rule.c linux/net/ipv4/netfilter/ip_nat_rule.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_rule.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_rule.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,327 @@ +/* Everything about the rules for NAT. */ +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define NAT_VALID_HOOKS ((1<rangesize - 1))))) { + DEBUGP("SNAT: Target size %u wrong for %u ranges\n", + targinfosize, mr->rangesize); + return 0; + } + + if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) { + DEBUGP("SNAT: hook mask 0x%x bad\n", hook_mask); + return 0; + } + return 1; +} + +static int ipt_dnat_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ip_nat_multi_range *mr = targinfo; + + /* Must be a valid range */ + if (targinfosize < sizeof(struct ip_nat_multi_range)) { + DEBUGP("DNAT: Target size %u too small\n", targinfosize); + return 0; + } + + if (targinfosize != IPT_ALIGN((sizeof(struct ip_nat_multi_range) + + (sizeof(struct ip_nat_range) + * (mr->rangesize - 1))))) { + DEBUGP("DNAT: Target size %u wrong for %u ranges\n", + targinfosize, mr->rangesize); + return 0; + } + + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) { + DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask); + return 0; + } + return 1; +} + +static inline unsigned int +alloc_null_binding(struct ip_conntrack *conntrack, + struct ip_nat_info *info, + unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + Use reply in case it's already been mangled (eg local packet). + */ + u_int32_t ip + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip + : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); + struct ip_nat_multi_range mr + = { 1, { { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } } } }; + + DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack, + IP_PARTS(ip)); + return ip_nat_setup_info(conntrack, &mr, hooknum); +} + +static inline int call_expect(const struct ip_nat_expect *i, + struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info, + struct ip_conntrack *master, + struct ip_nat_info *masterinfo, + unsigned int *verdict) +{ + return i->expect(pskb, hooknum, ct, info, master, masterinfo, + verdict); +} + +int ip_nat_rule_find(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct ip_conntrack *ct, + struct ip_nat_info *info) +{ + int ret; + + /* Master won't vanish while this ctrack still alive */ + if (ct->master.master) { + struct ip_conntrack *master; + + master = (struct ip_conntrack *)ct->master.master; + if (LIST_FIND(&nat_expect_list, + call_expect, + struct ip_nat_expect *, + pskb, hooknum, ct, info, + master, &master->nat.info, &ret)) + return ret; + } + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); + if (ret == NF_ACCEPT) { + if (!(info->initialized & (1 << HOOK2MANIP(hooknum)))) + /* NUL mapping */ + ret = alloc_null_binding(ct, info, hooknum); + } + return ret; +} + +int ip_nat_expect_register(struct ip_nat_expect *expect) +{ + WRITE_LOCK(&ip_nat_lock); + list_prepend(&nat_expect_list, expect); + WRITE_UNLOCK(&ip_nat_lock); + + return 0; +} + +void ip_nat_expect_unregister(struct ip_nat_expect *expect) +{ + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&nat_expect_list, expect); + WRITE_UNLOCK(&ip_nat_lock); +} + +static struct ipt_target ipt_snat_reg += { { NULL, NULL }, "SNAT", ipt_snat_target, ipt_snat_checkentry, NULL }; +static struct ipt_target ipt_dnat_reg += { { NULL, NULL }, "DNAT", ipt_dnat_target, ipt_dnat_checkentry, NULL }; + +int __init ip_nat_rule_init(void) +{ + int ret; + + ret = ipt_register_table(&nat_table); + if (ret != 0) + return ret; + ret = ipt_register_target(&ipt_snat_reg); + if (ret != 0) + goto unregister_table; + + ret = ipt_register_target(&ipt_dnat_reg); + if (ret != 0) + goto unregister_snat; + + return ret; + + unregister_snat: + ipt_unregister_target(&ipt_snat_reg); + unregister_table: + ipt_unregister_table(&nat_table); + + return ret; +} + +void ip_nat_rule_cleanup(void) +{ + ipt_unregister_target(&ipt_dnat_reg); + ipt_unregister_target(&ipt_snat_reg); + ipt_unregister_table(&nat_table); +} diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_standalone.c linux/net/ipv4/netfilter/ip_nat_standalone.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_nat_standalone.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_nat_standalone.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,273 @@ +/* This file contains all the functions required for the standalone + ip_nat module. + + These are not required by the compatibility layer. +*/ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#ifdef MODULE +#define EXPORT_SYMTAB +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ + : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ + : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ + : "*ERROR*"))) + +static unsigned int +ip_nat_fn(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_nat_info *info; + /* maniptype == SRC for postrouting. */ + enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); + + /* We never see fragments: conntrack defrags on pre-routing + and local-out, and ip_nat_out protects post-routing. */ + IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off + & __constant_htons(IP_MF|IP_OFFSET))); + + /* FIXME: One day, fill in properly. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; + + /* If we had a hardware checksum before, it's now invalid */ + if ((*pskb)->pkt_type != PACKET_LOOPBACK) + (*pskb)->ip_summed = CHECKSUM_NONE; + + ct = ip_conntrack_get(*pskb, &ctinfo); + /* Can't track? Maybe out of memory: this would make NAT + unreliable. */ + if (!ct) + return NF_DROP; + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED+IP_CT_IS_REPLY: + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { + icmp_reply_translation(*pskb, ct, hooknum, + CTINFO2DIR(ctinfo)); + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + info = &ct->nat.info; + + WRITE_LOCK(&ip_nat_lock); + /* Seen it before? This can happen for loopback, retrans, + or local packets.. */ + if (!(info->initialized & (1 << maniptype))) { + int in_hashes = info->initialized; + unsigned int ret; + + ret = ip_nat_rule_find(pskb, hooknum, in, out, + ct, info); + if (ret != NF_ACCEPT) { + WRITE_UNLOCK(&ip_nat_lock); + return ret; + } + + if (in_hashes) { + IP_NF_ASSERT(info->bysource.conntrack); + replace_in_hashes(ct, info); + } else { + place_in_hashes(ct, info); + } + } else + DEBUGP("Already setup manip %s for ct %p\n", + maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + WRITE_UNLOCK(&ip_nat_lock); + break; + + default: + /* ESTABLISHED */ + IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED + || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + info = &ct->nat.info; + } + + IP_NF_ASSERT(info); + return do_bindings(ct, ctinfo, info, hooknum, pskb); +} + +static unsigned int +ip_nat_out(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* We can hit fragment here; forwarded packets get + defragmented by connection tracking coming in, then + fragmented (grr) by the forward code. + + In future: If we have nfct != NULL, AND we have NAT + initialized, AND there is no helper, then we can do full + NAPT on the head, and IP-address-only NAT on the rest. + + I'm starting to have nightmares about fragments. */ + + if ((*pskb)->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + + if (!*pskb) + return NF_STOLEN; + } + + return ip_nat_fn(hooknum, pskb, in, out, okfn); +} + +/* We must be after connection tracking and before packet filtering. */ + +/* Before packet filtering, change destination */ +static struct nf_hook_ops ip_nat_in_ops += { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST }; +/* After packet filtering, change source */ +static struct nf_hook_ops ip_nat_out_ops += { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC}; +/* Before packet filtering, change destination */ +static struct nf_hook_ops ip_nat_local_out_ops += { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_LOCAL_OUT, NF_IP_PRI_NAT_DST }; + +/* Protocol registration. */ +int ip_nat_protocol_register(struct ip_nat_protocol *proto) +{ + int ret = 0; + struct list_head *i; + + WRITE_LOCK(&ip_nat_lock); + for (i = protos.next; i != &protos; i = i->next) { + if (((struct ip_nat_protocol *)i)->protonum + == proto->protonum) { + ret = -EBUSY; + goto out; + } + } + + list_prepend(&protos, proto); + MOD_INC_USE_COUNT; + + out: + WRITE_UNLOCK(&ip_nat_lock); + return ret; +} + +/* Noone stores the protocol anywhere; simply delete it. */ +void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) +{ + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&protos, proto); + WRITE_UNLOCK(&ip_nat_lock); + + /* Someone could be still looking at the proto in a bh. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + MOD_DEC_USE_COUNT; +} + +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = ip_nat_rule_init(); + if (ret < 0) { + printk("ip_nat_init: can't setup rules.\n"); + goto cleanup_nothing; + } + ret = ip_nat_init(); + if (ret < 0) { + printk("ip_nat_init: can't setup rules.\n"); + goto cleanup_rule_init; + } + ret = nf_register_hook(&ip_nat_in_ops); + if (ret < 0) { + printk("ip_nat_init: can't register in hook.\n"); + goto cleanup_nat; + } + ret = nf_register_hook(&ip_nat_out_ops); + if (ret < 0) { + printk("ip_nat_init: can't register out hook.\n"); + goto cleanup_inops; + } + ret = nf_register_hook(&ip_nat_local_out_ops); + if (ret < 0) { + printk("ip_nat_init: can't register local out hook.\n"); + goto cleanup_outops; + } + __MOD_INC_USE_COUNT(ip_conntrack_module); + return ret; + + cleanup: + __MOD_DEC_USE_COUNT(ip_conntrack_module); + nf_unregister_hook(&ip_nat_local_out_ops); + cleanup_outops: + nf_unregister_hook(&ip_nat_out_ops); + cleanup_inops: + nf_unregister_hook(&ip_nat_in_ops); + cleanup_nat: + ip_nat_cleanup(); + cleanup_rule_init: + ip_nat_rule_cleanup(); + cleanup_nothing: + MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock); + return ret; +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_nat_setup_info); +EXPORT_SYMBOL(ip_nat_helper_register); +EXPORT_SYMBOL(ip_nat_helper_unregister); +EXPORT_SYMBOL(ip_nat_expect_register); +EXPORT_SYMBOL(ip_nat_expect_unregister); +EXPORT_SYMBOL(ip_nat_cheat_check); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_queue.c linux/net/ipv4/netfilter/ip_queue.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_queue.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_queue.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,752 @@ +/* + * This is a module which is used for queueing IPv4 packets and + * communicating with userspace via netlink. + * + * (C) 2000 James Morris + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +EXPORT_NO_SYMBOLS; + +#define IPQ_THR_NAME "kipq" +#define IPQ_NAME "ip_queue" +#define IPQ_QMAX_DEFAULT 1024 + +#define IPQ_PROC_FS_NAME "ip_queue" + +#define NET_IPQ_QMAX 2088 +#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" + +typedef struct ipq_queue_element { + struct list_head list; /* Links element into queue */ + unsigned char state; /* State of this element */ + int verdict; /* Current verdict */ + struct nf_info *info; /* Extra info from netfilter */ + struct sk_buff *skb; /* Packet inside */ +} ipq_queue_element_t; + +typedef int (*ipq_send_cb_t)(ipq_queue_element_t *e); + +typedef struct ipq_peer { + pid_t pid; /* PID of userland peer */ + unsigned char died; /* We think the peer died */ + unsigned char copy_mode; /* Copy packet as well as metadata? */ + size_t copy_range; /* Range past metadata to copy */ + ipq_send_cb_t send; /* Callback for sending data to peer */ +} ipq_peer_t; + +typedef struct ipq_thread { + pid_t pid; /* PID of kernel thread */ + unsigned char terminate; /* Termination flag */ + unsigned char running; /* Running flag */ + wait_queue_head_t wq; /* I/O wait queue */ + void (*process)(void *data); /* Queue processing function */ +} ipq_thread_t; + +typedef struct ipq_queue { + int len; /* Current queue len */ + int *maxlen; /* Maximum queue len, via sysctl */ + unsigned char state; /* Current queue state */ + struct list_head list; /* Head of packet queue */ + spinlock_t lock; /* Queue spinlock */ + ipq_peer_t peer; /* Userland peer */ + ipq_thread_t thread; /* Thread context */ +} ipq_queue_t; + + +/**************************************************************************** +* +* Kernel thread +* +****************************************************************************/ + +static void ipq_thread_init(char *thread_name) +{ + lock_kernel(); + exit_files(current); + daemonize(); + strcpy(current->comm, thread_name); + unlock_kernel(); + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); +} + +static int ipq_thread_start(void *data) +{ + ipq_queue_t *q = (ipq_queue_t *)data; + + q->thread.running = 1; + ipq_thread_init(IPQ_THR_NAME); + q->thread.pid = current->pid; + while (!q->thread.terminate) { + interruptible_sleep_on(&q->thread.wq); + q->thread.process(q); + } + q->thread.running = 0; + return 0; +} + +static void ipq_thread_stop(ipq_queue_t *q) +{ + if (!(q->thread.pid || q->thread.running)) + return; + q->state = IPQ_QS_FLUSH; + q->thread.terminate = 1; + wake_up_interruptible(&q->thread.wq); + current->state = TASK_INTERRUPTIBLE; + while (q->thread.running) { + schedule_timeout(HZ/10); + current->state = TASK_RUNNING; + } +} + +static int ipq_thread_create(ipq_queue_t *q) +{ + int status = kernel_thread(ipq_thread_start, q, 0); + return (status < 0) ? status : 0; +} + + +/**************************************************************************** + * + * Packet queue + * + ****************************************************************************/ + +/* Must be called under spinlock */ +static __inline__ void +ipq_dequeue(ipq_queue_t *q, + ipq_queue_element_t *e) +{ + list_del(&e->list); + nf_reinject(e->skb, e->info, e->verdict); + kfree(e); + q->len--; +} + +/* Must be called under spinlock */ +static __inline__ void +ipq_queue_drop(ipq_queue_t *q, + ipq_queue_element_t *e) +{ + e->verdict = NF_DROP; + ipq_dequeue(q, e); +} + +static int +ipq_notify_peer(ipq_queue_t *q, + ipq_queue_element_t *e) +{ + int status = q->peer.send(e); + + if (status >= 0) { + e->state = IPQ_PS_WAITING; + return status; + } + if (status == -ERESTARTSYS || status == -EAGAIN) + return 0; + printk(KERN_INFO "%s: error notifying peer %d, resetting " + "state and flushing queue\n", IPQ_NAME, q->peer.pid); + q->state = IPQ_QS_FLUSH; + q->peer.died = 1; + q->peer.pid = 0; + q->peer.copy_mode = IPQ_COPY_META; + q->peer.copy_range = 0; + return status; +} + +static void +ipq_queue_process(void *data) +{ + struct list_head *i; + ipq_queue_t *q = (ipq_queue_t *)data; + +restart: + if (q->state == IPQ_QS_HOLD) + return; + spin_lock_bh(&q->lock); + for (i = q->list.prev; i != &q->list; i = i->prev) { + ipq_queue_element_t *e = (ipq_queue_element_t *)i; + + if (q->state == IPQ_QS_FLUSH) { + QDEBUG("flushing packet %p\n", e); + ipq_queue_drop(q, e); + continue; + } + switch (e->state) { + case IPQ_PS_NEW: { + int status = ipq_notify_peer(q, e); + if (status < 0) { + spin_unlock_bh(&q->lock); + goto restart; + } + break; + } + case IPQ_PS_VERDICT: + ipq_dequeue(q, e); + break; + case IPQ_PS_WAITING: + break; + default: + printk(KERN_INFO "%s: dropping stuck packet %p " + "with ps=%d qs=%d\n", IPQ_NAME, + e, e->state, q->state); + ipq_queue_drop(q, e); + } + } + spin_unlock_bh(&q->lock); + if (q->state == IPQ_QS_FLUSH) + q->state = IPQ_QS_HOLD; +} + +static ipq_queue_t * +ipq_queue_create(nf_queue_outfn_t outfn, + ipq_send_cb_t send_cb, + int *errp, + int *sysctl_qmax) +{ + int status; + ipq_queue_t *q; + + *errp = 0; + q = kmalloc(sizeof(ipq_queue_t), GFP_KERNEL); + if (q == NULL) { + *errp = -ENOMEM; + return NULL; + } + q->thread.terminate = 0; + q->thread.running = 0; + q->thread.process = ipq_queue_process; + init_waitqueue_head(&q->thread.wq); + q->peer.pid = 0; + q->peer.died = 0; + q->peer.copy_mode = IPQ_COPY_META; + q->peer.copy_range = 0; + q->peer.send = send_cb; + q->len = 0; + q->maxlen = sysctl_qmax; + q->state = IPQ_QS_HOLD; + INIT_LIST_HEAD(&q->list); + spin_lock_init(&q->lock); + status = nf_register_queue_handler(PF_INET, outfn, q); + if (status < 0) { + *errp = -EBUSY; + kfree(q); + return NULL; + } + status = ipq_thread_create(q); + if (status < 0) { + nf_unregister_queue_handler(PF_INET); + *errp = status; + kfree(q); + return NULL; + } + return q; +} + +static int +ipq_enqueue(ipq_queue_t *q, + struct sk_buff *skb, + struct nf_info *info) +{ + ipq_queue_element_t *e = NULL; + + e = kmalloc(sizeof(*e), GFP_ATOMIC); + if (e == NULL) { + printk(KERN_ERR "%s: out of memory in %s\n", + IPQ_NAME, __FUNCTION__); + return -ENOMEM; + } + e->state = IPQ_PS_NEW; + e->verdict = NF_DROP; + e->info = info; + e->skb = skb; + spin_lock_bh(&q->lock); + if (q->len >= *q->maxlen) { + spin_unlock_bh(&q->lock); + printk(KERN_WARNING "%s: queue full at %d entries, " + "dropping packet.\n", IPQ_NAME, q->len); + kfree(e); + nf_reinject(skb, info, NF_DROP); + return 0; + } + list_add(&e->list, &q->list); + q->len++; + spin_unlock_bh(&q->lock); + wake_up_interruptible(&q->thread.wq); + return 0; +} + +/* FIXME: need to find a way to notify user during module unload */ +static void +ipq_queue_destroy(ipq_queue_t *q) +{ + ipq_thread_stop(q); + nf_unregister_queue_handler(PF_INET); + kfree(q); +} + +static int +ipq_queue_mangle_ipv4(unsigned char *buf, + ipq_verdict_msg_t *v, + ipq_queue_element_t *e) +{ + struct iphdr *user_iph = (struct iphdr *)buf; + + if (v->data_len < sizeof(*user_iph)) + return 0; + + if (e->skb->nh.iph->check != user_iph->check) { + int diff = v->data_len - e->skb->len; + + if (diff < 0) + skb_trim(e->skb, v->data_len); + else if (diff > 0) { + if (v->data_len > 0xFFFF) { + e->verdict = NF_DROP; + return -EINVAL; + } + if (diff > skb_tailroom(e->skb)) { + struct sk_buff *newskb; + + /* Ack, we waste a memcpy() of data here */ + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { + printk(KERN_WARNING "%s: OOM in %s, " + "dropping packet\n", + IPQ_THR_NAME, __FUNCTION__); + e->verdict = NF_DROP; + return -ENOMEM; + } + kfree_skb(e->skb); + e->skb = newskb; + } + skb_put(e->skb, diff); + } + memcpy(e->skb->data, buf, v->data_len); + e->skb->nfcache |= NFC_ALTERED; + } + return 0; +} + +static int +ipq_queue_set_verdict(ipq_queue_t *q, + ipq_verdict_msg_t *v, + unsigned char *buf, + unsigned int len) +{ + struct list_head *i; + + if (v->value < 0 || v->value > NF_MAX_VERDICT) + return -EINVAL; + spin_lock_bh(&q->lock); + for (i = q->list.next; i != &q->list; i = i->next) { + ipq_queue_element_t *e = (ipq_queue_element_t *)i; + + if (v->id == (unsigned long )e) { + int status = 0; + e->state = IPQ_PS_VERDICT; + e->verdict = v->value; + + if (buf && v->data_len == len) + status = ipq_queue_mangle_ipv4(buf, v, e); + spin_unlock_bh(&q->lock); + return status; + } + } + spin_unlock_bh(&q->lock); + return -ENOENT; +} + +static int +ipq_receive_peer(ipq_queue_t *q, + ipq_peer_msg_t *m, + unsigned char type, + unsigned int len) +{ + if (q->state == IPQ_QS_FLUSH) + return -EBUSY; + + if (len < sizeof(ipq_peer_msg_t)) + return -EINVAL; + + switch (type) { + case IPQM_MODE: + switch (m->msg.mode.value) { + case IPQ_COPY_NONE: + q->peer.copy_mode = IPQ_COPY_NONE; + q->peer.copy_range = 0; + q->state = IPQ_QS_FLUSH; + break; + case IPQ_COPY_META: + if (q->state == IPQ_QS_FLUSH) + return -EAGAIN; + q->peer.copy_mode = IPQ_COPY_META; + q->peer.copy_range = 0; + q->state = IPQ_QS_COPY; + break; + case IPQ_COPY_PACKET: + if (q->state == IPQ_QS_FLUSH) + return -EAGAIN; + q->peer.copy_mode = IPQ_COPY_PACKET; + q->peer.copy_range = m->msg.mode.range; + q->state = IPQ_QS_COPY; + break; + default: + return -EINVAL; + } + break; + case IPQM_VERDICT: { + int status; + unsigned char *data = NULL; + + if (m->msg.verdict.value > NF_MAX_VERDICT) + return -EINVAL; + if (m->msg.verdict.data_len) + data = (unsigned char *)m + sizeof(*m); + status = ipq_queue_set_verdict(q, &m->msg.verdict, + data, len - sizeof(*m)); + if (status < 0) + return status; + break; + } + default: + return -EINVAL; + } + wake_up_interruptible(&q->thread.wq); + return 0; +} + + +/**************************************************************************** + * + * Netfilter interface + * + ****************************************************************************/ + +/* + * Packets arrive here from netfilter for queuing to userspace. + * All of them must be fed back via nf_reinject() or Alexey will kill Rusty. + */ +static int +receive_netfilter(struct sk_buff *skb, + struct nf_info *info, + void *data) +{ + ipq_queue_t *q = (ipq_queue_t *)data; + + if (q->state == IPQ_QS_FLUSH) + return -EBUSY; + return ipq_enqueue(q, skb, info); +} + +/**************************************************************************** + * + * Netlink interface. + * + ****************************************************************************/ + +static struct sk_buff * +netlink_build_message(ipq_queue_element_t *e, + int *errp); + +extern __inline__ void +receive_user_skb(struct sk_buff *skb); + +static int +netlink_send_peer(ipq_queue_element_t *e); + +static struct sock *nfnl = NULL; +ipq_queue_t *nlq = NULL; + +static int +netlink_send_peer(ipq_queue_element_t *e) +{ + int status = 0; + struct sk_buff *skb; + + if (!nlq->peer.pid) + return -EINVAL; + skb = netlink_build_message(e, &status); + if (skb == NULL) + return status; + return netlink_unicast(nfnl, skb, nlq->peer.pid, 0); +} + +static struct sk_buff * +netlink_build_message(ipq_queue_element_t *e, + int *errp) +{ + unsigned char *old_tail; + size_t size = 0; + size_t data_len = 0; + struct sk_buff *skb; + ipq_packet_msg_t *pm; + struct nlmsghdr *nlh; + + switch (nlq->peer.copy_mode) { + size_t copy_range; + + case IPQ_COPY_META: + size = NLMSG_SPACE(sizeof(*pm)); + data_len = 0; + break; + case IPQ_COPY_PACKET: + copy_range = nlq->peer.copy_range; + if (copy_range == 0 || copy_range > e->skb->len) + data_len = e->skb->len; + else + data_len = copy_range; + size = NLMSG_SPACE(sizeof(*pm) + data_len); + break; + case IPQ_COPY_NONE: + default: + *errp = -EINVAL; + return NULL; + } + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + old_tail = skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); + pm = NLMSG_DATA(nlh); + memset(pm, 0, sizeof(*pm)); + pm->packet_id = (unsigned long )e; + pm->data_len = data_len; + pm->timestamp_sec = e->skb->stamp.tv_sec; + pm->timestamp_usec = e->skb->stamp.tv_usec; + pm->hook = e->info->hook; + if (e->info->indev) strcpy(pm->indev_name, e->info->indev->name); + else pm->indev_name[0] = '\0'; + if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name); + else pm->outdev_name[0] = '\0'; + if (data_len) + memcpy(++pm, e->skb->data, data_len); + nlh->nlmsg_len = skb->tail - old_tail; + NETLINK_CB(skb).dst_groups = 0; + return skb; +nlmsg_failure: + if (skb) + kfree(skb); + *errp = 0; + printk(KERN_ERR "%s: error creating netlink message\n", IPQ_NAME); + return NULL; +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0); +/* + * FIXME: ping old peer if we detect a new peer then resend. + */ +extern __inline__ void +receive_user_skb(struct sk_buff *skb) +{ + int status, type; + struct nlmsghdr *nlh; + + nlh = (struct nlmsghdr *)skb->data; + if (nlh->nlmsg_len < sizeof(*nlh) + || skb->len < nlh->nlmsg_len + || nlh->nlmsg_pid <= 0 + || !(nlh->nlmsg_flags & NLM_F_REQUEST) + || nlh->nlmsg_flags & NLM_F_MULTI) + RCV_SKB_FAIL(-EINVAL); + if (nlh->nlmsg_flags & MSG_TRUNC) + RCV_SKB_FAIL(-ECOMM); + type = nlh->nlmsg_type; + if (type < NLMSG_NOOP || type >= IPQM_MAX) + RCV_SKB_FAIL(-EINVAL); + if (type <= IPQM_BASE) + return; + if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) + RCV_SKB_FAIL(-EPERM); + if (nlq->peer.pid && !nlq->peer.died + && (nlq->peer.pid != nlh->nlmsg_pid)) + printk(KERN_WARNING "%s: peer pid changed from %d to %d\n", + IPQ_NAME, nlq->peer.pid, nlh->nlmsg_pid); + nlq->peer.pid = nlh->nlmsg_pid; + nlq->peer.died = 0; + status = ipq_receive_peer(nlq, NLMSG_DATA(nlh), + type, skb->len - NLMSG_LENGTH(0)); + if (status < 0) + RCV_SKB_FAIL(status); + if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + return; +} + +/* Note: we are only dealing with single part messages at the moment. */ +static void +receive_user_sk(struct sock *sk, + int len) +{ + do { + struct sk_buff *skb; + + if (rtnl_shlock_nowait()) + return; + while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { + receive_user_skb(skb); + kfree_skb(skb); + } + up(&rtnl_sem); + } while (nfnl && nfnl->receive_queue.qlen); +} + + +/**************************************************************************** + * + * System events + * + ****************************************************************************/ + +static int +receive_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + if (event == NETDEV_UNREGISTER) + if (nlq) + ipq_thread_stop(nlq); + return NOTIFY_DONE; +} + +struct notifier_block ipq_dev_notifier = { + receive_event, + NULL, + 0 +}; + + +/**************************************************************************** + * + * Sysctl - queue tuning. + * + ****************************************************************************/ + +static int sysctl_maxlen = IPQ_QMAX_DEFAULT; + +static struct ctl_table_header *ipq_sysctl_header; + +static ctl_table ipq_table[] = { + { NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen, + sizeof(sysctl_maxlen), 0644, NULL, proc_dointvec }, + { 0 } +}; + +static ctl_table ipq_dir_table[] = { + {NET_IPV4, "ipv4", NULL, 0, 0555, ipq_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +static ctl_table ipq_root_table[] = { + {CTL_NET, "net", NULL, 0, 0555, ipq_dir_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +/**************************************************************************** + * + * Procfs - debugging info. + * + ****************************************************************************/ + +static int +ipq_get_info(char *buffer, char **start, off_t offset, int length) +{ + int len; + + spin_lock_bh(&nlq->lock); + len = sprintf(buffer, + "Thread pid : %d\n" + "Thread terminate : %d\n" + "Thread running : %d\n" + "Peer pid : %d\n" + "Peer died : %d\n" + "Peer copy mode : %d\n" + "Peer copy range : %d\n" + "Queue length : %d\n" + "Queue max. length : %d\n" + "Queue state : %d\n", + nlq->thread.pid, + nlq->thread.terminate, + nlq->thread.running, + nlq->peer.pid, + nlq->peer.died, + nlq->peer.copy_mode, + nlq->peer.copy_range, + nlq->len, + *nlq->maxlen, + nlq->state); + spin_unlock_bh(&nlq->lock); + *start = buffer + offset; + len -= offset; + if (len > length) + len = length; + else if (len < 0) + len = 0; + return len; +} + +/**************************************************************************** + * + * Module stuff. + * + ****************************************************************************/ + +static int __init init(void) +{ + int status = 0; + + nfnl = netlink_kernel_create(NETLINK_FIREWALL, receive_user_sk); + if (nfnl == NULL) { + printk(KERN_ERR "%s: initialisation failed: unable to " + "create kernel netlink socket\n", IPQ_NAME); + return -ENOMEM; + } + nlq = ipq_queue_create(receive_netfilter, + netlink_send_peer, &status, &sysctl_maxlen); + if (nlq == NULL) { + printk(KERN_ERR "%s: initialisation failed: unable to " + "initialise queue\n", IPQ_NAME); + sock_release(nfnl->socket); + return status; + } + register_netdevice_notifier(&ipq_dev_notifier); + proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); + return status; +} + +static void __exit fini(void) +{ + unregister_sysctl_table(ipq_sysctl_header); + proc_net_remove(IPQ_PROC_FS_NAME); + unregister_netdevice_notifier(&ipq_dev_notifier); + ipq_queue_destroy(nlq); + sock_release(nfnl->socket); +} + +MODULE_DESCRIPTION("IPv4 packet queue handler"); +module_init(init); +module_exit(fini); + diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ip_tables.c linux/net/ipv4/netfilter/ip_tables.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ip_tables.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ip_tables.c Sun Mar 19 11:13:46 2000 @@ -0,0 +1,1664 @@ +/* + * Packet matching code. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifndef IP_OFFSET +#define IP_OFFSET 0x1FFF +#endif + +/*#define DEBUG_IP_FIREWALL*/ +/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ +/*#define DEBUG_IP_FIREWALL_USER*/ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf(format, args...) printk(format , ## args) +#else +#define dprintf(format, args...) +#endif + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +#ifdef CONFIG_NETFILTER_DEBUG +#define IP_NF_ASSERT(x) \ +do { \ + if (!(x)) \ + printk("IPT_ASSERT: %s:%s:%u\n", \ + __FUNCTION__, __FILE__, __LINE__); \ +} while(0) +#else +#define IP_NF_ASSERT(x) +#endif +#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) + +/* Mutex protects lists (only traversed in user context). */ +static DECLARE_MUTEX(ipt_mutex); + +/* Must have mutex */ +#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) +#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) +#include +#include + +#if 0 +/* All the better to debug you with... */ +#define static +#define inline +#endif + +/* Locking is simple: we assume at worst case there will be one packet + in user context and one from bottom halves (or soft irq if Alexey's + softnet patch was applied). + + We keep a set of rules for each CPU, so we can avoid write-locking + them; doing a readlock_bh() stops packets coming through if we're + in user context. + + To be cache friendly on SMP, we arrange them like so: + [ n-entries ] + ... cache-align padding ... + [ n-entries ] + + Hence the start of any table is given by get_table() below. */ + +/* The table itself */ +struct ipt_table_info +{ + /* Size per table */ + unsigned int size; + /* Number of entries: FIXME. --RR */ + unsigned int number; + + /* Entry points and underflows */ + unsigned int hook_entry[NF_IP_NUMHOOKS]; + unsigned int underflow[NF_IP_NUMHOOKS]; + + char padding[SMP_ALIGN((NF_IP_NUMHOOKS*2+2)*sizeof(unsigned int))]; + + /* ipt_entry tables: one per CPU */ + char entries[0]; +}; + +static LIST_HEAD(ipt_target); +static LIST_HEAD(ipt_match); +static LIST_HEAD(ipt_tables); +#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) + +#ifdef CONFIG_SMP +#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*cpu_number_map(p)) +#else +#define TABLE_OFFSET(t,p) 0 +#endif + +#if 0 +#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) +#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) +#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) +#endif + +/* Returns whether matches rule or not. */ +static inline int +ip_packet_match(const struct iphdr *ip, + const char *indev, + const char *outdev, + const struct ipt_ip *ipinfo, + int isfrag) +{ + size_t i; + unsigned long ret; + +#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg)) + + if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, + IPT_INV_SRCIP) + || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, + IPT_INV_DSTIP)) { + dprintf("Source or dest mismatch.\n"); + + dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", + NIPQUAD(ip->saddr), + NIPQUAD(ipinfo->smsk.s_addr), + NIPQUAD(ipinfo->src.s_addr), + ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); + dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", + NIPQUAD(ip->daddr), + NIPQUAD(ipinfo->dmsk.s_addr), + NIPQUAD(ipinfo->dst.s_addr), + ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); + return 0; + } + + /* Look for ifname matches; this should unroll nicely. */ + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)indev)[i] + ^ ((const unsigned long *)ipinfo->iniface)[i]) + & ((const unsigned long *)ipinfo->iniface_mask)[i]; + } + + if (FWINV(ret != 0, IPT_INV_VIA_IN)) { + dprintf("VIA in mismatch (%s vs %s).%s\n", + indev, ipinfo->iniface, + ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); + return 0; + } + + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)outdev)[i] + ^ ((const unsigned long *)ipinfo->outiface)[i]) + & ((const unsigned long *)ipinfo->outiface_mask)[i]; + } + + if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { + dprintf("VIA out mismatch (%s vs %s).%s\n", + outdev, ipinfo->outiface, + ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); + return 0; + } + + /* Check specific protocol */ + if (ipinfo->proto + && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { + dprintf("Packet protocol %hi does not match %hi.%s\n", + ip->protocol, ipinfo->proto, + ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); + return 0; + } + + /* If we have a fragment rule but the packet is not a fragment + * then we return zero */ + if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { + dprintf("Fragment rule but not fragment.%s\n", + ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); + return 0; + } + + return 1; +} + +static inline int +ip_checkentry(const struct ipt_ip *ip) +{ + if (ip->flags & ~IPT_F_MASK) { + duprintf("Unknown flag bits set: %08X\n", + ip->flags & ~IPT_F_MASK); + return 0; + } + if (ip->invflags & ~IPT_INV_MASK) { + duprintf("Unknown invflag bits set: %08X\n", + ip->invflags & ~IPT_INV_MASK); + return 0; + } + return 1; +} + +static unsigned int +ipt_error(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + if (net_ratelimit()) + printk("ip_tables: error: `%s'\n", (char *)targinfo); + + return NF_DROP; +} + +static inline +int do_match(struct ipt_entry_match *m, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + /* Stop iteration if it doesn't match */ + if (!m->u.match->match(skb, in, out, m->data, + offset, hdr, datalen, hotdrop)) + return 1; + else + return 0; +} + +static inline struct ipt_entry * +get_entry(void *base, unsigned int offset) +{ + return (struct ipt_entry *)(base + offset); +} + +/* Returns one of the generic firewall policies, like NF_ACCEPT. */ +unsigned int +ipt_do_table(struct sk_buff **pskb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + struct ipt_table *table, + void *userdata) +{ + static const char nulldevname[IFNAMSIZ] = { 0 }; + u_int16_t offset; + struct iphdr *ip; + void *protohdr; + u_int16_t datalen; + int hotdrop = 0; + /* Initializing verdict to NF_DROP keeps gcc happy. */ + unsigned int verdict = NF_DROP; + const char *indev, *outdev; + void *table_base; + struct ipt_entry *e, *back; + + /* Initialization */ + ip = (*pskb)->nh.iph; + protohdr = (u_int32_t *)ip + ip->ihl; + datalen = (*pskb)->len - ip->ihl * 4; + indev = in ? in->name : nulldevname; + outdev = out ? out->name : nulldevname; + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ + offset = ntohs(ip->frag_off) & IP_OFFSET; + + read_lock_bh(&table->lock); + IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + table_base = (void *)table->private->entries + + TABLE_OFFSET(table->private, smp_processor_id()); + e = get_entry(table_base, table->private->hook_entry[hook]); + + /* Check noone else using our table */ + IP_NF_ASSERT(((struct ipt_entry *)table_base)->comefrom == 0xdead57ac); +#ifdef CONFIG_NETFILTER_DEBUG + ((struct ipt_entry *)table_base)->comefrom = 0x57acc001; +#endif + + /* For return from builtin chain */ + back = get_entry(table_base, table->private->underflow[hook]); + + do { + IP_NF_ASSERT(e); + IP_NF_ASSERT(back); + (*pskb)->nfcache |= e->nfcache; + if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { + struct ipt_entry_target *t; + + if (IPT_MATCH_ITERATE(e, do_match, + *pskb, in, out, + offset, protohdr, + datalen, &hotdrop) != 0) + goto no_match; + + ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + + t = ipt_get_target(e); + IP_NF_ASSERT(t->u.target); + /* Standard target? */ + if (!t->u.target->target) { + int v; + + v = ((struct ipt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != IPT_RETURN) { + verdict = (unsigned)(-v) - 1; + break; + } + e = back; + back = get_entry(table_base, + back->comefrom); + continue; + } + if (table_base + v + != (void *)e + e->next_offset) { + /* Save old back ptr in next entry */ + struct ipt_entry *next + = (void *)e + e->next_offset; + next->comefrom + = (void *)back - table_base; + /* set back pointer to next entry */ + back = next; + } + + e = get_entry(table_base, v); + } else { + verdict = t->u.target->target(pskb, hook, + in, out, + t->data, + userdata); + + /* Target might have changed stuff. */ + ip = (*pskb)->nh.iph; + protohdr = (u_int32_t *)ip + ip->ihl; + datalen = (*pskb)->len - ip->ihl * 4; + + if (verdict == IPT_CONTINUE) + e = (void *)e + e->next_offset; + else + /* Verdict */ + break; + } + } else { + + no_match: + e = (void *)e + e->next_offset; + } + } while (!hotdrop); + +#ifdef CONFIG_NETFILTER_DEBUG + ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac; +#endif + read_unlock_bh(&table->lock); + +#ifdef DEBUG_ALLOW_ALL + return NF_ACCEPT; +#else + if (hotdrop) + return NF_DROP; + else return verdict; +#endif +} + +/* If it succeeds, returns element and locks mutex */ +static inline void * +find_inlist_lock_noload(struct list_head *head, + const char *name, + int *error, + struct semaphore *mutex) +{ + void *ret; + +#if 0 + duprintf("find_inlist: searching for `%s' in %s.\n", + name, head == &ipt_target ? "ipt_target" + : head == &ipt_match ? "ipt_match" + : head == &ipt_tables ? "ipt_tables" : "UNKNOWN"); +#endif + + *error = down_interruptible(mutex); + if (*error != 0) + return NULL; + + ret = list_named_find(head, name); + if (!ret) { + *error = -ENOENT; + up(mutex); + } + return ret; +} + +#ifndef CONFIG_KMOD +#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) +#else +static void * +find_inlist_lock(struct list_head *head, + const char *name, + const char *prefix, + int *error, + struct semaphore *mutex) +{ + void *ret; + + ret = find_inlist_lock_noload(head, name, error, mutex); + if (!ret) { + char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1]; + strcpy(modulename, prefix); + strcat(modulename, name); + duprintf("find_inlist: loading `%s'.\n", modulename); + request_module(modulename); + ret = find_inlist_lock_noload(head, name, error, mutex); + } + + return ret; +} +#endif + +static inline struct ipt_table * +find_table_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex); +} + +static inline struct ipt_match * +find_match_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex); +} + +static inline struct ipt_target * +find_target_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex); +} + +/* All zeroes == unconditional rule. */ +static inline int +unconditional(const struct ipt_ip *ip) +{ + unsigned int i; + + for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) + if (((__u32 *)ip)[i]) + return 0; + + return 1; +} + +/* Figures out from what hook each rule can be called: returns 0 if + there are loops. Puts hook bitmask in comefrom. */ +static int +mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) +{ + unsigned int hook; + + /* No recursion; use packet counter to save back ptrs (reset + to 0 as we leave), and comefrom to save source hook bitmask */ + for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { + unsigned int pos = newinfo->hook_entry[hook]; + struct ipt_entry *e + = (struct ipt_entry *)(newinfo->entries + pos); + + if (!(valid_hooks & (1 << hook))) + continue; + + /* Set initial back pointer. */ + e->counters.pcnt = pos; + + for (;;) { + struct ipt_standard_target *t + = (void *)ipt_get_target(e); + + if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { + printk("iptables: loop hook %u pos %u %08X.\n", + hook, pos, e->comefrom); + return 0; + } + e->comefrom + |= ((1 << hook) | (1 << NF_IP_NUMHOOKS)); + + /* Unconditional return/END. */ + if (e->target_offset == sizeof(struct ipt_entry) + && (strcmp(t->target.u.name, IPT_STANDARD_TARGET) + == 0) + && t->verdict < 0 + && unconditional(&e->ip)) { + unsigned int oldpos, size; + + /* Return: backtrack through the last + big jump. */ + do { + e->comefrom ^= (1<comefrom + & (1 << NF_IP_NUMHOOKS)) { + duprintf("Back unset " + "on hook %u " + "rule %u\n", + hook, pos); + } +#endif + oldpos = pos; + pos = e->counters.pcnt; + e->counters.pcnt = 0; + + /* We're at the start. */ + if (pos == oldpos) + goto next; + + e = (struct ipt_entry *) + (newinfo->entries + pos); + } while (oldpos == pos + e->next_offset); + + /* Move along one */ + size = e->next_offset; + e = (struct ipt_entry *) + (newinfo->entries + pos + size); + e->counters.pcnt = pos; + pos += size; + } else { + int newpos = t->verdict; + + if (strcmp(t->target.u.name, + IPT_STANDARD_TARGET) == 0 + && newpos >= 0) { + /* This a jump; chase it. */ + duprintf("Jump rule %u -> %u\n", + pos, newpos); + } else { + /* ... this is a fallthru */ + newpos = pos + e->next_offset; + } + e = (struct ipt_entry *) + (newinfo->entries + newpos); + e->counters.pcnt = pos; + pos = newpos; + } + } + next: + duprintf("Finished chain %u\n", hook); + } + return 1; +} + +static inline int +cleanup_match(struct ipt_entry_match *m, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + + if (m->u.match->me) + __MOD_DEC_USE_COUNT(m->u.match->me); + + return 0; +} + +static inline int +standard_check(const struct ipt_entry_target *t, + unsigned int max_offset) +{ + struct ipt_standard_target *targ = (void *)t; + + /* Check standard info. */ + if (t->target_size != sizeof(struct ipt_standard_target)) { + duprintf("standard_check: target size %u != %u\n", + t->target_size, sizeof(struct ipt_standard_target)); + return 0; + } + + if (targ->verdict >= 0 + && targ->verdict > max_offset - sizeof(struct ipt_entry)) { + duprintf("ipt_standard_check: bad verdict (%i)\n", + targ->verdict); + return 0; + } + + if (targ->verdict < -NF_MAX_VERDICT - 1) { + duprintf("ipt_standard_check: bad negative verdict (%i)\n", + targ->verdict); + return 0; + } + return 1; +} + +static inline int +check_match(struct ipt_entry_match *m, + const char *name, + const struct ipt_ip *ip, + unsigned int hookmask, + unsigned int *i) +{ + int ret; + struct ipt_match *match; + + match = find_match_lock(m->u.name, &ret, &ipt_mutex); + if (!match) { + duprintf("check_match: `%s' not found\n", m->u.name); + return ret; + } + if (match->me) + __MOD_INC_USE_COUNT(match->me); + m->u.match = match; + up(&ipt_mutex); + + if (m->u.match->checkentry + && !m->u.match->checkentry(name, ip, m->data, + m->match_size - sizeof(*m), + hookmask)) { + if (m->u.match->me) + __MOD_DEC_USE_COUNT(m->u.match->me); + duprintf("ip_tables: check failed for `%s'.\n", + m->u.match->name); + return -EINVAL; + } + + (*i)++; + return 0; +} + +static struct ipt_target ipt_standard_target; + +static inline int +check_entry(struct ipt_entry *e, const char *name, unsigned int size, + unsigned int *i) +{ + struct ipt_entry_target *t; + struct ipt_target *target; + int ret; + unsigned int j; + + if (!ip_checkentry(&e->ip)) { + duprintf("ip_tables: ip check failed %p %s.\n", e, name); + return -EINVAL; + } + + j = 0; + ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); + if (ret != 0) + goto cleanup_matches; + + t = ipt_get_target(e); + target = find_target_lock(t->u.name, &ret, &ipt_mutex); + if (!target) { + duprintf("check_entry: `%s' not found\n", t->u.name); + up(&ipt_mutex); + return ret; + } + if (target->me) + __MOD_INC_USE_COUNT(target->me); + t->u.target = target; + up(&ipt_mutex); + + if (t->u.target == &ipt_standard_target) { + if (!standard_check(t, size)) { + ret = -EINVAL; + goto cleanup_matches; + } + } else if (t->u.target->checkentry + && !t->u.target->checkentry(name, e, t->data, + t->target_size - sizeof(*t), + e->comefrom)) { + if (t->u.target->me) + __MOD_DEC_USE_COUNT(t->u.target->me); + duprintf("ip_tables: check failed for `%s'.\n", + t->u.target->name); + ret = -EINVAL; + goto cleanup_matches; + } + + (*i)++; + return 0; + + cleanup_matches: + IPT_MATCH_ITERATE(e, cleanup_match, &j); + return ret; +} + +static inline int +check_entry_size_and_hooks(struct ipt_entry *e, + struct ipt_table_info *newinfo, + unsigned char *base, + unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + unsigned int *i) +{ + unsigned int h; + + if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 + || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + duprintf("Bad offset %p\n", e); + return -EINVAL; + } + + if (e->next_offset + < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* Check hooks & underflows */ + for (h = 0; h < NF_IP_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* FIXME: underflows must be unconditional, standard verdicts + < 0 (not IPT_RETURN). --RR */ + + /* Clear counters and comefrom */ + e->counters = ((struct ipt_counters) { 0, 0 }); + e->comefrom = 0; + + (*i)++; + return 0; +} + +static inline int +cleanup_entry(struct ipt_entry *e, unsigned int *i) +{ + struct ipt_entry_target *t; + + if (i && (*i)-- == 0) + return 1; + + /* Cleanup all matches */ + IPT_MATCH_ITERATE(e, cleanup_match, NULL); + t = ipt_get_target(e); + if (t->u.target->me) + __MOD_DEC_USE_COUNT(t->u.target->me); + + return 0; +} + +/* Checks and translates the user-supplied table segment (held in + newinfo) */ +static int +translate_table(const char *name, + unsigned int valid_hooks, + struct ipt_table_info *newinfo, + unsigned int size, + unsigned int number, + const unsigned int *hook_entries, + const unsigned int *underflows) +{ + unsigned int i; + int ret; + + newinfo->size = size; + newinfo->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_IP_NUMHOOKS; i++) { + newinfo->hook_entry[i] = 0xFFFFFFFF; + newinfo->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_table: size %u\n", newinfo->size); + i = 0; + /* Walk through entries, checking offsets. */ + ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, + check_entry_size_and_hooks, + newinfo, + newinfo->entries, + newinfo->entries + size, + hook_entries, underflows, &i); + if (ret != 0) + return ret; + + if (i != number) { + duprintf("translate_table: %u not %u entries\n", + i, number); + return -EINVAL; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_IP_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (newinfo->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + return -EINVAL; + } + if (newinfo->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + return -EINVAL; + } + } + + if (!mark_source_chains(newinfo, valid_hooks)) + return -ELOOP; + + /* Finally, each sanity check must pass */ + i = 0; + ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, + check_entry, name, size, &i); + + if (ret != 0) { + IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, + cleanup_entry, &i); + return ret; + } + + /* And one copy for every other CPU */ + for (i = 1; i < smp_num_cpus; i++) { + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size*i), + newinfo->entries, + SMP_ALIGN(newinfo->size)); + } + + return ret; +} + +static struct ipt_table_info * +replace_table(struct ipt_table *table, + unsigned int num_counters, + struct ipt_table_info *newinfo, + int *error) +{ + struct ipt_table_info *oldinfo; + +#ifdef CONFIG_NETFILTER_DEBUG + { + struct ipt_entry *table_base; + unsigned int i; + + for (i = 0; i < smp_num_cpus; i++) { + table_base = + (void *)newinfo->entries + + TABLE_OFFSET(newinfo, i); + + table_base->comefrom = 0xdead57ac; + } + } +#endif + + /* Do the substitution. */ + write_lock_bh(&table->lock); + /* Check inside lock: is the old number correct? */ + if (num_counters != table->private->number) { + duprintf("num_counters != table->private->number (%u/%u)\n", + num_counters, table->private->number); + write_unlock_bh(&table->lock); + *error = -EAGAIN; + return NULL; + } + oldinfo = table->private; + table->private = newinfo; + write_unlock_bh(&table->lock); + + return oldinfo; +} + +/* Gets counters. */ +static inline int +add_entry_to_counter(const struct ipt_entry *e, + struct ipt_counters total[], + unsigned int *i) +{ + ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); + + (*i)++; + return 0; +} + +static void +get_counters(const struct ipt_table_info *t, + struct ipt_counters counters[]) +{ + unsigned int cpu; + unsigned int i; + + for (cpu = 0; cpu < smp_num_cpus; cpu++) { + i = 0; + IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), + t->size, + add_entry_to_counter, + counters, + &i); + } +} + +static int +copy_entries_to_user(unsigned int total_size, + struct ipt_table *table, + void *userptr) +{ + unsigned int off, num, countersize; + struct ipt_entry *e; + struct ipt_counters *counters; + int ret = 0; + + /* We need atomic snapshot of counters: rest doesn't change + (other than comefrom, which userspace doesn't care + about). */ + countersize = sizeof(struct ipt_counters) * table->private->number; + counters = vmalloc(countersize); + + if (counters == NULL) + return -ENOMEM; + + /* First, sum counters... */ + memset(counters, 0, countersize); + write_lock_bh(&table->lock); + get_counters(table->private, counters); + write_unlock_bh(&table->lock); + + /* ... then copy entire thing from CPU 0... */ + if (copy_to_user(userptr, table->private->entries, total_size) != 0) { + ret = -EFAULT; + goto free_counters; + } + + /* FIXME: use iterator macros --RR */ + /* ... then go back and fix counters and names */ + for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ + unsigned int i; + struct ipt_entry_match *m; + struct ipt_entry_target *t; + + e = (struct ipt_entry *)(table->private->entries + off); + if (copy_to_user(userptr + off + + offsetof(struct ipt_entry, counters), + &counters[num], + sizeof(counters[num])) != 0) { + ret = -EFAULT; + goto free_counters; + } + + for (i = sizeof(struct ipt_entry); + i < e->target_offset; + i += m->match_size) { + m = (void *)e + i; + + if (copy_to_user(userptr + off + i + + offsetof(struct ipt_entry_match, + u.name), + m->u.match->name, + strlen(m->u.match->name)+1) != 0) { + ret = -EFAULT; + goto free_counters; + } + } + + t = ipt_get_target(e); + if (copy_to_user(userptr + off + e->target_offset + + offsetof(struct ipt_entry_target, + u.name), + t->u.target->name, + strlen(t->u.target->name)+1) != 0) { + ret = -EFAULT; + goto free_counters; + } + } + + free_counters: + vfree(counters); + return ret; +} + +static int +get_entries(const struct ipt_get_entries *entries, + struct ipt_get_entries *uptr) +{ + int ret; + struct ipt_table *t; + + t = find_table_lock(entries->name, &ret, &ipt_mutex); + if (t) { + duprintf("t->private->number = %u\n", + t->private->number); + if (entries->size == t->private->size) + ret = copy_entries_to_user(t->private->size, + t, uptr->entries); + else { + duprintf("get_entries: I've got %u not %u!\n", + t->private->size, + entries->size); + ret = -EINVAL; + } + up(&ipt_mutex); + } else + duprintf("get_entries: Can't find %s!\n", + entries->name); + + return ret; +} + +static int +do_replace(void *user, unsigned int len) +{ + int ret; + struct ipt_replace tmp; + struct ipt_table *t; + struct ipt_table_info *newinfo, *oldinfo; + struct ipt_counters *counters; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + newinfo = vmalloc(sizeof(struct ipt_table_info) + + SMP_ALIGN(tmp.size) * smp_num_cpus); + if (!newinfo) + return -ENOMEM; + + if (copy_from_user(newinfo->entries, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters)); + if (!counters) { + ret = -ENOMEM; + goto free_newinfo; + } + memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters)); + + ret = translate_table(tmp.name, tmp.valid_hooks, + newinfo, tmp.size, tmp.num_entries, + tmp.hook_entry, tmp.underflow); + if (ret != 0) + goto free_newinfo_counters; + + duprintf("ip_tables: Translated table\n"); + + t = find_table_lock(tmp.name, &ret, &ipt_mutex); + if (!t) + goto free_newinfo_counters_untrans; + + /* You lied! */ + if (tmp.valid_hooks != t->valid_hooks) { + duprintf("Valid hook crap: %08X vs %08X\n", + tmp.valid_hooks, t->valid_hooks); + ret = -EINVAL; + goto free_newinfo_counters_untrans_unlock; + } + + oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); + if (!oldinfo) + goto free_newinfo_counters_untrans_unlock; + + /* Get the old counters. */ + get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ + IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); + vfree(oldinfo); + /* Silent error: too late now. */ + copy_to_user(tmp.counters, counters, + sizeof(struct ipt_counters) * tmp.num_counters); + + up(&ipt_mutex); + return 0; + + free_newinfo_counters_untrans_unlock: + up(&ipt_mutex); + free_newinfo_counters_untrans: + IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); + free_newinfo_counters: + vfree(counters); + free_newinfo: + vfree(newinfo); + return ret; +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static inline int +add_counter_to_entry(struct ipt_entry *e, + const struct ipt_counters addme[], + unsigned int *i) +{ +#if 0 + duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", + *i, + (long unsigned int)e->counters.pcnt, + (long unsigned int)e->counters.bcnt, + (long unsigned int)addme[*i].pcnt, + (long unsigned int)addme[*i].bcnt); +#endif + + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +static int +do_add_counters(void *user, unsigned int len) +{ + unsigned int i; + struct ipt_counters_info tmp, *paddc; + struct ipt_table *t; + int ret; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) + return -EINVAL; + + paddc = vmalloc(len); + if (!paddc) + return -ENOMEM; + + if (copy_from_user(paddc, user, len) != 0) { + ret = -EFAULT; + goto free; + } + + t = find_table_lock(tmp.name, &ret, &ipt_mutex); + if (!t) + goto free; + + write_lock_bh(&t->lock); + if (t->private->number != paddc->num_counters) { + ret = -EINVAL; + goto unlock_up_free; + } + + i = 0; + IPT_ENTRY_ITERATE(t->private->entries, + t->private->size, + add_counter_to_entry, + paddc->counters, + &i); + unlock_up_free: + write_unlock_bh(&t->lock); + up(&ipt_mutex); + free: + vfree(paddc); + + return ret; +} + +static int +do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_SET_REPLACE: + ret = do_replace(user, len); + break; + + case IPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(user, len); + break; + + default: + duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +static int +do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_GET_INFO: { + char name[IPT_TABLE_MAXNAMELEN]; + struct ipt_table *t; + + if (*len != sizeof(struct ipt_getinfo)) { + duprintf("length %u != %u\n", *len, + sizeof(struct ipt_getinfo)); + ret = -EINVAL; + break; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) { + ret = -EFAULT; + break; + } + t = find_table_lock(name, &ret, &ipt_mutex); + if (t) { + struct ipt_getinfo info; + + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, t->private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, t->private->underflow, + sizeof(info.underflow)); + info.num_entries = t->private->number; + info.size = t->private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + + up(&ipt_mutex); + } + } + break; + + case IPT_SO_GET_ENTRIES: { + struct ipt_get_entries get; + + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %u\n", *len, sizeof(get)); + ret = -EINVAL; + } else if (copy_from_user(&get, user, sizeof(get)) != 0) { + ret = -EFAULT; + } else if (*len != sizeof(struct ipt_get_entries) + get.size) { + duprintf("get_entries: %u != %u\n", *len, + sizeof(struct ipt_get_entries) + get.size); + ret = -EINVAL; + } else + ret = get_entries(&get, user); + break; + } + + default: + duprintf("do_ipt_get_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +/* Registration hooks for targets. */ +int +ipt_register_target(struct ipt_target *target) +{ + int ret; + + ret = down_interruptible(&ipt_mutex); + if (ret != 0) + return ret; + + if (list_named_insert(&ipt_target, target)) { + MOD_INC_USE_COUNT; + ret = 0; + } else { + duprintf("ipt_register_target: `%s' already in list!\n", + target->name); + ret = -EINVAL; + } + up(&ipt_mutex); + return ret; +} + +void +ipt_unregister_target(struct ipt_target *target) +{ + down(&ipt_mutex); + LIST_DELETE(&ipt_target, target); + up(&ipt_mutex); + MOD_DEC_USE_COUNT; +} + +int +ipt_register_match(struct ipt_match *match) +{ + int ret; + + ret = down_interruptible(&ipt_mutex); + if (ret != 0) + return ret; + + if (list_named_insert(&ipt_match, match)) { + MOD_INC_USE_COUNT; + ret = 0; + } else { + duprintf("ipt_register_match: `%s' already in list!\n", + match->name); + ret = -EINVAL; + } + up(&ipt_mutex); + + return ret; +} + +void +ipt_unregister_match(struct ipt_match *match) +{ + down(&ipt_mutex); + LIST_DELETE(&ipt_match, match); + up(&ipt_mutex); + MOD_DEC_USE_COUNT; +} + +int ipt_register_table(struct ipt_table *table) +{ + int ret; + struct ipt_table_info *newinfo; + static struct ipt_table_info bootstrap + = { 0, 0, { 0 }, { 0 }, { }, { } }; + + newinfo = vmalloc(sizeof(struct ipt_table_info) + + SMP_ALIGN(table->table->size) * smp_num_cpus); + if (!newinfo) { + ret = -ENOMEM; + return ret; + } + memcpy(newinfo->entries, table->table->entries, table->table->size); + + ret = translate_table(table->name, table->valid_hooks, + newinfo, table->table->size, + table->table->num_entries, + table->table->hook_entry, + table->table->underflow); + if (ret != 0) { + vfree(newinfo); + return ret; + } + + ret = down_interruptible(&ipt_mutex); + if (ret != 0) { + vfree(newinfo); + return ret; + } + + /* Don't autoload: we'd eat our tail... */ + if (list_named_find(&ipt_tables, table->name)) { + ret = -EEXIST; + goto free_unlock; + } + + /* Simplifies replace_table code. */ + table->private = &bootstrap; + if (!replace_table(table, 0, newinfo, &ret)) + goto free_unlock; + + duprintf("table->private->number = %u\n", + table->private->number); + + table->lock = RW_LOCK_UNLOCKED; + list_prepend(&ipt_tables, table); + MOD_INC_USE_COUNT; + + unlock: + up(&ipt_mutex); + return ret; + + free_unlock: + vfree(newinfo); + goto unlock; +} + +void ipt_unregister_table(struct ipt_table *table) +{ + down(&ipt_mutex); + LIST_DELETE(&ipt_tables, table); + up(&ipt_mutex); + + /* Decrease module usage counts and free resources */ + IPT_ENTRY_ITERATE(table->private->entries, table->private->size, + cleanup_entry, NULL); + vfree(table->private); + MOD_DEC_USE_COUNT; +} + +/* Returns 1 if the port is matched by the range, 0 otherwise */ +static inline int +port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert) +{ + int ret; + + ret = (port >= min && port <= max) ^ invert; + return ret; +} + +static int +tcp_find_option(u_int8_t option, + const struct tcphdr *tcp, + u_int16_t datalen, + int invert, + int *hotdrop) +{ + unsigned int i = sizeof(struct tcphdr); + const u_int8_t *opt = (u_int8_t *)tcp; + + duprintf("tcp_match: finding option\n"); + /* If we don't have the whole header, drop packet. */ + if (tcp->doff * 4 > datalen) { + *hotdrop = 1; + return 0; + } + + while (i < tcp->doff * 4) { + if (opt[i] == option) return !invert; + if (opt[i] < 2) i++; + else i += opt[i+1]?:1; + } + + return invert; +} + +static int +tcp_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct tcphdr *tcp = hdr; + const struct ipt_tcp *tcpinfo = matchinfo; + + /* To quote Alan: + + Don't allow a fragment of TCP 8 bytes in. Nobody normal + causes this. Its a cracker trying to break in by doing a + flag overwrite to pass the direction checks. + */ + + if (offset == 1) { + duprintf("Dropping evil TCP offset=1 frag.\n"); + *hotdrop = 1; + return 0; + } else if (offset == 0 && datalen < sizeof(struct tcphdr)) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil TCP offset=0 tinygram.\n"); + *hotdrop = 1; + return 0; + } + + /* FIXME: Try tcp doff >> packet len against various stacks --RR */ + +#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg)) + + /* Must not be a fragment. */ + return !offset + && port_match(tcpinfo->spts[0], tcpinfo->spts[1], + ntohs(tcp->source), + !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)) + && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], + ntohs(tcp->dest), + !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)) + && FWINVTCP((((unsigned char *)tcp)[13] + & tcpinfo->flg_mask) + == tcpinfo->flg_cmp, + IPT_TCP_INV_FLAGS) + && (!tcpinfo->option + || tcp_find_option(tcpinfo->option, tcp, datalen, + tcpinfo->invflags + & IPT_TCP_INV_OPTION, + hotdrop)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +tcp_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_tcp *tcpinfo = matchinfo; + + /* Must specify proto == TCP, and no unknown invflags */ + return ip->proto == IPPROTO_TCP + && !(ip->invflags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp)) + && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK); +} + +static int +udp_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct udphdr *udp = hdr; + const struct ipt_udp *udpinfo = matchinfo; + + if (offset == 0 && datalen < sizeof(struct udphdr)) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil UDP tinygram.\n"); + *hotdrop = 1; + return 0; + } + + /* Must not be a fragment. */ + return !offset + && port_match(udpinfo->spts[0], udpinfo->spts[1], + ntohs(udp->source), + !!(udpinfo->invflags & IPT_UDP_INV_SRCPT)) + && port_match(udpinfo->dpts[0], udpinfo->dpts[1], + ntohs(udp->dest), + !!(udpinfo->invflags & IPT_UDP_INV_DSTPT)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +udp_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) +{ + const struct ipt_udp *udpinfo = matchinfo; + + /* Must specify proto == UDP, and no unknown invflags */ + if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) { + duprintf("ipt_udp: Protocol %u != %u\n", ip->proto, + IPPROTO_UDP); + return 0; + } + if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) { + duprintf("ipt_udp: matchsize %u != %u\n", + matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp))); + return 0; + } + if (udpinfo->invflags & ~IPT_UDP_INV_MASK) { + duprintf("ipt_udp: unknown flags %X\n", + udpinfo->invflags); + return 0; + } + + return 1; +} + +/* Returns 1 if the type and code is matched by the range, 0 otherwise */ +static inline int +icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, + u_int8_t type, u_int8_t code, + int invert) +{ + return (type == test_type && code >= min_code && code <= max_code) + ^ invert; +} + +static int +icmp_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct icmphdr *icmp = hdr; + const struct ipt_icmp *icmpinfo = matchinfo; + + if (offset == 0 && datalen < 2) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil ICMP tinygram.\n"); + *hotdrop = 1; + return 0; + } + + /* Must not be a fragment. */ + return !offset + && icmp_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + icmp->type, icmp->code, + !!(icmpinfo->invflags&IPT_ICMP_INV)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +icmp_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_icmp *icmpinfo = matchinfo; + + /* Must specify proto == ICMP, and no unknown invflags */ + return ip->proto == IPPROTO_ICMP + && !(ip->invflags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp)) + && !(icmpinfo->invflags & ~IPT_ICMP_INV); +} + +/* The built-in targets: standard (NULL) and error. */ +static struct ipt_target ipt_standard_target += { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL }; +static struct ipt_target ipt_error_target += { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL }; + +static struct nf_sockopt_ops ipt_sockopts += { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl, + IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL }; + +static struct ipt_match tcp_matchstruct += { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL }; +static struct ipt_match udp_matchstruct += { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL }; +static struct ipt_match icmp_matchstruct += { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL }; + +static int __init init(void) +{ + int ret; + + /* Noone else will be downing sem now, so we won't sleep */ + down(&ipt_mutex); + list_append(&ipt_target, &ipt_standard_target); + list_append(&ipt_target, &ipt_error_target); + list_append(&ipt_match, &tcp_matchstruct); + list_append(&ipt_match, &udp_matchstruct); + list_append(&ipt_match, &icmp_matchstruct); + up(&ipt_mutex); + + /* Register setsockopt */ + ret = nf_register_sockopt(&ipt_sockopts); + if (ret < 0) { + duprintf("Unable to register sockopts.\n"); + return ret; + } + + printk("iptables: (c)2000 Netfilter core team\n"); + return 0; +} + +static void __exit fini(void) +{ + nf_unregister_sockopt(&ipt_sockopts); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipchains_core.c linux/net/ipv4/netfilter/ipchains_core.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipchains_core.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipchains_core.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,1768 @@ +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +/* + * This code is heavily based on the code on the old ip_fw.c code; see below for + * copyrights and attributions of the old code. This code is basically GPL. + * + * 15-Aug-1997: Major changes to allow graphs for firewall rules. + * Paul Russell and + * Michael Neuling + * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP). + * Added explicit RETURN from chains. + * Removed TOS mangling (done in ipchains 1.0.1). + * Fixed read & reset bug by reworking proc handling. + * Paul Russell + * 28-Sep-1997: Added packet marking for net sched code. + * Removed fw_via comparisons: all done on device name now, + * similar to changes in ip_fw.c in DaveM's CVS970924 tree. + * Paul Russell + * 2-Nov-1997: Moved types across to __u16, etc. + * Added inverse flags. + * Fixed fragment bug (in args to port_match). + * Changed mark to only one flag (MARKABS). + * 21-Nov-1997: Added ability to test ICMP code. + * 19-Jan-1998: Added wildcard interfaces. + * 6-Feb-1998: Merged 2.0 and 2.1 versions. + * Initialised ip_masq for 2.0.x version. + * Added explicit NETLINK option for 2.1.x version. + * Added packet and byte counters for policy matches. + * 26-Feb-1998: Fixed race conditions, added SMP support. + * 18-Mar-1998: Fix SMP, fix race condition fix. + * 1-May-1998: Remove caching of device pointer. + * 12-May-1998: Allow tiny fragment case for TCP/UDP. + * 15-May-1998: Treat short packets as fragments, don't just block. + * 3-Jan-1999: Fixed serious procfs security hole -- users should never + * be allowed to view the chains! + * Marc Santoro + * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash + * during dump_packet. --RR. + * 19-May-1999: Star Wars: The Phantom Menace opened. Rule num + * printed in log (modified from Michael Hasenstein's patch). + * Added SYN in log message. --RR + * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998. + * John McDonald + * Thomas Lopatic + */ + +/* + * + * The origina Linux port was done Alan Cox, with changes/fixes from + * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan + * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others. + * + * Copyright from the original FreeBSD version follows: + * + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Understanding locking in this code: (thanks to Alan Cox for using + * little words to explain this to me). -- PR + * + * In UP, there can be two packets traversing the chains: + * 1) A packet from the current userspace context + * 2) A packet off the bh handlers (timer or net). + * + * For SMP (kernel v2.1+), multiply this by # CPUs. + * + * [Note that this in not correct for 2.2 - because the socket code always + * uses lock_kernel() to serialize, and bottom halves (timers and net_bhs) + * only run on one CPU at a time. This will probably change for 2.3. + * It is still good to use spinlocks because that avoids the global cli() + * for updating the tables, which is rather costly in SMP kernels -AK] + * + * This means counters and backchains can get corrupted if no precautions + * are taken. + * + * To actually alter a chain on UP, we need only do a cli(), as this will + * stop a bh handler firing, as we are in the current userspace context + * (coming from a setsockopt()). + * + * On SMP, we need a write_lock_irqsave(), which is a simple cli() in + * UP. + * + * For backchains and counters, we use an array, indexed by + * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of + * size [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So, + * confident of uniqueness, we modify counters even though we only + * have a read lock (to read the counters, you need a write lock, + * though). */ + +/* Why I didn't use straight locking... -- PR + * + * The backchains can be separated out of the ip_chains structure, and + * allocated as needed inside ip_fw_check(). + * + * The counters, however, can't. Trying to lock these means blocking + * interrupts every time we want to access them. This would suck HARD + * performance-wise. Not locking them leads to possible corruption, + * made worse on 32-bit machines (counters are 64-bit). */ + +/*#define DEBUG_IP_FIREWALL*/ +/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ +/*#define DEBUG_IP_FIREWALL_USER*/ +/*#define DEBUG_IP_FIREWALL_LOCKING*/ + +static struct sock *ipfwsk; + +#ifdef CONFIG_SMP +#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt()) +#else /* !SMP */ +#define SLOT_NUMBER() (!in_interrupt()) +#endif /* CONFIG_SMP */ +#define NUM_SLOTS (smp_num_cpus*2) + +#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \ + + NUM_SLOTS*sizeof(struct ip_reent)) +#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \ + + NUM_SLOTS*sizeof(struct ip_counters)) + +#ifdef DEBUG_IP_FIREWALL_LOCKING +static unsigned int fwc_rlocks, fwc_wlocks; +#define FWC_DEBUG_LOCK(d) \ +do { \ + FWC_DONT_HAVE_LOCK(d); \ + d |= (1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DEBUG_UNLOCK(d) \ +do { \ + FWC_HAVE_LOCK(d); \ + d &= ~(1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DONT_HAVE_LOCK(d) \ +do { \ + if ((d) & (1 << SLOT_NUMBER())) \ + printk("%s:%i: Got lock on %i already!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while(0) + +#define FWC_HAVE_LOCK(d) \ +do { \ + if (!((d) & (1 << SLOT_NUMBER()))) \ + printk("%s:%i:No lock on %i!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while (0) + +#else +#define FWC_DEBUG_LOCK(d) do { } while(0) +#define FWC_DEBUG_UNLOCK(d) do { } while(0) +#define FWC_DONT_HAVE_LOCK(d) do { } while(0) +#define FWC_HAVE_LOCK(d) do { } while(0) +#endif /*DEBUG_IP_FIRWALL_LOCKING*/ + +#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0) +#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0) +#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0) +#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0) +#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0) +#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0) +#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0) +#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0) + +struct ip_chain; + +struct ip_counters +{ + __u64 pcnt, bcnt; /* Packet and byte counters */ +}; + +struct ip_fwkernel +{ + struct ip_fw ipfw; + struct ip_fwkernel *next; /* where to go next if current + * rule doesn't match */ + struct ip_chain *branch; /* which branch to jump to if + * current rule matches */ + int simplebranch; /* Use this if branch == NULL */ + struct ip_counters counters[0]; /* Actually several of these */ +}; + +struct ip_reent +{ + struct ip_chain *prevchain; /* Pointer to referencing chain */ + struct ip_fwkernel *prevrule; /* Pointer to referencing rule */ + struct ip_counters counters; +}; + +struct ip_chain +{ + ip_chainlabel label; /* Defines the label for each block */ + struct ip_chain *next; /* Pointer to next block */ + struct ip_fwkernel *chain; /* Pointer to first rule in block */ + __u32 refcount; /* Number of refernces to block */ + int policy; /* Default rule for chain. Only * + * used in built in chains */ + struct ip_reent reent[0]; /* Actually several of these */ +}; + +/* + * Implement IP packet firewall + */ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf(format, args...) printk(format , ## args) +#else +#define dprintf(format, args...) +#endif + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +/* Lock around ip_fw_chains linked list structure */ +rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED; + +/* Head of linked list of fw rules */ +static struct ip_chain *ip_fw_chains; + +#define IP_FW_INPUT_CHAIN ip_fw_chains +#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next) +#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next) + +/* Returns 1 if the port is matched by the range, 0 otherwise */ +extern inline int port_match(__u16 min, __u16 max, __u16 port, + int frag, int invert) +{ + if (frag) /* Fragments fail ANY port test. */ + return (min == 0 && max == 0xFFFF); + else return (port >= min && port <= max) ^ invert; +} + +/* Returns whether matches rule or not. */ +static int ip_rule_match(struct ip_fwkernel *f, + const char *ifname, + struct iphdr *ip, + char tcpsyn, + __u16 src_port, __u16 dst_port, + char isfrag) +{ +#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg)) + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ + + if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr, + IP_FW_INV_SRCIP) + || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr, + IP_FW_INV_DSTIP)) { + dprintf("Source or dest mismatch.\n"); + + dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, + f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : ""); + dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, + f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : ""); + return 0; + } + + /* + * Look for a VIA device match + */ + if (f->ipfw.fw_flg & IP_FW_F_WILDIF) { + if (FWINV(strncmp(ifname, f->ipfw.fw_vianame, + strlen(f->ipfw.fw_vianame)) != 0, + IP_FW_INV_VIA)) { + dprintf("Wildcard interface mismatch.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : ""); + return 0; /* Mismatch */ + } + } + else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0, + IP_FW_INV_VIA)) { + dprintf("Interface name does not match.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA + ? " (INV)" : ""); + return 0; /* Mismatch */ + } + + /* + * Ok the chain addresses match. + */ + + /* If we have a fragment rule but the packet is not a fragment + * the we return zero */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { + dprintf("Fragment rule but not fragment.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : ""); + return 0; + } + + /* Fragment NEVER passes a SYN test, even an inverted one. */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN) + || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) { + dprintf("Rule requires SYN and packet has no SYN.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : ""); + return 0; + } + + if (f->ipfw.fw_proto) { + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ + + if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) { + dprintf("Packet protocol %hi does not match %hi.%s\n", + ip->protocol, f->ipfw.fw_proto, + f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":""); + return 0; + } + + /* For non TCP/UDP/ICMP, port range is max anyway. */ + if (!port_match(f->ipfw.fw_spts[0], + f->ipfw.fw_spts[1], + src_port, isfrag, + !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT)) + || !port_match(f->ipfw.fw_dpts[0], + f->ipfw.fw_dpts[1], + dst_port, isfrag, + !!(f->ipfw.fw_invflg + &IP_FW_INV_DSTPT))) { + dprintf("Port match failed.\n"); + return 0; + } + } + + dprintf("Match succeeded.\n"); + return 1; +} + +static const char *branchname(struct ip_chain *branch,int simplebranch) +{ + if (branch) + return branch->label; + switch (simplebranch) + { + case FW_BLOCK: return IP_FW_LABEL_BLOCK; + case FW_ACCEPT: return IP_FW_LABEL_ACCEPT; + case FW_REJECT: return IP_FW_LABEL_REJECT; + case FW_REDIRECT: return IP_FW_LABEL_REDIRECT; + case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE; + case FW_SKIP: return "-"; + case FW_SKIP+1: return IP_FW_LABEL_RETURN; + default: + return "UNKNOWN"; + } +} + +/* + * VERY ugly piece of code which actually + * makes kernel printf for matching packets... + */ +static void dump_packet(const struct iphdr *ip, + const char *ifname, + struct ip_fwkernel *f, + const ip_chainlabel chainlabel, + __u16 src_port, + __u16 dst_port, + unsigned int count, + int syn) +{ + __u32 *opt = (__u32 *) (ip + 1); + int opti; + + if (f) { + printk(KERN_INFO "Packet log: %s ",chainlabel); + printk("%s ",branchname(f->branch,f->simplebranch)); + if (f->simplebranch==FW_REDIRECT) + printk("%d ",f->ipfw.fw_redirpt); + } + + printk("%s PROTO=%d %d.%d.%d.%d:%hu %d.%d.%d.%d:%hu" + " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", + ifname, ip->protocol, + (ntohl(ip->saddr)>>24)&0xFF, + (ntohl(ip->saddr)>>16)&0xFF, + (ntohl(ip->saddr)>>8)&0xFF, + (ntohl(ip->saddr))&0xFF, + src_port, + (ntohl(ip->daddr)>>24)&0xFF, + (ntohl(ip->daddr)>>16)&0xFF, + (ntohl(ip->daddr)>>8)&0xFF, + (ntohl(ip->daddr))&0xFF, + dst_port, + ntohs(ip->tot_len), ip->tos, ntohs(ip->id), + ntohs(ip->frag_off), ip->ttl); + + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) + printk(" O=0x%8.8X", *opt++); + printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count); +} + +/* function for checking chain labels for user space. */ +static int check_label(ip_chainlabel label) +{ + unsigned int i; + /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */ + for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++) + if (label[i] == '\0') return 1; + + return 0; +} + +/* This function returns a pointer to the first chain with a label + * that matches the one given. */ +static struct ip_chain *find_label(ip_chainlabel label) +{ + struct ip_chain *tmp; + FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks); + for (tmp = ip_fw_chains; tmp; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + break; + return tmp; +} + +/* This function returns a boolean which when true sets answer to one + of the FW_*. */ +static int find_special(ip_chainlabel label, int *answer) +{ + if (label[0] == '\0') { + *answer = FW_SKIP; /* => pass-through rule */ + return 1; + } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) { + *answer = FW_ACCEPT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) { + *answer = FW_BLOCK; + return 1; + } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) { + *answer = FW_REJECT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) { + *answer = FW_REDIRECT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) { + *answer = FW_MASQUERADE; + return 1; + } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) { + *answer = FW_SKIP+1; + return 1; + } else { + return 0; + } +} + +/* This function cleans up the prevchain and prevrule. If the verbose + * flag is set then he names of the chains will be printed as it + * cleans up. */ +static void cleanup(struct ip_chain *chain, + const int verbose, + unsigned int slot) +{ + struct ip_chain *tmpchain = chain->reent[slot].prevchain; + if (verbose) + printk(KERN_ERR "Chain backtrace: "); + while (tmpchain) { + if (verbose) + printk("%s<-",chain->label); + chain->reent[slot].prevchain = NULL; + chain = tmpchain; + tmpchain = chain->reent[slot].prevchain; + } + if (verbose) + printk("%s\n",chain->label); +} + +static inline int +ip_fw_domatch(struct ip_fwkernel *f, + struct iphdr *ip, + const char *rif, + const ip_chainlabel label, + struct sk_buff *skb, + unsigned int slot, + __u16 src_port, __u16 dst_port, + unsigned int count, + int tcpsyn) +{ + f->counters[slot].bcnt+=ntohs(ip->tot_len); + f->counters[slot].pcnt++; + if (f->ipfw.fw_flg & IP_FW_F_PRN) { + dump_packet(ip,rif,f,label,src_port,dst_port,count,tcpsyn); + } + ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor; + +/* This functionality is useless in stock 2.0.x series, but we don't + * discard the mark thing altogether, to avoid breaking ipchains (and, + * more importantly, the ipfwadm wrapper) --PR */ + if (f->ipfw.fw_flg & IP_FW_F_MARKABS) { + skb->nfmark = f->ipfw.fw_mark; + } else { + skb->nfmark += f->ipfw.fw_mark; + } + if (f->ipfw.fw_flg & IP_FW_F_NETLINK) { +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len)) + + sizeof(__u32) + sizeof(skb->nfmark) + IFNAMSIZ; + struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC); + + duprintf("Sending packet out NETLINK (length = %u).\n", + (unsigned int)len); + if (outskb) { + /* Prepend length, mark & interface */ + skb_put(outskb, len); + *((__u32 *)outskb->data) = (__u32)len; + *((__u32 *)(outskb->data+sizeof(__u32))) = skb->nfmark; + strcpy(outskb->data+sizeof(__u32)*2, rif); + memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip, + len-(sizeof(__u32)*2+IFNAMSIZ)); + netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL); + } + else { +#endif + if (net_ratelimit()) + printk(KERN_WARNING "ip_fw: packet drop due to " + "netlink failure\n"); + return 0; +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + } +#endif + } + return 1; +} + +/* + * Returns one of the generic firewall policies, like FW_ACCEPT. + * + * The testing is either false for normal firewall mode or true for + * user checking mode (counters are not updated, TOS & mark not done). + */ +static int +ip_fw_check(struct iphdr *ip, + const char *rif, + __u16 *redirport, + struct ip_chain *chain, + struct sk_buff *skb, + unsigned int slot, + int testing) +{ + struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); + struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl); + struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl); + __u32 src, dst; + __u16 src_port = 0xFFFF, dst_port = 0xFFFF; + char tcpsyn=0; + __u16 offset; + unsigned char oldtos; + struct ip_fwkernel *f; + int ret = FW_SKIP+2; + unsigned int count; + + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ + + offset = ntohs(ip->frag_off) & IP_OFFSET; + + /* + * Don't allow a fragment of TCP 8 bytes in. Nobody + * normal causes this. Its a cracker trying to break + * in by doing a flag overwrite to pass the direction + * checks. + */ + if (offset == 1 && ip->protocol == IPPROTO_TCP) { + if (!testing && net_ratelimit()) { + printk("Suspect TCP fragment.\n"); + dump_packet(ip,rif,NULL,NULL,0,0,0,0); + } + return FW_BLOCK; + } + + /* If we can't investigate ports, treat as fragment. It's + * either a trucated whole packet, or a truncated first + * fragment, or a TCP first fragment of length 8-15, in which + * case the above rule stops reassembly. + */ + if (offset == 0) { + unsigned int size_req; + switch (ip->protocol) { + case IPPROTO_TCP: + /* Don't care about things past flags word */ + size_req = 16; + break; + + case IPPROTO_UDP: + case IPPROTO_ICMP: + size_req = 8; + break; + + default: + size_req = 0; + } + + /* If it is a truncated first fragment then it can be + * used to rewrite port information, and thus should + * be blocked. + */ + if (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req) { + if (!testing && net_ratelimit()) { + printk("Suspect short first fragment.\n"); + dump_packet(ip,rif,NULL,NULL,0,0,0,0); + } + return FW_BLOCK; + } + } + + src = ip->saddr; + dst = ip->daddr; + oldtos = ip->tos; + + /* + * If we got interface from which packet came + * we can use the address directly. Linux 2.1 now uses address + * chains per device too, but unlike BSD we first check if the + * incoming packet matches a device address and the routing + * table before calling the firewall. + */ + + dprintf("Packet "); + switch(ip->protocol) + { + case IPPROTO_TCP: + dprintf("TCP "); + if (!offset) { + src_port=ntohs(tcp->source); + dst_port=ntohs(tcp->dest); + + /* Connection initilisation can only + * be made when the syn bit is set and + * neither of the ack or reset is + * set. */ + if(tcp->syn && !(tcp->ack || tcp->rst)) + tcpsyn=1; + } + break; + case IPPROTO_UDP: + dprintf("UDP "); + if (!offset) { + src_port=ntohs(udp->source); + dst_port=ntohs(udp->dest); + } + break; + case IPPROTO_ICMP: + if (!offset) { + src_port=(__u16)icmp->type; + dst_port=(__u16)icmp->code; + } + dprintf("ICMP "); + break; + default: + dprintf("p=%d ",ip->protocol); + break; + } +#ifdef DEBUG_IP_FIREWALL + print_ip(ip->saddr); + + if (offset) + dprintf(":fragment (%i) ", ((int)offset)<<2); + else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP + || ip->protocol==IPPROTO_ICMP) + dprintf(":%hu:%hu", src_port, dst_port); + dprintf("\n"); +#endif + + if (!testing) FWC_READ_LOCK(&ip_fw_lock); + else FWC_HAVE_LOCK(fwc_rlocks); + + f = chain->chain; + do { + count = 0; + for (; f; f = f->next) { + count++; + if (ip_rule_match(f,rif,ip, + tcpsyn,src_port,dst_port,offset)) { + if (!testing + && !ip_fw_domatch(f, ip, rif, chain->label, + skb, slot, + src_port, dst_port, + count, tcpsyn)) { + ret = FW_BLOCK; + goto out; + } + break; + } + } + if (f) { + if (f->branch) { + /* Do sanity check to see if we have + * already set prevchain and if so we + * must be in a loop */ + if (f->branch->reent[slot].prevchain) { + if (!testing) { + printk(KERN_ERR + "IP firewall: " + "Loop detected " + "at `%s'.\n", + f->branch->label); + cleanup(chain, 1, slot); + ret = FW_BLOCK; + } else { + cleanup(chain, 0, slot); + ret = FW_SKIP+1; + } + } + else { + f->branch->reent[slot].prevchain + = chain; + f->branch->reent[slot].prevrule + = f->next; + chain = f->branch; + f = chain->chain; + } + } + else if (f->simplebranch == FW_SKIP) + f = f->next; + else if (f->simplebranch == FW_SKIP+1) { + /* Just like falling off the chain */ + goto fall_off_chain; + } else { + cleanup(chain, 0, slot); + ret = f->simplebranch; + } + } /* f == NULL */ + else { + fall_off_chain: + if (chain->reent[slot].prevchain) { + struct ip_chain *tmp = chain; + f = chain->reent[slot].prevrule; + chain = chain->reent[slot].prevchain; + tmp->reent[slot].prevchain = NULL; + } + else { + ret = chain->policy; + if (!testing) { + chain->reent[slot].counters.pcnt++; + chain->reent[slot].counters.bcnt + += ntohs(ip->tot_len); + } + } + } + } while (ret == FW_SKIP+2); + + out: + if (!testing) FWC_READ_UNLOCK(&ip_fw_lock); + + /* Recalculate checksum if not going to reject, and TOS changed. */ + if (ip->tos != oldtos + && ret != FW_REJECT && ret != FW_BLOCK + && !testing) + ip_send_check(ip); + + if (ret == FW_REDIRECT && redirport) { + if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) { + /* Wildcard redirection. + * Note that redirport will become + * 0xFFFF for non-TCP/UDP packets. + */ + *redirport = htons(dst_port); + } + } + +#ifdef DEBUG_ALLOW_ALL + return (testing ? ret : FW_ACCEPT); +#else + return ret; +#endif +} + +/* Must have write lock & interrupts off for any of these */ + +/* This function sets all the byte counters in a chain to zero. The + * input is a pointer to the chain required for zeroing */ +static int zero_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i; + + FWC_HAVE_LOCK(fwc_wlocks); + for (i = chainptr->chain; i; i = i->next) + memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); + return 0; +} + +static int clear_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i= chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->chain=NULL; + + while (i) { + struct ip_fwkernel *tmp = i->next; + if (i->branch) + i->branch->refcount--; + kfree(i); + i = tmp; + } + return 0; +} + +static int replace_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) +{ + struct ip_fwkernel *f = chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + + if (f->branch) f->branch->refcount--; + if (frwl->branch) frwl->branch->refcount++; + + frwl->next = f->next; + memcpy(f,frwl,sizeof(struct ip_fwkernel)); + kfree(frwl); + return 0; +} + +static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule) +{ + struct ip_fwkernel *i; + + FWC_HAVE_LOCK(fwc_wlocks); + /* Special case if no rules already present */ + if (chainptr->chain == NULL) { + + /* If pointer writes are atomic then turning off + * interupts is not necessary. */ + chainptr->chain = rule; + if (rule->branch) rule->branch->refcount++; + return 0; + } + + /* Find the rule before the end of the chain */ + for (i = chainptr->chain; i->next; i = i->next); + i->next = rule; + if (rule->branch) rule->branch->refcount++; + return 0; +} + +/* This function inserts a rule at the position of position in the + * chain refenced by chainptr. If position is 1 then this rule will + * become the new rule one. */ +static int insert_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) +{ + struct ip_fwkernel *f = chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + /* special case if the position is number 1 */ + if (position == 1) { + frwl->next = chainptr->chain; + if (frwl->branch) frwl->branch->refcount++; + chainptr->chain = frwl; + return 0; + } + position--; + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + if (frwl->branch) frwl->branch->refcount++; + frwl->next = f->next; + + f->next = frwl; + return 0; +} + +/* This function deletes the a rule from a given rulenum and chain. + * With rulenum = 1 is the first rule is deleted. */ + +static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum) +{ + struct ip_fwkernel *i=chainptr->chain,*tmp; + + FWC_HAVE_LOCK(fwc_wlocks); + + if (!chainptr->chain) + return ENOENT; + + /* Need a special case for the first rule */ + if (rulenum == 1) { + /* store temp to allow for freeing up of memory */ + tmp = chainptr->chain; + if (chainptr->chain->branch) chainptr->chain->branch->refcount--; + chainptr->chain = chainptr->chain->next; + kfree(tmp); /* free memory that is now unused */ + } else { + rulenum--; + while (--rulenum && i->next ) i = i->next; + if (!i->next) + return ENOENT; + tmp = i->next; + if (i->next->branch) + i->next->branch->refcount--; + i->next = i->next->next; + kfree(tmp); + } + return 0; +} + + +/* This function deletes the a rule from a given rule and chain. + * The rule that is deleted is the first occursance of that rule. */ +static int del_rule_from_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl) +{ + struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ; + int was_found; + + FWC_HAVE_LOCK(fwc_wlocks); + + /* Sure, we should compare marks, but since the `ipfwadm' + * script uses it for an unholy hack... well, life is easier + * this way. We also mask it out of the flags word. --PR */ + for (ltmp=NULL, was_found=0; + !was_found && ftmp != NULL; + ltmp = ftmp,ftmp = ftmp->next) { + if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr + || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr + || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr + || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr +#if 0 + || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg +#else + || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) + != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS)) +#endif + || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg + || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto +#if 0 + || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark +#endif + || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt + || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0] + || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1] + || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0] + || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1] + || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) { + duprintf("del_rule_from_chain: mismatch:" + "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u " + "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u " + "mark:%u/%u " + "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu " + "outputsize:%hu-%hu\n", + ftmp->ipfw.fw_src.s_addr, + frwl->ipfw.fw_src.s_addr, + ftmp->ipfw.fw_dst.s_addr, + frwl->ipfw.fw_dst.s_addr, + ftmp->ipfw.fw_smsk.s_addr, + frwl->ipfw.fw_smsk.s_addr, + ftmp->ipfw.fw_dmsk.s_addr, + frwl->ipfw.fw_dmsk.s_addr, + ftmp->ipfw.fw_flg, + frwl->ipfw.fw_flg, + ftmp->ipfw.fw_invflg, + frwl->ipfw.fw_invflg, + ftmp->ipfw.fw_proto, + frwl->ipfw.fw_proto, + ftmp->ipfw.fw_mark, + frwl->ipfw.fw_mark, + ftmp->ipfw.fw_spts[0], + frwl->ipfw.fw_spts[0], + ftmp->ipfw.fw_spts[1], + frwl->ipfw.fw_spts[1], + ftmp->ipfw.fw_dpts[0], + frwl->ipfw.fw_dpts[0], + ftmp->ipfw.fw_dpts[1], + frwl->ipfw.fw_dpts[1], + ftmp->ipfw.fw_outputsize, + frwl->ipfw.fw_outputsize); + continue; + } + + if (strncmp(ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame, + IFNAMSIZ)) { + duprintf("del_rule_from_chain: if mismatch: %s/%s\n", + ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame); + continue; + } + if (ftmp->branch != frwl->branch) { + duprintf("del_rule_from_chain: branch mismatch: " + "%s/%s\n", + ftmp->branch?ftmp->branch->label:"(null)", + frwl->branch?frwl->branch->label:"(null)"); + continue; + } + if (ftmp->branch == NULL + && ftmp->simplebranch != frwl->simplebranch) { + duprintf("del_rule_from_chain: simplebranch mismatch: " + "%i/%i\n", + ftmp->simplebranch, frwl->simplebranch); + continue; + } + was_found = 1; + if (ftmp->branch) + ftmp->branch->refcount--; + if (ltmp) + ltmp->next = ftmp->next; + else + chainptr->chain = ftmp->next; + kfree(ftmp); + break; + } + + if (was_found) + return 0; + else { + duprintf("del_rule_from_chain: no matching rule found\n"); + return EINVAL; + } +} + +/* This function takes the label of a chain and deletes the first + * chain with that name. No special cases required for the built in + * chains as they have their refcount initilised to 1 so that they are + * never deleted. */ +static int del_chain(ip_chainlabel label) +{ + struct ip_chain *tmp,*tmp2; + + FWC_HAVE_LOCK(fwc_wlocks); + /* Corner case: return EBUSY not ENOENT for first elem ("input") */ + if (strcmp(label, ip_fw_chains->label) == 0) + return EBUSY; + + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if(strcmp(tmp->next->label,label) == 0) + break; + + tmp2 = tmp->next; + if (!tmp2) + return ENOENT; + + if (tmp2->refcount) + return EBUSY; + + if (tmp2->chain) + return ENOTEMPTY; + + tmp->next = tmp2->next; + kfree(tmp2); + return 0; +} + +/* This is a function to initilise a chain. Built in rules start with + * refcount = 1 so that they cannot be deleted. User defined rules + * start with refcount = 0 so they can be deleted. */ +static struct ip_chain *ip_init_chain(ip_chainlabel name, + __u32 ref, + int policy) +{ + unsigned int i; + struct ip_chain *label + = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL); + if (label == NULL) + panic("Can't kmalloc for firewall chains.\n"); + strcpy(label->label,name); + label->next = NULL; + label->chain = NULL; + label->refcount = ref; + label->policy = policy; + for (i = 0; i < smp_num_cpus*2; i++) { + label->reent[i].counters.pcnt = label->reent[i].counters.bcnt + = 0; + label->reent[i].prevchain = NULL; + label->reent[i].prevrule = NULL; + } + + return label; +} + +/* This is a function for reating a new chain. The chains is not + * created if a chain of the same name already exists */ +static int create_chain(ip_chainlabel label) +{ + struct ip_chain *tmp; + + if (!check_label(label)) + return EINVAL; + + FWC_HAVE_LOCK(fwc_wlocks); + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is + * zero since this is a + * user defined chain * + * and therefore can be + * deleted */ + return 0; +} + +/* This function simply changes the policy on one of the built in + * chains. checking must be done before this is call to ensure that + * chainptr is pointing to one of the three possible chains */ +static int change_policy(struct ip_chain *chainptr, int policy) +{ + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->policy = policy; + return 0; +} + +/* This function takes an ip_fwuser and converts it to a ip_fwkernel. It also + * performs some checks in the structure. */ +static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno) +{ + struct ip_fwkernel *fwkern; + + if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) { + duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n", + fwuser->ipfw.fw_flg); + *errno = EINVAL; + return NULL; + } + +#ifdef DEBUG_IP_FIREWALL_USER + /* These are sanity checks that don't really matter. + * We can get rid of these once testing is complete. + */ + if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || fwuser->ipfw.fw_proto != IPPROTO_TCP)) { + duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n"); + *errno = EINVAL; + return NULL; + } + + if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0 + && fwuser->ipfw.fw_redirpt != 0) { + duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n"); + *errno = EINVAL; + return NULL; + } + + if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)) + || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) { + duprintf("convert_ipfw: Can't have INV flag if flag unset!\n"); + *errno = EINVAL; + return NULL; + } + + if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) + && fwuser->ipfw.fw_spts[0] == 0 + && fwuser->ipfw.fw_spts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) + && fwuser->ipfw.fw_dpts[0] == 0 + && fwuser->ipfw.fw_dpts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) + && (fwuser->ipfw.fw_vianame)[0] == '\0') + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP) + && fwuser->ipfw.fw_smsk.s_addr == 0) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP) + && fwuser->ipfw.fw_dmsk.s_addr == 0)) { + duprintf("convert_ipfw: INV flag makes rule unmatchable!\n"); + *errno = EINVAL; + return NULL; + } + + if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG) + && (fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF + || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) { + duprintf("convert_ipfw: Can't test ports or SYN with frag!\n"); + *errno = EINVAL; + return NULL; + } +#endif + + if ((fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || (fwuser->ipfw.fw_proto != IPPROTO_TCP + && fwuser->ipfw.fw_proto != IPPROTO_UDP + && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) { + duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n"); + *errno = EINVAL; + return NULL; + } + + fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL); + if (!fwkern) { + duprintf("convert_ipfw: kmalloc failed!\n"); + *errno = ENOMEM; + return NULL; + } + memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw)); + + if (!find_special(fwuser->label, &fwkern->simplebranch)) { + fwkern->branch = find_label(fwuser->label); + if (!fwkern->branch) { + duprintf("convert_ipfw: chain doesn't exist `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } else if (fwkern->branch == IP_FW_INPUT_CHAIN + || fwkern->branch == IP_FW_FORWARD_CHAIN + || fwkern->branch == IP_FW_OUTPUT_CHAIN) { + duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } + } else + fwkern->branch = NULL; + memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); + + /* Handle empty vianame by making it a wildcard */ + if ((fwkern->ipfw.fw_vianame)[0] == '\0') + fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF; + + fwkern->next = NULL; + return fwkern; +} + +int ip_fw_ctl(int cmd, void *m, int len) +{ + int ret; + struct ip_chain *chain; + unsigned long flags; + + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + switch (cmd) { + case IP_FW_FLUSH: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = clear_fw_chain(chain); + break; + + case IP_FW_ZERO: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = zero_fw_chain(chain); + break; + + case IP_FW_CHECK: { + struct ip_fwtest *new = m; + struct iphdr *ip; + + /* Don't need write lock. */ + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + if (len != sizeof(struct ip_fwtest) || !check_label(m)) + return EINVAL; + + /* Need readlock to do find_label */ + FWC_READ_LOCK(&ip_fw_lock); + + if ((chain = find_label(new->fwt_label)) == NULL) + ret = ENOENT; + else { + ip = &(new->fwt_packet.fwp_iph); + + if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) { + duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n", + ip->ihl, + sizeof(struct iphdr) / sizeof(int)); + ret = EINVAL; + } + else { + ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame, + NULL, chain, + NULL, SLOT_NUMBER(), 1); + switch (ret) { + case FW_ACCEPT: + ret = 0; break; + case FW_REDIRECT: + ret = ECONNABORTED; break; + case FW_MASQUERADE: + ret = ECONNRESET; break; + case FW_REJECT: + ret = ECONNREFUSED; break; + /* Hack to help diag; these only get + returned when testing. */ + case FW_SKIP+1: + ret = ELOOP; break; + case FW_SKIP: + ret = ENFILE; break; + default: /* FW_BLOCK */ + ret = ETIMEDOUT; break; + } + } + } + FWC_READ_UNLOCK(&ip_fw_lock); + return ret; + } + + case IP_FW_MASQ_TIMEOUTS: { + ret = ip_fw_masq_timeouts(m, len); + } + break; + + case IP_FW_REPLACE: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = replace_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_APPEND: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) + ret = append_to_chain(chain, ip_fwkern); + } + break; + + case IP_FW_INSERT: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = insert_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_DELETE: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) { + ret = del_rule_from_chain(chain, ip_fwkern); + kfree(ip_fwkern); + } + } + break; + + case IP_FW_DELETE_NUM: { + struct ip_fwdelnum *new = m; + + if (len != sizeof(struct ip_fwdelnum) + || !check_label(new->fwd_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwd_label)) == NULL) + ret = ENOENT; + else ret = del_num_from_chain(chain, new->fwd_rulenum); + } + break; + + case IP_FW_CREATECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("create_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = create_chain(m); + } + break; + + case IP_FW_DELETECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("delete_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = del_chain(m); + } + break; + + case IP_FW_POLICY: { + struct ip_fwpolicy *new = m; + + if (len != sizeof(struct ip_fwpolicy) + || !check_label(new->fwp_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwp_label)) == NULL) + ret = ENOENT; + else if (chain != IP_FW_INPUT_CHAIN + && chain != IP_FW_FORWARD_CHAIN + && chain != IP_FW_OUTPUT_CHAIN) { + duprintf("change_policy: can't change policy on user" + " defined chain.\n"); + ret = EINVAL; + } + else { + int pol = FW_SKIP; + find_special(new->fwp_policy, &pol); + + switch(pol) { + case FW_MASQUERADE: + if (chain != IP_FW_FORWARD_CHAIN) { + ret = EINVAL; + break; + } + /* Fall thru... */ + case FW_BLOCK: + case FW_ACCEPT: + case FW_REJECT: + ret = change_policy(chain, pol); + break; + default: + duprintf("change_policy: bad policy `%s'\n", + new->fwp_policy); + ret = EINVAL; + } + } + break; + } + default: + duprintf("ip_fw_ctl: unknown request %d\n",cmd); + ret = ENOPROTOOPT; + } + + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; +} + +/* Returns bytes used - doesn't NUL terminate */ +static int dump_rule(char *buffer, + const char *chainlabel, + const struct ip_fwkernel *rule) +{ + int len; + unsigned int i; + __u64 packets = 0, bytes = 0; + + FWC_HAVE_LOCK(fwc_wlocks); + for (i = 0; i < NUM_SLOTS; i++) { + packets += rule->counters[i].pcnt; + bytes += rule->counters[i].bcnt; + } + + len=sprintf(buffer, + "%9s " /* Chain name */ + "%08X/%08X->%08X/%08X " /* Source & Destination IPs */ + "%.16s " /* Interface */ + "%X %X " /* fw_flg and fw_invflg fields */ + "%u " /* Protocol */ + "%-9u %-9u %-9u %-9u " /* Packet & byte counters */ + "%u-%u %u-%u " /* Source & Dest port ranges */ + "A%02X X%02X " /* TOS and and xor masks */ + "%08X " /* Redirection port */ + "%u " /* fw_mark field */ + "%u " /* output size */ + "%9s\n", /* Target */ + chainlabel, + ntohl(rule->ipfw.fw_src.s_addr), + ntohl(rule->ipfw.fw_smsk.s_addr), + ntohl(rule->ipfw.fw_dst.s_addr), + ntohl(rule->ipfw.fw_dmsk.s_addr), + (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-", + rule->ipfw.fw_flg, + rule->ipfw.fw_invflg, + rule->ipfw.fw_proto, + (__u32)(packets >> 32), (__u32)packets, + (__u32)(bytes >> 32), (__u32)bytes, + rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1], + rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], + rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, + rule->ipfw.fw_redirpt, + rule->ipfw.fw_mark, + rule->ipfw.fw_outputsize, + branchname(rule->branch,rule->simplebranch)); + + duprintf("dump_rule: %i bytes done.\n", len); + return len; +} + +/* File offset is actually in records, not bytes. */ +static int ip_chain_procinfo(char *buffer, char **start, + off_t offset, int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + struct ip_chain *i; + struct ip_fwkernel *j = ip_fw_chains->chain; + unsigned long flags; + int len = 0; + int last_len = 0; + off_t upto = 0; + + duprintf("Offset starts at %lu\n", offset); + duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains); + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) { + for (j = i->chain; j; j = j->next) { + if (upto == offset) break; + duprintf("Skipping rule in chain `%s'\n", + i->label); + upto++; + } + if (upto == offset) break; + } + + /* Don't init j first time, or once i = NULL */ + for (; i; (void)((i = i->next) && (j = i->chain))) { + duprintf("Dumping chain `%s'\n", i->label); + for (; j; j = j->next, upto++, last_len = len) + { + len += dump_rule(buffer+len, i->label, j); + if (len > length) { + duprintf("Dumped to %i (past %i). " + "Moving back to %i.\n", + len, length, last_len); + len = last_len; + goto outside; + } + else if (reset) + memset(j->counters, 0, + sizeof(struct ip_counters)*NUM_SLOTS); + } + } +outside: + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + buffer[len] = '\0'; + + duprintf("ip_chain_procinfo: Length = %i (of %i). Offset = %li.\n", + len, length, upto); + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start=(char *)((unsigned int)upto-offset); + return len; +} + +static int ip_chain_name_procinfo(char *buffer, char **start, + off_t offset, int length) +{ + struct ip_chain *i; + int len = 0,last_len = 0; + off_t pos = 0,begin = 0; + unsigned long flags; + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) + { + unsigned int j; + __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0; + + for (j = 0; j < NUM_SLOTS; j++) { + packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF; + packetsHi += ((i->reent[j].counters.pcnt >> 32) + & 0xFFFFFFFF); + bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF; + bytesHi += ((i->reent[j].counters.bcnt >> 32) + & 0xFFFFFFFF); + } + + /* print the label and the policy */ + len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n", + i->label,branchname(NULL, i->policy),i->refcount, + packetsHi, packetsLo, bytesHi, bytesLo); + pos=begin+len; + if(posoffset+length) { + len = last_len; + break; + } + + last_len = len; + } + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + *start = buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/* + * Interface to the generic firewall chains. + */ +int ipfw_input_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_check(phdr, dev->name, + arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +int ipfw_output_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + /* Locally generated bogus packets by root. . */ + if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr) + || (*pskb)->len < sizeof(struct iphdr)) + return FW_ACCEPT; + return ip_fw_check(phdr, dev->name, + arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +int ipfw_forward_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_check(phdr, dev->name, + arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +struct firewall_ops ipfw_ops= +{ + NULL, + ipfw_forward_check, + ipfw_input_check, + ipfw_output_check, + NULL, + NULL +}; + +int ipfw_init_or_cleanup(int init) +{ + int ret = 0; + unsigned long flags; + + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + if (!init) goto cleanup; + +#ifdef DEBUG_IP_FIREWALL_LOCKING + fwc_wlocks = fwc_rlocks = 0; +#endif + +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); + if (ipfwsk == NULL) + goto cleanup_nothing; +#endif + + ret = register_firewall(PF_INET, &ipfw_ops); + if (ret < 0) + goto cleanup_netlink; + + proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, ip_chain_procinfo); + proc_net_create(IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, ip_chain_name_procinfo); + + IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT); + IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT); + IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT); + + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; + + cleanup: + while (ip_fw_chains) { + struct ip_chain *next = ip_fw_chains->next; + + clear_fw_chain(ip_fw_chains); + kfree(ip_fw_chains); + ip_fw_chains = next; + } + + proc_net_remove(IP_FW_PROC_CHAINS); + proc_net_remove(IP_FW_PROC_CHAIN_NAMES); + + unregister_firewall(PF_INET, &ipfw_ops); + + cleanup_netlink: +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + sock_release(ipfwsk->socket); + + cleanup_nothing: +#endif + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; +} diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipfwadm_core.c linux/net/ipv4/netfilter/ipfwadm_core.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipfwadm_core.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipfwadm_core.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,1410 @@ +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +#define CONFIG_IP_FIREWALL +#define CONFIG_IP_FIREWALL_VERBOSE +#define CONFIG_IP_MASQUERADE +#define CONFIG_IP_ACCT +#define CONFIG_IP_TRANSPARENT_PROXY +#define CONFIG_IP_FIREWALL_NETLINK + +/* + * IP firewalling code. This is taken from 4.4BSD. Please note the + * copyright message below. As per the GPL it must be maintained + * and the licenses thus do not conflict. While this port is subject + * to the GPL I also place my modifications under the original + * license in recognition of the original copyright. + * -- Alan Cox. + * + * $Id: ipfwadm_core.c,v 1.1 2000/03/17 14:42:00 davem Exp $ + * + * Ported from BSD to Linux, + * Alan Cox 22/Nov/1994. + * Zeroing /proc and other additions + * Jos Vos 4/Feb/1995. + * Merged and included the FreeBSD-Current changes at Ugen's request + * (but hey it's a lot cleaner now). Ugen would prefer in some ways + * we waited for his final product but since Linux 1.2.0 is about to + * appear it's not practical - Read: It works, it's not clean but please + * don't consider it to be his standard of finished work. + * Alan Cox 12/Feb/1995 + * Porting bidirectional entries from BSD, fixing accounting issues, + * adding struct ip_fwpkt for checking packets with interface address + * Jos Vos 5/Mar/1995. + * Established connections (ACK check), ACK check on bidirectional rules, + * ICMP type check. + * Wilfred Mollenvanger 7/7/1995. + * TCP attack protection. + * Alan Cox 25/8/95, based on information from bugtraq. + * ICMP type printk, IP_FW_F_APPEND + * Bernd Eckenfels 1996-01-31 + * Split blocking chain into input and output chains, add new "insert" and + * "append" commands to replace semi-intelligent "add" command, let "delete". + * only delete the first matching entry, use 0xFFFF (0xFF) as ports (ICMP + * types) when counting packets being 2nd and further fragments. + * Jos Vos 8/2/1996. + * Add support for matching on device names. + * Jos Vos 15/2/1996. + * Transparent proxying support. + * Willy Konynenberg 10/5/96. + * Make separate accounting on incoming and outgoing packets possible. + * Jos Vos 18/5/1996. + * Added trap out of bad frames. + * Alan Cox 17/11/1996 + * + * + * Masquerading functionality + * + * Copyright (c) 1994 Pauline Middelink + * + * The pieces which added masquerading functionality are totally + * my responsibility and have nothing to with the original authors + * copyright or doing. + * + * Parts distributed under GPL. + * + * Fixes: + * Pauline Middelink : Added masquerading. + * Alan Cox : Fixed an error in the merge. + * Thomas Quinot : Fixed port spoofing. + * Alan Cox : Cleaned up retransmits in spoofing. + * Alan Cox : Cleaned up length setting. + * Wouter Gadeyne : Fixed masquerading support of ftp PORT commands + * + * Juan Jose Ciarlante : Masquerading code moved to ip_masq.c + * Andi Kleen : Print frag_offsets and the ip flags properly. + * + * All the real work was done by ..... + * + */ + + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Implement IP packet firewall + */ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf1(a) printk(a) +#define dprintf2(a1,a2) printk(a1,a2) +#define dprintf3(a1,a2,a3) printk(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) printk(a1,a2,a3,a4) +#else +#define dprintf1(a) +#define dprintf2(a1,a2) +#define dprintf3(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) +#endif + +#define print_ip(a) printk("%d.%d.%d.%d",(ntohl(a)>>24)&0xFF,\ + (ntohl(a)>>16)&0xFF,\ + (ntohl(a)>>8)&0xFF,\ + (ntohl(a))&0xFF); + +#ifdef DEBUG_IP_FIREWALL +#define dprint_ip(a) print_ip(a) +#else +#define dprint_ip(a) +#endif + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + +struct ip_fw *ip_fw_fwd_chain; +struct ip_fw *ip_fw_in_chain; +struct ip_fw *ip_fw_out_chain; +struct ip_fw *ip_acct_chain; +struct ip_fw *ip_masq_chain; + +static struct ip_fw **chains[] = + {&ip_fw_fwd_chain, &ip_fw_in_chain, &ip_fw_out_chain, &ip_acct_chain, + &ip_masq_chain + }; +#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ + +#ifdef CONFIG_IP_FIREWALL +int ip_fw_fwd_policy=IP_FW_F_ACCEPT; +int ip_fw_in_policy=IP_FW_F_ACCEPT; +int ip_fw_out_policy=IP_FW_F_ACCEPT; + +static int *policies[] = + {&ip_fw_fwd_policy, &ip_fw_in_policy, &ip_fw_out_policy}; + +#endif + +#ifdef CONFIG_IP_FIREWALL_NETLINK +struct sock *ipfwsk; +#endif + +/* + * Returns 1 if the port is matched by the vector, 0 otherwise + */ + +extern inline int port_match(unsigned short *portptr,int nports,unsigned short port,int range_flag) +{ + if (!nports) + return 1; + if ( range_flag ) + { + if ( portptr[0] <= port && port <= portptr[1] ) + { + return( 1 ); + } + nports -= 2; + portptr += 2; + } + while ( nports-- > 0 ) + { + if ( *portptr++ == port ) + { + return( 1 ); + } + } + return(0); +} + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + +#ifdef CONFIG_IP_FIREWALL_VERBOSE + +/* + * VERY ugly piece of code which actually makes kernel printf for + * matching packets. + */ + +static char *chain_name(struct ip_fw *chain, int mode) +{ + switch (mode) { + case IP_FW_MODE_ACCT_IN: return "acct in"; + case IP_FW_MODE_ACCT_OUT: return "acct out"; + default: + if (chain == ip_fw_fwd_chain) + return "fw-fwd"; + else if (chain == ip_fw_in_chain) + return "fw-in"; + else + return "fw-out"; + } +} + +static char *rule_name(struct ip_fw *f, int mode, char *buf) +{ + if (mode == IP_FW_MODE_ACCT_IN || mode == IP_FW_MODE_ACCT_OUT) + return ""; + + if(f->fw_flg&IP_FW_F_ACCEPT) { + if(f->fw_flg&IP_FW_F_REDIR) { + sprintf(buf, "acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]); + return buf; + } else if(f->fw_flg&IP_FW_F_MASQ) + return "acc/masq "; + else + return "acc "; + } else if(f->fw_flg&IP_FW_F_ICMPRPL) { + return "rej "; + } else { + return "deny "; + } +} + +static void print_packet(struct iphdr *ip, + u16 src_port, u16 dst_port, u16 icmp_type, + char *chain, char *rule, char *devname) +{ + __u32 *opt = (__u32 *) (ip + 1); + int opti; + __u16 foff = ntohs(ip->frag_off); + + printk(KERN_INFO "IP %s %s%s", chain, rule, devname); + + switch(ip->protocol) + { + case IPPROTO_TCP: + printk(" TCP "); + break; + case IPPROTO_UDP: + printk(" UDP "); + break; + case IPPROTO_ICMP: + printk(" ICMP/%d ", icmp_type); + break; + default: + printk(" PROTO=%d ", ip->protocol); + break; + } + print_ip(ip->saddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%hu", src_port); + printk(" "); + print_ip(ip->daddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%hu", dst_port); + printk(" L=%hu S=0x%2.2hX I=%hu FO=0x%4.4hX T=%hu", + ntohs(ip->tot_len), ip->tos, ntohs(ip->id), + foff & IP_OFFSET, ip->ttl); + if (foff & IP_DF) printk(" DF=1"); + if (foff & IP_MF) printk(" MF=1"); + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) + printk(" O=0x%8.8X", *opt++); + printk("\n"); +} +#endif + +/* + * Returns one of the generic firewall policies, like FW_ACCEPT. + * Also does accounting so you can feed it the accounting chain. + * + * The modes is either IP_FW_MODE_FW (normal firewall mode), + * IP_FW_MODE_ACCT_IN or IP_FW_MODE_ACCT_OUT (accounting mode, + * steps through the entire chain and handles fragments + * differently), or IP_FW_MODE_CHK (handles user-level check, + * counters are not updated). + */ + + +int ip_fw_chk(struct iphdr *ip, struct net_device *rif, __u16 *redirport, + struct ip_fw *chain, int policy, int mode) +{ + struct ip_fw *f; + struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); + struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl); + struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl); + __u32 src, dst; + __u16 src_port=0xFFFF, dst_port=0xFFFF, icmp_type=0xFF; + unsigned short f_prt=0, prt; + char notcpsyn=0, notcpack=0, match; + unsigned short offset; + int answer; + unsigned char tosand, tosxor; + + /* + * If the chain is empty follow policy. The BSD one + * accepts anything giving you a time window while + * flushing and rebuilding the tables. + */ + + src = ip->saddr; + dst = ip->daddr; + + /* + * This way we handle fragmented packets. + * we ignore all fragments but the first one + * so the whole packet can't be reassembled. + * This way we relay on the full info which + * stored only in first packet. + * + * Note that this theoretically allows partial packet + * spoofing. Not very dangerous but paranoid people may + * wish to play with this. It also allows the so called + * "fragment bomb" denial of service attack on some types + * of system. + */ + + offset = ntohs(ip->frag_off) & IP_OFFSET; + + /* + * Don't allow a fragment of TCP 8 bytes in. Nobody + * normal causes this. Its a cracker trying to break + * in by doing a flag overwrite to pass the direction + * checks. + */ + + if (offset == 1 && ip->protocol == IPPROTO_TCP) + return FW_BLOCK; + + if (offset!=0 && !(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT)) && + (ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP || + ip->protocol == IPPROTO_ICMP)) + return FW_ACCEPT; + + /* + * Header fragment for TCP is too small to check the bits. + */ + + if(ip->protocol==IPPROTO_TCP && (ip->ihl<<2)+16 > ntohs(ip->tot_len)) + return FW_BLOCK; + + /* + * Too short. + * + * But only too short for a packet with ports... + */ + + else if((ntohs(ip->tot_len)<8+(ip->ihl<<2))&&(ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP)) + return FW_BLOCK; + + src = ip->saddr; + dst = ip->daddr; + + /* + * If we got interface from which packet came + * we can use the address directly. This is unlike + * 4.4BSD derived systems that have an address chain + * per device. We have a device per address with dummy + * devices instead. + */ + + dprintf1("Packet "); + switch(ip->protocol) + { + case IPPROTO_TCP: + dprintf1("TCP "); + /* ports stay 0xFFFF if it is not the first fragment */ + if (!offset) { + src_port=ntohs(tcp->source); + dst_port=ntohs(tcp->dest); + if(!tcp->ack && !tcp->rst) + /* We do NOT have ACK, value TRUE */ + notcpack=1; + if(!tcp->syn || !notcpack) + /* We do NOT have SYN, value TRUE */ + notcpsyn=1; + } + prt=IP_FW_F_TCP; + break; + case IPPROTO_UDP: + dprintf1("UDP "); + /* ports stay 0xFFFF if it is not the first fragment */ + if (!offset) { + src_port=ntohs(udp->source); + dst_port=ntohs(udp->dest); + } + prt=IP_FW_F_UDP; + break; + case IPPROTO_ICMP: + /* icmp_type stays 255 if it is not the first fragment */ + if (!offset) + icmp_type=(__u16)(icmp->type); + dprintf2("ICMP:%d ",icmp_type); + prt=IP_FW_F_ICMP; + break; + default: + dprintf2("p=%d ",ip->protocol); + prt=IP_FW_F_ALL; + break; + } +#ifdef DEBUG_IP_FIREWALL + dprint_ip(ip->saddr); + + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 65535 when it is not the first fragment! */ + dprintf2(":%d ", src_port); + dprint_ip(ip->daddr); + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 65535 when it is not the first fragment! */ + dprintf2(":%d ",dst_port); + dprintf1("\n"); +#endif + + for (f=chain;f;f=f->fw_next) + { + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ + + /* + * Match can become 0x01 (a "normal" match was found), + * 0x02 (a reverse match was found), and 0x03 (the + * IP addresses match in both directions). + * Now we know in which direction(s) we should look + * for a match for the TCP/UDP ports. Both directions + * might match (e.g., when both addresses are on the + * same network for which an address/mask is given), but + * the ports might only match in one direction. + * This was obviously wrong in the original BSD code. + */ + match = 0x00; + + if ((src&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (dst&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* normal direction */ + match |= 0x01; + + if ((f->fw_flg & IP_FW_F_BIDIR) && + (dst&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (src&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* reverse direction */ + match |= 0x02; + + if (!match) + continue; + + /* + * Look for a VIA device match + */ + if(f->fw_viadev) + { + if(rif!=f->fw_viadev) + continue; /* Mismatch */ + } + + /* This looks stupid, because we scan almost static + list, searching for static key. However, this way seems + to be only reasonable way of handling fw_via rules + (btw bsd makes the same thing). + + It will not affect performance if you will follow + the following simple rules: + + - if inteface is aliased, ALWAYS specify fw_viadev, + so that previous check will guarantee, that we will + not waste time when packet arrive on another interface. + + - avoid using fw_via.s_addr if fw_via.s_addr is owned + by an aliased interface. + + --ANK + */ + if (f->fw_via.s_addr && rif) { + struct in_ifaddr *ifa; + + if (rif->ip_ptr == NULL) + continue; /* Mismatch */ + + for (ifa = ((struct in_device*)(rif->ip_ptr))->ifa_list; + ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_local == f->fw_via.s_addr) + goto ifa_ok; + } + continue; /* Mismatch */ + + ifa_ok: + } + + /* + * Ok the chain addresses match. + */ + +#ifdef CONFIG_IP_ACCT + /* + * See if we're in accounting mode and only want to + * count incoming or outgoing packets. + */ + + if (mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT) && + ((mode == IP_FW_MODE_ACCT_IN && f->fw_flg&IP_FW_F_ACCTOUT) || + (mode == IP_FW_MODE_ACCT_OUT && f->fw_flg&IP_FW_F_ACCTIN))) + continue; + +#endif + /* + * For all non-TCP packets and/or non-first fragments, + * notcpsyn and notcpack will always be FALSE, + * so the IP_FW_F_TCPSYN and IP_FW_F_TCPACK flags + * are actually ignored for these packets. + */ + + if((f->fw_flg&IP_FW_F_TCPSYN) && notcpsyn) + continue; + + if((f->fw_flg&IP_FW_F_TCPACK) && notcpack) + continue; + + f_prt=f->fw_flg&IP_FW_F_KIND; + if (f_prt!=IP_FW_F_ALL) + { + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ + + if(prt!=f_prt) + continue; + + if((prt==IP_FW_F_ICMP && + ! port_match(&f->fw_pts[0], f->fw_nsp, + icmp_type,f->fw_flg&IP_FW_F_SRNG)) || + !(prt==IP_FW_F_ICMP || ((match & 0x01) && + port_match(&f->fw_pts[0], f->fw_nsp, src_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, dst_port, + f->fw_flg&IP_FW_F_DRNG)) || ((match & 0x02) && + port_match(&f->fw_pts[0], f->fw_nsp, dst_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, src_port, + f->fw_flg&IP_FW_F_DRNG)))) + { + continue; + } + } + +#ifdef CONFIG_IP_FIREWALL_VERBOSE + if (f->fw_flg & IP_FW_F_PRN) + { + char buf[16]; + + print_packet(ip, src_port, dst_port, icmp_type, + chain_name(chain, mode), + rule_name(f, mode, buf), + rif ? rif->name : "-"); + } +#endif + if (mode != IP_FW_MODE_CHK) { + f->fw_bcnt+=ntohs(ip->tot_len); + f->fw_pcnt++; + } + if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT))) + break; + } /* Loop */ + + if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT))) { + + /* + * We rely on policy defined in the rejecting entry or, if no match + * was found, we rely on the general policy variable for this type + * of firewall. + */ + + if (f!=NULL) { + policy=f->fw_flg; + tosand=f->fw_tosand; + tosxor=f->fw_tosxor; + } else { + tosand=0xFF; + tosxor=0x00; + } + + if (policy&IP_FW_F_ACCEPT) { + /* Adjust priority and recompute checksum */ + __u8 old_tos = ip->tos; + ip->tos = (old_tos & tosand) ^ tosxor; + if (ip->tos != old_tos) + ip_send_check(ip); +#ifdef CONFIG_IP_TRANSPARENT_PROXY + if (policy&IP_FW_F_REDIR) { + if (redirport) + if ((*redirport = htons(f->fw_pts[f->fw_nsp+f->fw_ndp])) == 0) { + /* Wildcard redirection. + * Note that redirport will become + * 0xFFFF for non-TCP/UDP packets. + */ + *redirport = htons(dst_port); + } + answer = FW_REDIRECT; + } else +#endif +#ifdef CONFIG_IP_MASQUERADE + if (policy&IP_FW_F_MASQ) + answer = FW_MASQUERADE; + else +#endif + answer = FW_ACCEPT; + + } else if(policy&IP_FW_F_ICMPRPL) + answer = FW_REJECT; + else + answer = FW_BLOCK; + +#ifdef CONFIG_IP_FIREWALL_NETLINK + if((policy&IP_FW_F_PRN) && (answer == FW_REJECT || answer == FW_BLOCK)) + { + struct sk_buff *skb=alloc_skb(128, GFP_ATOMIC); + if(skb) + { + int len=min(128,ntohs(ip->tot_len)); + skb_put(skb,len); + memcpy(skb->data,ip,len); + if(netlink_post(NETLINK_FIREWALL, skb)) + kfree_skb(skb); + } + } +#endif + return answer; + } else + /* we're doing accounting, always ok */ + return 0; +} + + +static void zero_fw_chain(struct ip_fw *chainptr) +{ + struct ip_fw *ctmp=chainptr; + while(ctmp) + { + ctmp->fw_pcnt=0L; + ctmp->fw_bcnt=0L; + ctmp=ctmp->fw_next; + } +} + +static void free_fw_chain(struct ip_fw *volatile* chainptr) +{ + unsigned long flags; + save_flags(flags); + cli(); + while ( *chainptr != NULL ) + { + struct ip_fw *ftmp; + ftmp = *chainptr; + *chainptr = ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + } + restore_flags(flags); +} + +/* Volatiles to keep some of the compiler versions amused */ + +static int insert_in_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len) +{ + struct ip_fw *ftmp; + unsigned long flags; + + save_flags(flags); + + ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); + if ( ftmp == NULL ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: malloc said no\n"); +#endif + return( ENOMEM ); + } + + memcpy(ftmp, frwl, len); + /* + * Allow the more recent "minimise cost" flag to be + * set. [Rob van Nieuwkerk] + */ + ftmp->fw_tosand |= 0x01; + ftmp->fw_tosxor &= 0xFE; + ftmp->fw_pcnt=0L; + ftmp->fw_bcnt=0L; + + cli(); + + if ((ftmp->fw_vianame)[0]) { + if (!(ftmp->fw_viadev = dev_get_by_name(ftmp->fw_vianame))) + ftmp->fw_viadev = (struct net_device *) -1; + } else + ftmp->fw_viadev = NULL; + + ftmp->fw_next = *chainptr; + *chainptr=ftmp; + restore_flags(flags); + return(0); +} + +static int append_to_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len) +{ + struct ip_fw *ftmp; + struct ip_fw *chtmp=NULL; + struct ip_fw *volatile chtmp_prev=NULL; + unsigned long flags; + + save_flags(flags); + + ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); + if ( ftmp == NULL ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: malloc said no\n"); +#endif + return( ENOMEM ); + } + + memcpy(ftmp, frwl, len); + /* + * Allow the more recent "minimise cost" flag to be + * set. [Rob van Nieuwkerk] + */ + ftmp->fw_tosand |= 0x01; + ftmp->fw_tosxor &= 0xFE; + ftmp->fw_pcnt=0L; + ftmp->fw_bcnt=0L; + + ftmp->fw_next = NULL; + + cli(); + + if ((ftmp->fw_vianame)[0]) { + if (!(ftmp->fw_viadev = dev_get_by_name(ftmp->fw_vianame))) + ftmp->fw_viadev = (struct net_device *) -1; + } else + ftmp->fw_viadev = NULL; + + chtmp_prev=NULL; + for (chtmp=*chainptr;chtmp!=NULL;chtmp=chtmp->fw_next) + chtmp_prev=chtmp; + + if (chtmp_prev) + chtmp_prev->fw_next=ftmp; + else + *chainptr=ftmp; + restore_flags(flags); + return(0); +} + +static int del_from_chain(struct ip_fw *volatile*chainptr, struct ip_fw *frwl) +{ + struct ip_fw *ftmp,*ltmp; + unsigned short tport1,tport2,tmpnum; + char matches,was_found; + unsigned long flags; + + save_flags(flags); + cli(); + + ftmp=*chainptr; + + if ( ftmp == NULL ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: chain is empty\n"); +#endif + restore_flags(flags); + return( EINVAL ); + } + + ltmp=NULL; + was_found=0; + + while( !was_found && ftmp != NULL ) + { + matches=1; + if (ftmp->fw_src.s_addr!=frwl->fw_src.s_addr + || ftmp->fw_dst.s_addr!=frwl->fw_dst.s_addr + || ftmp->fw_smsk.s_addr!=frwl->fw_smsk.s_addr + || ftmp->fw_dmsk.s_addr!=frwl->fw_dmsk.s_addr + || ftmp->fw_via.s_addr!=frwl->fw_via.s_addr + || ftmp->fw_flg!=frwl->fw_flg) + matches=0; + + tport1=ftmp->fw_nsp+ftmp->fw_ndp; + tport2=frwl->fw_nsp+frwl->fw_ndp; + if (tport1!=tport2) + matches=0; + else if (tport1!=0) + { + for (tmpnum=0;tmpnum < tport1 && tmpnum < IP_FW_MAX_PORTS;tmpnum++) + if (ftmp->fw_pts[tmpnum]!=frwl->fw_pts[tmpnum]) + matches=0; + } + if (strncmp(ftmp->fw_vianame, frwl->fw_vianame, IFNAMSIZ)) + matches=0; + if(matches) + { + was_found=1; + if (ltmp) + { + ltmp->fw_next=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=ltmp->fw_next; + } + else + { + *chainptr=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=*chainptr; + } + } + else + { + ltmp = ftmp; + ftmp = ftmp->fw_next; + } + } + restore_flags(flags); + if (was_found) + return 0; + else + return(EINVAL); +} + +#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ + +struct ip_fw *check_ipfw_struct(struct ip_fw *frwl, int len) +{ + + if ( len != sizeof(struct ip_fw) ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: len=%d, want %d\n",len, sizeof(struct ip_fw)); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & ~IP_FW_F_MASK) != 0 ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: undefined flag bits set (flags=%x)\n", + frwl->fw_flg); +#endif + return(NULL); + } + +#ifndef CONFIG_IP_TRANSPARENT_PROXY + if (frwl->fw_flg & IP_FW_F_REDIR) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unsupported flag IP_FW_F_REDIR\n"); +#endif + return(NULL); + } +#endif + +#ifndef CONFIG_IP_MASQUERADE + if (frwl->fw_flg & IP_FW_F_MASQ) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unsupported flag IP_FW_F_MASQ\n"); +#endif + return(NULL); + } +#endif + + if ( (frwl->fw_flg & IP_FW_F_SRNG) && frwl->fw_nsp < 2 ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: src range set but fw_nsp=%d\n", + frwl->fw_nsp); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & IP_FW_F_DRNG) && frwl->fw_ndp < 2 ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: dst range set but fw_ndp=%d\n", + frwl->fw_ndp); +#endif + return(NULL); + } + + if ( frwl->fw_nsp + frwl->fw_ndp > (frwl->fw_flg & IP_FW_F_REDIR ? IP_FW_MAX_PORTS - 1 : IP_FW_MAX_PORTS) ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: too many ports (%d+%d)\n", + frwl->fw_nsp,frwl->fw_ndp); +#endif + return(NULL); + } + + return frwl; +} + + + + +#ifdef CONFIG_IP_ACCT + +int ip_acct_ctl(int stage, void *m, int len) +{ + if ( stage == IP_ACCT_FLUSH ) + { + free_fw_chain(&ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_ZERO ) + { + zero_fw_chain(ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_INSERT || stage == IP_ACCT_APPEND || + stage == IP_ACCT_DELETE ) + { + struct ip_fw *frwl; + + if (!(frwl=check_ipfw_struct(m,len))) + return (EINVAL); + + switch (stage) + { + case IP_ACCT_INSERT: + return( insert_in_chain(&ip_acct_chain,frwl,len)); + case IP_ACCT_APPEND: + return( append_to_chain(&ip_acct_chain,frwl,len)); + case IP_ACCT_DELETE: + return( del_from_chain(&ip_acct_chain,frwl)); + default: + /* + * Should be panic but... (Why ??? - AC) + */ +#ifdef DEBUG_IP_FIREWALL + printk("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } +#ifdef DEBUG_IP_FIREWALL + printk("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); +} +#endif + +#ifdef CONFIG_IP_FIREWALL +int ip_fw_ctl(int stage, void *m, int len) +{ + int cmd, fwtype; + + cmd = stage & IP_FW_COMMAND; + fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; + + if ( cmd == IP_FW_FLUSH ) + { + free_fw_chain(chains[fwtype]); + return(0); + } + + if ( cmd == IP_FW_ZERO ) + { + zero_fw_chain(*chains[fwtype]); + return(0); + } + + if ( cmd == IP_FW_POLICY ) + { + int *tmp_policy_ptr; + tmp_policy_ptr=(int *)m; + *policies[fwtype] = *tmp_policy_ptr; + return 0; + } + + if ( cmd == IP_FW_CHECK ) + { + struct net_device *viadev; + struct ip_fwpkt *ipfwp; + struct iphdr *ip; + + if ( len != sizeof(struct ip_fwpkt) ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: length=%d, expected %d\n", + len, sizeof(struct ip_fwpkt)); +#endif + return( EINVAL ); + } + + ipfwp = (struct ip_fwpkt *)m; + ip = &(ipfwp->fwp_iph); + + if ( !(viadev = dev_get_by_name(ipfwp->fwp_vianame)) ) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: invalid device \"%s\"\n", ipfwp->fwp_vianame); +#endif + return(EINVAL); + } else if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl, + sizeof(struct iphdr)/sizeof(int)); +#endif + return(EINVAL); + } + + switch (ip_fw_chk(ip, viadev, NULL, *chains[fwtype], + *policies[fwtype], IP_FW_MODE_CHK)) + { + case FW_ACCEPT: + return(0); + case FW_REDIRECT: + return(ECONNABORTED); + case FW_MASQUERADE: + return(ECONNRESET); + case FW_REJECT: + return(ECONNREFUSED); + default: /* FW_BLOCK */ + return(ETIMEDOUT); + } + } + + if ( cmd == IP_FW_MASQ_TIMEOUTS ) + return ip_fw_masq_timeouts(m, len); + +/* + * Here we really working hard-adding new elements + * to blocking/forwarding chains or deleting 'em + */ + + if ( cmd == IP_FW_INSERT || cmd == IP_FW_APPEND || cmd == IP_FW_DELETE ) + { + struct ip_fw *frwl; + int fwtype; + + frwl=check_ipfw_struct(m,len); + if (frwl==NULL) + return (EINVAL); + fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; + + switch (cmd) + { + case IP_FW_INSERT: + return(insert_in_chain(chains[fwtype],frwl,len)); + case IP_FW_APPEND: + return(append_to_chain(chains[fwtype],frwl,len)); + case IP_FW_DELETE: + return(del_from_chain(chains[fwtype],frwl)); + default: + /* + * Should be panic but... (Why are BSD people panic obsessed ??) + */ +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } + +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(ENOPROTOOPT); +} +#endif /* CONFIG_IP_FIREWALL */ + +#ifdef CONFIG_PROC_FS +#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) + +static int ip_chain_procinfo(int stage, char *buffer, char **start, + off_t offset, int length, int reset) +{ + off_t pos=0, begin=0; + struct ip_fw *i; + unsigned long flags; + int len, p; + int last_len = 0; + + + switch(stage) + { +#ifdef CONFIG_IP_FIREWALL + case IP_FW_IN: + i = ip_fw_in_chain; + len=sprintf(buffer, "IP firewall input rules, default %d\n", + ip_fw_in_policy); + break; + case IP_FW_OUT: + i = ip_fw_out_chain; + len=sprintf(buffer, "IP firewall output rules, default %d\n", + ip_fw_out_policy); + break; + case IP_FW_FWD: + i = ip_fw_fwd_chain; + len=sprintf(buffer, "IP firewall forward rules, default %d\n", + ip_fw_fwd_policy); + break; +#endif +#ifdef CONFIG_IP_ACCT + case IP_FW_ACCT: + i = ip_acct_chain; + len=sprintf(buffer,"IP accounting rules\n"); + break; +#endif + default: + /* this should never be reached, but safety first... */ + i = NULL; + len=0; + break; + } + + save_flags(flags); + cli(); + + while(i!=NULL) + { + len+=sprintf(buffer+len,"%08X/%08X->%08X/%08X %.16s %08X %X ", + ntohl(i->fw_src.s_addr),ntohl(i->fw_smsk.s_addr), + ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr), + (i->fw_vianame)[0] ? i->fw_vianame : "-", + ntohl(i->fw_via.s_addr), i->fw_flg); + /* 10 is enough for a 32 bit box but the counters are 64bit on + the Alpha and Ultrapenguin */ + len+=sprintf(buffer+len,"%u %u %-20lu %-20lu", + i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt); + for (p = 0; p < IP_FW_MAX_PORTS; p++) + len+=sprintf(buffer+len, " %u", i->fw_pts[p]); + len+=sprintf(buffer+len, " A%02X X%02X", i->fw_tosand, i->fw_tosxor); + buffer[len++]='\n'; + buffer[len]='\0'; + pos=begin+len; + if(posoffset+length) + { + len = last_len; + break; + } + else if(reset) + { + /* This needs to be done at this specific place! */ + i->fw_pcnt=0L; + i->fw_bcnt=0L; + } + last_len = len; + i=i->fw_next; + } + restore_flags(flags); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} +#endif + +#ifdef CONFIG_IP_ACCT + +static int ip_acct_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_ACCT, buffer,start, offset,length, + reset); +} + +#endif + +#ifdef CONFIG_IP_FIREWALL + +static int ip_fw_in_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_IN, buffer,start,offset,length, + reset); +} + +static int ip_fw_out_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_OUT, buffer,start,offset,length, + reset); +} + +static int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_FWD, buffer,start,offset,length, + reset); +} +#endif +#endif + + +#ifdef CONFIG_IP_FIREWALL +/* + * Interface to the generic firewall chains. + */ + +int ipfw_input_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_chk(phdr, dev, arg, ip_fw_in_chain, ip_fw_in_policy, + IP_FW_MODE_FW); +} + +int ipfw_output_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_chk(phdr, dev, arg, ip_fw_out_chain, ip_fw_out_policy, + IP_FW_MODE_FW); +} + +int ipfw_forward_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_chk(phdr, dev, arg, ip_fw_fwd_chain, ip_fw_fwd_policy, + IP_FW_MODE_FW); +} + +#ifdef CONFIG_IP_ACCT +int ipfw_acct_in(struct firewall_ops *this, int pf, struct net_device *dev, + void *phdr, void *arg, struct sk_buff **pskb) +{ + return ip_fw_chk(phdr,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_IN); +} + +int ipfw_acct_out(struct firewall_ops *this, int pf, struct net_device *dev, + void *phdr, void *arg, struct sk_buff **pskb) +{ + return ip_fw_chk(phdr,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT); +} +#endif + +struct firewall_ops ipfw_ops= +{ + NULL, + ipfw_forward_check, + ipfw_input_check, + ipfw_output_check, +#ifdef CONFIG_IP_ACCT + ipfw_acct_in, + ipfw_acct_out +#else + NULL, + NULL +#endif +}; + +#endif + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + +int ipfw_device_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev=ptr; + char *devname = dev->name; + unsigned long flags; + struct ip_fw *fw; + int chn; + + save_flags(flags); + cli(); + + if (event == NETDEV_UP) { + for (chn = 0; chn < IP_FW_CHAINS; chn++) + for (fw = *chains[chn]; fw; fw = fw->fw_next) + if ((fw->fw_vianame)[0] && !strncmp(devname, + fw->fw_vianame, IFNAMSIZ)) + fw->fw_viadev = dev; + } else if (event == NETDEV_DOWN) { + for (chn = 0; chn < IP_FW_CHAINS; chn++) + for (fw = *chains[chn]; fw; fw = fw->fw_next) + /* we could compare just the pointers ... */ + if ((fw->fw_vianame)[0] && !strncmp(devname, + fw->fw_vianame, IFNAMSIZ)) + fw->fw_viadev = (struct net_device*)-1; + } + + restore_flags(flags); + return NOTIFY_DONE; +} + +static struct notifier_block ipfw_dev_notifier={ + ipfw_device_event, + NULL, + 0 +}; + +#endif + +int ipfw_init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) + goto cleanup; + + ret = register_firewall(PF_INET, &ipfw_ops); + if (ret < 0) + goto cleanup_nothing; + +#ifdef CONFIG_IP_ACCT + proc_net_create("ip_acct", S_IFREG | S_IRUGO | S_IWUSR, ip_acct_procinfo); +#endif + proc_net_create("ip_input", S_IFREG | S_IRUGO | S_IWUSR, ip_fw_in_procinfo); + proc_net_create("ip_output", S_IFREG | S_IRUGO | S_IWUSR, ip_fw_out_procinfo); + proc_net_create("ip_forward", S_IFREG | S_IRUGO | S_IWUSR, ip_fw_fwd_procinfo); + + /* Register for device up/down reports */ + register_netdevice_notifier(&ipfw_dev_notifier); + +#ifdef CONFIG_IP_FIREWALL_NETLINK + ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); +#endif + return ret; + + cleanup: +#ifdef CONFIG_IP_FIREWALL_NETLINK + sock_release(ipfwsk->socket); +#endif + unregister_netdevice_notifier(&ipfw_dev_notifier); + +#ifdef CONFIG_IP_ACCT + proc_net_remove("ip_acct"); +#endif + proc_net_remove("ip_input"); + proc_net_remove("ip_output"); + proc_net_remove("ip_forward"); + + free_fw_chain(chains[IP_FW_FWD]); + free_fw_chain(chains[IP_FW_IN]); + free_fw_chain(chains[IP_FW_OUT]); + free_fw_chain(chains[IP_FW_ACCT]); + + unregister_firewall(PF_INET, &ipfw_ops); + + cleanup_nothing: + return ret; +} diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_LOG.c linux/net/ipv4/netfilter/ipt_LOG.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_LOG.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_LOG.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,368 @@ +/* + * This is a module which is used for logging packets. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +struct in_device; +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct esphdr { + __u32 spi; +}; /* FIXME evil kludge */ + +/* Make init and cleanup non-static, so gcc doesn't warn about unused, + but don't export the symbols */ +EXPORT_NO_SYMBOLS; + +/* Use lock to serialize, so printks don't overlap */ +static spinlock_t log_lock = SPIN_LOCK_UNLOCKED; + +/* One level of recursion won't kill us */ +static void dump_packet(const struct ipt_log_info *info, + struct iphdr *iph, unsigned int len, int recurse) +{ + void *protoh = (u_int32_t *)iph + iph->ihl; + unsigned int datalen = len - iph->ihl * 4; + + /* Important fields: + * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ + /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ + printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ", + (ntohl(iph->saddr)>>24)&0xFF, + (ntohl(iph->saddr)>>16)&0xFF, + (ntohl(iph->saddr)>>8)&0xFF, + (ntohl(iph->saddr))&0xFF, + (ntohl(iph->daddr)>>24)&0xFF, + (ntohl(iph->daddr)>>16)&0xFF, + (ntohl(iph->daddr)>>8)&0xFF, + (ntohl(iph->daddr))&0xFF); + + /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ + printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", + ntohs(iph->tot_len), iph->tos & IPTOS_TOS_MASK, + iph->tos & IPTOS_PREC_MASK, iph->ttl, ntohs(iph->id)); + + /* Max length: 6 "CE DF MF " */ + if (ntohs(iph->frag_off) & IP_CE) + printk("CE "); + if (ntohs(iph->frag_off) & IP_DF) + printk("DF "); + if (ntohs(iph->frag_off) & IP_MF) + printk("MF "); + + /* Max length: 11 "FRAG:65535 " */ + if (ntohs(iph->frag_off) & IP_OFFSET) + printk("FRAG:%u ", ntohs(iph->frag_off) & IP_OFFSET); + + if ((info->logflags & IPT_LOG_IPOPT) + && iph->ihl * 4 != sizeof(struct iphdr)) { + unsigned int i; + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + printk("OPT ("); + for (i = sizeof(struct iphdr); i < iph->ihl * 4; i++) + printk("%02X", ((u_int8_t *)iph)[i]); + printk(") "); + } + + switch (iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr *tcph = protoh; + + /* Max length: 10 "PROTO=TCP " */ + printk("PROTO=TCP "); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < sizeof (*tcph)) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + printk("SPT=%u DPT=%u ", + ntohs(tcph->source), ntohs(tcph->dest)); + /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ + if (info->logflags & IPT_LOG_TCPSEQ) + printk("SEQ=%u ACK=%u ", + ntohl(tcph->seq), ntohl(tcph->ack_seq)); + /* Max length: 13 "WINDOW=65535 " */ + printk("WINDOW=%u ", ntohs(tcph->window)); + /* Max length: 9 "RES=0x3F " */ + printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(tcph) & TCP_RESERVED_BITS) >> 22)); + /* Max length: 36 "URG ACK PSH RST SYN FIN " */ + if (tcph->urg) + printk("URG "); + if (tcph->ack) + printk("ACK "); + if (tcph->psh) + printk("PSH "); + if (tcph->rst) + printk("RST "); + if (tcph->syn) + printk("SYN "); + if (tcph->fin) + printk("FIN "); + /* Max length: 11 "URGP=65535 " */ + printk("URGP=%u ", ntohs(tcph->urg_ptr)); + + if ((info->logflags & IPT_LOG_TCPOPT) + && tcph->doff * 4 != sizeof(struct tcphdr)) { + unsigned int i; + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + printk("OPT ("); + for (i =sizeof(struct tcphdr); i < tcph->doff * 4; i++) + printk("%02X", ((u_int8_t *)tcph)[i]); + printk(") "); + } + break; + } + case IPPROTO_UDP: { + struct udphdr *udph = protoh; + + /* Max length: 10 "PROTO=UDP " */ + printk("PROTO=UDP "); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < sizeof (*udph)) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + printk("SPT=%u DPT=%u LEN=%u ", + ntohs(udph->source), ntohs(udph->dest), + ntohs(udph->len)); + break; + } + case IPPROTO_ICMP: { + struct icmphdr *icmph = protoh; + static size_t required_len[NR_ICMP_TYPES+1] + = { [ICMP_ECHOREPLY] = 4, + [ICMP_DEST_UNREACH] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_SOURCE_QUENCH] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_REDIRECT] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_ECHO] = 4, + [ICMP_TIME_EXCEEDED] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_PARAMETERPROB] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_TIMESTAMP] = 20, + [ICMP_TIMESTAMPREPLY] = 20, + [ICMP_ADDRESS] = 12, + [ICMP_ADDRESSREPLY] = 12 }; + + /* Max length: 11 "PROTO=ICMP " */ + printk("PROTO=ICMP "); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < 4) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + printk("TYPE=%u CODE=%u ", icmph->type, icmph->code); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (icmph->type <= NR_ICMP_TYPES + && required_len[icmph->type] + && datalen < required_len[icmph->type]) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + switch (icmph->type) { + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + printk("ID=%u SEQ=%u ", + ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); + break; + + case ICMP_PARAMETERPROB: + /* Max length: 14 "PARAMETER=255 " */ + printk("PARAMETER=%u ", + ntohl(icmph->un.gateway) >> 24); + break; + case ICMP_REDIRECT: + /* Max length: 24 "GATEWAY=255.255.255.255 " */ + printk("GATEWAY=%u.%u.%u.%u ", + (ntohl(icmph->un.gateway)>>24)&0xFF, + (ntohl(icmph->un.gateway)>>16)&0xFF, + (ntohl(icmph->un.gateway)>>8)&0xFF, + (ntohl(icmph->un.gateway))&0xFF); + /* Fall through */ + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + /* Max length: 3+maxlen */ + if (recurse) { + printk("["); + dump_packet(info, + (struct iphdr *)(icmph + 1), + datalen-sizeof(struct iphdr), + 0); + printk("] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (icmph->type == ICMP_DEST_UNREACH + && icmph->code == ICMP_FRAG_NEEDED) + printk("MTU=%u ", ntohs(icmph->un.frag.mtu)); + } + break; + } + /* Max Length */ + case IPPROTO_AH: + case IPPROTO_ESP: { + struct esphdr *esph = protoh; + int esp= (iph->protocol==IPPROTO_ESP); + + /* Max length: 10 "PROTO=ESP " */ + printk("PROTO=%s ",esp? "ESP" : "AH"); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < sizeof (*esph)) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + printk("SPI=0x%x ", ntohl(esph->spi) ); + break; + } + /* Max length: 10 "PROTO 255 " */ + default: + printk("PROTO=%u ", iph->protocol); + } + + /* Proto Max log string length */ + /* IP: 40+46+6+11+127 = 230 */ + /* TCP: 10+max(25,20+30+13+9+36+11+127) = 256 */ + /* UDP: 10+max(25,20) = 35 */ + /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ + /* ESP: 10+max(25)+15 = 50 */ + /* AH: 9+max(25)+15 = 49 */ + /* unknown: 10 */ + + /* (ICMP allows recursion one level deep) */ + /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ + /* maxlen = 230+ 91 + 230 + 256 = 807 */ +} + +static unsigned int +ipt_log_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct iphdr *iph = (*pskb)->nh.iph; + const struct ipt_log_info *loginfo = targinfo; + char level_string[4] = "< >"; + + level_string[1] = '0' + (loginfo->level % 8); + spin_lock_bh(&log_lock); + printk(level_string); + printk("%sIN=%s OUT=%s ", + loginfo->prefix, + in ? in->name : "", + out ? out->name : ""); + if (in && !out) { + /* MAC logging for input chain only. */ + printk("MAC="); + if ((*pskb)->dev && (*pskb)->dev->hard_header_len) { + int i; + unsigned char *p = (*pskb)->mac.raw; + for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++) + printk("%02x%c", *p, + i==(*pskb)->dev->hard_header_len - 1 + ? ' ':':'); + } + } + + dump_packet(loginfo, iph, (*pskb)->len, 1); + printk("\n"); + spin_unlock_bh(&log_lock); + + return IPT_CONTINUE; +} + +static int ipt_log_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ipt_log_info *loginfo = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_log_info))) { + DEBUGP("LOG: targinfosize %u != %u\n", + targinfosize, IPT_ALIGN(sizeof(struct ipt_log_info))); + return 0; + } + + if (loginfo->level >= 8) { + DEBUGP("LOG: level %u >= 8\n", loginfo->level); + return 0; + } + + if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { + DEBUGP("LOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix)-1]); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_log_reg += { { NULL, NULL }, "LOG", ipt_log_target, ipt_log_checkentry, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_log_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_log_reg); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_MARK.c linux/net/ipv4/netfilter/ipt_MARK.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_MARK.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_MARK.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,68 @@ +/* This is a module which is used for setting the NFMARK field of an skb. */ +#include +#include +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_mark_target_info *markinfo = targinfo; + + if((*pskb)->nfmark != markinfo->mark) { + (*pskb)->nfmark = markinfo->mark; + (*pskb)->nfcache |= NFC_ALTERED; + } + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { + printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_mark_target_info))); + return 0; + } + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_mark_reg += { { NULL, NULL }, "MARK", target, checkentry, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_mark_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_mark_reg); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_MASQUERADE.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,171 @@ +/* Masquerade. Simple mapping which alters range to a local IP address + (depending on route). */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Lock protects masq region inside conntrack */ +static DECLARE_RWLOCK(masq_lock); + +/* FIXME: Multiple targets. --RR */ +static int +masquerade_check(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip_nat_multi_range *mr = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(*mr))) { + DEBUGP("masquerade_check: size %u != %u.\n", + targinfosize, sizeof(*mr)); + return 0; + } + if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) { + DEBUGP("masquerade_check: bad hooks %x.\n", hook_mask); + return 0; + } + if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + DEBUGP("masquerade_check: bad MAP_IPS.\n"); + return 0; + } + if (mr->rangesize != 1) { + DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize); + return 0; + } + return 1; +} + +static unsigned int +masquerade_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + const struct ip_nat_range *r; + struct ip_nat_multi_range newrange; + u_int32_t newsrc; + struct rtable *rt; + + IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); + + /* FIXME: For the moment, don't do local packets, breaks + testsuite for 2.3.49 --RR */ + if ((*pskb)->sk) + return NF_ACCEPT; + + ct = ip_conntrack_get(*pskb, &ctinfo); + IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW + || ctinfo == IP_CT_RELATED)); + + r = targinfo; + + if (ip_route_output(&rt, (*pskb)->nh.iph->daddr, + 0, + RT_TOS((*pskb)->nh.iph->tos)|RTO_CONN, + out->ifindex) != 0) { + /* Shouldn't happen */ + printk("MASQUERADE: No route: Rusty's brain broke!\n"); + return NF_DROP; + } + + newsrc = rt->rt_src; + DEBUGP("newsrc = %u.%u.%u.%u\n", IP_PARTS(newsrc)); + ip_rt_put(rt); + + WRITE_LOCK(&masq_lock); + ct->nat.masq_index = out->ifindex; + WRITE_UNLOCK(&masq_lock); + + /* Transfer from original range. */ + newrange = ((struct ip_nat_multi_range) + { 1, { { r->flags | IP_NAT_RANGE_MAP_IPS, + newsrc, newsrc, + r->min, r->max } } }); + + /* Hand modified range to generic setup. */ + return ip_nat_setup_info(ct, &newrange, hooknum); +} + +static inline int +device_cmp(const struct ip_conntrack *i, void *ifindex) +{ + int ret; + + READ_LOCK(&masq_lock); + ret = (i->nat.masq_index == (int)(long)ifindex); + READ_UNLOCK(&masq_lock); + + return ret; +} + +int masq_device_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + + if (event == NETDEV_DOWN) { + /* Device was downed. Search entire table for + conntracks which were associated with that device, + and forget them. */ + IP_NF_ASSERT(dev->ifindex != 0); + + ip_ct_selective_cleanup(device_cmp, (void *)(long)dev->ifindex); + } + + return NOTIFY_DONE; +} + +static struct notifier_block masq_dev_notifier = { + masq_device_event, + NULL, + 0 +}; + +static struct ipt_target masquerade += { { NULL, NULL }, "MASQUERADE", masquerade_target, masquerade_check, + THIS_MODULE }; + +static int __init init(void) +{ + int ret; + + ret = ipt_register_target(&masquerade); + + if (ret == 0) { + /* Register for device down reports */ + register_netdevice_notifier(&masq_dev_notifier); + } + + return ret; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&masquerade); + unregister_netdevice_notifier(&masq_dev_notifier); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_MIRROR.c linux/net/ipv4/netfilter/ipt_MIRROR.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_MIRROR.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_MIRROR.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,131 @@ +/* + This is a module which is used for resending packets with inverted src and dst. + + Based on code from: ip_nat_dumb.c,v 1.9 1999/08/20 + and various sources. + + Copyright (C) 2000 Emmanuel Roger + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ +#include +#include +#include +#include +#include +#include +#include +struct in_device; +#include +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int route_mirror(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct rtable *rt; + + if (ip_route_output(&rt, iph->daddr, iph->saddr, + RT_TOS(iph->tos) | RTO_CONN, + 0)) { + return -EINVAL; + } + /* check if the interface we are living by is the same as the one we arrived on */ + + if (skb->rx_dev != rt->u.dst.dev) { + /* Drop old route. */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + return 0; + } + else return -EINVAL; +} + +static int +ip_rewrite(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + u32 odaddr = iph->saddr; + u32 osaddr = iph->daddr; + + skb->nfcache |= NFC_ALTERED; + + /* Rewrite IP header */ + iph->daddr = odaddr; + iph->saddr = osaddr; + + return 0; +} + + +static unsigned int ipt_mirror_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + if ((*pskb)->dst != NULL) { + if (!ip_rewrite(*pskb) && !route_mirror(*pskb)) { + ip_send(*pskb); + return NF_STOLEN; + } + } + return NF_DROP; +} + +static int ipt_mirror_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + /* Only on INPUT, FORWARD or PRE_ROUTING, otherwise loop danger. */ + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) + | (1 << NF_IP_FORWARD) + | (1 << NF_IP_LOCAL_IN))) { + DEBUGP("MIRROR: bad hook\n"); + return 0; + } + + if (targinfosize != IPT_ALIGN(0)) { + DEBUGP("MIRROR: targinfosize %u != 0\n", targinfosize); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_mirror_reg += { { NULL, NULL }, "MIRROR", ipt_mirror_target, ipt_mirror_checkentry, + THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&ipt_mirror_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_mirror_reg); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_REDIRECT.c linux/net/ipv4/netfilter/ipt_REDIRECT.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_REDIRECT.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_REDIRECT.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,104 @@ +/* Redirect. Simple mapping which alters dst to a local IP address. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* FIXME: Take multiple ranges --RR */ +static int +redirect_check(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip_nat_multi_range *mr = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(*mr))) { + DEBUGP("redirect_check: size %u.\n", targinfosize); + return 0; + } + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) { + DEBUGP("redirect_check: bad hooks %x.\n", hook_mask); + return 0; + } + if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + DEBUGP("redirect_check: bad MAP_IPS.\n"); + return 0; + } + if (mr->rangesize != 1) { + DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize); + return 0; + } + return 1; +} + +static unsigned int +redirect_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + u_int32_t newdst; + const struct ip_nat_range *r = targinfo; + struct ip_nat_multi_range newrange; + + IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING + || hooknum == NF_IP_LOCAL_OUT); + + ct = ip_conntrack_get(*pskb, &ctinfo); + IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + /* Local packets: make them go to loopback */ + if (hooknum == NF_IP_LOCAL_OUT) + newdst = htonl(0x7F000001); + else + /* Grab first address on interface. */ + newdst = (((struct in_device *)(*pskb)->dev->ip_ptr) + ->ifa_list->ifa_local); + + /* Transfer from original range. */ + newrange = ((struct ip_nat_multi_range) + { 1, { { r->flags | IP_NAT_RANGE_MAP_IPS, + newdst, newdst, + r->min, r->max } } }); + + /* Hand modified range to generic setup. */ + return ip_nat_setup_info(ct, &newrange, hooknum); +} + +static struct ipt_target redirect_reg += { { NULL, NULL }, "REDIRECT", redirect_target, redirect_check, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&redirect_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&redirect_reg); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_REJECT.c linux/net/ipv4/netfilter/ipt_REJECT.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_REJECT.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_REJECT.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,145 @@ +/* + * This is a module which is used for rejecting packets. + * Added support for customized reject packets (Jozsef Kadlecsik). + */ +#include +#include +#include +#include +#include +struct in_device; +#include +#include +#include +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static unsigned int reject(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_reject_info *reject = targinfo; + + switch (reject->with) { + case IPT_ICMP_NET_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0); + break; + case IPT_ICMP_HOST_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + break; + case IPT_ICMP_PROT_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); + break; + case IPT_ICMP_PORT_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + break; + case IPT_ICMP_ECHOREPLY: { + struct icmphdr *icmph = (struct icmphdr *) + ((u_int32_t *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl); + unsigned int datalen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + + /* Not non-head frags, or truncated */ + if (((ntohs((*pskb)->nh.iph->frag_off) & IP_OFFSET) == 0) + && datalen >= 4) { + /* Usually I don't like cut & pasting code, + but dammit, my party is starting in 45 + mins! --RR */ + struct icmp_bxm icmp_param; + + icmp_param.icmph=*icmph; + icmp_param.icmph.type=ICMP_ECHOREPLY; + icmp_param.data_ptr=(icmph+1); + icmp_param.data_len=datalen; + icmp_reply(&icmp_param, *pskb); + } + } + break; + case IPT_TCP_RESET: + tcp_v4_send_reset(*pskb); + break; + } + + return NF_DROP; +} + +static inline int find_ping_match(const struct ipt_entry_match *m) +{ + const struct ipt_icmp *icmpinfo = (const struct ipt_icmp *)m->data; + + if (strcmp(m->u.match->name, "icmp") == 0 + && icmpinfo->type == ICMP_ECHO + && !(icmpinfo->invflags & IPT_ICMP_INV)) + return 1; + + return 0; +} + +static int check(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ipt_reject_info *rejinfo = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_icmp))) { + DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize); + return 0; + } + + /* Only allow these for packet filtering. */ + if ((hook_mask & ~((1 << NF_IP_LOCAL_IN) + | (1 << NF_IP_FORWARD) + | (1 << NF_IP_LOCAL_OUT))) != 0) { + DEBUGP("REJECT: bad hook mask %X\n", hook_mask); + return 0; + } + + if (rejinfo->with == IPT_ICMP_ECHOREPLY) { + /* Must specify that it's an ICMP ping packet. */ + if (e->ip.proto != IPPROTO_ICMP + || (e->ip.invflags & IPT_INV_PROTO)) { + DEBUGP("REJECT: ECHOREPLY illegal for non-icmp\n"); + return 0; + } + /* Must contain ICMP match. */ + if (IPT_MATCH_ITERATE(e, find_ping_match) == 0) { + DEBUGP("REJECT: ECHOREPLY illegal for non-ping\n"); + return 0; + } + } else if (rejinfo->with == IPT_TCP_RESET) { + if (e->ip.proto != IPPROTO_TCP + || (e->ip.invflags & IPT_INV_PROTO)) { + DEBUGP("REJECT: TCP_RESET illegal for non-tcp\n"); + return 0; + } + } + + return 1; +} + +static struct ipt_target ipt_reject_reg += { { NULL, NULL }, "REJECT", reject, check, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_reject_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_reject_reg); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_TOS.c linux/net/ipv4/netfilter/ipt_TOS.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_TOS.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_TOS.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,87 @@ +/* This is a module which is used for setting the TOS field of a packet. */ +#include +#include +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct iphdr *iph = (*pskb)->nh.iph; + const struct ipt_tos_target_info *tosinfo = targinfo; + + if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { + u_int8_t diffs[2]; + + diffs[0] = iph->tos; + iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; + diffs[1] = iph->tos; + iph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + iph->check^0xFFFF)); + (*pskb)->nfcache |= NFC_ALTERED; + } + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) { + printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_tos_target_info))); + return 0; + } + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (tos != IPTOS_LOWDELAY + && tos != IPTOS_THROUGHPUT + && tos != IPTOS_RELIABILITY + && tos != IPTOS_MINCOST + && tos != IPTOS_NORMALSVC) { + printk(KERN_WARNING "TOS: bad tos value %#x\n", tos); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_tos_reg += { { NULL, NULL }, "TOS", target, checkentry, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_tos_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_tos_reg); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_limit.c linux/net/ipv4/netfilter/ipt_limit.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_limit.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_limit.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,144 @@ +/* Kernel module to control the rate + * + * Jérôme de Vivie + * Hervé Eychenne + * + * 2 September 1999: Changed from the target RATE to the match + * `limit', removed logging. Did I mention that + * Alexey is a fucking genius? + * Rusty Russell (rusty@rustcorp.com.au). */ +#include +#include +#include +#include + +#include +#include +EXPORT_NO_SYMBOLS; + +#define IP_PARTS_NATIVE(n) \ +(unsigned int)((n)>>24)&0xFF, \ +(unsigned int)((n)>>16)&0xFF, \ +(unsigned int)((n)>>8)&0xFF, \ +(unsigned int)((n)&0xFF) + +#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n)) + +/* The algorithm used is the Simple Token Bucket Filter (TBF) + * see net/sched/sch_tbf.c in the linux source tree + */ + +static spinlock_t limit_lock = SPIN_LOCK_UNLOCKED; + +/* Rusty: This is my (non-mathematically-inclined) understanding of + this algorithm. The `average rate' in jiffies becomes your initial + amount of credit `credit' and the most credit you can ever have + `credit_cap'. The `peak rate' becomes the cost of passing the + test, `cost'. + + `prev' tracks the last packet hit: you gain one credit per jiffy. + If you get credit balance more than this, the extra credit is + discarded. Every time the match passes, you lose `cost' credits; + if you don't have that many, the test fails. + + See Alexey's formal explanation in net/sched/sch_tbf.c. + + To avoid underflow, we multiply by 128 (ie. you get 128 credits per + jiffy). Hence a cost of 2^32-1, means one pass per 32768 seconds + at 1024HZ (or one every 9 hours). A cost of 1 means 12800 passes + per second at 100HZ. */ + +#define CREDITS_PER_JIFFY 128 + +static int +ipt_limit_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + struct ipt_rateinfo *r = ((struct ipt_rateinfo *)matchinfo)->master; + unsigned long now = jiffies; + + spin_lock_bh(&limit_lock); + r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY; + if (r->credit > r->credit_cap) + r->credit = r->credit_cap; + + if (r->credit >= r->cost) { + /* We're not limited. */ + r->credit -= r->cost; + spin_unlock_bh(&limit_lock); + return 1; + } + + spin_unlock_bh(&limit_lock); + return 0; +} + +/* Precision saver. */ +static u_int32_t +user2credits(u_int32_t user) +{ + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + /* Divide first. */ + return (user / IPT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + + return (user * HZ * CREDITS_PER_JIFFY) / IPT_LIMIT_SCALE; +} + +static int +ipt_limit_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_rateinfo *r = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_rateinfo))) + return 0; + + /* Check for overflow. */ + if (r->burst == 0 + || user2credits(r->avg * r->burst) < user2credits(r->avg)) { + printk("Call rusty: overflow in ipt_limit: %u/%u\n", + r->avg, r->burst); + return 0; + } + + /* User avg in seconds * IPT_LIMIT_SCALE: convert to jiffies * + 128. */ + r->prev = jiffies; + r->credit = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->cost = user2credits(r->avg); + + /* For SMP, we only want to use one set of counters. */ + r->master = r; + + return 1; +} + +static struct ipt_match ipt_limit_reg += { { NULL, NULL }, "limit", ipt_limit_match, ipt_limit_checkentry, + THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_match(&ipt_limit_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipt_limit_reg); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_mac.c linux/net/ipv4/netfilter/ipt_mac.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_mac.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_mac.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,63 @@ +/* Kernel module to match MAC address parameters. */ +#include +#include +#include + +#include +#include +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_mac_info *info = matchinfo; + + /* Is mac pointer valid? */ + return (skb->mac.raw >= skb->head + && skb->mac.raw < skb->head + skb->len - ETH_HLEN + /* If so, compare... */ + && ((memcmp(skb->mac.ethernet->h_source, info->srcaddr, ETH_ALEN) + == 0) ^ info->invert)); +} + +static int +ipt_mac_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (hook_mask + & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN))) { + printk("ipt_mac: only valid for PRE_ROUTING or LOCAL_IN.\n"); + return 0; + } + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info))) + return 0; + + return 1; +} + +static struct ipt_match mac_match += { { NULL, NULL }, "mac", &match, &ipt_mac_checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&mac_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&mac_match); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_mark.c linux/net/ipv4/netfilter/ipt_mark.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_mark.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_mark.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,52 @@ +/* Kernel module to match NFMARK values. */ +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_mark_info *info = matchinfo; + + return ((skb->nfmark & info->mask) == info->mark) ^ info->invert; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) + return 0; + + return 1; +} + +static struct ipt_match mark_match += { { NULL, NULL }, "mark", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&mark_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&mark_match); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_multiport.c linux/net/ipv4/netfilter/ipt_multiport.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_multiport.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_multiport.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,102 @@ +/* Kernel module to match one of a list of TCP/UDP ports: ports are in + the same place so we can treat them as equal. */ +#include +#include +#include +#include + +#include +#include + +#if 0 +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +EXPORT_NO_SYMBOLS; + +/* Returns 1 if the port is matched by the test, 0 otherwise. */ +static inline int +ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags, + u_int8_t count, u_int16_t src, u_int16_t dst) +{ + unsigned int i; + for (i=0; iports, + multiinfo->flags, multiinfo->count, + ntohs(udp->source), ntohs(udp->dest)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_multiport *multiinfo = matchinfo; + + /* Must specify proto == TCP/UDP, no unknown flags or bad count */ + return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP) + && !(ip->flags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_multiport)) + && (multiinfo->flags == IPT_MULTIPORT_SOURCE + || multiinfo->flags == IPT_MULTIPORT_DESTINATION + || multiinfo->flags == IPT_MULTIPORT_EITHER) + && multiinfo->count <= IPT_MULTI_PORTS; +} + +static struct ipt_match multiport_match += { { NULL, NULL }, "multiport", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&multiport_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&multiport_match); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_owner.c linux/net/ipv4/netfilter/ipt_owner.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_owner.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_owner.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,136 @@ +/* Kernel module to match various things tied to sockets associated with + locally generated outgoing packets. + + (C)2000 Marc Boucher + */ +#include +#include +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static int +match_pid(const struct sk_buff *skb, pid_t pid) +{ + struct task_struct *p; + int i; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if(p && p->files) { + for (i=0; i < p->files->max_fds; i++) { + if (fcheck_task(p, i) == skb->sk->socket->file) { + read_unlock(&tasklist_lock); + return 1; + } + } + } + read_unlock(&tasklist_lock); + return 0; +} + +static int +match_sid(const struct sk_buff *skb, pid_t sid) +{ + struct task_struct *p; + int i, found=0; + + read_lock(&tasklist_lock); + for_each_task(p) { + if ((p->session != sid) || !p->files) + continue; + + for (i=0; i < p->files->max_fds; i++) { + if (fcheck_task(p, i) == skb->sk->socket->file) { + found = 1; + break; + } + } + if(found) + break; + } + read_unlock(&tasklist_lock); + + return found; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_owner_info *info = matchinfo; + + if (!skb->sk || !skb->sk->socket || !skb->sk->socket->file) + return 0; + + if(info->match & IPT_OWNER_UID) { + if((skb->sk->socket->file->f_uid != info->uid) ^ + !!(info->invert & IPT_OWNER_UID)) + return 0; + } + + if(info->match & IPT_OWNER_GID) { + if((skb->sk->socket->file->f_gid != info->gid) ^ + !!(info->invert & IPT_OWNER_GID)) + return 0; + } + + if(info->match & IPT_OWNER_PID) { + if (!match_pid(skb, info->pid) ^ + !!(info->invert & IPT_OWNER_PID)) + return 0; + } + + if(info->match & IPT_OWNER_SID) { + if (!match_sid(skb, info->sid) ^ + !!(info->invert & IPT_OWNER_SID)) + return 0; + } + + return 1; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (hook_mask + & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { + printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); + return 0; + } + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_owner_info))) + return 0; + + return 1; +} + +static struct ipt_match owner_match += { { NULL, NULL }, "owner", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&owner_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&owner_match); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_state.c linux/net/ipv4/netfilter/ipt_state.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_state.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_state.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,61 @@ +/* Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include +#include +#include +#include +#include +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_state_info *sinfo = matchinfo; + enum ip_conntrack_info ctinfo; + unsigned int statebit; + + if (!ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) + statebit = IPT_STATE_INVALID; + else + statebit = IPT_STATE_BIT(ctinfo); + + return (sinfo->statemask & statebit); +} + +static int check(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_state_info))) + return 0; + + return 1; +} + +static struct ipt_match state_match += { { NULL, NULL }, "state", &match, &check, THIS_MODULE }; + +static int __init init(void) +{ + __MOD_INC_USE_COUNT(ip_conntrack_module); + return ipt_register_match(&state_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&state_match); + __MOD_DEC_USE_COUNT(ip_conntrack_module); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_tos.c linux/net/ipv4/netfilter/ipt_tos.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_tos.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_tos.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,53 @@ +/* Kernel module to match TOS values. */ +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_tos_info *info = matchinfo; + const struct iphdr *iph = skb->nh.iph; + + return (iph->tos == info->tos) ^ info->invert; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_tos_info))) + return 0; + + return 1; +} + +static struct ipt_match tos_match += { { NULL, NULL }, "tos", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&tos_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&tos_match); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_unclean.c linux/net/ipv4/netfilter/ipt_unclean.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/ipt_unclean.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/ipt_unclean.c Fri Mar 17 10:56:20 2000 @@ -0,0 +1,576 @@ +/* Kernel module to match suspect packets. */ +#include +#include +#include +#include +#include +#include +#include + +#include + +EXPORT_NO_SYMBOLS; + +#define limpk(format, args...) \ +do { \ + if (net_ratelimit()) \ + printk("ipt_unclean: %s" format, \ + embedded ? "(embedded packet) " : "" , ## args); \ +} while(0) + +enum icmp_error_status +{ + ICMP_MAY_BE_ERROR, + ICMP_IS_ERROR, + ICMP_NOT_ERROR +}; + +struct icmp_info +{ + size_t min_len, max_len; + enum icmp_error_status err; + u_int8_t min_code, max_code; +}; + +static int +check_ip(struct iphdr *iph, size_t length, int embedded); + +/* ICMP-specific checks. */ +static int +check_icmp(const struct icmphdr *icmph, + u_int16_t datalen, + unsigned int offset, + int more_frags, + int embedded) +{ + static struct icmp_info info[] + = { [ICMP_ECHOREPLY] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_DEST_UNREACH] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 15 }, + [ICMP_SOURCE_QUENCH] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 0 }, + [ICMP_REDIRECT] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 3 }, + [ICMP_ECHO] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + /* Router advertisement. */ + [9] + = { 8, 8 + 255 * 8, ICMP_NOT_ERROR, 0, 0 }, + /* Router solicitation. */ + [10] + = { 8, 8, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_TIME_EXCEEDED] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 1 }, + [ICMP_PARAMETERPROB] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 1 }, + [ICMP_TIMESTAMP] + = { 20, 20, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_TIMESTAMPREPLY] + = { 20, 20, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_INFO_REQUEST] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_INFO_REPLY] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_ADDRESS] + = { 12, 12, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_ADDRESSREPLY] + = { 12, 12, ICMP_NOT_ERROR, 0, 0 } }; + + /* Can't do anything if it's a fragment. */ + if (!offset) + return 1; + + /* Must cover type and code. */ + if (datalen < 2) { + limpk("ICMP len=%u too short\n", datalen); + return 0; + } + + /* If not embedded. */ + if (!embedded) { + /* Bad checksum? Don't print, just drop. */ + if (!more_frags + && ip_compute_csum((unsigned char *) icmph, datalen) != 0) + return 0; + + /* CHECK: Truncated ICMP (even if first fragment). */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph->type].min_len != 0 + && datalen < info[icmph->type].min_len) { + limpk("ICMP type %u len %u too short\n", + icmph->type, datalen); + return 0; + } + + /* CHECK: Check within known error ICMPs. */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph->type].err == ICMP_IS_ERROR) { + /* CHECK: Embedded packet must be at least + length of iph + 8 bytes. */ + struct iphdr *inner = (void *)icmph + 8; + + if (datalen - 8 < sizeof(struct iphdr)) { + limpk("ICMP error internal way too short\n"); + return 0; + } + if (datalen - 8 < inner->ihl*4 + 8) { + limpk("ICMP error internal too short\n"); + return 0; + } + if (!check_ip(inner, datalen - 8, 1)) + return 0; + } + } else { + /* CHECK: Can't embed ICMP unless known non-error. */ + if (icmph->type >= sizeof(info)/sizeof(struct icmp_info) + || info[icmph->type].err != ICMP_NOT_ERROR) { + limpk("ICMP type %u not embeddable\n", + icmph->type); + return 0; + } + } + + /* CHECK: Invalid ICMP codes. */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && (icmph->code < info[icmph->type].min_code + || icmph->code > info[icmph->type].max_code)) { + limpk("ICMP type=%u code=%u\n", + icmph->type, icmph->code); + return 0; + } + + /* CHECK: Above maximum length. */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph->type].max_len != 0 + && datalen > info[icmph->type].max_len) { + limpk("ICMP type=%u too long: %u bytes\n", + icmph->type, datalen); + return 0; + } + + switch (icmph->type) { + case ICMP_PARAMETERPROB: { + /* CHECK: Problem param must be within error packet's + * IP header. */ + struct iphdr *iph = (void *)icmph + 8; + u_int32_t arg = ntohl(icmph->un.gateway); + + if (icmph->code == 0) { + if ((arg >> 24) >= iph->ihl*4) { + limpk("ICMP PARAMETERPROB ptr = %u\n", + ntohl(icmph->un.gateway) >> 24); + return 0; + } + arg &= 0x00FFFFFF; + } + + /* CHECK: Rest must be zero. */ + if (arg) { + limpk("ICMP PARAMETERPROB nonzero arg = %u\n", + arg); + return 0; + } + break; + } + + case ICMP_TIME_EXCEEDED: + case ICMP_SOURCE_QUENCH: + /* CHECK: Unused must be zero. */ + if (icmph->un.gateway != 0) { + limpk("ICMP type=%u unused = %u\n", + icmph->type, ntohl(icmph->un.gateway)); + return 0; + } + break; + } + + return 1; +} + +/* UDP-specific checks. */ +static int +check_udp(const struct iphdr *iph, + const struct udphdr *udph, + u_int16_t datalen, + unsigned int offset, + int more_frags, + int embedded) +{ + /* Can't do anything if it's a fragment. */ + if (!offset) + return 1; + + /* CHECK: Must cover UDP header. */ + if (datalen < sizeof(struct udphdr)) { + limpk("UDP len=%u too short\n", datalen); + return 0; + } + + /* Bad checksum? Don't print, just drop. */ + /* FIXME: SRC ROUTE packets won't match checksum --RR */ + if (!more_frags && !embedded + && csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, + csum_partial((char *)udph, datalen, 0)) != 0) + return 0; + + /* CHECK: Ports can't be zero. */ + if (!udph->source || !udph->dest) { + limpk("UDP zero ports %u/%u\n", + ntohs(udph->source), ntohs(udph->dest)); + return 0; + } + + if (!more_frags) { + if (!embedded) { + /* CHECK: UDP length must match. */ + if (ntohs(udph->len) != datalen) { + limpk("UDP len too short %u vs %u\n", + ntohs(udph->len), datalen); + return 0; + } + } else { + /* CHECK: UDP length be >= this truncated pkt. */ + if (ntohs(udph->len) < datalen) { + limpk("UDP len too long %u vs %u\n", + ntohs(udph->len), datalen); + return 0; + } + } + } else { + /* CHECK: UDP length must be > this frag's length. */ + if (ntohs(udph->len) <= datalen) { + limpk("UDP fragment len too short %u vs %u\n", + ntohs(udph->len), datalen); + return 0; + } + } + + return 1; +} + +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 + +/* TCP-specific checks. */ +static int +check_tcp(const struct iphdr *iph, + const struct tcphdr *tcph, + u_int16_t datalen, + unsigned int offset, + int more_frags, + int embedded) +{ + u_int8_t *opt = (u_int8_t *)(tcph + 1); + u_int8_t tcpflags; + int end_of_options = 0; + size_t i; + + /* CHECK: Can't have offset=1: used to override TCP syn-checks. */ + /* In fact, this is caught below (offset < 516). */ + + /* Can't do anything if it's a fragment. */ + if (!offset) + return 1; + + /* CHECK: Smaller than minimal TCP hdr. */ + if (datalen < sizeof(struct tcphdr)) { + if (!embedded) { + limpk("Packet length %u < TCP header.\n", datalen); + return 0; + } + /* Must have ports available (datalen >= 8). */ + /* CHECK: TCP ports inside ICMP error */ + if (!tcph->source || !tcph->dest) { + limpk("Zero TCP ports %u/%u.\n", + htons(tcph->source), htons(tcph->dest)); + return 0; + } + return 1; + } + + /* CHECK: Smaller than actual TCP hdr. */ + if (datalen < tcph->doff * 4) { + if (!embedded) { + limpk("Packet length %u < actual TCP header.\n", + datalen); + return 0; + } else + return 1; + } + + /* Bad checksum? Don't print, just drop. */ + /* FIXME: SRC ROUTE packets won't match checksum --RR */ + if (!more_frags && !embedded + && csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_TCP, + csum_partial((char *)tcph, datalen, 0)) != 0) + return 0; + + /* CHECK: TCP ports non-zero */ + if (!tcph->source || !tcph->dest) { + limpk("Zero TCP ports %u/%u.\n", + htons(tcph->source), htons(tcph->dest)); + return 0; + } + + /* CHECK: TCP reserved bits zero. */ + if(tcp_flag_word(tcph) & TCP_RESERVED_BITS) { + limpk("TCP reserved bits not zero\n"); + return 0; + } + + /* CHECK: TCP flags. */ + tcpflags = ((u_int8_t *)tcph)[13]; + if (tcpflags != TH_SYN + && tcpflags != (TH_SYN|TH_ACK) + && tcpflags != (TH_RST|TH_ACK) + && tcpflags != (TH_RST|TH_ACK|TH_PUSH) + && tcpflags != (TH_FIN|TH_ACK) + && tcpflags != TH_ACK + && tcpflags != (TH_ACK|TH_PUSH) + && tcpflags != (TH_ACK|TH_URG) + && tcpflags != (TH_ACK|TH_URG|TH_PUSH) + && tcpflags != (TH_FIN|TH_ACK|TH_PUSH) + && tcpflags != (TH_FIN|TH_ACK|TH_URG) + && tcpflags != (TH_FIN|TH_ACK|TH_URG|TH_PUSH)) { + limpk("TCP flags bad: %u\n", tcpflags); + return 0; + } + + for (i = sizeof(struct tcphdr); i < tcph->doff * 4; ) { + switch (opt[i]) { + case 0: + end_of_options = 1; + i++; + break; + case 1: + i++; + break; + default: + /* CHECK: options after EOO. */ + if (end_of_options) { + limpk("TCP option %u after end\n", + opt[i]); + return 0; + } + /* CHECK: options at tail. */ + else if (i+1 >= tcph->doff * 4) { + limpk("TCP option %u at tail\n", + opt[i]); + return 0; + } + /* CHECK: zero-length options. */ + else if (opt[i+1] == 0) { + limpk("TCP option %u 0 len\n", + opt[i]); + return 0; + } + /* CHECK: oversize options. */ + else if (opt[i+1] + i >= tcph->doff * 4) { + limpk("TCP option %u at %Zu too long\n", + (unsigned int) opt[i], i); + return 0; + } + } + } + + return 1; +} + +/* Returns 1 if ok */ +/* Standard IP checks. */ +static int +check_ip(struct iphdr *iph, size_t length, int embedded) +{ + u_int8_t *opt = (u_int8_t *)(iph + 1); + int end_of_options = 0; + void *protoh; + size_t datalen; + unsigned int i; + unsigned int offset; + + /* Should only happen for local outgoing raw-socket packets. */ + /* CHECK: length >= ip header. */ + if (length < sizeof(struct iphdr) || length < iph->ihl * 4) { + limpk("Packet length %Zu < IP header.\n", length); + return 0; + } + + offset = ntohs(iph->frag_off) & IP_OFFSET; + protoh = (void *)iph + iph->ihl * 4; + datalen = length - iph->ihl * 4; + + /* CHECK: Embedded fragment. */ + if (embedded && offset) { + limpk("Embedded fragment.\n"); + return 0; + } + + for (i = sizeof(struct iphdr); i < iph->ihl * 4; ) { + switch (opt[i]) { + case 0: + end_of_options = 1; + i++; + break; + case 1: + i++; + break; + default: + /* CHECK: options after EOO. */ + if (end_of_options) { + limpk("IP option %u after end\n", + opt[i]); + return 0; + } + /* CHECK: options at tail. */ + else if (i+1 >= iph->ihl * 4) { + limpk("IP option %u at tail\n", + opt[i]); + return 0; + } + /* CHECK: zero-length options. */ + else if (opt[i+1] == 0) { + limpk("IP option %u 0 len\n", + opt[i]); + return 0; + } + /* CHECK: oversize options. */ + else if (opt[i+1] + i >= iph->ihl * 4) { + limpk("IP option %u at %u too long\n", + opt[i], i); + return 0; + } + } + } + + /* Fragment checks. */ + + /* CHECK: More fragments, but doesn't fill 8-byte boundary. */ + if ((ntohs(iph->frag_off) & IP_MF) + && (ntohs(iph->tot_len) % 8) != 0) { + limpk("Truncated fragment %u long.\n", ntohs(iph->tot_len)); + return 0; + } + + /* CHECK: Oversize fragment a-la Ping of Death. */ + if (offset * 8 + datalen > 65535) { + limpk("Oversize fragment to %u.\n", offset * 8); + return 0; + } + + /* CHECK: DF set and offset or MF set. */ + if ((ntohs(iph->frag_off) & IP_DF) + && (offset || (ntohs(iph->frag_off) & IP_MF))) { + limpk("DF set and offset=%u, MF=%u.\n", + offset, ntohs(iph->frag_off) & IP_MF); + return 0; + } + + /* CHECK: Zero-sized fragments. */ + if ((offset || (ntohs(iph->frag_off) & IP_MF)) + && datalen == 0) { + limpk("Zero size fragment offset=%u\n", offset); + return 0; + } + + /* Note: we can have even middle fragments smaller than this: + consider a large packet passing through a 600MTU then + 576MTU link: this gives a fragment of 24 data bytes. But + everyone packs fragments largest first, hence a fragment + can't START before 576 - MAX_IP_HEADER_LEN. */ + + /* Used to be min-size 576: I recall Alan Cox saying ax25 goes + down to 128 (576 taken from RFC 791: All hosts must be + prepared to accept datagrams of up to 576 octets). Use 128 + here. */ +#define MIN_LIKELY_MTU 128 + /* CHECK: Min size of first frag = 128. */ + if ((ntohs(iph->frag_off) & IP_MF) + && offset == 0 + && ntohs(iph->tot_len) < MIN_LIKELY_MTU) { + limpk("First fragment size %u < %u\n", ntohs(iph->tot_len), + MIN_LIKELY_MTU); + return 0; + } + + /* CHECK: Min offset of frag = 128 - 60 (max IP hdr len). */ + if (offset && offset * 8 < MIN_LIKELY_MTU - 60) { + limpk("Fragment starts at %u < %u\n", offset * 8, + MIN_LIKELY_MTU-60); + return 0; + } + + /* CHECK: Protocol specification non-zero. */ + if (iph->protocol == 0) { + limpk("Zero protocol\n"); + return 0; + } + + /* Per-protocol checks. */ + switch (iph->protocol) { + case IPPROTO_ICMP: + return check_icmp(protoh, datalen, offset, + (ntohs(iph->frag_off) & IP_MF), + embedded); + + case IPPROTO_UDP: + return check_udp(iph, protoh, datalen, offset, + (ntohs(iph->frag_off) & IP_MF), + embedded); + + case IPPROTO_TCP: + return check_tcp(iph, protoh, datalen, offset, + (ntohs(iph->frag_off) & IP_MF), + embedded); + default: + /* Ignorance is bliss. */ + return 1; + } +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + return !check_ip(skb->nh.iph, skb->len, 0); +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(0)) + return 0; + + return 1; +} + +static struct ipt_match unclean_match += { { NULL, NULL }, "unclean", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&unclean_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&unclean_match); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/iptable_filter.c linux/net/ipv4/netfilter/iptable_filter.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/iptable_filter.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/iptable_filter.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,181 @@ +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ +#include +#include + +#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT)) + +/* Standard entry. */ +struct ipt_standard +{ + struct ipt_entry entry; + struct ipt_standard_target target; +}; + +struct ipt_error_target +{ + struct ipt_entry_target target; + char errorname[IPT_FUNCTION_MAXNAMELEN]; +}; + +struct ipt_error +{ + struct ipt_entry entry; + struct ipt_error_target target; +}; + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +} initial_table __initdata += { { "filter", FILTER_VALID_HOOKS, 4, + sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + { [NF_IP_LOCAL_IN] 0, + [NF_IP_FORWARD] sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) * 2 }, + { [NF_IP_LOCAL_IN] 0, + [NF_IP_FORWARD] sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) * 2 }, + 0, NULL, { } }, + { + /* LOCAL_IN */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } }, + /* FORWARD */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } } + }, + /* ERROR */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_error), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_error_target), { IPT_ERROR_TARGET }, + { } }, + "ERROR" + } + } +}; + +static struct ipt_table packet_filter += { { NULL, NULL }, "filter", &initial_table.repl, + FILTER_VALID_HOOKS, RW_LOCK_UNLOCKED, NULL }; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ipt_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk("ipt_hook: happy cracking.\n"); + return NF_ACCEPT; + } + + return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); +} + +static struct nf_hook_ops ipt_ops[] += { { { NULL, NULL }, ipt_hook, PF_INET, NF_IP_LOCAL_IN, NF_IP_PRI_FILTER }, + { { NULL, NULL }, ipt_hook, PF_INET, NF_IP_FORWARD, NF_IP_PRI_FILTER }, + { { NULL, NULL }, ipt_local_out_hook, PF_INET, NF_IP_LOCAL_OUT, + NF_IP_PRI_FILTER } +}; + +/* Default to no forward for security reasons. */ +static int forward = NF_DROP; +MODULE_PARM(forward, "i"); + +static int __init init(void) +{ + int ret; + + if (forward < 0 || forward > NF_MAX_VERDICT) { + printk("iptables forward must be 0 or 1\n"); + return -EINVAL; + } + + /* Entry 1 is the FORWARD hook */ + initial_table.entries[1].target.verdict = -forward - 1; + + /* Register table */ + ret = ipt_register_table(&packet_filter); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ipt_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ipt_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + ret = nf_register_hook(&ipt_ops[2]); + if (ret < 0) + goto cleanup_hook1; + + return ret; + + cleanup_hook1: + nf_unregister_hook(&ipt_ops[1]); + cleanup_hook0: + nf_unregister_hook(&ipt_ops[0]); + cleanup_table: + ipt_unregister_table(&packet_filter); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ipt_ops[i]); + + ipt_unregister_table(&packet_filter); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/iptable_mangle.c linux/net/ipv4/netfilter/iptable_mangle.c --- v2.3.99-pre1/linux/net/ipv4/netfilter/iptable_mangle.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/netfilter/iptable_mangle.c Sat Mar 18 16:51:35 2000 @@ -0,0 +1,152 @@ +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ +#include +#include + +#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) + +/* Standard entry. */ +struct ipt_standard +{ + struct ipt_entry entry; + struct ipt_standard_target target; +}; + +struct ipt_error_target +{ + struct ipt_entry_target target; + char errorname[IPT_FUNCTION_MAXNAMELEN]; +}; + +struct ipt_error +{ + struct ipt_entry entry; + struct ipt_error_target target; +}; + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[2]; + struct ipt_error term; +} initial_table __initdata += { { "mangle", MANGLE_VALID_HOOKS, 3, + sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + 0, NULL, { } }, + { + /* PRE_ROUTING */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } } + }, + /* ERROR */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_error), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_error_target), { IPT_ERROR_TARGET }, + { } }, + "ERROR" + } + } +}; + +static struct ipt_table packet_mangler += { { NULL, NULL }, "mangle", &initial_table.repl, + MANGLE_VALID_HOOKS, RW_LOCK_UNLOCKED, NULL }; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ipt_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk("ipt_hook: happy cracking.\n"); + return NF_ACCEPT; + } + + return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); +} + +static struct nf_hook_ops ipt_ops[] += { { { NULL, NULL }, ipt_hook, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_MANGLE }, + { { NULL, NULL }, ipt_local_out_hook, PF_INET, NF_IP_LOCAL_OUT, + NF_IP_PRI_MANGLE } +}; + +static int __init init(void) +{ + int ret; + + /* Register table */ + ret = ipt_register_table(&packet_mangler); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ipt_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ipt_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + return ret; + + cleanup_hook0: + nf_unregister_hook(&ipt_ops[0]); + cleanup_table: + ipt_unregister_table(&packet_mangler); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ipt_ops[i]); + + ipt_unregister_table(&packet_mangler); +} + +module_init(init); +module_exit(fini); diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/route.c linux/net/ipv4/route.c --- v2.3.99-pre1/linux/net/ipv4/route.c Thu Feb 10 17:11:24 2000 +++ linux/net/ipv4/route.c Fri Mar 17 10:56:20 2000 @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.81 2000/02/09 11:16:42 davem Exp $ + * Version: $Id: route.c,v 1.82 2000/03/17 14:41:52 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -1187,10 +1187,7 @@ rth->rt_dst = daddr; rth->key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - rth->key.fwmark = skb->nfmark; - else - rth->key.fwmark = 0; + rth->key.fwmark = skb->nfmark; #endif rth->key.src = saddr; rth->rt_src = saddr; @@ -1269,10 +1266,7 @@ key.src = saddr; key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - key.fwmark = skb->nfmark; - else - key.fwmark = 0; + key.fwmark = skb->nfmark; #endif key.iif = dev->ifindex; key.oif = 0; @@ -1395,10 +1389,7 @@ rth->rt_dst = daddr; rth->key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - rth->key.fwmark = skb->nfmark; - else - rth->key.fwmark = 0; + rth->key.fwmark = skb->nfmark; #endif rth->key.src = saddr; rth->rt_src = saddr; @@ -1473,10 +1464,7 @@ rth->rt_dst = daddr; rth->key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - rth->key.fwmark = skb->nfmark; - else - rth->key.fwmark = 0; + rth->key.fwmark = skb->nfmark; #endif rth->key.src = saddr; rth->rt_src = saddr; @@ -1563,9 +1551,7 @@ rth->key.iif == iif && rth->key.oif == 0 && #ifdef CONFIG_IP_ROUTE_FWMARK - rth->key.fwmark - == (skb->nfreason == NF_REASON_FOR_ROUTING - ? skb->nfmark : 0) && + rth->key.fwmark == skb->nfmark && #endif rth->key.tos == tos) { rth->u.dst.lastuse = jiffies; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c --- v2.3.99-pre1/linux/net/ipv4/tcp_ipv4.c Fri Mar 10 16:40:51 2000 +++ linux/net/ipv4/tcp_ipv4.c Fri Mar 17 10:56:20 2000 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.201 2000/03/08 19:36:42 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.202 2000/03/17 14:41:53 davem Exp $ * * IPv4 specific functions * @@ -72,8 +72,6 @@ struct inode tcp_inode; struct socket *tcp_socket=&tcp_inode.u.socket_i; -static void tcp_v4_send_reset(struct sk_buff *skb); - void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); @@ -1059,7 +1057,7 @@ * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(struct sk_buff *skb) +void tcp_v4_send_reset(struct sk_buff *skb) { struct tcphdr *th = skb->h.th; struct tcphdr rth; diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv6/sit.c linux/net/ipv6/sit.c --- v2.3.99-pre1/linux/net/ipv6/sit.c Thu Jan 6 12:57:48 2000 +++ linux/net/ipv6/sit.c Sat Mar 18 16:51:36 2000 @@ -6,7 +6,7 @@ * Pedro Roque * Alexey Kuznetsov * - * $Id: sit.c,v 1.35 2000/01/06 00:42:08 davem Exp $ + * $Id: sit.c,v 1.36 2000/03/17 14:42:08 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -15,6 +15,7 @@ */ #define __NO_VERSION__ +#include #include #include #include @@ -388,6 +389,10 @@ skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); read_unlock(&ipip6_lock); return 0; @@ -546,6 +551,11 @@ iph->tot_len = htons(skb->len); ip_select_ident(iph, &rt->u.dst); ip_send_check(iph); + +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif stats->tx_bytes += skb->len; stats->tx_packets++; diff -u --recursive --new-file v2.3.99-pre1/linux/net/khttpd/security.c linux/net/khttpd/security.c --- v2.3.99-pre1/linux/net/khttpd/security.c Tue Jan 11 22:31:47 2000 +++ linux/net/khttpd/security.c Thu Mar 16 13:58:59 2000 @@ -115,7 +115,7 @@ - filp = filp_open(Filename,00,O_RDONLY); + filp = filp_open(Filename, 0, O_RDONLY, NULL); if ((IS_ERR(filp))||(filp==NULL)||(filp->f_dentry==NULL)) diff -u --recursive --new-file v2.3.99-pre1/linux/net/netsyms.c linux/net/netsyms.c --- v2.3.99-pre1/linux/net/netsyms.c Fri Mar 10 16:40:51 2000 +++ linux/net/netsyms.c Fri Mar 17 10:56:20 2000 @@ -229,6 +229,7 @@ EXPORT_SYMBOL(ip_route_output); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(icmp_reply); EXPORT_SYMBOL(ip_options_compile); EXPORT_SYMBOL(ip_options_undo); EXPORT_SYMBOL(arp_send); @@ -339,6 +340,7 @@ EXPORT_SYMBOL(tcp_v4_rebuild_header); EXPORT_SYMBOL(tcp_v4_send_check); EXPORT_SYMBOL(tcp_v4_conn_request); +EXPORT_SYMBOL(tcp_v4_send_reset); EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_bucket_create); EXPORT_SYMBOL(__tcp_put_port); diff -u --recursive --new-file v2.3.99-pre1/linux/net/sched/cls_fw.c linux/net/sched/cls_fw.c --- v2.3.99-pre1/linux/net/sched/cls_fw.c Thu Aug 26 13:05:46 1999 +++ linux/net/sched/cls_fw.c Fri Mar 17 10:56:20 2000 @@ -66,7 +66,7 @@ struct fw_head *head = (struct fw_head*)tp->root; struct fw_filter *f; #ifdef CONFIG_NETFILTER - u32 id = (skb->nfreason == NF_REASON_FOR_CLS_FW ? skb->nfmark : 0); + u32 id = skb->nfmark; #else u32 id = 0; #endif diff -u --recursive --new-file v2.3.99-pre1/linux/net/sched/sch_ingress.c linux/net/sched/sch_ingress.c --- v2.3.99-pre1/linux/net/sched/sch_ingress.c Tue Mar 14 19:10:40 2000 +++ linux/net/sched/sch_ingress.c Fri Mar 17 10:56:20 2000 @@ -224,14 +224,14 @@ return fwres; } -/* after iptables */ +/* after ipt_filter */ static struct nf_hook_ops ing_ops = { { NULL, NULL}, ing_hook, PF_INET, NF_IP_PRE_ROUTING, - 1 + NF_IP_PRI_FILTER + 1 }; int ingress_init(struct Qdisc *sch,struct rtattr *opt) diff -u --recursive --new-file v2.3.99-pre1/linux/net/sunrpc/svcsock.c linux/net/sunrpc/svcsock.c --- v2.3.99-pre1/linux/net/sunrpc/svcsock.c Sat Feb 12 11:22:11 2000 +++ linux/net/sunrpc/svcsock.c Thu Mar 16 14:05:45 2000 @@ -636,7 +636,7 @@ dprintk("svc: incomplete TCP record (%d of %d)\n", len, svsk->sk_reclen); svc_sock_received(svsk, ready); - len = -EAGAIN; /* record not complete */ + return -EAGAIN; /* record not complete */ } /* Frob argbuf */ diff -u --recursive --new-file v2.3.99-pre1/linux/net/unix/af_unix.c linux/net/unix/af_unix.c --- v2.3.99-pre1/linux/net/unix/af_unix.c Thu Mar 2 14:36:23 2000 +++ linux/net/unix/af_unix.c Fri Mar 17 10:56:20 2000 @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: af_unix.c,v 1.89 2000/02/27 19:52:50 davem Exp $ + * Version: $Id: af_unix.c,v 1.90 2000/03/16 20:38:45 davem Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. @@ -569,7 +569,7 @@ /* Do not believe to VFS, grab kernel lock */ lock_kernel(); - dentry = open_namei(sunname->sun_path, 2|O_NOFOLLOW, S_IFSOCK); + dentry = __open_namei(sunname->sun_path, 2|O_NOFOLLOW, S_IFSOCK, NULL); if (IS_ERR(dentry)) { *error = PTR_ERR(dentry); unlock_kernel(); diff -u --recursive --new-file v2.3.99-pre1/linux/scripts/Makefile linux/scripts/Makefile --- v2.3.99-pre1/linux/scripts/Makefile Tue Mar 14 19:10:40 2000 +++ linux/scripts/Makefile Thu Mar 16 11:27:17 2000 @@ -40,6 +40,6 @@ ${HOSTCC} -o docproc docproc.o clean: - rm -f *~ kconfig.tk *.o tkparse mkdep split-include + rm -f *~ kconfig.tk *.o tkparse mkdep split-include docproc include $(TOPDIR)/Rules.make